All reimplemented, lexer still needs to be finished

This commit is contained in:
Kyler Olsen 2025-12-01 23:37:10 -07:00
parent d17687e5a6
commit ae077ef433
14 changed files with 528 additions and 6756 deletions

View File

@ -4,3 +4,8 @@ version = "0.1.0"
edition = "2021"
[dependencies]
[build-dependencies]
rustc_version = "0.4"
chrono = "0.4"
vergen = { version = "8", features = ["build"] }

47
SLS_Rust/sls/build.rs Normal file
View File

@ -0,0 +1,47 @@
use std::process::Command;
use vergen::EmitBuilder;
fn try_cmd(cmd: &mut Command) -> Option<String> {
let out = cmd.output().ok()?;
if !out.status.success() {
return None;
}
Some(String::from_utf8_lossy(&out.stdout).trim().to_string())
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Emit vergen default build info (BUILD_DATE / BUILD_TIME)
EmitBuilder::builder().build()?;
// Git describe + commit date (exact match to your Python)
let commit_info = (|| {
let hash = try_cmd(
Command::new("git")
.arg("describe")
.arg("--always")
.arg("--dirty")
.arg("--abbrev=7"),
)?;
let date = try_cmd(
Command::new("git")
.arg("show")
.arg("-s")
.arg("--format=%ci"),
)?;
Some(format!("{} {}", hash, date))
})()
.unwrap_or_else(|| "unknown".into());
println!("cargo:rustc-env=GIT_COMMIT_HASH={}", commit_info);
// Compiler name and rustc version
println!("cargo:rustc-env=COMPILER_NAME=rustc");
let rustc_ver = try_cmd(Command::new("rustc").arg("--version"))
.unwrap_or_else(|| "unknown".into());
println!("cargo:rustc-env=COMPILER_VER={}", rustc_ver);
Ok(())
}

View File

@ -1,6 +0,0 @@
use crate::types::Value;
/// Collection of builtin functions. This is a placeholder module.
pub fn hello() -> Value {
Value::Str("hello from builtin".to_string())
}

View File

@ -1,20 +0,0 @@
use std::fmt;
#[derive(Debug)]
pub enum SlsError {
Runtime(String),
Lexer(String),
Io(String),
}
impl std::error::Error for SlsError {}
impl fmt::Display for SlsError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SlsError::Runtime(s) => write!(f, "Runtime: {}", s),
SlsError::Lexer(s) => write!(f, "Lexer: {}", s),
SlsError::Io(s) => write!(f, "IO: {}", s),
}
}
}

View File

@ -1,16 +1,71 @@
use crate::errors::SlsError;
use std::fs;
use std::io;
pub struct SlsFile {
pub path: String,
}
use crate::interpreter::InterpreterState;
use crate::lexer::{LexerInfo, lexical_analysis};
use crate::string::SlsStr;
use crate::errors::{LexerResult, LexerTokenResult};
impl SlsFile {
pub fn open(path: impl Into<String>) -> Self {
SlsFile { path: path.into() }
/// Execute the contents of a script file.
pub fn exec_file(interpreter: &mut InterpreterState, filename: SlsStr) -> bool {
// Read the whole file
let source = match fs::read_to_string(&filename.str) {
Ok(s) => s,
Err(e) => {
eprintln!("Cannot read file: {} ({})", filename.str, e);
return false;
}
};
// Wrap source in SlsStr
let code = SlsStr::from_string(source);
let mut lexer_info = LexerInfo::new(filename.clone(), code);
let result = lexical_analysis(&mut lexer_info);
match result {
LexerResult::Error(err) => {
eprintln!("{}", err.message);
false
}
pub fn read_all(&self) -> Result<String, SlsError> {
fs::read_to_string(&self.path).map_err(|e| SlsError::Io(e.to_string()))
LexerResult::Ok(mut head) => {
let mut node = Some(head.as_mut());
while let Some(tok) = node {
match tok {
LexerTokenResult::Error(err) => {
eprintln!("{}", err.message);
return false;
}
LexerTokenResult::Token(token) => {
if !interpreter.execute(token) {
eprintln!("A runtime error occurred!");
return false;
}
node = None; // No linked list, next handled below
}
LexerTokenResult::Next(_, next) => {
node = next.as_deref_mut();
}
}
}
true
}
}
}
/// Stand-alone file execution entry point.
pub fn run_file(filename: SlsStr) -> i32 {
println!("Executing file: {}", filename.str);
let mut interpreter = match InterpreterState::new() {
Some(i) => i,
None => return 1,
};
if exec_file(&mut interpreter, filename) {
0
} else {
1
}
}

View File

@ -1,17 +1,137 @@
use crate::types::Value;
use std::collections::HashMap;
pub struct Interpreter {
pub globals: HashMap<String, Value>,
use crate::lexer::*; // Identifier, Token, TokenString, etc.
pub type BuiltinFn = fn(&mut InterpreterState) -> bool;
#[derive(Debug, Clone)]
pub enum StackValue {
Identifier(Identifier),
I64(i64),
I32(i32),
I16(i16),
I8(i8),
U64(u64),
U32(u32),
U16(u16),
U8(u8),
Float(f32),
Double(f64),
Character(u8),
Boolean(bool),
TokenString(TokenString),
Callable(FunctionItem),
}
impl Interpreter {
#[derive(Debug, Clone)]
pub enum FunctionItem {
TokenString(TokenString),
Builtin(BuiltinFn),
}
#[derive(Debug)]
pub struct InterpreterState {
pub stack: Vec<StackValue>,
pub functions: HashMap<String, FunctionItem>,
}
impl InterpreterState {
pub fn new() -> Self {
Interpreter { globals: HashMap::new() }
Self {
stack: Vec::new(),
functions: HashMap::new(),
}
/// Evaluate source and return a Value. This is a stubbed placeholder.
pub fn eval(&mut self, _src: &str) -> Result<Value, Box<dyn std::error::Error>> {
Ok(Value::Nil)
}
}
pub fn push_token(state: &mut InterpreterState, token: Token) -> bool {
let value = match token {
Token::Eof => return true,
Token::Identifier(id) => {
StackValue::Identifier(id)
}
Token::I64(v) => StackValue::I64(v),
Token::I32(v) => StackValue::I32(v),
Token::I16(v) => StackValue::I16(v),
Token::I8(v) => StackValue::I8(v),
Token::U64(v) => StackValue::U64(v),
Token::U32(v) => StackValue::U32(v),
Token::U16(v) => StackValue::U16(v),
Token::U8(v) => StackValue::U8(v),
Token::Float(v) => StackValue::Float(v),
Token::Double(v) => StackValue::Double(v),
Token::Character(c) => StackValue::Character(c),
Token::Boolean(b) => StackValue::Boolean(b),
Token::TokenString(ts) => StackValue::TokenString(ts),
Token::StringLiteral(_) |
Token::Array(_) |
Token::TypeTuple(_) => return false,
};
state.stack.push(value);
true
}
pub fn execute_func(state: &mut InterpreterState, key: &str) -> bool {
let item = match state.functions.get(key) {
Some(v) => v.clone(),
None => return false,
};
match item {
FunctionItem::Builtin(f) => f(state),
FunctionItem::TokenString(ts) => execute_token_string(state, ts),
}
}
pub fn execute_token_string(state: &mut InterpreterState, ts: TokenString) -> bool {
for token in ts.tokens {
if let Token::Identifier(id) = &token {
if !id.is_literal {
if !execute_func(state, &id.name) {
return false;
}
continue;
}
}
if !push_token(state, token) {
return false;
}
}
true
}
pub fn execute(state: &mut InterpreterState, token: Token) -> bool {
match token {
Token::Identifier(id) if !id.is_literal => {
execute_func(state, &id.name)
}
_ => push_token(state, token),
}
}
pub fn interpreter_create(load_builtins: fn(&mut InterpreterState) -> bool)
-> Option<InterpreterState>
{
let mut state = InterpreterState::new();
if !load_builtins(&mut state) {
return None;
}
Some(state)
}

View File

@ -1,409 +1,127 @@
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
Illegal,
#[derive(Debug, Clone)]
pub struct LexerInfo {
pub filename: String,
pub source: String,
pub pos: usize,
pub column: usize,
pub line: usize,
}
impl LexerInfo {
pub fn new(filename: impl Into<String>, source: impl Into<String>) -> Self {
Self {
filename: filename.into(),
source: source.into(),
pos: 0,
column: 1,
line: 1,
}
}
}
#[derive(Debug, Clone)]
pub struct Identifier {
pub name: String,
pub is_literal: bool,
}
#[derive(Debug, Clone)]
pub enum ArrayLiteral {
Identifiers(Vec<Identifier>),
I64(Vec<i64>),
I32(Vec<i32>),
I16(Vec<i16>),
I8(Vec<i8>),
U64(Vec<u64>),
U32(Vec<u32>),
U16(Vec<u16>),
U8(Vec<u8>),
Float(Vec<f32>),
Double(Vec<f64>),
Character(Vec<u8>),
Strings(Vec<String>),
Boolean(Vec<bool>),
TokenStrings(Vec<TokenString>),
TypeTuples(Vec<TypeTuple>),
StructInline(StructInline),
}
#[derive(Debug, Clone)]
pub struct ShapedArray {
pub array: ArrayLiteral,
pub shape: Vec<usize>,
}
#[derive(Debug, Clone)]
pub struct TokenString {
pub tokens: Vec<Token>,
}
#[derive(Debug, Clone)]
pub struct TypeTuple {
pub inputs: Vec<Identifier>,
pub outputs: Vec<Identifier>,
}
#[derive(Debug, Clone)]
pub struct StructInline {
pub name: String,
pub values: Vec<StructValue>,
}
#[derive(Debug, Clone)]
pub enum StructValue {
Integer(i64),
Float(f32),
Double(f64),
Boolean(bool),
Character(u8),
String(String),
Token(Token),
}
#[derive(Debug, Clone)]
pub enum Token {
Eof,
Ident,
Int,
Float,
Str,
Identifier(Identifier),
// Operators
Assign,
Plus,
Minus,
Asterisk,
Slash,
Bang,
Lt,
Gt,
Eq,
NotEq,
// All integer sizes
I64(i64),
I32(i32),
I16(i16),
I8(i8),
U64(u64),
U32(u32),
U16(u16),
U8(u8),
// Delimiters
Comma,
Semicolon,
Colon,
LParen,
RParen,
LBrace,
RBrace,
LBracket,
RBracket,
Float(f32),
Double(f64),
Character(u8),
StringLiteral(String),
Boolean(bool),
Array(ShapedArray),
TokenString(TokenString),
TypeTuple(TypeTuple),
}
#[derive(Debug, Clone, PartialEq)]
pub struct Token {
pub ttype: TokenType,
pub lexeme: String,
#[derive(Debug, Clone)]
pub struct LexError {
pub message: String,
pub file: String,
pub line: usize,
pub column: usize,
pub numeric: Option<i128>,
pub float: Option<f64>,
}
pub struct Lexer {
input: Vec<char>,
pos: usize, // current position in input (points to current char)
read_pos: usize, // current reading position in input (after current char)
ch: Option<char>,
line: usize,
column: usize,
}
pub type LexResult<T> = Result<T, LexError>;
impl Lexer {
pub fn new(input: impl Into<String>) -> Self {
let s = input.into();
let mut lexer = Lexer {
input: s.chars().collect(),
pos: 0,
read_pos: 0,
ch: None,
line: 1,
column: 0,
};
lexer.read_char();
lexer
}
fn read_char(&mut self) {
if self.read_pos >= self.input.len() {
self.ch = None;
} else {
self.ch = Some(self.input[self.read_pos]);
}
self.pos = self.read_pos;
self.read_pos += 1;
if let Some(c) = self.ch {
if c == '\n' {
self.line += 1;
self.column = 0;
} else {
self.column += 1;
}
}
}
fn peek_char(&self) -> Option<char> {
if self.read_pos >= self.input.len() {
pub fn get_token(_lexer: &mut LexerInfo) -> Option<Token> {
None
} else {
Some(self.input[self.read_pos])
}
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.ch {
if c.is_whitespace() {
self.read_char();
} else {
break;
}
}
}
fn read_identifier(&mut self) -> String {
let start = self.pos;
while let Some(c) = self.ch {
if is_identifier_char(c) {
self.read_char();
} else {
break;
}
}
self.input[start..self.pos].iter().collect()
}
fn read_number(&mut self) -> String {
// Simple numeric reader: read digits, dot, underscores, and suffixes will be handled by caller
let start = self.pos;
while let Some(c) = self.ch {
if c.is_ascii_digit() || c == '.' || c == '_' || c == 'x' || c == 'b' || c == 'o' || c.is_ascii_hexdigit() {
self.read_char();
} else {
break;
}
}
self.input[start..self.pos].iter().collect()
}
fn read_string(&mut self) -> String {
// consume opening quote
self.read_char();
let start = self.pos;
while let Some(c) = self.ch {
if c == '"' {
break;
}
// support escape handling later
self.read_char();
}
let s: String = self.input[start..self.pos].iter().collect();
// consume closing quote
self.read_char();
s
}
pub fn next_token(&mut self) -> Token {
self.skip_whitespace();
let token = match self.ch {
Some('=') => {
if self.peek_char() == Some('=') {
self.read_char();
let lex = "==".to_string();
Token { ttype: TokenType::Eq, lexeme: lex, line: self.line, column: self.column, numeric: None, float: None }
} else {
Token { ttype: TokenType::Assign, lexeme: "=".to_string(), line: self.line, column: self.column, numeric: None, float: None }
}
}
Some('+') => { Token { ttype: TokenType::Plus, lexeme: "+".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some('*') => { Token { ttype: TokenType::Asterisk, lexeme: "*".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some('/') => { Token { ttype: TokenType::Slash, lexeme: "/".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some('!') => {
if self.peek_char() == Some('=') {
self.read_char();
Token { ttype: TokenType::NotEq, lexeme: "!=".to_string(), line: self.line, column: self.column, numeric: None, float: None }
} else {
Token { ttype: TokenType::Bang, lexeme: "!".to_string(), line: self.line, column: self.column, numeric: None, float: None }
}
}
Some('<') => { Token { ttype: TokenType::Lt, lexeme: "<".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some('>') => { Token { ttype: TokenType::Gt, lexeme: ">".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some(',') => { Token { ttype: TokenType::Comma, lexeme: ",".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some(';') => { Token { ttype: TokenType::Semicolon, lexeme: ";".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some(':') => { Token { ttype: TokenType::Colon, lexeme: ":".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some('(') => { Token { ttype: TokenType::LParen, lexeme: "(".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some(')') => { Token { ttype: TokenType::RParen, lexeme: ")".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some('{') => { Token { ttype: TokenType::LBrace, lexeme: "{".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some('}') => { Token { ttype: TokenType::RBrace, lexeme: "}".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some('[') => { Token { ttype: TokenType::LBracket, lexeme: "[".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some(']') => { Token { ttype: TokenType::RBracket, lexeme: "]".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some('"') => {
let s = self.read_string();
Token { ttype: TokenType::Str, lexeme: s, line: self.line, column: self.column, numeric: None, float: None }
}
Some(c) if is_letter(c) => {
let ident = self.read_identifier();
Token { ttype: TokenType::Ident, lexeme: ident, line: self.line, column: self.column, numeric: None, float: None }
}
Some(c) if c.is_ascii_digit() || (c == '-' && self.peek_char().map_or(false, |pc| pc.is_ascii_digit())) => {
// Handle optional leading '-' as part of number
let mut sign = 1i128;
if c == '-' {
sign = -1;
self.read_char();
}
// Determine base and read digits with underscores and optional suffix
let start_pos = self.pos;
// If starting with '0' and next is x/b/o, handle prefixes
let mut base = 10u32;
let mut raw_digits = String::new();
if self.ch == Some('0') && self.peek_char().map_or(false, |pc| pc == 'x' || pc == 'X' || pc == 'b' || pc == 'o') {
// consume '0'
self.read_char();
if let Some(prefix) = self.ch {
match prefix {
'x' | 'X' => base = 16,
'b' => base = 2,
'o' => base = 8,
_ => {}
}
// consume prefix
self.read_char();
// read digits appropriate for base (allow underscores)
while let Some(d) = self.ch {
if d == '_' { self.read_char(); continue; }
if base == 16 && d.is_ascii_hexdigit() { raw_digits.push(d); self.read_char(); continue; }
if base == 10 && d.is_ascii_digit() { raw_digits.push(d); self.read_char(); continue; }
if base == 8 && ('0'..='7').contains(&d) { raw_digits.push(d); self.read_char(); continue; }
if base == 2 && (d == '0' || d == '1') { raw_digits.push(d); self.read_char(); continue; }
break;
}
}
} else {
// Decimal or float — detect invalid characters inside the literal
let mut seen_dot = false;
let mut invalid_start = false;
while let Some(d) = self.ch {
if d == '_' { self.read_char(); continue; }
if d == '.' {
if seen_dot { break; }
seen_dot = true;
raw_digits.push('.');
self.read_char();
continue;
}
if d.is_ascii_digit() {
raw_digits.push(d);
self.read_char();
continue;
}
// if we encounter a comma or alphabetic character inside a decimal
// treat the whole sequence as an invalid literal
if d == ',' || d.is_ascii_alphabetic() {
invalid_start = true;
break;
}
break;
}
if invalid_start {
// consume until whitespace or a clear delimiter to form a single Illegal token
while let Some(ch) = self.ch {
if ch.is_whitespace() { break; }
match ch {
'+' | '*' | '/' | '!' | '<' | '>' | '=' | ';' | '(' | ')' | '{' | '}' | '[' | ']' | '"' | '\'' => break,
_ => { self.read_char(); }
}
}
raw_digits = self.input[start_pos..self.pos].iter().collect();
}
}
// After digits, check for optional type suffix like ':i8'
let mut suffix: Option<String> = None;
if self.ch == Some(':') {
// consume ':'
self.read_char();
let mut sstart = self.pos;
while let Some(sc) = self.ch {
if sc.is_ascii_alphanumeric() || sc == '_' { self.read_char(); } else { break; }
}
suffix = Some(self.input[sstart..self.pos].iter().collect());
}
// Now attempt to parse numeric value
let mut token = Token { ttype: TokenType::Int, lexeme: String::new(), line: self.line, column: self.column, numeric: None, float: None };
// reconstruct lexeme (include sign and any prefix)
let lexeme: String = self.input[start_pos..self.pos].iter().collect();
let full_lex = if sign < 0 { format!("-{}", lexeme) } else { lexeme.clone() };
token.lexeme = full_lex.clone();
// parse according to base
if raw_digits.contains('.') {
// float
match full_lex.replace("_", "").parse::<f64>() {
Ok(f) => { token.ttype = TokenType::Float; token.float = Some(f); }
Err(_) => { token.ttype = TokenType::Illegal; }
}
} else {
// integer: need to remove underscores and handle base
let digits = raw_digits.replace("_", "");
if digits.is_empty() {
token.ttype = TokenType::Illegal;
} else {
// if prefix was used, adjust parsing
let parsed = if base == 10 {
digits.parse::<i128>().ok()
} else {
i128::from_str_radix(&digits, base).ok()
};
if let Some(mut v) = parsed {
v *= sign;
// If base was non-decimal and a signed suffix exists, interpret
// the parsed unsigned bits as a two's-complement signed value
if base != 10 {
if let Some(ref sfx) = suffix {
match sfx.as_str() {
"i8" => {
let bits = 8u32;
let max_unsigned = (1i128 << bits) - 1;
let signed_max = i8::MAX as i128;
if v >= 0 && v <= max_unsigned && v > signed_max {
v = v - (1i128 << bits);
}
}
"i16" => {
let bits = 16u32;
let max_unsigned = (1i128 << bits) - 1;
let signed_max = i16::MAX as i128;
if v >= 0 && v <= max_unsigned && v > signed_max {
v = v - (1i128 << bits);
}
}
"i32" => {
let bits = 32u32;
let max_unsigned = (1i128 << bits) - 1;
let signed_max = i32::MAX as i128;
if v >= 0 && v <= max_unsigned && v > signed_max {
v = v - (1i128 << bits);
}
}
"i64" => {
let bits = 64u32;
let max_unsigned = (1i128 << bits) - 1;
let signed_max = i64::MAX as i128;
if v >= 0 && v <= max_unsigned && v > signed_max {
v = v - (1i128 << bits);
}
}
_ => {}
}
}
}
// validate suffix ranges if present
let mut overflow = false;
if let Some(ref s) = suffix {
match s.as_str() {
"i8" => if v < i8::MIN as i128 || v > i8::MAX as i128 { overflow = true; }
"i16" => if v < i16::MIN as i128 || v > i16::MAX as i128 { overflow = true; }
"i32" => if v < i32::MIN as i128 || v > i32::MAX as i128 { overflow = true; }
"i64" => if v < i64::MIN as i128 || v > i64::MAX as i128 { overflow = true; }
"u8" => if v < 0 || v > u8::MAX as i128 { overflow = true; }
"u16" => if v < 0 || v > u16::MAX as i128 { overflow = true; }
"u32" => if v < 0 || v > u32::MAX as i128 { overflow = true; }
"u64" => if v < 0 || v > u64::MAX as i128 { overflow = true; }
_ => {}
}
} else {
// default type is i64 per tests: check i64 range
if v < i64::MIN as i128 || v > i64::MAX as i128 { overflow = true; }
}
if overflow {
token.ttype = TokenType::Illegal;
} else {
token.numeric = Some(v);
token.ttype = TokenType::Int;
}
} else {
token.ttype = TokenType::Illegal;
}
}
}
token
}
Some('-') => { Token { ttype: TokenType::Minus, lexeme: "-".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
None => { Token { ttype: TokenType::Eof, lexeme: "".to_string(), line: self.line, column: self.column, numeric: None, float: None } }
Some(_) => { Token { ttype: TokenType::Illegal, lexeme: self.ch.unwrap().to_string(), line: self.line, column: self.column, numeric: None, float: None } }
};
// advance to next char if not EOF and we didn't already advance inside readers
if self.ch.is_some() {
// For cases where read_identifier/read_number/read_string already moved position
// we don't want to skip an extra char. The read_* helpers leave `ch` at the
// character after the token. To keep behavior consistent, only call read_char
// when the token was produced from a single-char branch.
match token.ttype {
TokenType::Ident | TokenType::Int | TokenType::Float | TokenType::Str => {}
TokenType::Eof => {}
_ => { self.read_char(); }
}
}
token
}
}
fn is_letter(c: char) -> bool {
c.is_ascii_alphabetic() || c == '_'
pub fn lexical_analysis(_lexer: &mut LexerInfo) -> LexResult<Vec<Token>> {
Ok(Vec::new())
}
fn is_identifier_char(c: char) -> bool {
c.is_ascii_alphanumeric() || c == '_'
}

View File

@ -1,13 +0,0 @@
pub mod types;
pub mod errors;
pub mod file;
pub mod lexer;
pub mod interpreter;
pub mod repl;
pub mod meta;
pub mod builtin;
// Re-export commonly used items for tests and external users
pub use crate::lexer::{Lexer, Token, TokenType};
pub use crate::types::Value;
pub use crate::interpreter::Interpreter;

View File

@ -1,15 +1,69 @@
use sls::interpreter::Interpreter;
use sls::meta;
use sls::repl;
use std::env;
use std::process;
use crate::file::run_file;
use crate::repl::repl;
use crate::string::SlsStr;
// These mirror the C macros.
const SLS_NAME: &str = "SLS_RUST";
const SLS_VER: &str = "a.0.0";
// Environment variables set via build.rs for commit hash / compiler.
const GIT_COMMIT_HASH: &str = env!("GIT_COMMIT_HASH", "UNKNOWN");
const COMPILER_NAME: &str = env!("COMPILER_NAME", "Unknown");
const COMPILER_VER: &str = env!("COMPILER_VER", "0");
fn print_version() {
println!(
"YREA SLS ({}) {} ({})",
SLS_NAME,
SLS_VER,
GIT_COMMIT_HASH
);
println!(
"Compiled with {} {} on {} {}",
COMPILER_NAME,
COMPILER_VER,
env!("BUILD_DATE"),
env!("BUILD_TIME")
);
}
fn main() {
println!("Starting sls (Rust) - {} v{}", meta::NAME, meta::VERSION);
let mut interp = Interpreter::new();
// placeholder: evaluate empty program to ensure basic startup
match interp.eval("") {
Ok(_) => println!("Interpreter initialized."),
Err(e) => eprintln!("Interpreter failed to initialize: {}", e),
let mut args = env::args().skip(1);
let mut version_flag = false;
let mut filename: Option<String> = None;
match args.len() {
0 => {}
1 => {
let arg = args.next().unwrap();
if arg == "--version" || arg == "-v" {
version_flag = true;
} else {
filename = Some(arg);
}
// start REPL stub
repl::run_repl();
}
_ => {
eprintln!("Too many arguments!");
process::exit(1);
}
}
if version_flag {
print_version();
process::exit(0);
}
if let Some(file) = filename {
let sls_filename = SlsStr::from_string(file);
let status = run_file(sls_filename);
process::exit(status);
}
// Default to REPL
let status = repl();
process::exit(status);
}

View File

@ -1,2 +0,0 @@
pub const VERSION: &str = "0.1.0";
pub const NAME: &str = "sls_rust";

View File

@ -1,8 +1,104 @@
use crate::interpreter::Interpreter;
use std::io::{self, Write};
pub fn run_repl() {
let mut interp = Interpreter::new();
println!("SLS REPL (stub). Type Ctrl+C to exit.");
// For now just call eval once as a smoke test
let _ = interp.eval("");
use crate::lexer::{Lexer, LexerInfo, lexical_analysis};
use crate::meta::print_version;
use crate::interpreter::{InterpreterState, StackType};
use crate::string::{SlsStr};
use crate::errors::{LexerTokenResult, LexerResult};
static REPL_FILE_NAME: &str = "<STDIN>";
fn print_top_of_stack(state: &InterpreterState) {
let Some(item) = state.stack_top() else {
println!("#0: <STACK IS EMPTY>");
return;
};
match &item.value {
StackType::Identifier(id) => {
println!("#0: ::{}", id.name);
}
StackType::I64(v) => println!("#0: {}", v),
StackType::I32(v) => println!("#0: {}:i32", v),
StackType::I16(v) => println!("#0: {}:i16", v),
StackType::I8(v) => println!("#0: {}:i8", v),
StackType::U64(v) => println!("#0: {}:u64", v),
StackType::U32(v) => println!("#0: {}:u32", v),
StackType::U16(v) => println!("#0: {}:u16", v),
StackType::U8(v) => println!("#0: {}:u8", v),
StackType::F32(v) => println!("#0: {}:f32", v),
StackType::F64(v) => println!("#0: {}", v),
StackType::Character(ch) => println!("#0: {}", ch),
StackType::Boolean(b) => println!("#0: {}", if *b { "TRUE" } else { "FALSE" }),
StackType::TokenString(_) => println!("#0: <TOKEN STRING>"),
StackType::Callable(_) => println!("#0: <CALLABLE>"),
};
}
pub fn repl() -> i32 {
print_version();
println!("===== YREA SLS REPL =====");
println!("Type `#exit` to exit.");
io::stdout().flush().unwrap();
let mut interpreter = match InterpreterState::new() {
Some(i) => i,
None => return 1,
};
let stdin = io::stdin();
let mut buf = String::new();
loop {
buf.clear();
if stdin.read_line(&mut buf).is_err() {
return 1;
}
if buf.trim_end() == "#exit" {
return 0;
}
let code = SlsStr::from_string(buf.clone());
let mut lexer_info = LexerInfo::new(REPL_FILE_NAME.into(), code.clone());
let result = lexical_analysis(&mut lexer_info);
match result {
LexerResult::Error(err) => {
println!("{}", err.message);
}
LexerResult::Ok(mut head) => {
let mut node = Some(head.as_mut());
while let Some(tok_node) = node {
match tok_node {
LexerTokenResult::Error(err) => {
println!("{}", err.message);
break;
}
LexerTokenResult::Token(tok) => {
if !interpreter.execute(tok) {
println!("A runtime error occurred!");
break;
}
}
LexerTokenResult::Next(_, next) => {
node = next.as_deref_mut();
continue;
}
}
node = None;
}
print_top_of_stack(&interpreter);
}
}
}
}

View File

@ -1,26 +0,0 @@
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub enum Value {
Nil,
Bool(bool),
Int(i64),
Float(f64),
Str(String),
Object(HashMap<String, Value>),
Function(String),
}
impl Value {
pub fn type_name(&self) -> &'static str {
match self {
Value::Nil => "nil",
Value::Bool(_) => "bool",
Value::Int(_) => "int",
Value::Float(_) => "float",
Value::Str(_) => "str",
Value::Object(_) => "object",
Value::Function(_) => "function",
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,127 +0,0 @@
import yaml
import re
from pathlib import Path
"""
Convert YAML test cases to Rust integration tests for the `sls` crate.
Usage:
python3 SLS_Tests/yaml_to_rust_tests.py SLS_Tests/cases.yaml SLS_Rust/sls/tests/lexer_tests_generated.rs
This generator produces simple `#[test]` functions that run the lexer and
verify token kinds and values (basic checks). It's intentionally conservative
it compares token types and lexemes/numeric values where applicable.
"""
def sanitize_name(name: str) -> str:
name = re.sub(r"[^a-zA-Z0-9_]", "_", name)
name = re.sub(r"_+", "_", name)
name = name.strip("_")
name = name.lower()
if not name:
name = "unnamed"
return f"test_{name}"
def rust_string_literal(s: str) -> str:
return s.replace('\\', '\\\\').replace('"', '\\"')
def token_match_expectation(token_var: str, expected: dict) -> str:
ttype = expected.get('type')
val = expected.get('value')
if ttype in ('i64','i32','i16','i8','u64','u32','u16','u8'):
# check numeric field produced by the lexer
# allow constant names like INT64_MIN to be used directly in generated code
if isinstance(val, str) and re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", val):
vexpr = val
else:
vexpr = str(val)
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Int);\n assert!({token_var}.numeric.is_some(), \"expected numeric value\");\n assert_eq!({token_var}.numeric.unwrap(), {vexpr});"
elif ttype in ('f64','f32'):
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Float);\n assert!({token_var}.float.is_some(), \"expected float value\");\n assert!(({token_var}.float.unwrap() - {val}).abs() < 1e-12);"
elif ttype == 'string':
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Str);\n assert_eq!({token_var}.lexeme, \"{rust_string_literal(str(val))}\");"
elif ttype in ('identifier', 'identifier_literal'):
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Ident);\n assert_eq!({token_var}.lexeme, \"{rust_string_literal(str(val))}\");"
elif ttype == 'char':
codepoint = ord(val) if isinstance(val, str) and len(val) == 1 else val
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Int);\n assert!({token_var}.numeric.is_some(), \"expected numeric char code\");\n assert_eq!({token_var}.numeric.unwrap(), {codepoint});"
elif ttype == 'bool':
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Ident);\n assert_eq!({token_var}.lexeme, \"{'true' if val else 'false'}\");"
elif ttype == 'error':
# For now, assert that we got an Illegal token
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Illegal);"
elif ttype == 'token_string':
# Complex nested token strings are not handled by this simple generator
return f"// token_string check not implemented; received token: {{:#?}}\n // TODO: implement nested expectations\n // for now just assert we got an Ident or similar\n assert!(!{token_var}.lexeme.is_empty());"
else:
return f"// Unhandled expected token type: {ttype}\n assert!(!{token_var}.lexeme.is_empty());"
def generate_rust_test(test: dict) -> str:
name = sanitize_name(test.get('name','unnamed'))
code = test.get('code','')
tokens = test.get('tokens', [])
fn_lines = [f"#[test]", f"fn {name}() " "{"]
fn_lines.append(f" let src = \"{rust_string_literal(str(code))}\";")
fn_lines.append(" let mut lexer = sls::lexer::Lexer::new(src);")
fn_lines.append(" let mut got = vec![];")
fn_lines.append(" loop {")
fn_lines.append(" let t = lexer.next_token();")
fn_lines.append(" if t.ttype == sls::lexer::TokenType::Eof { break; }")
fn_lines.append(" got.push(t);")
fn_lines.append(" }")
fn_lines.append("")
# Basic assertion count vs expected (allow zero expected -> empty)
if tokens:
fn_lines.append(f" assert_eq!(got.len(), {len(tokens)}usize, \"token count mismatch\");")
else:
fn_lines.append(" assert!(got.is_empty());")
for i, token in enumerate(tokens):
expectation = token_match_expectation(f"got[{i}]", token)
# indent lines of expectation properly
for line in expectation.split('\n'):
fn_lines.append(f" {line}")
fn_lines.append("}")
fn_lines.append("")
return "\n".join(fn_lines)
def yaml_to_rust_tests(yaml_path: str, output_path: str):
with open(yaml_path, 'r', encoding='utf-8') as f:
tests = yaml.safe_load(f)
if not isinstance(tests, list):
raise ValueError('Expected YAML to be a list of tests')
rust_tests = []
for t in tests:
rust_tests.append(generate_rust_test(t))
header = """// Generated tests - do not edit by hand
// Use: run `python3 SLS_Tests/yaml_to_rust_tests.py SLS_Tests/cases.yaml tests/lexer_tests_generated.rs`
use sls; // crate under test
const INT64_MIN: i128 = i64::MIN as i128;
const UINT64_MAX: i128 = u64::MAX as i128;
"""
out_text = header + "\n".join(rust_tests)
Path(output_path).write_text(out_text, encoding='utf-8')
print(f"Generated {len(rust_tests)} Rust tests -> {output_path}")
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('input')
parser.add_argument('output')
args = parser.parse_args()
yaml_to_rust_tests(args.input, args.output)