Compare commits

...

4 Commits

Author SHA1 Message Date
Kyler Olsen 6f4be5a929 Small fixes 2025-12-02 11:31:46 -07:00
Kyler Olsen a154741176 Added numeric type annotations 2025-12-02 11:31:27 -07:00
Kyler Olsen a26a1c0d4a Merge branch 'rust' into rust-claude 2025-12-02 10:45:31 -07:00
Kyler Olsen 08a8aadf16 Claude attempt at lexer.rs 2025-12-01 23:46:46 -07:00
3 changed files with 623 additions and 12 deletions

View File

@ -124,7 +124,7 @@ impl InterpreterState {
} }
pub fn stack_top(&self) -> Option<&StackValue> { pub fn stack_top(&self) -> Option<&StackValue> {
self.stack.get(0) self.stack.get(self.stack.len() - 1)
} }
} }

View File

@ -17,6 +17,47 @@ impl LexerInfo {
line: 1, line: 1,
} }
} }
fn peek(&self) -> char {
self.source.chars().nth(self.pos).unwrap_or('\0')
}
fn far_peek(&self, offset: usize) -> char {
self.source.chars().nth(self.pos + offset).unwrap_or('\0')
}
fn advance(&mut self) -> char {
if self.peek() == '\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
self.pos += 1;
self.peek()
}
fn skip_comments_and_whitespace(&mut self) {
loop {
let c = self.peek();
// Skip comments
if (c == '/' && self.far_peek(1) == '/') || c == '#' {
while self.peek() != '\n' && self.peek() != '\0' {
self.advance();
}
}
// Skip whitespace
if self.peek().is_whitespace() {
while self.peek().is_whitespace() {
self.advance();
}
} else {
break;
}
}
}
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -83,10 +124,7 @@ pub enum StructValue {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum Token { pub enum Token {
Eof, Eof,
Identifier(Identifier), Identifier(Identifier),
// All integer sizes
I64(i64), I64(i64),
I32(i32), I32(i32),
I16(i16), I16(i16),
@ -95,14 +133,11 @@ pub enum Token {
U32(u32), U32(u32),
U16(u16), U16(u16),
U8(u8), U8(u8),
Float(f32), Float(f32),
Double(f64), Double(f64),
Character(u8), Character(u8),
StringLiteral(String), StringLiteral(String),
Boolean(bool), Boolean(bool),
Array(ShapedArray), Array(ShapedArray),
TokenString(TokenString), TokenString(TokenString),
TypeTuple(TypeTuple), TypeTuple(TypeTuple),
@ -118,10 +153,585 @@ pub struct LexError {
pub type LexResult<T> = Result<T, LexError>; pub type LexResult<T> = Result<T, LexError>;
pub fn get_token(_lexer: &mut LexerInfo) -> Option<Token> { #[derive(Debug, Clone, Copy)]
None enum NumericLiteralType {
Binary,
Octal,
Decimal,
Hexadecimal,
Float,
} }
pub fn lexical_analysis(_lexer: &mut LexerInfo) -> LexResult<Vec<Token>> { impl LexerInfo {
Ok(Vec::new()) fn make_error(&self, message: impl Into<String>, start_line: usize, start_col: usize) -> LexError {
LexError {
message: message.into(),
file: self.filename.clone(),
line: start_line,
column: start_col,
}
}
fn is_identifier_continue(&self, c: char) -> bool {
if !c.is_ascii() || !c.is_ascii_graphic() {
return false;
}
if c == '/' && self.far_peek(1) == '/' {
return false;
}
!matches!(c, '{' | '}' | '[' | ']' | '(' | ')' | '\'' | '"' | '#') && !c.is_whitespace()
}
fn is_identifier_start(&self) -> bool {
let mut c = self.peek();
if c == ':' && self.far_peek(1) == ':' {
c = self.far_peek(2);
}
!c.is_ascii_digit() && self.is_identifier_continue(c)
}
fn parse_identifiers_and_booleans(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
let mut c = self.peek();
let mut literal = false;
// Skip leading `::` for identifier literals
if c == ':' && self.far_peek(1) == ':' {
literal = true;
self.advance();
c = self.advance();
}
// Read the name
let name_start = self.pos;
while self.is_identifier_continue(c) {
if c == ':' {
return Err(self.make_error("Invalid identifier: ':' is not allowed in identifiers.", start_line, start_col));
}
if c == '.' {
return Err(self.make_error("Invalid identifier: '.' is not allowed in identifiers.", start_line, start_col));
}
c = self.advance();
}
let name = self.source[name_start..self.pos].to_string();
// Check for booleans
match name.as_str() {
"false" => Ok(Token::Boolean(false)),
"true" => Ok(Token::Boolean(true)),
_ => Ok(Token::Identifier(Identifier { name, is_literal: literal })),
}
}
fn parse_character_literal(&mut self, start_line: usize, start_col: usize) -> LexResult<Token> {
let mut c = self.peek();
if c == '\'' {
return Err(self.make_error("Invalid character literal: empty character literal.", start_line, start_col));
}
let value = if c == '\\' {
c = self.advance();
match c {
'n' => b'\n',
'r' => b'\r',
't' => b'\t',
'\\' => b'\\',
'\'' => b'\'',
'0' => b'\0',
_ => return Err(self.make_error(format!("Invalid character literal: unknown escape sequence '\\{}'.", c), start_line, start_col)),
}
} else if c == '\n' || c == '\r' {
return Err(self.make_error("Invalid character literal: unclosed character literal.", start_line, start_col));
} else {
c as u8
};
c = self.advance();
if c.is_whitespace() || c == '/' || c == '\0' {
return Err(self.make_error("Invalid character literal: unclosed character literal.", start_line, start_col));
} else if c != '\'' {
return Err(self.make_error(format!("Invalid character literal: unexpected '{}' in character.", c), start_line, start_col));
}
self.advance();
Ok(Token::Character(value))
}
fn parse_token_string(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
let mut tokens = Vec::new();
self.advance(); // skip '{'
loop {
self.skip_comments_and_whitespace();
let c = self.peek();
if c == '}' {
self.advance();
return Ok(Token::TokenString(TokenString { tokens }));
}
if c == '\0' {
return Err(self.make_error("Unclosed token string: missing closing brace '}'.", start_line, start_col));
}
match get_token(self) {
Some(token) => {
if matches!(token, Token::Eof) {
break;
}
tokens.push(token);
}
None => return Err(self.make_error("Failed to parse token in token string.", start_line, start_col)),
}
}
Err(self.make_error("Unclosed token string: missing closing brace '}'.", start_line, start_col))
}
fn parse_numeric_literal(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
let mut c = self.peek();
if c == '-' {
c = self.advance();
}
if c == '0' {
c = self.advance();
match c {
'b' | 'B' => {
self.advance();
return self.parse_binary_integer(start, start_line, start_col);
}
'o' | 'O' => {
self.advance();
return self.parse_octal_integer(start, start_line, start_col);
}
'x' | 'X' => {
self.advance();
return self.parse_hexadecimal_integer(start, start_line, start_col);
}
_ => {}
}
}
self.parse_decimal_integer(start, start_line, start_col)
}
fn parse_binary_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
let mut c = self.peek();
while c == '0' || c == '1' || c == '_' {
c = self.advance();
}
if c == ':' {
return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Binary);
}
let value = self.create_binary_integer(start);
Ok(Token::I64(value as i64))
}
fn parse_octal_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
let mut c = self.peek();
while c.is_ascii_digit() && c != '8' && c != '9' || c == '_' {
c = self.advance();
}
if c == ':' {
return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Octal);
}
let value = self.create_octal_integer(start);
Ok(Token::I64(value as i64))
}
fn parse_decimal_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
let mut c = self.peek();
while c.is_ascii_digit() || c == '_' {
c = self.advance();
}
if c == '.' {
self.advance();
return self.parse_float(start, start_line, start_col);
}
if c == ':' {
return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Decimal);
}
let value = self.create_decimal_integer(start);
Ok(Token::I64(value as i64))
}
fn parse_hexadecimal_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
let mut c = self.peek();
while c.is_ascii_hexdigit() || c == '_' {
c = self.advance();
}
if c == ':' {
return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Hexadecimal);
}
let value = self.create_hexadecimal_integer(start);
Ok(Token::I64(value as i64))
}
fn parse_float(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
let mut c = self.peek();
while c.is_ascii_digit() || c == '_' {
c = self.advance();
}
if c == ':' {
return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Float);
}
let value = self.create_float(start);
Ok(Token::Double(value))
}
fn parse_numeric_type(&mut self, start: usize, start_line: usize, start_col: usize, literal_type: NumericLiteralType) -> LexResult<Token> {
let mut c = self.advance(); // skip ':'
let mut is_float = false;
let mut is_unsigned = false;
let mut bit_size = 64; // default
if c == 'f' {
is_float = true;
if !matches!(literal_type, NumericLiteralType::Decimal | NumericLiteralType::Float) {
return Err(self.make_error("Invalid numeric literal: float type not allowed.", start_line, start_col));
}
c = self.advance();
if c == '6' && self.far_peek(1) == '4' {
bit_size = 64;
self.advance();
self.advance();
} else if c == '3' && self.far_peek(1) == '2' {
bit_size = 32;
self.advance();
self.advance();
} else {
return Err(self.make_error("Invalid float type: must be of type 'f64' or 'f32'.", start_line, start_col));
}
} else if c == 'i' || c == 'u' {
if matches!(literal_type, NumericLiteralType::Float) {
return Err(self.make_error("Invalid float type: must be of type 'f64' or 'f32'.", start_line, start_col));
}
is_unsigned = c == 'u';
c = self.advance();
if c == '6' && self.far_peek(1) == '4' {
bit_size = 64;
self.advance();
self.advance();
} else if c == '3' && self.far_peek(1) == '2' {
bit_size = 32;
self.advance();
self.advance();
} else if c == '1' && self.far_peek(1) == '6' {
bit_size = 16;
self.advance();
self.advance();
} else if c == '8' {
bit_size = 8;
self.advance();
} else {
let type_name = if is_unsigned { "unsigned" } else { "signed" };
return Err(self.make_error(
format!("Invalid {} integer type: must be of type '{}64', '{}32', '{}16', or '{}8'.",
type_name, if is_unsigned { "u" } else { "i" },
if is_unsigned { "u" } else { "i" },
if is_unsigned { "u" } else { "i" },
if is_unsigned { "u" } else { "i" }),
start_line, start_col));
}
} else {
return Err(self.make_error("Invalid numeric type: type must start with 'f', 'i', or 'u'.", start_line, start_col));
}
// Create the token based on the parsed type
if is_float {
let value = self.create_float(start);
match bit_size {
32 => Ok(Token::Float(value as f32)),
64 => Ok(Token::Double(value)),
_ => unreachable!()
}
} else {
let value = match literal_type {
NumericLiteralType::Binary => self.create_binary_integer(start),
NumericLiteralType::Octal => self.create_octal_integer(start),
NumericLiteralType::Decimal => self.create_decimal_integer(start),
NumericLiteralType::Hexadecimal => self.create_hexadecimal_integer(start),
NumericLiteralType::Float => return Err(self.make_error("Internal error: float literal in integer path", start_line, start_col)),
};
self.create_integer_token(value, is_unsigned, bit_size, start, start_line, start_col)
}
}
fn create_integer_token(&self, value: u64, is_unsigned: bool, bit_size: u32, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
let is_negative = self.source[start..].starts_with('-');
match (is_unsigned, bit_size) {
(false, 64) => Ok(Token::I64(value as i64)),
(false, 32) => {
let signed = value as i64;
if signed < i32::MIN as i64 || signed > i32::MAX as i64 {
return Err(self.make_error("Integer overflow: value exceeds range for i32.", start_line, start_col));
}
Ok(Token::I32(value as i32))
}
(false, 16) => {
let signed = value as i64;
if signed < i16::MIN as i64 || signed > i16::MAX as i64 {
return Err(self.make_error("Integer overflow: value exceeds range for i16.", start_line, start_col));
}
Ok(Token::I16(value as i16))
}
(false, 8) => {
let signed = value as i64;
if signed < i8::MIN as i64 || signed > i8::MAX as i64 {
return Err(self.make_error("Integer overflow: value exceeds range for i8.", start_line, start_col));
}
Ok(Token::I8(value as i8))
}
(true, 64) => {
if is_negative {
return Err(self.make_error("Integer overflow: value exceeds range for u64.", start_line, start_col));
}
Ok(Token::U64(value))
}
(true, 32) => {
if is_negative {
return Err(self.make_error("Integer overflow: value exceeds range for u32.", start_line, start_col));
}
if value > u32::MAX as u64 {
return Err(self.make_error("Integer overflow: value exceeds range for u32.", start_line, start_col));
}
Ok(Token::U32(value as u32))
}
(true, 16) => {
if is_negative {
return Err(self.make_error("Integer overflow: value exceeds range for u16.", start_line, start_col));
}
if value > u16::MAX as u64 {
return Err(self.make_error("Integer overflow: value exceeds range for u16.", start_line, start_col));
}
Ok(Token::U16(value as u16))
}
(true, 8) => {
if is_negative {
return Err(self.make_error("Integer overflow: value exceeds range for u8.", start_line, start_col));
}
if value > u8::MAX as u64 {
return Err(self.make_error("Integer overflow: value exceeds range for u8.", start_line, start_col));
}
Ok(Token::U8(value as u8))
}
_ => Err(self.make_error("Invalid bit size for integer type.", start_line, start_col))
}
}
fn create_binary_integer(&self, start: usize) -> u64 {
let token = &self.source[start..self.pos];
let mut value = 0u64;
let mut i = 2;
if token.starts_with('-') {
i += 1;
}
for c in token[i..].chars() {
if c == '_' || c == '.' {
continue;
}
if c.is_whitespace() || c == '/' || c == ':' {
break;
}
value *= 2;
if c == '1' {
value += 1;
}
}
if token.starts_with('-') {
(!value).wrapping_add(1)
} else {
value
}
}
fn create_octal_integer(&self, start: usize) -> u64 {
let token = &self.source[start..self.pos];
let mut value = 0u64;
let mut i = 2;
if token.starts_with('-') {
i += 1;
}
for c in token[i..].chars() {
if c == '_' || c == '.' {
continue;
}
if c.is_whitespace() || c == '/' || c == ':' {
break;
}
value *= 8;
value += c.to_digit(8).unwrap_or(0) as u64;
}
if token.starts_with('-') {
(!value).wrapping_add(1)
} else {
value
}
}
fn create_decimal_integer(&self, start: usize) -> u64 {
let token = &self.source[start..self.pos];
let mut value = 0u64;
let mut i = 0;
if token.starts_with('-') {
i += 1;
}
for c in token[i..].chars() {
if c == '_' {
continue;
}
if c.is_whitespace() || c == '/' || c == ':' {
break;
}
value *= 10;
value += c.to_digit(10).unwrap_or(0) as u64;
}
if token.starts_with('-') {
(!value).wrapping_add(1)
} else {
value
}
}
fn create_hexadecimal_integer(&self, start: usize) -> u64 {
let token = &self.source[start..self.pos];
let mut value = 0u64;
let mut i = 2;
if token.starts_with('-') {
i += 1;
}
for c in token[i..].chars() {
if c == '_' || c == '.' {
continue;
}
if c.is_whitespace() || c == '/' || c == ':' {
break;
}
value *= 16;
value += c.to_digit(16).unwrap_or(0) as u64;
}
if token.starts_with('-') {
(!value).wrapping_add(1)
} else {
value
}
}
fn create_float(&self, start: usize) -> f64 {
let token = &self.source[start..self.pos];
let mut value = 0.0;
let mut fractional = 0u64;
let mut i = 0;
if token.starts_with('-') {
i += 1;
}
for c in token[i..].chars() {
if c == '_' {
continue;
}
if c.is_whitespace() || c == '/' || c == ':' {
break;
}
if c == '.' {
fractional = 1;
continue;
}
if fractional == 0 {
value *= 10.0;
} else {
fractional *= 10;
}
let digit = c.to_digit(10).unwrap_or(0) as f64;
if fractional == 0 {
value += digit;
} else {
value += digit / fractional as f64;
}
}
if token.starts_with('-') {
-value
} else {
value
}
}
}
pub fn get_token(lexer: &mut LexerInfo) -> Option<Token> {
lexer.skip_comments_and_whitespace();
let c = lexer.peek();
let start = lexer.pos;
let start_line = lexer.line;
let start_col = lexer.column;
if c == '\0' {
return Some(Token::Eof);
}
let result = if c.is_ascii_digit() || (c == '.' && lexer.far_peek(1).is_ascii_digit()) || (c == '-' && lexer.far_peek(1).is_ascii_digit()) {
lexer.parse_numeric_literal(start, start_line, start_col)
} else if c == '\'' {
lexer.advance();
lexer.parse_character_literal(start_line, start_col)
} else if c == '{' {
lexer.parse_token_string(start, start_line, start_col)
} else if lexer.is_identifier_start() {
lexer.parse_identifiers_and_booleans(start, start_line, start_col)
} else {
Err(lexer.make_error(format!("Unexpected character: '{}'", c), start_line, start_col))
};
result.ok()
}
pub fn lexical_analysis(lexer: &mut LexerInfo) -> LexResult<Vec<Token>> {
let mut tokens = Vec::new();
loop {
match get_token(lexer) {
Some(Token::Eof) => {
tokens.push(Token::Eof);
break;
}
Some(token) => tokens.push(token),
None => break,
}
}
Ok(tokens)
} }

View File

@ -67,7 +67,8 @@ pub fn repl() -> i32 {
LexResult::Ok(tokens) => { LexResult::Ok(tokens) => {
for token in tokens { for token in tokens {
if !interpreter.execute(token) { if !interpreter.execute(token) {
return 1; println!("A runtime error occured!");
break;
} }
} }