#[derive(Debug, Clone)] pub struct LexerInfo { pub filename: String, pub source: String, pub pos: usize, pub column: usize, pub line: usize, } impl LexerInfo { pub fn new(filename: impl Into, source: impl Into) -> Self { Self { filename: filename.into(), source: source.into(), pos: 0, column: 1, line: 1, } } fn peek(&self) -> char { self.source.chars().nth(self.pos).unwrap_or('\0') } fn far_peek(&self, offset: usize) -> char { self.source.chars().nth(self.pos + offset).unwrap_or('\0') } fn advance(&mut self) -> char { if self.peek() == '\n' { self.line += 1; self.column = 1; } else { self.column += 1; } self.pos += 1; self.peek() } fn skip_comments_and_whitespace(&mut self) { loop { let c = self.peek(); // Skip comments if (c == '/' && self.far_peek(1) == '/') || c == '#' { while self.peek() != '\n' && self.peek() != '\0' { self.advance(); } } // Skip whitespace if self.peek().is_whitespace() { while self.peek().is_whitespace() { self.advance(); } } else { break; } } } } #[derive(Debug, Clone)] pub struct Identifier { pub name: String, pub is_literal: bool, } #[derive(Debug, Clone)] pub enum ArrayLiteral { _Identifiers(Vec), _I64(Vec), _I32(Vec), _I16(Vec), _I8(Vec), _U64(Vec), _U32(Vec), _U16(Vec), _U8(Vec), _Float(Vec), _Double(Vec), _Character(Vec), _Strings(Vec), _Boolean(Vec), _TokenStrings(Vec), _TypeTuples(Vec), _StructInline(StructInline), } #[derive(Debug, Clone)] pub struct ShapedArray { pub _array: ArrayLiteral, pub _shape: Vec, } #[derive(Debug, Clone)] pub struct TokenString { pub tokens: Vec, } #[derive(Debug, Clone)] pub struct TypeTuple { pub _inputs: Vec, pub _outputs: Vec, } #[derive(Debug, Clone)] pub struct StructInline { pub _name: String, pub _values: Vec, } #[derive(Debug, Clone)] pub enum StructValue { _Integer(i64), _Float(f32), _Double(f64), _Boolean(bool), _Character(u8), _String(String), _Token(Token), } #[derive(Debug, Clone)] pub enum Token { Eof, Identifier(Identifier), I64(i64), I32(i32), I16(i16), I8(i8), U64(u64), U32(u32), U16(u16), U8(u8), Float(f32), Double(f64), Character(u8), StringLiteral(String), Boolean(bool), Array(ShapedArray), TokenString(TokenString), TypeTuple(TypeTuple), } #[derive(Debug, Clone)] pub struct LexError { pub message: String, pub file: String, pub line: usize, pub column: usize, } pub type LexResult = Result; #[derive(Debug, Clone, Copy)] enum NumericLiteralType { Binary, Octal, Decimal, Hexadecimal, Float, } impl LexerInfo { fn make_error(&self, message: impl Into, start_line: usize, start_col: usize) -> LexError { LexError { message: message.into(), file: self.filename.clone(), line: start_line, column: start_col, } } fn is_identifier_continue(&self, c: char) -> bool { if !c.is_ascii() || !c.is_ascii_graphic() { return false; } if c == '/' && self.far_peek(1) == '/' { return false; } !matches!(c, '{' | '}' | '[' | ']' | '(' | ')' | '\'' | '"' | '#') && !c.is_whitespace() } fn is_identifier_start(&self) -> bool { let mut c = self.peek(); if c == ':' && self.far_peek(1) == ':' { c = self.far_peek(2); } !c.is_ascii_digit() && self.is_identifier_continue(c) } fn parse_identifiers_and_booleans(&mut self, _start: usize, start_line: usize, start_col: usize) -> LexResult { let mut c = self.peek(); let mut literal = false; // Skip leading `::` for identifier literals if c == ':' && self.far_peek(1) == ':' { literal = true; self.advance(); c = self.advance(); } // Read the name let name_start = self.pos; while self.is_identifier_continue(c) { if c == ':' { return Err(self.make_error("Invalid identifier: ':' is not allowed in identifiers.", start_line, start_col)); } if c == '.' { return Err(self.make_error("Invalid identifier: '.' is not allowed in identifiers.", start_line, start_col)); } c = self.advance(); } let name = self.source[name_start..self.pos].to_string(); // Check for booleans match name.as_str() { "false" => Ok(Token::Boolean(false)), "true" => Ok(Token::Boolean(true)), _ => Ok(Token::Identifier(Identifier { name, is_literal: literal })), } } fn parse_character_literal(&mut self, start_line: usize, start_col: usize) -> LexResult { let mut c = self.peek(); if c == '\'' { return Err(self.make_error("Invalid character literal: empty character literal.", start_line, start_col)); } let value = if c == '\\' { c = self.advance(); match c { 'n' => b'\n', 'r' => b'\r', 't' => b'\t', '\\' => b'\\', '\'' => b'\'', '0' => b'\0', _ => return Err(self.make_error(format!("Invalid character literal: unknown escape sequence '\\{}'.", c), start_line, start_col)), } } else if c == '\n' || c == '\r' { return Err(self.make_error("Invalid character literal: unclosed character literal.", start_line, start_col)); } else { c as u8 }; c = self.advance(); if c.is_whitespace() || c == '/' || c == '\0' { return Err(self.make_error("Invalid character literal: unclosed character literal.", start_line, start_col)); } else if c != '\'' { return Err(self.make_error(format!("Invalid character literal: unexpected '{}' in character.", c), start_line, start_col)); } self.advance(); Ok(Token::Character(value)) } fn parse_token_string(&mut self, _start: usize, start_line: usize, start_col: usize) -> LexResult { let mut tokens = Vec::new(); self.advance(); // skip '{' loop { self.skip_comments_and_whitespace(); let c = self.peek(); if c == '}' { self.advance(); return Ok(Token::TokenString(TokenString { tokens })); } if c == '\0' { return Err(self.make_error("Unclosed token string: missing closing brace '}'.", start_line, start_col)); } match get_token(self) { Some(token) => { if matches!(token, Token::Eof) { break; } tokens.push(token); } None => return Err(self.make_error("Failed to parse token in token string.", start_line, start_col)), } } Err(self.make_error("Unclosed token string: missing closing brace '}'.", start_line, start_col)) } fn parse_numeric_literal(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult { let mut c = self.peek(); if c == '-' { c = self.advance(); } if c == '0' { c = self.advance(); match c { 'b' | 'B' => { self.advance(); return self.parse_binary_integer(start, start_line, start_col); } 'o' | 'O' => { self.advance(); return self.parse_octal_integer(start, start_line, start_col); } 'x' | 'X' => { self.advance(); return self.parse_hexadecimal_integer(start, start_line, start_col); } _ => {} } } self.parse_decimal_integer(start, start_line, start_col) } fn parse_binary_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult { let mut c = self.peek(); while c == '0' || c == '1' || c == '_' { c = self.advance(); } if c == ':' { return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Binary); } let value = self.create_binary_integer(start); Ok(Token::I64(value as i64)) } fn parse_octal_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult { let mut c = self.peek(); while c.is_ascii_digit() && c != '8' && c != '9' || c == '_' { c = self.advance(); } if c == ':' { return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Octal); } let value = self.create_octal_integer(start); Ok(Token::I64(value as i64)) } fn parse_decimal_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult { let mut c = self.peek(); while c.is_ascii_digit() || c == '_' { c = self.advance(); } if c == '.' { self.advance(); return self.parse_float(start, start_line, start_col); } if c == ':' { return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Decimal); } let value = self.create_decimal_integer(start); Ok(Token::I64(value as i64)) } fn parse_hexadecimal_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult { let mut c = self.peek(); while c.is_ascii_hexdigit() || c == '_' { c = self.advance(); } if c == ':' { return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Hexadecimal); } let value = self.create_hexadecimal_integer(start); Ok(Token::I64(value as i64)) } fn parse_float(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult { let mut c = self.peek(); while c.is_ascii_digit() || c == '_' { c = self.advance(); } if c == ':' { return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Float); } let value = self.create_float(start); Ok(Token::Double(value)) } fn parse_numeric_type(&mut self, start: usize, start_line: usize, start_col: usize, literal_type: NumericLiteralType) -> LexResult { let mut c = self.advance(); // skip ':' let mut is_float = false; let mut is_unsigned = false; let bit_size: u32; if c == 'f' { is_float = true; if !matches!(literal_type, NumericLiteralType::Decimal | NumericLiteralType::Float) { return Err(self.make_error("Invalid numeric literal: float type not allowed.", start_line, start_col)); } c = self.advance(); if c == '6' && self.far_peek(1) == '4' { bit_size = 64; self.advance(); self.advance(); } else if c == '3' && self.far_peek(1) == '2' { bit_size = 32; self.advance(); self.advance(); } else { return Err(self.make_error("Invalid float type: must be of type 'f64' or 'f32'.", start_line, start_col)); } } else if c == 'i' || c == 'u' { if matches!(literal_type, NumericLiteralType::Float) { return Err(self.make_error("Invalid float type: must be of type 'f64' or 'f32'.", start_line, start_col)); } is_unsigned = c == 'u'; c = self.advance(); if c == '6' && self.far_peek(1) == '4' { bit_size = 64; self.advance(); self.advance(); } else if c == '3' && self.far_peek(1) == '2' { bit_size = 32; self.advance(); self.advance(); } else if c == '1' && self.far_peek(1) == '6' { bit_size = 16; self.advance(); self.advance(); } else if c == '8' { bit_size = 8; self.advance(); } else { let type_name = if is_unsigned { "unsigned" } else { "signed" }; return Err(self.make_error( format!("Invalid {} integer type: must be of type '{}64', '{}32', '{}16', or '{}8'.", type_name, if is_unsigned { "u" } else { "i" }, if is_unsigned { "u" } else { "i" }, if is_unsigned { "u" } else { "i" }, if is_unsigned { "u" } else { "i" }), start_line, start_col)); } } else { return Err(self.make_error("Invalid numeric type: type must start with 'f', 'i', or 'u'.", start_line, start_col)); } // Create the token based on the parsed type if is_float { let value = self.create_float(start); match bit_size { 32 => Ok(Token::Float(value as f32)), 64 => Ok(Token::Double(value)), _ => unreachable!() } } else { let value = match literal_type { NumericLiteralType::Binary => self.create_binary_integer(start), NumericLiteralType::Octal => self.create_octal_integer(start), NumericLiteralType::Decimal => self.create_decimal_integer(start), NumericLiteralType::Hexadecimal => self.create_hexadecimal_integer(start), NumericLiteralType::Float => return Err(self.make_error("Internal error: float literal in integer path", start_line, start_col)), }; self.create_integer_token(value, is_unsigned, bit_size, start, start_line, start_col) } } fn create_integer_token(&self, value: u64, is_unsigned: bool, bit_size: u32, start: usize, start_line: usize, start_col: usize) -> LexResult { let is_negative = self.source[start..].starts_with('-'); match (is_unsigned, bit_size) { (false, 64) => Ok(Token::I64(value as i64)), (false, 32) => { let signed = value as i64; if signed < i32::MIN as i64 || signed > i32::MAX as i64 { return Err(self.make_error("Integer overflow: value exceeds range for i32.", start_line, start_col)); } Ok(Token::I32(value as i32)) } (false, 16) => { let signed = value as i64; if signed < i16::MIN as i64 || signed > i16::MAX as i64 { return Err(self.make_error("Integer overflow: value exceeds range for i16.", start_line, start_col)); } Ok(Token::I16(value as i16)) } (false, 8) => { let signed = value as i64; if signed < i8::MIN as i64 || signed > i8::MAX as i64 { return Err(self.make_error("Integer overflow: value exceeds range for i8.", start_line, start_col)); } Ok(Token::I8(value as i8)) } (true, 64) => { if is_negative { return Err(self.make_error("Integer overflow: value exceeds range for u64.", start_line, start_col)); } Ok(Token::U64(value)) } (true, 32) => { if is_negative { return Err(self.make_error("Integer overflow: value exceeds range for u32.", start_line, start_col)); } if value > u32::MAX as u64 { return Err(self.make_error("Integer overflow: value exceeds range for u32.", start_line, start_col)); } Ok(Token::U32(value as u32)) } (true, 16) => { if is_negative { return Err(self.make_error("Integer overflow: value exceeds range for u16.", start_line, start_col)); } if value > u16::MAX as u64 { return Err(self.make_error("Integer overflow: value exceeds range for u16.", start_line, start_col)); } Ok(Token::U16(value as u16)) } (true, 8) => { if is_negative { return Err(self.make_error("Integer overflow: value exceeds range for u8.", start_line, start_col)); } if value > u8::MAX as u64 { return Err(self.make_error("Integer overflow: value exceeds range for u8.", start_line, start_col)); } Ok(Token::U8(value as u8)) } _ => Err(self.make_error("Invalid bit size for integer type.", start_line, start_col)) } } fn create_binary_integer(&self, start: usize) -> u64 { let token = &self.source[start..self.pos]; let mut value = 0u64; let mut i = 2; if token.starts_with('-') { i += 1; } for c in token[i..].chars() { if c == '_' || c == '.' { continue; } if c.is_whitespace() || c == '/' || c == ':' { break; } value *= 2; if c == '1' { value += 1; } } if token.starts_with('-') { (!value).wrapping_add(1) } else { value } } fn create_octal_integer(&self, start: usize) -> u64 { let token = &self.source[start..self.pos]; let mut value = 0u64; let mut i = 2; if token.starts_with('-') { i += 1; } for c in token[i..].chars() { if c == '_' || c == '.' { continue; } if c.is_whitespace() || c == '/' || c == ':' { break; } value *= 8; value += c.to_digit(8).unwrap_or(0) as u64; } if token.starts_with('-') { (!value).wrapping_add(1) } else { value } } fn create_decimal_integer(&self, start: usize) -> u64 { let token = &self.source[start..self.pos]; let mut value = 0u64; let mut i = 0; if token.starts_with('-') { i += 1; } for c in token[i..].chars() { if c == '_' { continue; } if c.is_whitespace() || c == '/' || c == ':' { break; } value *= 10; value += c.to_digit(10).unwrap_or(0) as u64; } if token.starts_with('-') { (!value).wrapping_add(1) } else { value } } fn create_hexadecimal_integer(&self, start: usize) -> u64 { let token = &self.source[start..self.pos]; let mut value = 0u64; let mut i = 2; if token.starts_with('-') { i += 1; } for c in token[i..].chars() { if c == '_' || c == '.' { continue; } if c.is_whitespace() || c == '/' || c == ':' { break; } value *= 16; value += c.to_digit(16).unwrap_or(0) as u64; } if token.starts_with('-') { (!value).wrapping_add(1) } else { value } } fn create_float(&self, start: usize) -> f64 { let token = &self.source[start..self.pos]; let mut value = 0.0; let mut fractional = 0u64; let mut i = 0; if token.starts_with('-') { i += 1; } for c in token[i..].chars() { if c == '_' { continue; } if c.is_whitespace() || c == '/' || c == ':' { break; } if c == '.' { fractional = 1; continue; } if fractional == 0 { value *= 10.0; } else { fractional *= 10; } let digit = c.to_digit(10).unwrap_or(0) as f64; if fractional == 0 { value += digit; } else { value += digit / fractional as f64; } } if token.starts_with('-') { -value } else { value } } } pub fn get_token(lexer: &mut LexerInfo) -> Option { lexer.skip_comments_and_whitespace(); let c = lexer.peek(); let start = lexer.pos; let start_line = lexer.line; let start_col = lexer.column; if c == '\0' { return Some(Token::Eof); } let result = if c.is_ascii_digit() || (c == '.' && lexer.far_peek(1).is_ascii_digit()) || (c == '-' && lexer.far_peek(1).is_ascii_digit()) { lexer.parse_numeric_literal(start, start_line, start_col) } else if c == '\'' { lexer.advance(); lexer.parse_character_literal(start_line, start_col) } else if c == '{' { lexer.parse_token_string(start, start_line, start_col) } else if lexer.is_identifier_start() { lexer.parse_identifiers_and_booleans(start, start_line, start_col) } else { Err(lexer.make_error(format!("Unexpected character: '{}'", c), start_line, start_col)) }; result.ok() } pub fn lexical_analysis(lexer: &mut LexerInfo) -> LexResult> { let mut tokens = Vec::new(); loop { match get_token(lexer) { Some(Token::Eof) => { tokens.push(Token::Eof); break; } Some(token) => tokens.push(token), None => break, } } Ok(tokens) }