ChatGPT attempt at lexer.rs
This commit is contained in:
parent
ae077ef433
commit
349cb29762
|
|
@ -17,111 +17,607 @@ impl LexerInfo {
|
|||
line: 1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek(&self) -> char {
|
||||
self.source[self.pos..].chars().next().unwrap_or('\0')
|
||||
}
|
||||
|
||||
pub fn far_peek(&self, index: usize) -> char {
|
||||
self.source[self.pos..].chars().nth(index).unwrap_or('\0')
|
||||
}
|
||||
|
||||
pub fn seek(&self, index: usize) -> char {
|
||||
self.source[index..].chars().next().unwrap_or('\0')
|
||||
}
|
||||
|
||||
/// Advance by one UTF-8 char and update line/column. Returns the char after advancing.
|
||||
/// If already at end, returns '\0'.
|
||||
pub fn advance(&mut self) -> char {
|
||||
if self.pos >= self.source.len() {
|
||||
return '\0';
|
||||
}
|
||||
let ch = self.peek();
|
||||
if ch == '\n' {
|
||||
self.line += 1;
|
||||
self.column = 1;
|
||||
} else {
|
||||
self.column += 1;
|
||||
}
|
||||
// move pos forward by char len
|
||||
self.pos += ch.len_utf8();
|
||||
self.peek()
|
||||
}
|
||||
|
||||
/// Return substring from start to current pos (slice by byte indices).
|
||||
/// start is a byte index into the original source (we store start as byte index).
|
||||
pub fn get_token_text(&self, start: usize) -> &str {
|
||||
// clamp
|
||||
let end = self.pos.min(self.source.len());
|
||||
&self.source[start..end]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Identifier {
|
||||
pub name: String,
|
||||
pub is_literal: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ArrayLiteral {
|
||||
Identifiers(Vec<Identifier>),
|
||||
I64(Vec<i64>),
|
||||
I32(Vec<i32>),
|
||||
I16(Vec<i16>),
|
||||
I8(Vec<i8>),
|
||||
U64(Vec<u64>),
|
||||
U32(Vec<u32>),
|
||||
U16(Vec<u16>),
|
||||
U8(Vec<u8>),
|
||||
Float(Vec<f32>),
|
||||
Double(Vec<f64>),
|
||||
Character(Vec<u8>),
|
||||
Strings(Vec<String>),
|
||||
Boolean(Vec<bool>),
|
||||
TokenStrings(Vec<TokenString>),
|
||||
TypeTuples(Vec<TypeTuple>),
|
||||
StructInline(StructInline),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ShapedArray {
|
||||
pub array: ArrayLiteral,
|
||||
pub shape: Vec<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TokenString {
|
||||
pub tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TypeTuple {
|
||||
pub inputs: Vec<Identifier>,
|
||||
pub outputs: Vec<Identifier>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StructInline {
|
||||
pub name: String,
|
||||
pub values: Vec<StructValue>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum StructValue {
|
||||
Integer(i64),
|
||||
Float(f32),
|
||||
Double(f64),
|
||||
Boolean(bool),
|
||||
Character(u8),
|
||||
String(String),
|
||||
Token(Token),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Token {
|
||||
Eof,
|
||||
|
||||
Identifier(Identifier),
|
||||
|
||||
// All integer sizes
|
||||
I64(i64),
|
||||
I32(i32),
|
||||
I16(i16),
|
||||
I8(i8),
|
||||
U64(u64),
|
||||
U32(u32),
|
||||
U16(u16),
|
||||
U8(u8),
|
||||
|
||||
Float(f32),
|
||||
Double(f64),
|
||||
|
||||
Character(u8),
|
||||
StringLiteral(String),
|
||||
Boolean(bool),
|
||||
|
||||
Array(ShapedArray),
|
||||
TokenString(TokenString),
|
||||
TypeTuple(TypeTuple),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LexError {
|
||||
pub message: String,
|
||||
pub struct FileInfo {
|
||||
pub file: String,
|
||||
pub line: usize,
|
||||
pub column: usize,
|
||||
pub length: usize,
|
||||
pub lines: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum LexErrorKind {
|
||||
Message(String),
|
||||
NotImplemented(String),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LexError {
|
||||
pub kind: LexErrorKind,
|
||||
pub file_info: Option<FileInfo>,
|
||||
}
|
||||
|
||||
impl LexError {
|
||||
pub fn new(msg: impl Into<String>, file_info: Option<FileInfo>) -> Self {
|
||||
Self {
|
||||
kind: LexErrorKind::Message(msg.into()),
|
||||
file_info,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn not_implemented(msg: impl Into<String>, file_info: Option<FileInfo>) -> Self {
|
||||
Self {
|
||||
kind: LexErrorKind::NotImplemented(msg.into()),
|
||||
file_info,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type LexResult<T> = Result<T, LexError>;
|
||||
|
||||
pub fn get_token(_lexer: &mut LexerInfo) -> Option<Token> {
|
||||
None
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum IntegerType {
|
||||
I64,
|
||||
I32,
|
||||
I16,
|
||||
I8,
|
||||
U64,
|
||||
U32,
|
||||
U16,
|
||||
U8,
|
||||
}
|
||||
|
||||
pub fn lexical_analysis(_lexer: &mut LexerInfo) -> LexResult<Vec<Token>> {
|
||||
Ok(Vec::new())
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IntegerLiteral {
|
||||
pub typ: IntegerType,
|
||||
pub value: u64, // we'll keep as u64; sign info handled by type and parsing
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Token {
|
||||
EOF,
|
||||
Identifier { is_literal: bool, name: String },
|
||||
Integer(IntegerLiteral),
|
||||
Float(f32),
|
||||
Double(f64),
|
||||
Character(u8),
|
||||
String(String),
|
||||
Boolean(bool),
|
||||
Array, // placeholder
|
||||
TokenString, // placeholder
|
||||
TypeTuple, // placeholder
|
||||
}
|
||||
|
||||
fn make_file_info(lexer: &LexerInfo, start: usize, start_line: usize) -> FileInfo {
|
||||
FileInfo {
|
||||
file: lexer.filename.clone(),
|
||||
line: lexer.line,
|
||||
column: lexer.column,
|
||||
length: lexer.pos.saturating_sub(start),
|
||||
lines: lexer.line.saturating_sub(start_line),
|
||||
}
|
||||
}
|
||||
|
||||
/// Helpers for numeric parsing (create values from the slice)
|
||||
fn create_binary_integer(token: &str, negative: bool) -> u64 {
|
||||
// token begins like "0b..." or "-0b..."
|
||||
let mut value: u64 = 0;
|
||||
for ch in token.chars() {
|
||||
if ch == '0' || ch == '1' {
|
||||
value = (value << 1) + (if ch == '1' { 1 } else { 0 });
|
||||
} else if ch == '_' || ch == '.' || ch == ':' || ch == '-' || ch == 'b' || ch == 'B' {
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if negative {
|
||||
// two's complement style as in C original: (~value) + 1
|
||||
(!value).wrapping_add(1)
|
||||
} else {
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
fn create_octal_integer(token: &str, negative: bool) -> u64 {
|
||||
let mut value: u64 = 0;
|
||||
for ch in token.chars() {
|
||||
if ch >= '0' && ch <= '7' {
|
||||
value = value * 8 + ((ch as u8 - b'0') as u64);
|
||||
} else if ch == '_' || ch == '.' || ch == ':' || ch == '-' || ch == 'o' || ch == 'O' {
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if negative {
|
||||
(!value).wrapping_add(1)
|
||||
} else {
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
fn create_decimal_integer(token: &str, negative: bool) -> u64 {
|
||||
let mut value: u64 = 0;
|
||||
for ch in token.chars() {
|
||||
if ch.is_ascii_digit() {
|
||||
value = value * 10 + ((ch as u8 - b'0') as u64);
|
||||
} else if ch == '_' || ch == ':' || ch == '-' {
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if negative {
|
||||
(!value).wrapping_add(1)
|
||||
} else {
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
fn create_hex_integer(token: &str, negative: bool) -> u64 {
|
||||
let mut value: u64 = 0;
|
||||
for ch in token.chars() {
|
||||
if ch.is_ascii_hexdigit() {
|
||||
value = value * 16
|
||||
+ (ch.to_digit(16).unwrap_or(0) as u64);
|
||||
} else if ch == '_' || ch == '.' || ch == ':' || ch == '-' || ch == 'x' || ch == 'X' {
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if negative {
|
||||
(!value).wrapping_add(1)
|
||||
} else {
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
fn create_float_value(token: &str, negative: bool) -> f64 {
|
||||
// A simple but robust approach: remove underscores and parse with Rust
|
||||
let cleaned: String = token.chars().filter(|&c| c != '_').collect();
|
||||
// Accept leading '-' in cleaned string
|
||||
let parsed = cleaned.parse::<f64>().unwrap_or(0.0);
|
||||
if negative { -parsed } else { parsed }
|
||||
}
|
||||
|
||||
/// Determine integer builtin type from suffix bits similar to original
|
||||
fn select_integer_type(unsigned: bool, bits: usize) -> Option<IntegerType> {
|
||||
match (unsigned, bits) {
|
||||
(false, 64) => Some(IntegerType::I64),
|
||||
(false, 32) => Some(IntegerType::I32),
|
||||
(false, 16) => Some(IntegerType::I16),
|
||||
(false, 8) => Some(IntegerType::I8),
|
||||
(true, 64) => Some(IntegerType::U64),
|
||||
(true, 32) => Some(IntegerType::U32),
|
||||
(true, 16) => Some(IntegerType::U16),
|
||||
(true, 8) => Some(IntegerType::U8),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Top-level numeric-type-suffix parser: handles things like ":i32", ":u8", ":f64", ":f32"
|
||||
fn parse_numeric_type_suffix(token: &str) -> Result<(bool, Option<IntegerType>, bool /*is_float*/, usize /*bits*/), ()> {
|
||||
// returns (unsigned, integer_type_option, is_float, bits)
|
||||
let mut unsigned = false;
|
||||
let mut is_float = false;
|
||||
let mut bits = 0usize;
|
||||
// find suffix start colon
|
||||
if let Some(colon_pos) = token.find(':') {
|
||||
let suffix = &token[colon_pos + 1..];
|
||||
// float?
|
||||
if suffix.starts_with('f') {
|
||||
is_float = true;
|
||||
if suffix == "f64" { bits = 64; }
|
||||
else if suffix == "f32" { bits = 32; }
|
||||
else { return Err(()); }
|
||||
} else if suffix.starts_with('i') || suffix.starts_with('u') {
|
||||
unsigned = suffix.starts_with('u');
|
||||
if suffix == "i64" || suffix == "u64" { bits = 64; }
|
||||
else if suffix == "i32" || suffix == "u32" { bits = 32; }
|
||||
else if suffix == "i16" || suffix == "u16" { bits = 16; }
|
||||
else if suffix == "i8" || suffix == "u8" { bits = 8; }
|
||||
else { return Err(()); }
|
||||
} else {
|
||||
return Err(());
|
||||
}
|
||||
let int_type = if is_float { None } else { select_integer_type(unsigned, bits) };
|
||||
Ok((unsigned, int_type, is_float, bits))
|
||||
} else {
|
||||
// no suffix
|
||||
Ok((false, Some(IntegerType::I64), false, 64))
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip comments and whitespace similar to C version
|
||||
fn skip_comments_and_whitespace(lexer: &mut LexerInfo) {
|
||||
loop {
|
||||
let p = lexer.peek();
|
||||
if p == '\0' {
|
||||
break;
|
||||
}
|
||||
if p.is_whitespace() {
|
||||
lexer.advance();
|
||||
continue;
|
||||
}
|
||||
// comment styles: // or #
|
||||
if p == '/' && lexer.far_peek(1) == '/' {
|
||||
// consume until newline or '\0'
|
||||
while lexer.peek() != '\n' && lexer.peek() != '\0' {
|
||||
lexer.advance();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if p == '#' {
|
||||
while lexer.peek() != '\n' && lexer.peek() != '\0' {
|
||||
lexer.advance();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Is identifier char allowed to continue?
|
||||
fn is_identifier_continue(lexer: &LexerInfo, c: char) -> bool {
|
||||
if !c.is_ascii_graphic() && c != ' ' { // keep printable heuristic
|
||||
return false;
|
||||
}
|
||||
if c == '/' && lexer.far_peek(1) == '/' { return false; }
|
||||
match c {
|
||||
'{' | '}' | '[' | ']' | '(' | ')' | '\'' | '"' | '#' => false,
|
||||
_ if c.is_whitespace() || c == '\0' => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is identifier start
|
||||
fn is_identifier_start(lexer: &LexerInfo) -> bool {
|
||||
let mut c = lexer.peek();
|
||||
if c == ':' && lexer.far_peek(1) == ':' {
|
||||
c = lexer.far_peek(2);
|
||||
}
|
||||
(!c.is_ascii_digit()) && is_identifier_continue(lexer, c)
|
||||
}
|
||||
|
||||
/// Parse identifiers and booleans
|
||||
fn parse_identifiers_and_booleans(lexer: &mut LexerInfo, start: usize, start_line: usize) -> LexResult<Token> {
|
||||
let mut literal = false;
|
||||
if lexer.peek() == ':' && lexer.far_peek(1) == ':' {
|
||||
// skip the leading '::'
|
||||
lexer.advance();
|
||||
lexer.advance();
|
||||
literal = true;
|
||||
}
|
||||
|
||||
// collect name
|
||||
let mut name = String::new();
|
||||
while is_identifier_continue(lexer, lexer.peek()) {
|
||||
let c = lexer.peek();
|
||||
if c == ':' || c == '.' {
|
||||
return Err(LexError::new(format!("Invalid identifier: '{}' not allowed in identifiers.", c), Some(make_file_info(lexer, start, start_line))));
|
||||
}
|
||||
name.push(c);
|
||||
lexer.advance();
|
||||
}
|
||||
|
||||
match name.as_str() {
|
||||
"false" => Ok(Token::Boolean(false)),
|
||||
"true" => Ok(Token::Boolean(true)),
|
||||
_ => Ok(Token::Identifier { is_literal: literal, name }),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse character literal
|
||||
fn parse_character_literal(lexer: &mut LexerInfo, start: usize, start_line: usize) -> LexResult<Token> {
|
||||
// At entry the opening quote was consumed by caller; peek() is first char of literal (or '\0')
|
||||
let c = lexer.peek();
|
||||
if c == '\'' {
|
||||
return Err(LexError::new("Invalid character literal: empty character literal.", Some(make_file_info(lexer, start, start_line))));
|
||||
}
|
||||
let mut value: u8 = 0;
|
||||
if c == '\\' {
|
||||
lexer.advance(); // consume '\'
|
||||
let esc = lexer.peek();
|
||||
value = match esc {
|
||||
'n' => b'\n',
|
||||
'r' => b'\r',
|
||||
't' => b'\t',
|
||||
'\\' => b'\\',
|
||||
'\'' => b'\'',
|
||||
'0' => b'\0',
|
||||
_ => {
|
||||
return Err(LexError::new(format!("Invalid character literal: unknown escape sequence '\\{}'", esc),
|
||||
Some(make_file_info(lexer, start, start_line))));
|
||||
}
|
||||
};
|
||||
lexer.advance(); // move past the escaped char
|
||||
} else if c == '\n' || c == '\r' || c == '\0' {
|
||||
return Err(LexError::new("Invalid character literal: unclosed character literal.", Some(make_file_info(lexer, start, start_line))));
|
||||
} else {
|
||||
value = lexer.peek() as u8;
|
||||
lexer.advance();
|
||||
}
|
||||
// expect closing quote
|
||||
let closing = lexer.peek();
|
||||
if closing == '\0' || closing.is_whitespace() || closing == '/' {
|
||||
return Err(LexError::new("Invalid character literal: unclosed character literal.", Some(make_file_info(lexer, start, start_line))));
|
||||
} else if closing != '\'' {
|
||||
return Err(LexError::new(format!("Invalid character literal: unexpected '{}' in character.", closing), Some(make_file_info(lexer, start, start_line))));
|
||||
}
|
||||
lexer.advance(); // consume closing '
|
||||
Ok(Token::Character(value))
|
||||
}
|
||||
|
||||
/// Parse numeric literal (entry when first char is digit or '.' or '-')
|
||||
fn parse_numeric_literal(lexer: &mut LexerInfo, start: usize, start_line: usize) -> LexResult<Token> {
|
||||
// We'll gather the token by moving the lexer forward until end of numeric token,
|
||||
// then analyze the string slice.
|
||||
// Handle leading '-'
|
||||
if lexer.peek() == '-' {
|
||||
lexer.advance();
|
||||
}
|
||||
|
||||
// If 0b, 0o, 0x style:
|
||||
if lexer.peek() == '0' {
|
||||
let next = lexer.far_peek(1);
|
||||
if next == 'b' || next == 'B' {
|
||||
// binary
|
||||
lexer.advance(); // consume '0'
|
||||
lexer.advance(); // consume 'b'
|
||||
while matches!(lexer.peek(), '0' | '1' | '_' ) {
|
||||
lexer.advance();
|
||||
}
|
||||
// optional suffix :...
|
||||
if lexer.peek() == ':' {
|
||||
// we'll let suffix parser handle it by consuming suffix chars up to whitespace or '/'
|
||||
while !lexer.peek().is_whitespace() && lexer.peek() != '/' && lexer.peek() != '\0' {
|
||||
lexer.advance();
|
||||
}
|
||||
}
|
||||
let s = lexer.get_token_text(start);
|
||||
let negative = s.starts_with('-');
|
||||
// default type i64
|
||||
let v = create_binary_integer(s, negative);
|
||||
Ok(Token::Integer(IntegerLiteral { typ: IntegerType::I64, value: v }))
|
||||
} else if next == 'o' || next == 'O' {
|
||||
// octal
|
||||
lexer.advance(); lexer.advance();
|
||||
while (lexer.peek().is_ascii_digit() && !matches!(lexer.peek(), '8' | '9')) || lexer.peek() == '_' {
|
||||
lexer.advance();
|
||||
}
|
||||
if lexer.peek() == ':' {
|
||||
while !lexer.peek().is_whitespace() && lexer.peek() != '/' && lexer.peek() != '\0' {
|
||||
lexer.advance();
|
||||
}
|
||||
}
|
||||
let s = lexer.get_token_text(start);
|
||||
let negative = s.starts_with('-');
|
||||
let v = create_octal_integer(s, negative);
|
||||
Ok(Token::Integer(IntegerLiteral { typ: IntegerType::I64, value: v }))
|
||||
} else if next == 'x' || next == 'X' {
|
||||
// hex
|
||||
lexer.advance(); lexer.advance();
|
||||
while lexer.peek().is_ascii_hexdigit() || lexer.peek() == '_' {
|
||||
lexer.advance();
|
||||
}
|
||||
if lexer.peek() == ':' {
|
||||
while !lexer.peek().is_whitespace() && lexer.peek() != '/' && lexer.peek() != '\0' {
|
||||
lexer.advance();
|
||||
}
|
||||
}
|
||||
let s = lexer.get_token_text(start);
|
||||
let negative = s.starts_with('-');
|
||||
let v = create_hex_integer(s, negative);
|
||||
Ok(Token::Integer(IntegerLiteral { typ: IntegerType::I64, value: v }))
|
||||
}
|
||||
}
|
||||
|
||||
// decimal or float
|
||||
// consume digits and underscores
|
||||
while lexer.peek().is_ascii_digit() || lexer.peek() == '_' {
|
||||
lexer.advance();
|
||||
}
|
||||
if lexer.peek() == '.' {
|
||||
// float case: .digit or digits.digits
|
||||
lexer.advance();
|
||||
while lexer.peek().is_ascii_digit() || lexer.peek() == '_' {
|
||||
lexer.advance();
|
||||
}
|
||||
// exponential 'e/E' not implemented: mirror C behavior -> not implemented
|
||||
if lexer.peek() == 'e' || lexer.peek() == 'E' {
|
||||
// not implemented exponential in C either
|
||||
return Err(LexError::not_implemented("Float exponential parsing not implemented.", Some(make_file_info(lexer, start, start_line))));
|
||||
}
|
||||
// optional suffix :f64 or :f32
|
||||
if lexer.peek() == ':' {
|
||||
// capture suffix
|
||||
while !lexer.peek().is_whitespace() && lexer.peek() != '/' && lexer.peek() != '\0' {
|
||||
lexer.advance();
|
||||
}
|
||||
}
|
||||
let s = lexer.get_token_text(start);
|
||||
let negative = s.starts_with('-');
|
||||
let value = create_float_value(s, negative);
|
||||
Ok(Token::Double(value))
|
||||
} else if lexer.peek() == 'e' || lexer.peek() == 'E' {
|
||||
return Err(LexError::not_implemented("Float exponential parsing not implemented.", Some(make_file_info(lexer, start, start_line))));
|
||||
} else if lexer.peek() == ':' {
|
||||
// suffix that might indicate integer type or float base change
|
||||
// consume suffix characters
|
||||
while !lexer.peek().is_whitespace() && lexer.peek() != '/' && lexer.peek() != '\0' {
|
||||
lexer.advance();
|
||||
}
|
||||
let s = lexer.get_token_text(start);
|
||||
let negative = s.starts_with('-');
|
||||
// check suffix
|
||||
match parse_numeric_type_suffix(s) {
|
||||
Ok((_unsigned, int_type_opt, is_float, _bits)) => {
|
||||
if is_float {
|
||||
let v = create_float_value(s, negative);
|
||||
return Ok(Token::Double(v));
|
||||
} else {
|
||||
let v = create_decimal_integer(s, negative);
|
||||
let typ = int_type_opt.unwrap_or(IntegerType::I64);
|
||||
return Ok(Token::Integer(IntegerLiteral { typ, value: v }));
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
return Err(LexError::new("Invalid numeric type suffix.", Some(make_file_info(lexer, start, start_line))));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// plain decimal integer
|
||||
let s = lexer.get_token_text(start);
|
||||
let negative = s.starts_with('-');
|
||||
let v = create_decimal_integer(s, negative);
|
||||
Ok(Token::Integer(IntegerLiteral { typ: IntegerType::I64, value: v }))
|
||||
}
|
||||
}
|
||||
|
||||
/// parse string literal - not implemented (mirrors C behavior)
|
||||
fn parse_string_literal(lexer: &mut LexerInfo, start: usize, start_line: usize) -> LexResult<Token> {
|
||||
Err(LexError::not_implemented("String literals not implemented.", Some(make_file_info(lexer, start, start_line))))
|
||||
}
|
||||
|
||||
/// parse token string - similar placeholder to C
|
||||
fn parse_token_string(lexer: &mut LexerInfo, start: usize, start_line: usize) -> LexResult<Token> {
|
||||
Err(LexError::not_implemented("Token string parsing not implemented.", Some(make_file_info(lexer, start, start_line))))
|
||||
}
|
||||
|
||||
fn parse_array_literal(_lexer: &mut LexerInfo, start: usize, start_line: usize) -> LexResult<Token> {
|
||||
Err(LexError::not_implemented("Array literal parsing not implemented.", Some(make_file_info(_lexer, start, start_line))))
|
||||
}
|
||||
|
||||
fn parse_type_tuple(_lexer: &mut LexerInfo, start: usize, start_line: usize) -> LexResult<Token> {
|
||||
Err(LexError::not_implemented("Type tuple parsing not implemented.", Some(make_file_info(_lexer, start, start_line))))
|
||||
}
|
||||
|
||||
/// The main get_token function returns a Token or a LexError (wrapped by Result in lexical_analysis).
|
||||
pub fn get_token(lexer: &mut LexerInfo) -> Result<Token, LexError> {
|
||||
skip_comments_and_whitespace(lexer);
|
||||
|
||||
let start = lexer.pos;
|
||||
let start_line = lexer.line;
|
||||
|
||||
let c = lexer.peek();
|
||||
if c == '\0' {
|
||||
return Ok(Token::EOF);
|
||||
}
|
||||
|
||||
if c.is_ascii_digit() || (c == '.' && lexer.far_peek(1).is_ascii_digit()) || (c == '-' && lexer.far_peek(1).is_ascii_digit()) {
|
||||
return parse_numeric_literal(lexer, start, start_line);
|
||||
}
|
||||
|
||||
if c == '\'' {
|
||||
lexer.advance(); // consume opening '
|
||||
return parse_character_literal(lexer, start, start_line);
|
||||
}
|
||||
|
||||
if c == '"' {
|
||||
lexer.advance(); // consume opening "
|
||||
return parse_string_literal(lexer, start, start_line);
|
||||
}
|
||||
|
||||
if c == '{' {
|
||||
lexer.advance();
|
||||
return parse_token_string(lexer, start, start_line);
|
||||
}
|
||||
if c == '}' {
|
||||
lexer.advance();
|
||||
return Err(LexError::new("Unexpected closing brace '}' without matching opening brace.", Some(make_file_info(lexer, start, start_line))));
|
||||
}
|
||||
|
||||
if c == '[' {
|
||||
lexer.advance();
|
||||
return parse_array_literal(lexer, start, start_line);
|
||||
}
|
||||
if c == ']' {
|
||||
lexer.advance();
|
||||
return Err(LexError::new("Unexpected closing bracket ']' without matching opening bracket.", Some(make_file_info(lexer, start, start_line))));
|
||||
}
|
||||
|
||||
if c == '(' {
|
||||
lexer.advance();
|
||||
return parse_type_tuple(lexer, start, start_line);
|
||||
}
|
||||
if c == ')' {
|
||||
lexer.advance();
|
||||
return Err(LexError::new("Unexpected closing parentheses ')' without matching opening parentheses.", Some(make_file_info(lexer, start, start_line))));
|
||||
}
|
||||
|
||||
if is_identifier_start(lexer) {
|
||||
return parse_identifiers_and_booleans(lexer, start, start_line);
|
||||
}
|
||||
|
||||
if c == ':' {
|
||||
lexer.advance();
|
||||
if lexer.peek() == ':' {
|
||||
return Err(LexError::new("Invalid identifier literal: empty identifier after '::'.", Some(make_file_info(lexer, start, start_line))));
|
||||
} else {
|
||||
return Err(LexError::new("Unexpected single colon ':'.", Some(make_file_info(lexer, start, start_line))));
|
||||
}
|
||||
}
|
||||
|
||||
Err(LexError::new(format!("Unexpected character: unexpected '{}' during parsing.", c), Some(make_file_info(lexer, start, start_line))))
|
||||
}
|
||||
|
||||
/// Perform full lexical analysis, returning a Vec<Token> or LexError
|
||||
pub fn lexical_analysis(lexer: &mut LexerInfo) -> LexResult<Vec<Token>> {
|
||||
let mut tokens = Vec::new();
|
||||
loop {
|
||||
match get_token(lexer) {
|
||||
Ok(Token::EOF) => {
|
||||
tokens.push(Token::EOF);
|
||||
break;
|
||||
}
|
||||
Ok(tok) => tokens.push(tok),
|
||||
Err(e) => {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(tokens)
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue