|
|
|
|
@ -17,6 +17,47 @@ impl LexerInfo {
|
|
|
|
|
line: 1,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn peek(&self) -> char {
|
|
|
|
|
self.source.chars().nth(self.pos).unwrap_or('\0')
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn far_peek(&self, offset: usize) -> char {
|
|
|
|
|
self.source.chars().nth(self.pos + offset).unwrap_or('\0')
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn advance(&mut self) -> char {
|
|
|
|
|
if self.peek() == '\n' {
|
|
|
|
|
self.line += 1;
|
|
|
|
|
self.column = 1;
|
|
|
|
|
} else {
|
|
|
|
|
self.column += 1;
|
|
|
|
|
}
|
|
|
|
|
self.pos += 1;
|
|
|
|
|
self.peek()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn skip_comments_and_whitespace(&mut self) {
|
|
|
|
|
loop {
|
|
|
|
|
let c = self.peek();
|
|
|
|
|
|
|
|
|
|
// Skip comments
|
|
|
|
|
if (c == '/' && self.far_peek(1) == '/') || c == '#' {
|
|
|
|
|
while self.peek() != '\n' && self.peek() != '\0' {
|
|
|
|
|
self.advance();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skip whitespace
|
|
|
|
|
if self.peek().is_whitespace() {
|
|
|
|
|
while self.peek().is_whitespace() {
|
|
|
|
|
self.advance();
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
@ -83,10 +124,7 @@ pub enum StructValue {
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
pub enum Token {
|
|
|
|
|
Eof,
|
|
|
|
|
|
|
|
|
|
Identifier(Identifier),
|
|
|
|
|
|
|
|
|
|
// All integer sizes
|
|
|
|
|
I64(i64),
|
|
|
|
|
I32(i32),
|
|
|
|
|
I16(i16),
|
|
|
|
|
@ -95,14 +133,11 @@ pub enum Token {
|
|
|
|
|
U32(u32),
|
|
|
|
|
U16(u16),
|
|
|
|
|
U8(u8),
|
|
|
|
|
|
|
|
|
|
Float(f32),
|
|
|
|
|
Double(f64),
|
|
|
|
|
|
|
|
|
|
Character(u8),
|
|
|
|
|
StringLiteral(String),
|
|
|
|
|
Boolean(bool),
|
|
|
|
|
|
|
|
|
|
Array(ShapedArray),
|
|
|
|
|
TokenString(TokenString),
|
|
|
|
|
TypeTuple(TypeTuple),
|
|
|
|
|
@ -118,10 +153,585 @@ pub struct LexError {
|
|
|
|
|
|
|
|
|
|
pub type LexResult<T> = Result<T, LexError>;
|
|
|
|
|
|
|
|
|
|
pub fn get_token(_lexer: &mut LexerInfo) -> Option<Token> {
|
|
|
|
|
None
|
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
|
|
|
enum NumericLiteralType {
|
|
|
|
|
Binary,
|
|
|
|
|
Octal,
|
|
|
|
|
Decimal,
|
|
|
|
|
Hexadecimal,
|
|
|
|
|
Float,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn lexical_analysis(_lexer: &mut LexerInfo) -> LexResult<Vec<Token>> {
|
|
|
|
|
Ok(Vec::new())
|
|
|
|
|
impl LexerInfo {
|
|
|
|
|
fn make_error(&self, message: impl Into<String>, start_line: usize, start_col: usize) -> LexError {
|
|
|
|
|
LexError {
|
|
|
|
|
message: message.into(),
|
|
|
|
|
file: self.filename.clone(),
|
|
|
|
|
line: start_line,
|
|
|
|
|
column: start_col,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn is_identifier_continue(&self, c: char) -> bool {
|
|
|
|
|
if !c.is_ascii() || !c.is_ascii_graphic() {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if c == '/' && self.far_peek(1) == '/' {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
!matches!(c, '{' | '}' | '[' | ']' | '(' | ')' | '\'' | '"' | '#') && !c.is_whitespace()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn is_identifier_start(&self) -> bool {
|
|
|
|
|
let mut c = self.peek();
|
|
|
|
|
if c == ':' && self.far_peek(1) == ':' {
|
|
|
|
|
c = self.far_peek(2);
|
|
|
|
|
}
|
|
|
|
|
!c.is_ascii_digit() && self.is_identifier_continue(c)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_identifiers_and_booleans(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
|
|
|
|
|
let mut c = self.peek();
|
|
|
|
|
let mut literal = false;
|
|
|
|
|
|
|
|
|
|
// Skip leading `::` for identifier literals
|
|
|
|
|
if c == ':' && self.far_peek(1) == ':' {
|
|
|
|
|
literal = true;
|
|
|
|
|
self.advance();
|
|
|
|
|
c = self.advance();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Read the name
|
|
|
|
|
let name_start = self.pos;
|
|
|
|
|
while self.is_identifier_continue(c) {
|
|
|
|
|
if c == ':' {
|
|
|
|
|
return Err(self.make_error("Invalid identifier: ':' is not allowed in identifiers.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
if c == '.' {
|
|
|
|
|
return Err(self.make_error("Invalid identifier: '.' is not allowed in identifiers.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
c = self.advance();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let name = self.source[name_start..self.pos].to_string();
|
|
|
|
|
|
|
|
|
|
// Check for booleans
|
|
|
|
|
match name.as_str() {
|
|
|
|
|
"false" => Ok(Token::Boolean(false)),
|
|
|
|
|
"true" => Ok(Token::Boolean(true)),
|
|
|
|
|
_ => Ok(Token::Identifier(Identifier { name, is_literal: literal })),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_character_literal(&mut self, start_line: usize, start_col: usize) -> LexResult<Token> {
|
|
|
|
|
let mut c = self.peek();
|
|
|
|
|
|
|
|
|
|
if c == '\'' {
|
|
|
|
|
return Err(self.make_error("Invalid character literal: empty character literal.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let value = if c == '\\' {
|
|
|
|
|
c = self.advance();
|
|
|
|
|
match c {
|
|
|
|
|
'n' => b'\n',
|
|
|
|
|
'r' => b'\r',
|
|
|
|
|
't' => b'\t',
|
|
|
|
|
'\\' => b'\\',
|
|
|
|
|
'\'' => b'\'',
|
|
|
|
|
'0' => b'\0',
|
|
|
|
|
_ => return Err(self.make_error(format!("Invalid character literal: unknown escape sequence '\\{}'.", c), start_line, start_col)),
|
|
|
|
|
}
|
|
|
|
|
} else if c == '\n' || c == '\r' {
|
|
|
|
|
return Err(self.make_error("Invalid character literal: unclosed character literal.", start_line, start_col));
|
|
|
|
|
} else {
|
|
|
|
|
c as u8
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
c = self.advance();
|
|
|
|
|
|
|
|
|
|
if c.is_whitespace() || c == '/' || c == '\0' {
|
|
|
|
|
return Err(self.make_error("Invalid character literal: unclosed character literal.", start_line, start_col));
|
|
|
|
|
} else if c != '\'' {
|
|
|
|
|
return Err(self.make_error(format!("Invalid character literal: unexpected '{}' in character.", c), start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::Character(value))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_token_string(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
|
|
|
|
|
let mut tokens = Vec::new();
|
|
|
|
|
self.advance(); // skip '{'
|
|
|
|
|
|
|
|
|
|
loop {
|
|
|
|
|
self.skip_comments_and_whitespace();
|
|
|
|
|
let c = self.peek();
|
|
|
|
|
|
|
|
|
|
if c == '}' {
|
|
|
|
|
self.advance();
|
|
|
|
|
return Ok(Token::TokenString(TokenString { tokens }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if c == '\0' {
|
|
|
|
|
return Err(self.make_error("Unclosed token string: missing closing brace '}'.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match get_token(self) {
|
|
|
|
|
Some(token) => {
|
|
|
|
|
if matches!(token, Token::Eof) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
tokens.push(token);
|
|
|
|
|
}
|
|
|
|
|
None => return Err(self.make_error("Failed to parse token in token string.", start_line, start_col)),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Err(self.make_error("Unclosed token string: missing closing brace '}'.", start_line, start_col))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_numeric_literal(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
|
|
|
|
|
let mut c = self.peek();
|
|
|
|
|
|
|
|
|
|
if c == '-' {
|
|
|
|
|
c = self.advance();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if c == '0' {
|
|
|
|
|
c = self.advance();
|
|
|
|
|
match c {
|
|
|
|
|
'b' | 'B' => {
|
|
|
|
|
self.advance();
|
|
|
|
|
return self.parse_binary_integer(start, start_line, start_col);
|
|
|
|
|
}
|
|
|
|
|
'o' | 'O' => {
|
|
|
|
|
self.advance();
|
|
|
|
|
return self.parse_octal_integer(start, start_line, start_col);
|
|
|
|
|
}
|
|
|
|
|
'x' | 'X' => {
|
|
|
|
|
self.advance();
|
|
|
|
|
return self.parse_hexadecimal_integer(start, start_line, start_col);
|
|
|
|
|
}
|
|
|
|
|
_ => {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.parse_decimal_integer(start, start_line, start_col)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_binary_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
|
|
|
|
|
let mut c = self.peek();
|
|
|
|
|
while c == '0' || c == '1' || c == '_' {
|
|
|
|
|
c = self.advance();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if c == ':' {
|
|
|
|
|
return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Binary);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let value = self.create_binary_integer(start);
|
|
|
|
|
Ok(Token::I64(value as i64))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_octal_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
|
|
|
|
|
let mut c = self.peek();
|
|
|
|
|
while c.is_ascii_digit() && c != '8' && c != '9' || c == '_' {
|
|
|
|
|
c = self.advance();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if c == ':' {
|
|
|
|
|
return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Octal);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let value = self.create_octal_integer(start);
|
|
|
|
|
Ok(Token::I64(value as i64))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_decimal_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
|
|
|
|
|
let mut c = self.peek();
|
|
|
|
|
while c.is_ascii_digit() || c == '_' {
|
|
|
|
|
c = self.advance();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if c == '.' {
|
|
|
|
|
self.advance();
|
|
|
|
|
return self.parse_float(start, start_line, start_col);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if c == ':' {
|
|
|
|
|
return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Decimal);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let value = self.create_decimal_integer(start);
|
|
|
|
|
Ok(Token::I64(value as i64))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_hexadecimal_integer(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
|
|
|
|
|
let mut c = self.peek();
|
|
|
|
|
while c.is_ascii_hexdigit() || c == '_' {
|
|
|
|
|
c = self.advance();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if c == ':' {
|
|
|
|
|
return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Hexadecimal);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let value = self.create_hexadecimal_integer(start);
|
|
|
|
|
Ok(Token::I64(value as i64))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_float(&mut self, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
|
|
|
|
|
let mut c = self.peek();
|
|
|
|
|
while c.is_ascii_digit() || c == '_' {
|
|
|
|
|
c = self.advance();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if c == ':' {
|
|
|
|
|
return self.parse_numeric_type(start, start_line, start_col, NumericLiteralType::Float);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let value = self.create_float(start);
|
|
|
|
|
Ok(Token::Double(value))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_numeric_type(&mut self, start: usize, start_line: usize, start_col: usize, literal_type: NumericLiteralType) -> LexResult<Token> {
|
|
|
|
|
let mut c = self.advance(); // skip ':'
|
|
|
|
|
|
|
|
|
|
let mut is_float = false;
|
|
|
|
|
let mut is_unsigned = false;
|
|
|
|
|
let mut bit_size = 64; // default
|
|
|
|
|
|
|
|
|
|
if c == 'f' {
|
|
|
|
|
is_float = true;
|
|
|
|
|
if !matches!(literal_type, NumericLiteralType::Decimal | NumericLiteralType::Float) {
|
|
|
|
|
return Err(self.make_error("Invalid numeric literal: float type not allowed.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
c = self.advance();
|
|
|
|
|
if c == '6' && self.far_peek(1) == '4' {
|
|
|
|
|
bit_size = 64;
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
} else if c == '3' && self.far_peek(1) == '2' {
|
|
|
|
|
bit_size = 32;
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
} else {
|
|
|
|
|
return Err(self.make_error("Invalid float type: must be of type 'f64' or 'f32'.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
} else if c == 'i' || c == 'u' {
|
|
|
|
|
if matches!(literal_type, NumericLiteralType::Float) {
|
|
|
|
|
return Err(self.make_error("Invalid float type: must be of type 'f64' or 'f32'.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
is_unsigned = c == 'u';
|
|
|
|
|
c = self.advance();
|
|
|
|
|
if c == '6' && self.far_peek(1) == '4' {
|
|
|
|
|
bit_size = 64;
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
} else if c == '3' && self.far_peek(1) == '2' {
|
|
|
|
|
bit_size = 32;
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
} else if c == '1' && self.far_peek(1) == '6' {
|
|
|
|
|
bit_size = 16;
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
} else if c == '8' {
|
|
|
|
|
bit_size = 8;
|
|
|
|
|
self.advance();
|
|
|
|
|
} else {
|
|
|
|
|
let type_name = if is_unsigned { "unsigned" } else { "signed" };
|
|
|
|
|
return Err(self.make_error(
|
|
|
|
|
format!("Invalid {} integer type: must be of type '{}64', '{}32', '{}16', or '{}8'.",
|
|
|
|
|
type_name, if is_unsigned { "u" } else { "i" },
|
|
|
|
|
if is_unsigned { "u" } else { "i" },
|
|
|
|
|
if is_unsigned { "u" } else { "i" },
|
|
|
|
|
if is_unsigned { "u" } else { "i" }),
|
|
|
|
|
start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
return Err(self.make_error("Invalid numeric type: type must start with 'f', 'i', or 'u'.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create the token based on the parsed type
|
|
|
|
|
if is_float {
|
|
|
|
|
let value = self.create_float(start);
|
|
|
|
|
match bit_size {
|
|
|
|
|
32 => Ok(Token::Float(value as f32)),
|
|
|
|
|
64 => Ok(Token::Double(value)),
|
|
|
|
|
_ => unreachable!()
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
let value = match literal_type {
|
|
|
|
|
NumericLiteralType::Binary => self.create_binary_integer(start),
|
|
|
|
|
NumericLiteralType::Octal => self.create_octal_integer(start),
|
|
|
|
|
NumericLiteralType::Decimal => self.create_decimal_integer(start),
|
|
|
|
|
NumericLiteralType::Hexadecimal => self.create_hexadecimal_integer(start),
|
|
|
|
|
NumericLiteralType::Float => return Err(self.make_error("Internal error: float literal in integer path", start_line, start_col)),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
self.create_integer_token(value, is_unsigned, bit_size, start, start_line, start_col)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn create_integer_token(&self, value: u64, is_unsigned: bool, bit_size: u32, start: usize, start_line: usize, start_col: usize) -> LexResult<Token> {
|
|
|
|
|
let is_negative = self.source[start..].starts_with('-');
|
|
|
|
|
|
|
|
|
|
match (is_unsigned, bit_size) {
|
|
|
|
|
(false, 64) => Ok(Token::I64(value as i64)),
|
|
|
|
|
(false, 32) => {
|
|
|
|
|
let signed = value as i64;
|
|
|
|
|
if signed < i32::MIN as i64 || signed > i32::MAX as i64 {
|
|
|
|
|
return Err(self.make_error("Integer overflow: value exceeds range for i32.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
Ok(Token::I32(value as i32))
|
|
|
|
|
}
|
|
|
|
|
(false, 16) => {
|
|
|
|
|
let signed = value as i64;
|
|
|
|
|
if signed < i16::MIN as i64 || signed > i16::MAX as i64 {
|
|
|
|
|
return Err(self.make_error("Integer overflow: value exceeds range for i16.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
Ok(Token::I16(value as i16))
|
|
|
|
|
}
|
|
|
|
|
(false, 8) => {
|
|
|
|
|
let signed = value as i64;
|
|
|
|
|
if signed < i8::MIN as i64 || signed > i8::MAX as i64 {
|
|
|
|
|
return Err(self.make_error("Integer overflow: value exceeds range for i8.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
Ok(Token::I8(value as i8))
|
|
|
|
|
}
|
|
|
|
|
(true, 64) => {
|
|
|
|
|
if is_negative {
|
|
|
|
|
return Err(self.make_error("Integer overflow: value exceeds range for u64.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
Ok(Token::U64(value))
|
|
|
|
|
}
|
|
|
|
|
(true, 32) => {
|
|
|
|
|
if is_negative {
|
|
|
|
|
return Err(self.make_error("Integer overflow: value exceeds range for u32.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
if value > u32::MAX as u64 {
|
|
|
|
|
return Err(self.make_error("Integer overflow: value exceeds range for u32.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
Ok(Token::U32(value as u32))
|
|
|
|
|
}
|
|
|
|
|
(true, 16) => {
|
|
|
|
|
if is_negative {
|
|
|
|
|
return Err(self.make_error("Integer overflow: value exceeds range for u16.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
if value > u16::MAX as u64 {
|
|
|
|
|
return Err(self.make_error("Integer overflow: value exceeds range for u16.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
Ok(Token::U16(value as u16))
|
|
|
|
|
}
|
|
|
|
|
(true, 8) => {
|
|
|
|
|
if is_negative {
|
|
|
|
|
return Err(self.make_error("Integer overflow: value exceeds range for u8.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
if value > u8::MAX as u64 {
|
|
|
|
|
return Err(self.make_error("Integer overflow: value exceeds range for u8.", start_line, start_col));
|
|
|
|
|
}
|
|
|
|
|
Ok(Token::U8(value as u8))
|
|
|
|
|
}
|
|
|
|
|
_ => Err(self.make_error("Invalid bit size for integer type.", start_line, start_col))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn create_binary_integer(&self, start: usize) -> u64 {
|
|
|
|
|
let token = &self.source[start..self.pos];
|
|
|
|
|
let mut value = 0u64;
|
|
|
|
|
let mut i = 2;
|
|
|
|
|
|
|
|
|
|
if token.starts_with('-') {
|
|
|
|
|
i += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for c in token[i..].chars() {
|
|
|
|
|
if c == '_' || c == '.' {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if c.is_whitespace() || c == '/' || c == ':' {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
value *= 2;
|
|
|
|
|
if c == '1' {
|
|
|
|
|
value += 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if token.starts_with('-') {
|
|
|
|
|
(!value).wrapping_add(1)
|
|
|
|
|
} else {
|
|
|
|
|
value
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn create_octal_integer(&self, start: usize) -> u64 {
|
|
|
|
|
let token = &self.source[start..self.pos];
|
|
|
|
|
let mut value = 0u64;
|
|
|
|
|
let mut i = 2;
|
|
|
|
|
|
|
|
|
|
if token.starts_with('-') {
|
|
|
|
|
i += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for c in token[i..].chars() {
|
|
|
|
|
if c == '_' || c == '.' {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if c.is_whitespace() || c == '/' || c == ':' {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
value *= 8;
|
|
|
|
|
value += c.to_digit(8).unwrap_or(0) as u64;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if token.starts_with('-') {
|
|
|
|
|
(!value).wrapping_add(1)
|
|
|
|
|
} else {
|
|
|
|
|
value
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn create_decimal_integer(&self, start: usize) -> u64 {
|
|
|
|
|
let token = &self.source[start..self.pos];
|
|
|
|
|
let mut value = 0u64;
|
|
|
|
|
let mut i = 0;
|
|
|
|
|
|
|
|
|
|
if token.starts_with('-') {
|
|
|
|
|
i += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for c in token[i..].chars() {
|
|
|
|
|
if c == '_' {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if c.is_whitespace() || c == '/' || c == ':' {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
value *= 10;
|
|
|
|
|
value += c.to_digit(10).unwrap_or(0) as u64;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if token.starts_with('-') {
|
|
|
|
|
(!value).wrapping_add(1)
|
|
|
|
|
} else {
|
|
|
|
|
value
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn create_hexadecimal_integer(&self, start: usize) -> u64 {
|
|
|
|
|
let token = &self.source[start..self.pos];
|
|
|
|
|
let mut value = 0u64;
|
|
|
|
|
let mut i = 2;
|
|
|
|
|
|
|
|
|
|
if token.starts_with('-') {
|
|
|
|
|
i += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for c in token[i..].chars() {
|
|
|
|
|
if c == '_' || c == '.' {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if c.is_whitespace() || c == '/' || c == ':' {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
value *= 16;
|
|
|
|
|
value += c.to_digit(16).unwrap_or(0) as u64;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if token.starts_with('-') {
|
|
|
|
|
(!value).wrapping_add(1)
|
|
|
|
|
} else {
|
|
|
|
|
value
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn create_float(&self, start: usize) -> f64 {
|
|
|
|
|
let token = &self.source[start..self.pos];
|
|
|
|
|
let mut value = 0.0;
|
|
|
|
|
let mut fractional = 0u64;
|
|
|
|
|
let mut i = 0;
|
|
|
|
|
|
|
|
|
|
if token.starts_with('-') {
|
|
|
|
|
i += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for c in token[i..].chars() {
|
|
|
|
|
if c == '_' {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if c.is_whitespace() || c == '/' || c == ':' {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if c == '.' {
|
|
|
|
|
fractional = 1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if fractional == 0 {
|
|
|
|
|
value *= 10.0;
|
|
|
|
|
} else {
|
|
|
|
|
fractional *= 10;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let digit = c.to_digit(10).unwrap_or(0) as f64;
|
|
|
|
|
if fractional == 0 {
|
|
|
|
|
value += digit;
|
|
|
|
|
} else {
|
|
|
|
|
value += digit / fractional as f64;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if token.starts_with('-') {
|
|
|
|
|
-value
|
|
|
|
|
} else {
|
|
|
|
|
value
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn get_token(lexer: &mut LexerInfo) -> Option<Token> {
|
|
|
|
|
lexer.skip_comments_and_whitespace();
|
|
|
|
|
|
|
|
|
|
let c = lexer.peek();
|
|
|
|
|
let start = lexer.pos;
|
|
|
|
|
let start_line = lexer.line;
|
|
|
|
|
let start_col = lexer.column;
|
|
|
|
|
|
|
|
|
|
if c == '\0' {
|
|
|
|
|
return Some(Token::Eof);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let result = if c.is_ascii_digit() || (c == '.' && lexer.far_peek(1).is_ascii_digit()) || (c == '-' && lexer.far_peek(1).is_ascii_digit()) {
|
|
|
|
|
lexer.parse_numeric_literal(start, start_line, start_col)
|
|
|
|
|
} else if c == '\'' {
|
|
|
|
|
lexer.advance();
|
|
|
|
|
lexer.parse_character_literal(start_line, start_col)
|
|
|
|
|
} else if c == '{' {
|
|
|
|
|
lexer.parse_token_string(start, start_line, start_col)
|
|
|
|
|
} else if lexer.is_identifier_start() {
|
|
|
|
|
lexer.parse_identifiers_and_booleans(start, start_line, start_col)
|
|
|
|
|
} else {
|
|
|
|
|
Err(lexer.make_error(format!("Unexpected character: '{}'", c), start_line, start_col))
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
result.ok()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn lexical_analysis(lexer: &mut LexerInfo) -> LexResult<Vec<Token>> {
|
|
|
|
|
let mut tokens = Vec::new();
|
|
|
|
|
|
|
|
|
|
loop {
|
|
|
|
|
match get_token(lexer) {
|
|
|
|
|
Some(Token::Eof) => {
|
|
|
|
|
tokens.push(Token::Eof);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
Some(token) => tokens.push(token),
|
|
|
|
|
None => break,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(tokens)
|
|
|
|
|
}
|
|
|
|
|
|