YREA-SLS/SLS_Python/sls_py/lexer.py

from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum, auto
from typing import List, Optional, Any, Union


# =====================================================================
#  Basic Types
# =====================================================================

class LexerInfo:
    filename: str
    source_code: str
    pos: int
    column: int
    line: int

    def __init__(self, filename: str = "", source_code: str = ""):
        self.filename = filename
        self.source_code = source_code
        self.pos = 0
        self.column = 1
        self.line = 1


# =====================================================================
#  Token Types
# =====================================================================

class TokenType(Enum):
    EOF = auto()
    IDENTIFIER = auto()
    INTEGER = auto()
    FLOAT = auto()
    DOUBLE = auto()
    CHARACTER = auto()
    STRING = auto()
    BOOLEAN = auto()
    ARRAY = auto()
    TOKEN_STRING = auto()
    TYPE_TUPLE = auto()


# =====================================================================
#  Array Literal Types
# =====================================================================

class ArrayType(Enum):
    IDENTIFIER = auto()
    I64 = auto()
    I32 = auto()
    I16 = auto()
    I8 = auto()
    U64 = auto()
    U32 = auto()
    U16 = auto()
    U8 = auto()
    FLOAT = auto()
    DOUBLE = auto()
    CHARACTER = auto()
    STRING = auto()
    BOOLEAN = auto()
    STRUCT_INLINE = auto()


# =====================================================================
#  Identifier
# =====================================================================

@dataclass
class Identifier:
    name: str
    is_literal: bool


# =====================================================================
#  Integer Literal Type
# =====================================================================

class IntegerBuiltInType(Enum):
    I64 = auto()
    I32 = auto()
    I16 = auto()
    I8 = auto()
    U64 = auto()
    U32 = auto()
    U16 = auto()
    U8 = auto()


@dataclass
class IntegerLiteral:
    value: int
    type: IntegerBuiltInType


# =====================================================================
#  TokenString, TypeTuple, StructInline
# =====================================================================

@dataclass
class TokenString:
    tokens: List["Token"] = field(default_factory=list)

    def deep_copy(self) -> TokenString:
        copied_tokens = [Token(
            type=token.type,
            identifier=token.identifier,
            integer_literal=token.integer_literal,
            float_literal=token.float_literal,
            double_literal=token.double_literal,
            character_literal=token.character_literal,
            string_literal=token.string_literal,
            boolean_literal=token.boolean_literal,
            array_literal=token.array_literal.deep_copy() if token.array_literal else None,
            token_string=token.token_string.deep_copy() if token.token_string else None,
            type_tuple=token.type_tuple.deep_copy() if token.type_tuple else None
        ) for token in self.tokens]
        return TokenString(tokens=copied_tokens)


@dataclass
class TypeTuple:
    input_identifiers: List[Identifier] = field(default_factory=list)
    output_identifiers: List[Identifier] = field(default_factory=list)

    def deep_copy(self) -> TypeTuple:
        copied_input_ids = [Identifier(name=id.name, is_literal=id.is_literal) for id in self.input_identifiers]
        copied_output_ids = [Identifier(name=id.name, is_literal=id.is_literal) for id in self.output_identifiers]
        return TypeTuple(input_identifiers=copied_input_ids, output_identifiers=copied_output_ids)


@dataclass
class StructInline:
    values: List[Any]
    name: str


# =====================================================================
#  ArrayLiteral
# =====================================================================

@dataclass
class ArrayLiteral:
    type: ArrayType

    identifiers: Optional[List[Identifier]] = None
    integer_literals: Optional[List[int]] = None
    float_literals: Optional[List[float]] = None
    double_literals: Optional[List[float]] = None
    character_literals: Optional[List[int]] = None
    string_literals: Optional[List[str]] = None
    boolean_literals: Optional[List[bool]] = None
    token_strings: Optional[List[TokenString]] = None
    type_tuples: Optional[List[TypeTuple]] = None
    struct_inline: Optional[StructInline] = None

    shape: Optional[List[int]] = None
    dimensions: int = 0

    def deep_copy(self) -> ArrayLiteral:
        copied_array = ArrayLiteral(type=self.type, dimensions=self.dimensions, shape=list(self.shape) if self.shape else None)

        if self.identifiers is not None:
            copied_array.identifiers = [Identifier(name=id.name, is_literal=id.is_literal) for id in self.identifiers]
        if self.integer_literals is not None:
            copied_array.integer_literals = list(self.integer_literals)
        if self.float_literals is not None:
            copied_array.float_literals = list(self.float_literals)
        if self.double_literals is not None:
            copied_array.double_literals = list(self.double_literals)
        if self.character_literals is not None:
            copied_array.character_literals = list(self.character_literals)
        if self.string_literals is not None:
            copied_array.string_literals = list(self.string_literals)
        if self.boolean_literals is not None:
            copied_array.boolean_literals = list(self.boolean_literals)
        if self.token_strings is not None:
            copied_array.token_strings = [ts.deep_copy() for ts in self.token_strings]
        if self.type_tuples is not None:
            copied_array.type_tuples = [tt.deep_copy() for tt in self.type_tuples]
        if self.struct_inline is not None:
            copied_array.struct_inline = StructInline(
                values=list(self.struct_inline.values),
                name=self.struct_inline.name
            )

        return copied_array


# =====================================================================
#  Token
# =====================================================================

@dataclass
class Token:
    type: TokenType

    identifier: Optional[Identifier] = None
    integer_literal: Optional[IntegerLiteral] = None
    float_literal: Optional[float] = None
    double_literal: Optional[float] = None
    character_literal: Optional[int] = None
    string_literal: Optional[str] = None
    boolean_literal: Optional[bool] = None
    array_literal: Optional[ArrayLiteral] = None
    token_string: Optional[TokenString] = None
    type_tuple: Optional[TypeTuple] = None


# =====================================================================
#  File Info and Results
# =====================================================================

@dataclass
class FileInfo:
    filename: str
    line: int
    column: int
    length: int = 0
    lines: int = 0


@dataclass
class LexerError(Exception):
    message: str
    file_info: FileInfo


# =====================================================================
#  Numeric Type Flags
# =====================================================================

class NumericType(Enum):
    F64 = auto()
    F32 = auto()
    I64 = auto()
    I32 = auto()
    I16 = auto()
    I8 = auto()
    U64 = auto()
    U32 = auto()
    U16 = auto()
    U8 = auto()


class NumericLiteralType(Enum):
    BINARY = auto()
    OCTAL = auto()
    DECIMAL = auto()
    HEXADECIMAL = auto()
    FLOAT = auto()
    EXPONENTIAL = auto()


# =====================================================================
#  Lexer Implementation
# =====================================================================

class Lexer:
    def __init__(self, info: LexerInfo):
        self.info = info

    def peek(self) -> str:
        if self.info.pos >= len(self.info.source_code):
            return '\0'
        return self.info.source_code[self.info.pos]

    def far_peek(self, offset: int) -> str:
        pos = self.info.pos + offset
        if pos >= len(self.info.source_code):
            return '\0'
        return self.info.source_code[pos]

    def seek(self, index: int) -> str:
        if index >= len(self.info.source_code):
            return '\0'
        return self.info.source_code[index]

    def advance(self) -> str:
        if self.info.pos < len(self.info.source_code):
            if self.info.source_code[self.info.pos] == '\n':
                self.info.line += 1
                self.info.column = 1
            else:
                self.info.column += 1
            self.info.pos += 1
        return self.peek()

    def get_file_info(self, start: int, start_line: int) -> FileInfo:
        return FileInfo(
            filename=self.info.filename,
            line=self.info.line,
            column=self.info.column,
            length=self.info.pos - start,
            lines=self.info.line - start_line
        )

    def get_token_text(self, start: int) -> str:
        return self.info.source_code[start:self.info.pos]

    def skip_comments_and_whitespace(self):
        while True:
            c = self.peek()

            # Skip comments
            if (c == '/' and self.far_peek(1) == '/') or c == '#':
                while self.peek() not in ('\n', '\0'):
                    self.advance()

            # Skip whitespace
            if c.isspace():
                self.advance()
                continue

            break

    def is_identifier_continue(self, c: str) -> bool:
        if not c.isprintable():
            return False
        if c == '/' and self.far_peek(1) == '/':
            return False
        if c in '{}[]()\'\"#':
            return False
        if c.isspace() or c == '\0':
            return False
        return True

    def is_identifier_start(self) -> bool:
        c = self.peek()
        if c == ':' and self.far_peek(1) == ':':
            c = self.far_peek(2)
        return not c.isdigit() and self.is_identifier_continue(c)

    # =====================================================================
    #  Integer Parsing Helpers
    # =====================================================================

    def create_binary_integer(self, start: int) -> int:
        token = self.get_token_text(start)
        negative = token[0] == '-'
        i = 3 if negative else 2

        value = 0
        while i < len(token):
            c = token[i]
            if c.isspace() or c in '/:' or c == '\0':
                break
            if c in '._':
                i += 1
                continue
            value *= 2
            if c == '1':
                value += 1
            i += 1

        if negative:
            # Python handles negative integers naturally
            value = -value
        return value

    def create_octal_integer(self, start: int) -> int:
        token = self.get_token_text(start)
        negative = token[0] == '-'
        i = 3 if negative else 2

        value = 0
        while i < len(token):
            c = token[i]
            if c.isspace() or c in '/:' or c == '\0':
                break
            if c in '._':
                i += 1
                continue
            value *= 8
            if c.isdigit() and c < '8':
                value += int(c)
            i += 1

        if negative:
            value = -value
        return value

    def create_decimal_integer(self, start: int) -> int:
        token = self.get_token_text(start)
        negative = token[0] == '-'
        i = 1 if negative else 0

        value = 0
        while i < len(token):
            c = token[i]
            if c.isspace() or c in '/:' or c == '\0':
                break
            if c == '_':
                i += 1
                continue
            if c.isdigit():
                value *= 10
                value += int(c)
            i += 1

        if negative:
            value = -value
        return value

    def create_hexadecimal_integer(self, start: int) -> int:
        token = self.get_token_text(start)
        negative = token[0] == '-'
        i = 3 if negative else 2

        value = 0
        while i < len(token):
            c = token[i]
            if c.isspace() or c in '/:' or c == '\0':
                break
            if c in '._':
                i += 1
                continue
            value *= 16
            if c.isdigit():
                value += int(c)
            elif c.upper() in 'ABCDEF':
                value += ord(c.upper()) - ord('A') + 10
            i += 1

        if negative:
            value = -value
        return value

    def create_float(self, start: int) -> float:
        token = self.get_token_text(start)
        negative = token[0] == '-'
        i = 1 if negative else 0

        value = 0.0
        fractional = 0

        while i < len(token):
            c = token[i]
            if c.isspace() or c in '/:' or c == '\0':
                break
            if c == '_':
                i += 1
                continue
            if c == '.':
                fractional = 1
                i += 1
                continue

            if fractional == 0:
                value *= 10
            else:
                fractional *= 10

            if c.isdigit():
                digit = int(c)
                if fractional == 0:
                    value += digit
                else:
                    value += digit / fractional
            i += 1

        if negative:
            value = -value
        return value

    # =====================================================================
    #  Integer Type Validation
    # =====================================================================

    def get_integer_type(self, numeric_type: NumericType) -> IntegerBuiltInType:
        type_map = {
            NumericType.I64: IntegerBuiltInType.I64,
            NumericType.I32: IntegerBuiltInType.I32,
            NumericType.I16: IntegerBuiltInType.I16,
            NumericType.I8: IntegerBuiltInType.I8,
            NumericType.U64: IntegerBuiltInType.U64,
            NumericType.U32: IntegerBuiltInType.U32,
            NumericType.U16: IntegerBuiltInType.U16,
            NumericType.U8: IntegerBuiltInType.U8,
        }

        if numeric_type not in type_map:
            raise ValueError("Encountered a Float where there should not be one.")

        return type_map[numeric_type]

    def validate_integer_range(self, value: int, int_type: IntegerBuiltInType, start: int, start_line: int):
        ranges = {
            IntegerBuiltInType.I64: (-2**63, 2**63 - 1),
            IntegerBuiltInType.I32: (-2**31, 2**31 - 1),
            IntegerBuiltInType.I16: (-2**15, 2**15 - 1),
            IntegerBuiltInType.I8: (-2**7, 2**7 - 1),
            IntegerBuiltInType.U64: (0, 2**64 - 1),
            IntegerBuiltInType.U32: (0, 2**32 - 1),
            IntegerBuiltInType.U16: (0, 2**16 - 1),
            IntegerBuiltInType.U8: (0, 2**8 - 1),
        }

        min_val, max_val = ranges[int_type]
        if value < min_val or value > max_val:
            type_name = int_type.name.lower()
            raise LexerError(
                f"Integer overflow: value exceeds range for {type_name}.",
                self.get_file_info(start, start_line)
            )

    def create_integer_token(self, int_type: IntegerBuiltInType, value: int, start: int, start_line: int) -> Token:
        self.validate_integer_range(value, int_type, start, start_line)
        return Token(
            type=TokenType.INTEGER,
            integer_literal=IntegerLiteral(value=value, type=int_type)
        )

    def create_float_token(self, numeric_type: NumericType, start: int, start_line: int) -> Token:
        value = self.create_float(start)
        if numeric_type == NumericType.F64:
            return Token(type=TokenType.DOUBLE, double_literal=value)
        else:
            return Token(type=TokenType.FLOAT, float_literal=value)

    # =====================================================================
    #  Numeric Type Parsing
    # =====================================================================

    def parse_numeric_type(self, start: int, start_line: int, literal_type: NumericLiteralType) -> NumericType:
        c = self.advance()

        if c == 'f':
            if literal_type not in (NumericLiteralType.DECIMAL, NumericLiteralType.FLOAT, NumericLiteralType.EXPONENTIAL):
                raise LexerError("Invalid numeric literal: float type not allowed.", self.get_file_info(start, start_line))

            c = self.advance()
            if c == '6' and self.far_peek(1) == '4':
                self.advance()
                self.advance()
                return NumericType.F64
            elif c == '3' and self.far_peek(1) == '2':
                self.advance()
                self.advance()
                return NumericType.F32
            else:
                raise LexerError("Invalid float type: must be of type 'f64' or 'f32'.", self.get_file_info(start, start_line))

        elif c in 'iu':
            if literal_type in (NumericLiteralType.FLOAT, NumericLiteralType.EXPONENTIAL):
                raise LexerError("Invalid float type: must be of type 'f64' or 'f32'.", self.get_file_info(start, start_line))

            unsigned = c == 'u'
            c = self.advance()

            if c == '6' and self.far_peek(1) == '4':
                self.advance()
                self.advance()
                return NumericType.U64 if unsigned else NumericType.I64
            elif c == '3' and self.far_peek(1) == '2':
                self.advance()
                self.advance()
                return NumericType.U32 if unsigned else NumericType.I32
            elif c == '1' and self.far_peek(1) == '6':
                self.advance()
                self.advance()
                return NumericType.U16 if unsigned else NumericType.I16
            elif c == '8':
                self.advance()
                return NumericType.U8 if unsigned else NumericType.I8
            else:
                prefix = 'unsigned' if unsigned else 'signed'
                raise LexerError(f"Invalid {prefix} integer type.", self.get_file_info(start, start_line))

        else:
            raise LexerError("Invalid numeric type: type must start with 'f', 'i', or 'u'.", self.get_file_info(start, start_line))

    # =====================================================================
    #  Numeric Literal Parsing
    # =====================================================================

    def parse_binary_integer(self, start: int, start_line: int) -> Token:
        c = self.peek()
        while c in '01_':
            c = self.advance()

        if c == ':':
            numeric_type = self.parse_numeric_type(start, start_line, NumericLiteralType.BINARY)
            int_type = self.get_integer_type(numeric_type)
            value = self.create_binary_integer(start)
            return self.create_integer_token(int_type, value, start, start_line)

        if c.isspace() or c in '/\0':
            value = self.create_binary_integer(start)
            return self.create_integer_token(IntegerBuiltInType.I64, value, start, start_line)

        raise LexerError(f"Invalid binary literal: unexpected '{c}' in binary integer.", self.get_file_info(start, start_line))

    def parse_octal_integer(self, start: int, start_line: int) -> Token:
        c = self.peek()
        while c.isdigit() and c not in '89' or c == '_':
            c = self.advance()

        if c == ':':
            numeric_type = self.parse_numeric_type(start, start_line, NumericLiteralType.OCTAL)
            int_type = self.get_integer_type(numeric_type)
            value = self.create_octal_integer(start)
            return self.create_integer_token(int_type, value, start, start_line)

        if c.isspace() or c in '/\0':
            value = self.create_octal_integer(start)
            return self.create_integer_token(IntegerBuiltInType.I64, value, start, start_line)

        raise LexerError(f"Invalid octal literal: unexpected '{c}' in octal integer.", self.get_file_info(start, start_line))

    def parse_hexadecimal_integer(self, start: int, start_line: int) -> Token:
        c = self.peek()
        while c in '0123456789ABCDEFabcdef_':
            c = self.advance()

        if c == ':':
            numeric_type = self.parse_numeric_type(start, start_line, NumericLiteralType.HEXADECIMAL)
            int_type = self.get_integer_type(numeric_type)
            value = self.create_hexadecimal_integer(start)
            return self.create_integer_token(int_type, value, start, start_line)

        if c.isspace() or c in '/\0':
            value = self.create_hexadecimal_integer(start)
            return self.create_integer_token(IntegerBuiltInType.I64, value, start, start_line)

        raise LexerError(f"Invalid hexadecimal literal: unexpected '{c}' in hexadecimal integer.", self.get_file_info(start, start_line))

    def parse_exponential(self, start: int, start_line: int) -> Token:
        raise NotImplementedError("Float exponential not implemented yet.")

    def parse_float(self, start: int, start_line: int) -> Token:
        c = self.peek()
        while c.isdigit() or c == '_':
            c = self.advance()

        if c in 'eE':
            return self.parse_exponential(start, start_line)

        if c == ':':
            numeric_type = self.parse_numeric_type(start, start_line, NumericLiteralType.FLOAT)
            return self.create_float_token(numeric_type, start, start_line)

        if c.isspace() or c in '/\0':
            return self.create_float_token(NumericType.F64, start, start_line)

        raise LexerError(f"Invalid float literal: unexpected '{c}' in float.", self.get_file_info(start, start_line))

    def parse_decimal_integer(self, start: int, start_line: int) -> Token:
        c = self.peek()
        while c.isdigit() or c == '_':
            c = self.advance()

        if c == '.':
            self.advance()
            return self.parse_float(start, start_line)

        if c in 'eE':
            return self.parse_exponential(start, start_line)

        if c == ':':
            numeric_type = self.parse_numeric_type(start, start_line, NumericLiteralType.DECIMAL)
            int_type = self.get_integer_type(numeric_type)
            value = self.create_decimal_integer(start)
            return self.create_integer_token(int_type, value, start, start_line)

        if c.isspace() or c in '/\0':
            value = self.create_decimal_integer(start)
            return self.create_integer_token(IntegerBuiltInType.I64, value, start, start_line)

        raise LexerError(f"Invalid decimal literal: unexpected '{c}' in decimal integer.", self.get_file_info(start, start_line))

    def parse_numeric_literal(self, start: int, start_line: int) -> Token:
        c = self.peek()
        if c == '-':
            c = self.advance()

        if c == '0':
            c = self.advance()
            if c in 'bB':
                self.advance()
                return self.parse_binary_integer(start, start_line)
            elif c in 'oO':
                self.advance()
                return self.parse_octal_integer(start, start_line)
            elif c in 'xX':
                self.advance()
                return self.parse_hexadecimal_integer(start, start_line)

        return self.parse_decimal_integer(start, start_line)

    # =====================================================================
    #  Character Literal Parsing
    # =====================================================================

    def parse_character_literal(self, start: int, start_line: int) -> Token:
        c = self.peek()

        if c == '\'':
            raise LexerError("Invalid character literal: empty character literal.", self.get_file_info(start, start_line))

        if c == '\\':
            c = self.advance()
            escape_map = {
                'n': '\n',
                'r': '\r',
                't': '\t',
                '\\': '\\',
                '\'': '\'',
                '0': '\0'
            }
            if c in escape_map:
                value = ord(escape_map[c])
            else:
                raise LexerError(f"Invalid character literal: unknown escape sequence '\\{c}'.", self.get_file_info(start, start_line))
        elif c in '\n\r':
            raise LexerError("Invalid character literal: unclosed character literal.", self.get_file_info(start, start_line))
        else:
            value = ord(c)

        c = self.advance()

        if c.isspace() or c in '/\0':
            raise LexerError("Invalid character literal: unclosed character literal.", self.get_file_info(start, start_line))
        elif c != '\'':
            raise LexerError(f"Invalid character literal: unexpected '{c}' in character.", self.get_file_info(start, start_line))

        self.advance()
        return Token(type=TokenType.CHARACTER, character_literal=value)

    # =====================================================================
    #  String Literal Parsing (stub)
    # =====================================================================

    def parse_string_literal(self, start: int, start_line: int) -> Token:
        raise NotImplementedError("String literals not implemented yet.")

    # =====================================================================
    #  Token String Parsing
    # =====================================================================

    def parse_token_string(self, start: int, start_line: int) -> Token:
        tokens = []
        self.advance()  # Skip opening '{'

        watchdog = 0
        while self.peek() != '\0':
            self.skip_comments_and_whitespace()
            c = self.peek()

            if c == '}':
                self.advance()
                return Token(type=TokenType.TOKEN_STRING, token_string=TokenString(tokens=tokens))

            token = self.lexer_next()
            tokens.append(token)

            if token.type == TokenType.EOF:
                break

            watchdog += 1
            if watchdog > 1000000:
                raise LexerError("Watchdog triggered in token string.", self.get_file_info(start, start_line))

        raise LexerError("Unclosed token string: missing closing brace '}'.", self.get_file_info(start, start_line))

    # =====================================================================
    #  Array and Type Tuple Parsing (stubs)
    # =====================================================================

    def parse_array_literal(self, start: int, start_line: int) -> Token:
        raise NotImplementedError("Array literals not implemented yet.")

    def parse_type_tuple(self, start: int, start_line: int) -> Token:
        raise NotImplementedError("Type tuples not implemented yet.")

    # =====================================================================
    #  Identifier and Boolean Parsing
    # =====================================================================

    def parse_identifiers_and_booleans(self, start: int, start_line: int) -> Token:
        c = self.peek()
        is_literal = False

        # Check for identifier literal (::)
        if c == ':' and self.far_peek(1) == ':':
            is_literal = True
            self.advance()
            self.advance()
            c = self.peek()

        # Read identifier name
        name_chars = []
        while self.is_identifier_continue(c):
            if c == ':':
                raise LexerError("Invalid identifier: ':' is not allowed in identifiers.", self.get_file_info(start, start_line))
            if c == '.':
                raise LexerError("Invalid identifier: '.' is not allowed in identifiers.", self.get_file_info(start, start_line))
            name_chars.append(c)
            c = self.advance()

        name = ''.join(name_chars)

        # Check for boolean literals
        if name == 'false':
            return Token(type=TokenType.BOOLEAN, boolean_literal=False)
        elif name == 'true':
            return Token(type=TokenType.BOOLEAN, boolean_literal=True)
        else:
            return Token(type=TokenType.IDENTIFIER, identifier=Identifier(name=name, is_literal=is_literal))

    # =====================================================================
    #  Main Lexer Logic
    # =====================================================================

    def lexer_next(self) -> Token:
        self.skip_comments_and_whitespace()

        c = self.peek()
        start = self.info.pos
        start_line = self.info.line

        # End of file
        if c == '\0':
            return Token(type=TokenType.EOF)

        # Numeric literals (integers and floats)
        if c.isdigit() or (c == '.' and self.far_peek(1).isdigit()) or (c == '-' and self.far_peek(1).isdigit()):
            return self.parse_numeric_literal(start, start_line)

        # Character literals
        if c == '\'':
            self.advance()
            return self.parse_character_literal(start, start_line)

        # String literals
        if c == '"':
            return self.parse_string_literal(start, start_line)

        # Token strings
        if c == '{':
            return self.parse_token_string(start, start_line)

        if c == '}':
            self.advance()
            raise LexerError("Unexpected closing brace '}' without matching opening brace.", self.get_file_info(start, start_line))

        # Array literals
        if c == '[':
            return self.parse_array_literal(start, start_line)

        if c == ']':
            self.advance()
            raise LexerError("Unexpected closing bracket ']' without matching opening bracket.", self.get_file_info(start, start_line))

        # Type tuples
        if c == '(':
            return self.parse_type_tuple(start, start_line)

        if c == ')':
            self.advance()
            raise LexerError("Unexpected closing parentheses ')' without matching opening parentheses.", self.get_file_info(start, start_line))

        # Identifiers and booleans
        if self.is_identifier_start():
            return self.parse_identifiers_and_booleans(start, start_line)

        # Check for malformed identifier literal
        if c == ':':
            self.advance()
            if self.far_peek(1) == ':':
                raise LexerError("Invalid identifier literal: empty identifier after '::'.", self.get_file_info(start, start_line))
            else:
                raise LexerError("Unexpected single colon ':'.", self.get_file_info(start, start_line))

        # Unknown character
        raise LexerError(f"Unexpected character: unexpected '{c}' during parsing.", self.get_file_info(start, start_line))

    def lexical_analysis(self) -> List[Token]:
        """Main entry point for lexical analysis."""
        tokens = []

        while True:
            try:
                token = self.lexer_next()
                tokens.append(token)

                if token.type == TokenType.EOF:
                    break
            except LexerError as e:
                # Re-raise lexer errors
                raise

        return tokens


# =====================================================================
#  Public API
# =====================================================================

def lexical_analysis(lexer_info: LexerInfo) -> List[Token]:
    """Convenience function matching the original C API."""
    lexer = Lexer(lexer_info)
    return lexer.lexical_analysis()