YREA-SLS/SLS_Python/sls_py/lexer.py

from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum, auto
from typing import List, Optional, Any


# =====================================================================
#  Basic Types
# =====================================================================

class LexerInfo:
    filename: str
    source_code: str
    pos: int
    column: int
    line: int

    def __init__(self, filename: str = "", source_code: str = ""):
        self.filename = filename
        self.source_code = source_code
        self.pos = 0
        self.column = 0
        self.line = 1


# =====================================================================
#  Token Types
# =====================================================================

class TokenType(Enum):
    EOF = auto()
    IDENTIFIER = auto()
    INTEGER = auto()
    FLOAT = auto()
    DOUBLE = auto()
    CHARACTER = auto()
    STRING = auto()
    BOOLEAN = auto()
    ARRAY = auto()
    TOKEN_STRING = auto()
    TYPE_TUPLE = auto()


# =====================================================================
#  Array Literal Types
# =====================================================================

class ArrayType(Enum):
    IDENTIFIER = auto()
    I64 = auto()
    I32 = auto()
    I16 = auto()
    I8 = auto()
    U64 = auto()
    U32 = auto()
    U16 = auto()
    U8 = auto()
    FLOAT = auto()
    DOUBLE = auto()
    CHARACTER = auto()
    STRING = auto()
    BOOLEAN = auto()
    STRUCT_INLINE = auto()


# =====================================================================
#  Identifier
# =====================================================================

@dataclass
class Identifier:
    name: str
    is_literal: bool


# =====================================================================
#  Integer Literal Type
# =====================================================================

class IntegerBuiltInType(Enum):
    I64 = auto()
    I32 = auto()
    I16 = auto()
    I8 = auto()
    U64 = auto()
    U32 = auto()
    U16 = auto()
    U8 = auto()


@dataclass
class IntegerLiteral:
    value: int          # Python int is arbitrary precision
    type: IntegerBuiltInType


# =====================================================================
#  TokenString, TypeTuple, StructInline
# =====================================================================

@dataclass
class TokenString:
    tokens: List["Token"] = field(default_factory=list)

    def deep_copy(self) -> TokenString:
        copied_tokens = [Token(
            type=token.type,
            identifier=token.identifier,
            integer_literal=token.integer_literal,
            float_literal=token.float_literal,
            double_literal=token.double_literal,
            character_literal=token.character_literal,
            string_literal=token.string_literal,
            boolean_literal=token.boolean_literal,
            array_literal=token.array_literal.deep_copy() if token.array_literal else None,
            token_string=token.token_string.deep_copy() if token.token_string else None,
            type_tuple=token.type_tuple.deep_copy() if token.type_tuple else None
        ) for token in self.tokens]
        return TokenString(tokens=copied_tokens)


@dataclass
class TypeTuple:
    input_identifiers: List[Identifier] = field(default_factory=list)
    output_identifiers: List[Identifier] = field(default_factory=list)

    def deep_copy(self) -> TypeTuple:
        copied_input_ids = [Identifier(name=id.name, is_literal=id.is_literal) for id in self.input_identifiers]
        copied_output_ids = [Identifier(name=id.name, is_literal=id.is_literal) for id in self.output_identifiers]
        return TypeTuple(input_identifiers=copied_input_ids, output_identifiers=copied_output_ids)


@dataclass
class StructInline:
    values: List[Any]      # Python can store anything
    name: str


# =====================================================================
#  ArrayLiteral (replaces C unions with Optional lists)
# =====================================================================

@dataclass
class ArrayLiteral:
    type: ArrayType

    identifiers: Optional[List[Identifier]] = None
    integer_literals: Optional[List[int]] = None
    float_literals: Optional[List[float]] = None
    double_literals: Optional[List[float]] = None
    character_literals: Optional[List[int]] = None
    string_literals: Optional[List[str]] = None
    boolean_literals: Optional[List[bool]] = None
    token_strings: Optional[List[TokenString]] = None
    type_tuples: Optional[List[TypeTuple]] = None
    struct_inline: Optional[StructInline] = None

    shape: Optional[List[int]] = None
    dimensions: int = 0

    def deep_copy(self) -> ArrayLiteral:
        copied_array = ArrayLiteral(type=self.type, dimensions=self.dimensions, shape=list(self.shape) if self.shape else None)

        if self.identifiers is not None:
            copied_array.identifiers = [Identifier(name=id.name, is_literal=id.is_literal) for id in self.identifiers]
        if self.integer_literals is not None:
            copied_array.integer_literals = list(self.integer_literals)
        if self.float_literals is not None:
            copied_array.float_literals = list(self.float_literals)
        if self.double_literals is not None:
            copied_array.double_literals = list(self.double_literals)
        if self.character_literals is not None:
            copied_array.character_literals = list(self.character_literals)
        if self.string_literals is not None:
            copied_array.string_literals = list(self.string_literals)
        if self.boolean_literals is not None:
            copied_array.boolean_literals = list(self.boolean_literals)
        if self.token_strings is not None:
            copied_array.token_strings = [ts.deep_copy() for ts in self.token_strings]
        if self.type_tuples is not None:
            copied_array.type_tuples = [tt.deep_copy() for tt in self.type_tuples]
        if self.struct_inline is not None:
            copied_array.struct_inline = StructInline(
                values=list(self.struct_inline.values),
                name=self.struct_inline.name
            )

        return copied_array


# =====================================================================
#  Token (Python “union” via Optional fields)
# =====================================================================

@dataclass
class Token:
    type: TokenType

    identifier: Optional[Identifier] = None
    integer_literal: Optional[IntegerLiteral] = None
    float_literal: Optional[float] = None
    double_literal: Optional[float] = None
    character_literal: Optional[int] = None
    string_literal: Optional[str] = None
    boolean_literal: Optional[bool] = None
    array_literal: Optional[ArrayLiteral] = None
    token_string: Optional[TokenString] = None
    type_tuple: Optional[TypeTuple] = None


# =====================================================================
#  Lexer Token Result / Lexer Result
# =====================================================================

class SlsResultType(Enum):
    RESULT = auto()
    ERROR = auto()


@dataclass
class FileInfo:
    filename: str
    line: int
    column: int


# =====================================================================
#  Function Stubs (to be implemented in Python version)
# =====================================================================

def lexical_analysis(lexer_info: LexerInfo) -> list[Token]:
    return []