803 lines
20 KiB
Python
803 lines
20 KiB
Python
# Kyler Olsen
|
|
# Feb 2024
|
|
|
|
from enum import Enum
|
|
from typing import Sequence
|
|
|
|
from .compiler_types import CompilerError, FileInfo
|
|
from . import lexer
|
|
|
|
|
|
class _ExpectedTokenBase(CompilerError):
|
|
|
|
_token_type = lexer.Token
|
|
|
|
def __init__(
|
|
self,
|
|
token: lexer.Token,
|
|
expected: str | None = None,
|
|
found: str | None = None,
|
|
):
|
|
if expected is None:
|
|
expected = self._token_type.__name__
|
|
found = found or type(token).__name__
|
|
else:
|
|
found = found or token.value
|
|
message = f"Expected '{expected}' but found '{found}'."
|
|
super().__init__(message, token.file_info)
|
|
|
|
|
|
class ExpectedLiteral(_ExpectedTokenBase):
|
|
|
|
_token_type = (
|
|
lexer.NumberLiteral,
|
|
lexer.CharLiteral,
|
|
lexer.StringLiteral,
|
|
)
|
|
|
|
def __init__(
|
|
self,
|
|
token: lexer.Token,
|
|
expected: str | None = None,
|
|
found: str | None = None,
|
|
):
|
|
if expected is None:
|
|
expected = "NumberLiteral', 'CharLiteral', or 'StringLiteral"
|
|
found = found or type(token).__name__
|
|
super().__init__(token, expected, found)
|
|
|
|
|
|
class ExpectedDirective(_ExpectedTokenBase):
|
|
_type_name = lexer.Directive
|
|
class ExpectedIdentifier(_ExpectedTokenBase):
|
|
_type_name = lexer.Identifier
|
|
class ExpectedKeyword(_ExpectedTokenBase):
|
|
_type_name = lexer.Keyword
|
|
class ExpectedNumberLiteral(_ExpectedTokenBase):
|
|
_type_name = lexer.NumberLiteral
|
|
class ExpectedCharLiteral(_ExpectedTokenBase):
|
|
_type_name = lexer.CharLiteral
|
|
class ExpectedStringLiteral(_ExpectedTokenBase):
|
|
_type_name = lexer.StringLiteral
|
|
class ExpectedPunctuation(_ExpectedTokenBase):
|
|
_type_name = lexer.Punctuation
|
|
|
|
|
|
class _UnexpectedTokenBase(_ExpectedTokenBase):
|
|
|
|
def __init__(
|
|
self,
|
|
token: lexer.Token,
|
|
expected: str | list[str] | None = None,
|
|
found: str | None = None,
|
|
):
|
|
if isinstance(expected, list):
|
|
if len(expected) > 1:
|
|
s = ""
|
|
for i in expected[:-1]:
|
|
s += i + "', '"
|
|
s = s[:-1] + "or '" + i
|
|
expected = s
|
|
else:
|
|
expected = expected[0]
|
|
super().__init__(token, expected, found)
|
|
|
|
|
|
class UnexpectedToken(_UnexpectedTokenBase):
|
|
|
|
def __init__(
|
|
self,
|
|
token: lexer.Token,
|
|
expected: str | list[str],
|
|
found: str | None = None,
|
|
):
|
|
if isinstance(expected, list):
|
|
if len(expected) > 1:
|
|
s = ""
|
|
for i in expected[:-1]:
|
|
s += i + "', '"
|
|
s = s[:-1] + "or '" + i
|
|
expected = s
|
|
found = found or type(token).__name__
|
|
super().__init__(token, expected, found)
|
|
|
|
|
|
class UnexpectedDirective(_UnexpectedTokenBase):
|
|
_type_name = lexer.Directive
|
|
class UnexpectedIdentifier(_UnexpectedTokenBase):
|
|
_type_name = lexer.Identifier
|
|
class UnexpectedKeyword(_UnexpectedTokenBase):
|
|
_type_name = lexer.Keyword
|
|
class UnexpectedNumberLiteral(_UnexpectedTokenBase):
|
|
_type_name = lexer.NumberLiteral
|
|
class UnexpectedCharLiteral(_UnexpectedTokenBase):
|
|
_type_name = lexer.CharLiteral
|
|
class UnexpectedStringLiteral(_UnexpectedTokenBase):
|
|
_type_name = lexer.StringLiteral
|
|
class UnexpectedPunctuation(_UnexpectedTokenBase):
|
|
_type_name = lexer.Punctuation
|
|
|
|
|
|
type NestableCodeBlock = ForBlock | WhileBlock | DoBlock | IfBlock
|
|
|
|
type Literal = (
|
|
BuildInConst |
|
|
NumberLiteral |
|
|
CharLiteral |
|
|
StringLiteral
|
|
)
|
|
|
|
type Expression = (
|
|
Literal |
|
|
Identifier |
|
|
UnaryExpression |
|
|
BinaryExpression |
|
|
TernaryExpression |
|
|
FunctionCall
|
|
)
|
|
|
|
type Statement = Expression | LetStatement | LoopStatements | NestableCodeBlock
|
|
|
|
type DataType = DefaultDataType | Identifier
|
|
|
|
|
|
class BuildInConst(Enum):
|
|
ConstTrue = "True"
|
|
ConstFalse = "False"
|
|
ConstNone = "None"
|
|
|
|
|
|
class LoopStatements(Enum):
|
|
ContinueStatement = "continue"
|
|
BreakStatement = "break"
|
|
|
|
|
|
class UnaryOperator(Enum):
|
|
PostfixIncrement = "++"
|
|
PostfixDecrement = "--"
|
|
PrefixIncrement = "++"
|
|
PrefixDecrement = "--"
|
|
Negate = "-"
|
|
BitwiseNOT = "~"
|
|
BooleanNOT = "!"
|
|
Addressof = "@"
|
|
Dereference = "$"
|
|
|
|
|
|
class BinaryOperator(Enum):
|
|
Addition = "+"
|
|
Subtraction = "-"
|
|
Multiplication = "*"
|
|
Division = "/"
|
|
Modulus = "%"
|
|
BitwiseAND = "&"
|
|
BitwiseOR = "|"
|
|
BitwiseXOR = "^"
|
|
LeftShift = "<<"
|
|
RightShift = ">>"
|
|
Assignment = "="
|
|
AdditionAssignment = "+="
|
|
SubtractionAssignment = "-="
|
|
MultiplicationAssignment = "*="
|
|
DivisionAssignment = "/="
|
|
ModulusAssignment = "%="
|
|
BitwiseANDAssignment = "&="
|
|
BitwiseORAssignment = "|="
|
|
BitwiseXORAssignment = "^="
|
|
LeftShiftAssignment = "<<="
|
|
RightShiftAssignment = ">>="
|
|
BooleanAND = "&&"
|
|
BooleanOR = "||"
|
|
BooleanXOR = "^^"
|
|
EqualityComparison = "=="
|
|
InequalityComparison = "!="
|
|
LessThan = "<"
|
|
LessOrEqualToThan = "<="
|
|
GreaterThan = ">"
|
|
GreaterOrEqualToThan = ">="
|
|
|
|
|
|
class DefaultDataType(Enum):
|
|
unsigned = "unsigned"
|
|
int = "int"
|
|
fixed = "fixed"
|
|
float = "float"
|
|
|
|
|
|
_Operator_Precedence = [
|
|
BinaryOperator.Assignment,
|
|
BinaryOperator.AdditionAssignment,
|
|
BinaryOperator.SubtractionAssignment,
|
|
BinaryOperator.MultiplicationAssignment,
|
|
BinaryOperator.DivisionAssignment,
|
|
BinaryOperator.ModulusAssignment,
|
|
BinaryOperator.BitwiseANDAssignment,
|
|
BinaryOperator.BitwiseORAssignment,
|
|
BinaryOperator.BitwiseXORAssignment,
|
|
BinaryOperator.LeftShiftAssignment,
|
|
BinaryOperator.RightShiftAssignment,
|
|
BinaryOperator.BooleanAND,
|
|
BinaryOperator.BooleanOR,
|
|
BinaryOperator.BooleanXOR,
|
|
BinaryOperator.EqualityComparison,
|
|
BinaryOperator.InequalityComparison,
|
|
BinaryOperator.LessThan,
|
|
BinaryOperator.LessOrEqualToThan,
|
|
BinaryOperator.GreaterThan,
|
|
BinaryOperator.GreaterOrEqualToThan,
|
|
BinaryOperator.Addition,
|
|
BinaryOperator.Subtraction,
|
|
BinaryOperator.Multiplication,
|
|
BinaryOperator.Division,
|
|
BinaryOperator.Modulus,
|
|
BinaryOperator.BitwiseAND,
|
|
BinaryOperator.BitwiseOR,
|
|
BinaryOperator.BitwiseXOR,
|
|
BinaryOperator.LeftShift,
|
|
BinaryOperator.RightShift,
|
|
UnaryOperator.BooleanNOT,
|
|
UnaryOperator.Negate,
|
|
UnaryOperator.PrefixIncrement,
|
|
UnaryOperator.PrefixDecrement,
|
|
UnaryOperator.PostfixIncrement,
|
|
UnaryOperator.PostfixDecrement,
|
|
UnaryOperator.BitwiseNOT,
|
|
UnaryOperator.Dereference,
|
|
UnaryOperator.Addressof,
|
|
]
|
|
|
|
|
|
class Identifier:
|
|
|
|
_content: str
|
|
|
|
def __init__(
|
|
self,
|
|
content: str,
|
|
):
|
|
self._content = content
|
|
|
|
|
|
class StringLiteral:
|
|
|
|
_content: str
|
|
|
|
def __init__(
|
|
self,
|
|
content: str,
|
|
):
|
|
self._content = content
|
|
|
|
|
|
class CharLiteral:
|
|
|
|
_content: str
|
|
|
|
def __init__(
|
|
self,
|
|
content: str,
|
|
):
|
|
self._content = content
|
|
|
|
|
|
class NumberLiteral:
|
|
|
|
_content: str
|
|
|
|
def __init__(
|
|
self,
|
|
content: str,
|
|
):
|
|
self._content = content
|
|
|
|
|
|
class ArraySubscription:
|
|
|
|
_identifier: Identifier
|
|
_index: Expression
|
|
|
|
def __init__(
|
|
self,
|
|
identifier: Identifier,
|
|
index: Expression,
|
|
):
|
|
self._identifier = identifier
|
|
self._index = index
|
|
|
|
|
|
class FunctionArgument:
|
|
|
|
_identifier: Identifier | None
|
|
_value: Expression
|
|
|
|
def __init__(
|
|
self,
|
|
identifier: Identifier,
|
|
value: Expression,
|
|
):
|
|
self._identifier = identifier
|
|
self._value = value
|
|
|
|
|
|
class FunctionCall:
|
|
|
|
_identifier: Identifier
|
|
_params: list[FunctionArgument]
|
|
|
|
def __init__(
|
|
self,
|
|
identifier: Identifier,
|
|
params: list[FunctionArgument],
|
|
):
|
|
self._identifier = identifier
|
|
self._params = params
|
|
|
|
|
|
class TernaryExpression:
|
|
|
|
_operand1: Expression
|
|
_operand2: Expression
|
|
_operand3: Expression
|
|
|
|
def __init__(
|
|
self,
|
|
operand1: Expression,
|
|
operand2: Expression,
|
|
operand3: Expression,
|
|
):
|
|
self._operand1 = operand1
|
|
self._operand2 = operand2
|
|
self._operand3 = operand3
|
|
|
|
|
|
class BinaryExpression:
|
|
|
|
_operator: BinaryOperator
|
|
_operand1: Expression
|
|
_operand2: Expression
|
|
|
|
def __init__(
|
|
self,
|
|
operator: BinaryOperator,
|
|
operand1: Expression,
|
|
operand2: Expression,
|
|
):
|
|
self._operator = operator
|
|
self._operand1 = operand1
|
|
self._operand2 = operand2
|
|
|
|
|
|
class UnaryExpression:
|
|
|
|
_operator: UnaryOperator
|
|
_operand: Expression
|
|
|
|
def __init__(
|
|
self,
|
|
operator: UnaryOperator,
|
|
operand: Expression,
|
|
):
|
|
self._operator = operator
|
|
self._operand = operand
|
|
|
|
|
|
class LetStatement:
|
|
|
|
_identifier: Identifier
|
|
_type: DataType
|
|
_pointer: bool
|
|
_static: bool
|
|
_assignment: Expression | None
|
|
|
|
def __init__(
|
|
self,
|
|
identifier: Identifier,
|
|
type: DataType,
|
|
pointer: bool,
|
|
static: bool,
|
|
assignment: Literal | None,
|
|
):
|
|
self._identifier = identifier
|
|
self._type = type
|
|
self._pointer = pointer
|
|
self._static = static
|
|
self._assignment = assignment
|
|
|
|
|
|
class ElseBlock:
|
|
|
|
_code: list[Statement]
|
|
|
|
def __init__(
|
|
self,
|
|
code: list[Statement],
|
|
):
|
|
self._code = code[:]
|
|
|
|
|
|
class ForPreDef:
|
|
|
|
_identifier: Identifier
|
|
_type: DataType
|
|
_pointer: bool
|
|
_assignment: Expression
|
|
|
|
def __init__(
|
|
self,
|
|
identifier: Identifier,
|
|
type: DataType,
|
|
pointer: bool,
|
|
default: Literal | None,
|
|
):
|
|
self._identifier = identifier
|
|
self._type = type
|
|
self._pointer = pointer
|
|
self._default = default
|
|
|
|
|
|
class ForBlock:
|
|
|
|
_pre_statement: Expression | ForPreDef
|
|
_condition: Expression
|
|
_code: list[Statement]
|
|
_post_statement: Expression
|
|
_else: ElseBlock | None
|
|
|
|
def __init__(
|
|
self,
|
|
pre_statement: Expression | ForPreDef,
|
|
condition: Expression,
|
|
code: list[Statement],
|
|
post_statement: Expression,
|
|
else_block: ElseBlock | None,
|
|
):
|
|
self._pre_statement = pre_statement
|
|
self._condition = condition
|
|
self._code = code[:]
|
|
self._post_statement = post_statement
|
|
self._else = else_block
|
|
|
|
|
|
class WhileBlock:
|
|
|
|
_condition: Expression
|
|
_code: list[Statement]
|
|
_else: ElseBlock | None
|
|
|
|
def __init__(
|
|
self,
|
|
condition: Expression,
|
|
code: list[Statement],
|
|
else_block: ElseBlock | None,
|
|
):
|
|
self._condition = condition
|
|
self._code = code[:]
|
|
self._else = else_block
|
|
|
|
|
|
class DoBlock:
|
|
|
|
_first_code: list[Statement]
|
|
_condition: Expression
|
|
_second_code: list[Statement]
|
|
_else: ElseBlock | None
|
|
|
|
def __init__(
|
|
self,
|
|
first_code: list[Statement],
|
|
condition: Expression,
|
|
second_code: list[Statement],
|
|
else_block: ElseBlock | None,
|
|
):
|
|
self._first_code = first_code[:]
|
|
self._condition = condition
|
|
self._second_code = second_code[:]
|
|
self._else = else_block
|
|
|
|
|
|
class IfBlock:
|
|
|
|
_condition: Expression
|
|
_code: list[Statement]
|
|
_else: ElseBlock | None
|
|
|
|
def __init__(
|
|
self,
|
|
condition: Expression,
|
|
code: list[Statement],
|
|
else_block: ElseBlock | None,
|
|
):
|
|
self._condition = condition
|
|
self._code = code[:]
|
|
self._else = else_block
|
|
|
|
|
|
class FunctionParameter:
|
|
|
|
_identifier: Identifier
|
|
_type: DataType
|
|
_pointer: bool
|
|
_default: Literal | None
|
|
|
|
def __init__(
|
|
self,
|
|
identifier: Identifier,
|
|
type: DataType,
|
|
pointer: bool,
|
|
default: Literal | None,
|
|
):
|
|
self._identifier = identifier
|
|
self._type = type
|
|
self._pointer = pointer
|
|
self._default = default
|
|
|
|
|
|
class FunctionBlock:
|
|
|
|
_identifier: Identifier
|
|
_public: bool
|
|
_args: list[FunctionParameter]
|
|
_return_type: DataType
|
|
_code: list[Statement]
|
|
|
|
def __init__(
|
|
self,
|
|
identifier: Identifier,
|
|
public: bool,
|
|
args: list[FunctionParameter],
|
|
return_type: DataType,
|
|
code: list[Statement],
|
|
):
|
|
self._identifier = identifier
|
|
self._public = public
|
|
self._args = args[:]
|
|
self._return_type = return_type
|
|
self._code = code[:]
|
|
|
|
|
|
class EnumMember:
|
|
|
|
_identifier: Identifier
|
|
_value: NumberLiteral | None
|
|
|
|
def __init__(
|
|
self,
|
|
identifier: Identifier,
|
|
value: NumberLiteral | None,
|
|
):
|
|
self._identifier = identifier
|
|
self._value = value
|
|
|
|
|
|
class EnumBlock:
|
|
|
|
_identifier: Identifier
|
|
_public: bool
|
|
_members: list[EnumMember]
|
|
|
|
def __init__(
|
|
self,
|
|
identifier: Identifier,
|
|
public: bool,
|
|
members: list[EnumMember],
|
|
):
|
|
self._identifier = identifier
|
|
self._public = public
|
|
self._members = members[:]
|
|
|
|
|
|
class StructureMember:
|
|
|
|
_identifier: Identifier
|
|
_type: DataType
|
|
_pointer: bool
|
|
_static: bool
|
|
_default: Literal | None
|
|
|
|
def __init__(
|
|
self,
|
|
identifier: Identifier,
|
|
type: DataType,
|
|
pointer: bool,
|
|
static: bool,
|
|
default: Literal | None,
|
|
):
|
|
self._identifier = identifier
|
|
self._type = type
|
|
self._pointer = pointer
|
|
self._static = static
|
|
self._default = default
|
|
|
|
|
|
class StructBlock:
|
|
|
|
_identifier: Identifier
|
|
_public: bool
|
|
_members: list[StructureMember]
|
|
|
|
def __init__(
|
|
self,
|
|
identifier: Identifier,
|
|
public: bool,
|
|
members: list[StructureMember],
|
|
):
|
|
self._identifier = identifier
|
|
self._public = public
|
|
self._members = members[:]
|
|
|
|
|
|
class Directive:
|
|
|
|
_content: str
|
|
|
|
def __init__(
|
|
self,
|
|
content: str,
|
|
):
|
|
self._content = content
|
|
|
|
|
|
class File:
|
|
|
|
_children: list[Directive | StructBlock | FunctionBlock | EnumBlock]
|
|
|
|
def __init__(
|
|
self,
|
|
children: list[Directive | StructBlock | FunctionBlock | EnumBlock],
|
|
):
|
|
self._children = children[:]
|
|
|
|
|
|
def _assert_token(
|
|
exception: type[_ExpectedTokenBase],
|
|
token: lexer.Token,
|
|
value: str | None = None,
|
|
token_type: type[lexer.Token] | None = None,
|
|
):
|
|
if not isinstance(token, token_type or exception._token_type):
|
|
raise exception(token)
|
|
if value is not None and token.value != value:
|
|
raise exception(token, value)
|
|
|
|
def _assert_token_mult(
|
|
token: lexer.Token,
|
|
token_type: tuple[type[lexer.Token], ...],
|
|
):
|
|
if not isinstance(token, token_type):
|
|
raise UnexpectedToken(
|
|
token,
|
|
[i.__name__ for i in token_type], # type: ignore
|
|
type(token).__name__,
|
|
)
|
|
|
|
def _assert_token_literal(
|
|
token: lexer.Token,
|
|
):
|
|
token_types = (
|
|
lexer.Keyword,
|
|
lexer.NumberLiteral,
|
|
lexer.CharLiteral,
|
|
lexer.StringLiteral,
|
|
)
|
|
if not isinstance(token, token_types):
|
|
raise ExpectedLiteral(
|
|
token,
|
|
[i.__name__ for i in token_types], # type: ignore
|
|
type(token).__name__,
|
|
)
|
|
if isinstance(token, lexer.Keyword):
|
|
if token.value not in BuildInConst:
|
|
raise UnexpectedKeyword(token, [i.value for i in DefaultDataType])
|
|
|
|
def _literal_map(literal: (
|
|
lexer.Keyword |
|
|
lexer.NumberLiteral |
|
|
lexer.CharLiteral |
|
|
lexer.StringLiteral
|
|
)) -> Literal:
|
|
if isinstance(literal, lexer.Keyword):
|
|
return BuildInConst(literal.value)
|
|
elif isinstance(literal, lexer.NumberLiteral):
|
|
return NumberLiteral(literal.value)
|
|
elif isinstance(literal, lexer.CharLiteral):
|
|
return CharLiteral(literal.value)
|
|
elif isinstance(literal, lexer.StringLiteral):
|
|
return StringLiteral(literal.value)
|
|
|
|
def enumeration_syntactical_analyzer(tokens: list[lexer.Token]) -> EnumBlock:
|
|
pass
|
|
|
|
def function_syntactical_analyzer(tokens: list[lexer.Token]) -> FunctionBlock:
|
|
pass
|
|
|
|
def struct_syntactical_analyzer(tokens: list[lexer.Token]) -> StructBlock:
|
|
pass
|
|
|
|
def file_syntactical_analyzer(tokens: list[lexer.Token]) -> File:
|
|
children: list[Directive | StructBlock | FunctionBlock | EnumBlock] = []
|
|
|
|
while tokens:
|
|
token = tokens.pop(0)
|
|
if isinstance(token, lexer.Directive):
|
|
children.append(Directive(token.value))
|
|
elif isinstance(token, lexer.Keyword):
|
|
while True:
|
|
match token.value:
|
|
case 'pub': pass
|
|
case 'struct':
|
|
identifier = tokens.pop()
|
|
_assert_token(ExpectedIdentifier, identifier)
|
|
temp = tokens.pop()
|
|
_assert_token(ExpectedPunctuation, temp, '(')
|
|
members: list[StructureMember] = []
|
|
temp = tokens.pop()
|
|
while temp.value != ')':
|
|
if isinstance(temp, lexer.Keyword):
|
|
_assert_token(ExpectedKeyword, temp, 'static')
|
|
temp = tokens.pop()
|
|
static = True
|
|
else:
|
|
static = False
|
|
if isinstance(temp, lexer.Identifier):
|
|
member_id = Identifier(temp.value)
|
|
temp = tokens.pop()
|
|
_assert_token(ExpectedPunctuation, temp, ':')
|
|
temp = tokens.pop()
|
|
_assert_token_mult(temp, (
|
|
lexer.Keyword,
|
|
lexer.Identifier,
|
|
lexer.Punctuation,
|
|
))
|
|
if isinstance(temp, lexer.Punctuation):
|
|
_assert_token(ExpectedPunctuation, temp, '*')
|
|
pointer = True
|
|
temp = tokens.pop()
|
|
_assert_token_mult(temp, (
|
|
lexer.Keyword,
|
|
lexer.Identifier,
|
|
))
|
|
else:
|
|
pointer = False
|
|
if isinstance(temp, lexer.Keyword):
|
|
if temp.value not in DefaultDataType:
|
|
raise UnexpectedKeyword(
|
|
temp,
|
|
[i.value for i in DefaultDataType],
|
|
)
|
|
data_type = DefaultDataType(temp.value)
|
|
else:
|
|
data_type = Identifier(temp.value)
|
|
temp = tokens.pop()
|
|
_assert_token(ExpectedPunctuation, temp)
|
|
if temp.value not in [',', '=']:
|
|
raise UnexpectedPunctuation(
|
|
temp,
|
|
[',', '='],
|
|
)
|
|
elif temp.value == '=':
|
|
temp = tokens.pop()
|
|
_assert_token_literal(temp)
|
|
literal = _literal_map(temp) # type: ignore
|
|
temp = tokens.pop()
|
|
else: literal = None
|
|
members.append(StructureMember(
|
|
member_id,
|
|
data_type,
|
|
pointer,
|
|
static,
|
|
literal,
|
|
))
|
|
else:
|
|
raise UnexpectedToken(
|
|
temp, ["Keyword", "Identifier"])
|
|
children.append(StructBlock(
|
|
Identifier(identifier.value), False, members))
|
|
break
|
|
|
|
return File(children)
|
|
|
|
|
|
def syntactical_analyzer(tokens: Sequence[lexer.Token]) -> File:
|
|
return file_syntactical_analyzer(list(tokens))
|
|
|