From cccd80bec78c1ff077342a955a7f154e8c09aa4d Mon Sep 17 00:00:00 2001 From: Kyler <59854022+KylerOlsen@users.noreply.github.com> Date: Wed, 28 Feb 2024 00:47:06 -0700 Subject: [PATCH] Implemented syntax for structs --- pytd12dk/compiler/syntactical_analyzer.py | 516 +++++++++++++++++++++- 1 file changed, 509 insertions(+), 7 deletions(-) diff --git a/pytd12dk/compiler/syntactical_analyzer.py b/pytd12dk/compiler/syntactical_analyzer.py index 79e1aa2..42fe1d4 100644 --- a/pytd12dk/compiler/syntactical_analyzer.py +++ b/pytd12dk/compiler/syntactical_analyzer.py @@ -2,19 +2,131 @@ # Feb 2024 from enum import Enum -from typing import ClassVar, Sequence +from typing import Sequence from .compiler_types import CompilerError, FileInfo from . import lexer +class _ExpectedTokenBase(CompilerError): + + _token_type = lexer.Token + + def __init__( + self, + token: lexer.Token, + expected: str | None = None, + found: str | None = None, + ): + if expected is None: + expected = self._token_type.__name__ + found = found or type(token).__name__ + else: + found = found or token.value + message = f"Expected '{expected}' but found '{found}'." + super().__init__(message, token.file_info) + + +class ExpectedLiteral(_ExpectedTokenBase): + + _token_type = ( + lexer.NumberLiteral, + lexer.CharLiteral, + lexer.StringLiteral, + ) + + def __init__( + self, + token: lexer.Token, + expected: str | None = None, + found: str | None = None, + ): + if expected is None: + expected = "NumberLiteral', 'CharLiteral', or 'StringLiteral" + found = found or type(token).__name__ + super().__init__(token, expected, found) + + +class ExpectedDirective(_ExpectedTokenBase): + _type_name = lexer.Directive +class ExpectedIdentifier(_ExpectedTokenBase): + _type_name = lexer.Identifier +class ExpectedKeyword(_ExpectedTokenBase): + _type_name = lexer.Keyword +class ExpectedNumberLiteral(_ExpectedTokenBase): + _type_name = lexer.NumberLiteral +class ExpectedCharLiteral(_ExpectedTokenBase): + _type_name = lexer.CharLiteral +class ExpectedStringLiteral(_ExpectedTokenBase): + _type_name = lexer.StringLiteral +class ExpectedPunctuation(_ExpectedTokenBase): + _type_name = lexer.Punctuation + + +class _UnexpectedTokenBase(_ExpectedTokenBase): + + def __init__( + self, + token: lexer.Token, + expected: str | list[str] | None = None, + found: str | None = None, + ): + if isinstance(expected, list): + if len(expected) > 1: + s = "" + for i in expected[:-1]: + s += i + "', '" + s = s[:-1] + "or '" + i + expected = s + else: + expected = expected[0] + super().__init__(token, expected, found) + + +class UnexpectedToken(_UnexpectedTokenBase): + + def __init__( + self, + token: lexer.Token, + expected: str | list[str], + found: str | None = None, + ): + if isinstance(expected, list): + if len(expected) > 1: + s = "" + for i in expected[:-1]: + s += i + "', '" + s = s[:-1] + "or '" + i + expected = s + found = found or type(token).__name__ + super().__init__(token, expected, found) + + +class UnexpectedDirective(_UnexpectedTokenBase): + _type_name = lexer.Directive +class UnexpectedIdentifier(_UnexpectedTokenBase): + _type_name = lexer.Identifier +class UnexpectedKeyword(_UnexpectedTokenBase): + _type_name = lexer.Keyword +class UnexpectedNumberLiteral(_UnexpectedTokenBase): + _type_name = lexer.NumberLiteral +class UnexpectedCharLiteral(_UnexpectedTokenBase): + _type_name = lexer.CharLiteral +class UnexpectedStringLiteral(_UnexpectedTokenBase): + _type_name = lexer.StringLiteral +class UnexpectedPunctuation(_UnexpectedTokenBase): + _type_name = lexer.Punctuation + + type NestableCodeBlock = ForBlock | WhileBlock | DoBlock | IfBlock + type Literal = ( BuildInConst | NumberLiteral | CharLiteral | StringLiteral ) + type Expression = ( Literal | Identifier | @@ -23,7 +135,9 @@ type Expression = ( TernaryExpression | FunctionCall ) + type Statement = Expression | LetStatement | LoopStatements | NestableCodeBlock + type DataType = DefaultDataType | Identifier @@ -137,38 +251,86 @@ class Identifier: _content: str + def __init__( + self, + content: str, + ): + self._content = content + class StringLiteral: _content: str + def __init__( + self, + content: str, + ): + self._content = content + class CharLiteral: _content: str + def __init__( + self, + content: str, + ): + self._content = content + class NumberLiteral: _content: str + def __init__( + self, + content: str, + ): + self._content = content + class ArraySubscription: _identifier: Identifier _index: Expression + def __init__( + self, + identifier: Identifier, + index: Expression, + ): + self._identifier = identifier + self._index = index -class FunctionParameter: + +class FunctionArgument: _identifier: Identifier | None _value: Expression + def __init__( + self, + identifier: Identifier, + value: Expression, + ): + self._identifier = identifier + self._value = value + class FunctionCall: _identifier: Identifier - _params: list[FunctionParameter] + _params: list[FunctionArgument] + + def __init__( + self, + identifier: Identifier, + params: list[FunctionArgument], + ): + self._identifier = identifier + self._params = params class TernaryExpression: @@ -177,6 +339,16 @@ class TernaryExpression: _operand2: Expression _operand3: Expression + def __init__( + self, + operand1: Expression, + operand2: Expression, + operand3: Expression, + ): + self._operand1 = operand1 + self._operand2 = operand2 + self._operand3 = operand3 + class BinaryExpression: @@ -184,12 +356,30 @@ class BinaryExpression: _operand1: Expression _operand2: Expression + def __init__( + self, + operator: BinaryOperator, + operand1: Expression, + operand2: Expression, + ): + self._operator = operator + self._operand1 = operand1 + self._operand2 = operand2 + class UnaryExpression: _operator: UnaryOperator _operand: Expression + def __init__( + self, + operator: UnaryOperator, + operand: Expression, + ): + self._operator = operator + self._operand = operand + class LetStatement: @@ -199,11 +389,31 @@ class LetStatement: _static: bool _assignment: Expression | None + def __init__( + self, + identifier: Identifier, + type: DataType, + pointer: bool, + static: bool, + assignment: Literal | None, + ): + self._identifier = identifier + self._type = type + self._pointer = pointer + self._static = static + self._assignment = assignment + class ElseBlock: _code: list[Statement] + def __init__( + self, + code: list[Statement], + ): + self._code = code[:] + class ForPreDef: @@ -212,6 +422,18 @@ class ForPreDef: _pointer: bool _assignment: Expression + def __init__( + self, + identifier: Identifier, + type: DataType, + pointer: bool, + default: Literal | None, + ): + self._identifier = identifier + self._type = type + self._pointer = pointer + self._default = default + class ForBlock: @@ -221,6 +443,20 @@ class ForBlock: _post_statement: Expression _else: ElseBlock | None + def __init__( + self, + pre_statement: Expression | ForPreDef, + condition: Expression, + code: list[Statement], + post_statement: Expression, + else_block: ElseBlock | None, + ): + self._pre_statement = pre_statement + self._condition = condition + self._code = code[:] + self._post_statement = post_statement + self._else = else_block + class WhileBlock: @@ -228,6 +464,16 @@ class WhileBlock: _code: list[Statement] _else: ElseBlock | None + def __init__( + self, + condition: Expression, + code: list[Statement], + else_block: ElseBlock | None, + ): + self._condition = condition + self._code = code[:] + self._else = else_block + class DoBlock: @@ -236,6 +482,18 @@ class DoBlock: _second_code: list[Statement] _else: ElseBlock | None + def __init__( + self, + first_code: list[Statement], + condition: Expression, + second_code: list[Statement], + else_block: ElseBlock | None, + ): + self._first_code = first_code[:] + self._condition = condition + self._second_code = second_code[:] + self._else = else_block + class IfBlock: @@ -243,36 +501,90 @@ class IfBlock: _code: list[Statement] _else: ElseBlock | None + def __init__( + self, + condition: Expression, + code: list[Statement], + else_block: ElseBlock | None, + ): + self._condition = condition + self._code = code[:] + self._else = else_block -class FunctionArgument: + +class FunctionParameter: _identifier: Identifier _type: DataType _pointer: bool _default: Literal | None + def __init__( + self, + identifier: Identifier, + type: DataType, + pointer: bool, + default: Literal | None, + ): + self._identifier = identifier + self._type = type + self._pointer = pointer + self._default = default + class FunctionBlock: - _identifier: str + _identifier: Identifier _public: bool - _args: list[FunctionArgument] + _args: list[FunctionParameter] _return_type: DataType _code: list[Statement] + def __init__( + self, + identifier: Identifier, + public: bool, + args: list[FunctionParameter], + return_type: DataType, + code: list[Statement], + ): + self._identifier = identifier + self._public = public + self._args = args[:] + self._return_type = return_type + self._code = code[:] + class EnumMember: _identifier: Identifier _value: NumberLiteral | None + def __init__( + self, + identifier: Identifier, + value: NumberLiteral | None, + ): + self._identifier = identifier + self._value = value + class EnumBlock: - _identifier: str + _identifier: Identifier _public: bool _members: list[EnumMember] + def __init__( + self, + identifier: Identifier, + public: bool, + members: list[EnumMember], + ): + self._identifier = identifier + self._public = public + self._members = members[:] + class StructureMember: @@ -282,6 +594,20 @@ class StructureMember: _static: bool _default: Literal | None + def __init__( + self, + identifier: Identifier, + type: DataType, + pointer: bool, + static: bool, + default: Literal | None, + ): + self._identifier = identifier + self._type = type + self._pointer = pointer + self._static = static + self._default = default + class StructBlock: @@ -289,12 +615,188 @@ class StructBlock: _public: bool _members: list[StructureMember] + def __init__( + self, + identifier: Identifier, + public: bool, + members: list[StructureMember], + ): + self._identifier = identifier + self._public = public + self._members = members[:] + class Directive: _content: str + def __init__( + self, + content: str, + ): + self._content = content + class File: _children: list[Directive | StructBlock | FunctionBlock | EnumBlock] + + def __init__( + self, + children: list[Directive | StructBlock | FunctionBlock | EnumBlock], + ): + self._children = children[:] + + +def _assert_token( + exception: type[_ExpectedTokenBase], + token: lexer.Token, + value: str | None = None, + token_type: type[lexer.Token] | None = None, +): + if not isinstance(token, token_type or exception._token_type): + raise exception(token) + if value is not None and token.value != value: + raise exception(token, value) + +def _assert_token_mult( + token: lexer.Token, + token_type: tuple[type[lexer.Token], ...], +): + if not isinstance(token, token_type): + raise UnexpectedToken( + token, + [i.__name__ for i in token_type], # type: ignore + type(token).__name__, + ) + +def _assert_token_literal( + token: lexer.Token, +): + token_types = ( + lexer.Keyword, + lexer.NumberLiteral, + lexer.CharLiteral, + lexer.StringLiteral, + ) + if not isinstance(token, token_types): + raise ExpectedLiteral( + token, + [i.__name__ for i in token_types], # type: ignore + type(token).__name__, + ) + if isinstance(token, lexer.Keyword): + if token.value not in BuildInConst: + raise UnexpectedKeyword(token, [i.value for i in DefaultDataType]) + +def _literal_map(literal: ( + lexer.Keyword | + lexer.NumberLiteral | + lexer.CharLiteral | + lexer.StringLiteral +)) -> Literal: + if isinstance(literal, lexer.Keyword): + return BuildInConst(literal.value) + elif isinstance(literal, lexer.NumberLiteral): + return NumberLiteral(literal.value) + elif isinstance(literal, lexer.CharLiteral): + return CharLiteral(literal.value) + elif isinstance(literal, lexer.StringLiteral): + return StringLiteral(literal.value) + +def enumeration_syntactical_analyzer(tokens: list[lexer.Token]) -> EnumBlock: + pass + +def function_syntactical_analyzer(tokens: list[lexer.Token]) -> FunctionBlock: + pass + +def struct_syntactical_analyzer(tokens: list[lexer.Token]) -> StructBlock: + pass + +def file_syntactical_analyzer(tokens: list[lexer.Token]) -> File: + children: list[Directive | StructBlock | FunctionBlock | EnumBlock] = [] + + while tokens: + token = tokens.pop(0) + if isinstance(token, lexer.Directive): + children.append(Directive(token.value)) + elif isinstance(token, lexer.Keyword): + while True: + match token.value: + case 'pub': pass + case 'struct': + identifier = tokens.pop() + _assert_token(ExpectedIdentifier, identifier) + temp = tokens.pop() + _assert_token(ExpectedPunctuation, temp, '(') + members: list[StructureMember] = [] + temp = tokens.pop() + while temp.value != ')': + if isinstance(temp, lexer.Keyword): + _assert_token(ExpectedKeyword, temp, 'static') + temp = tokens.pop() + static = True + else: + static = False + if isinstance(temp, lexer.Identifier): + member_id = Identifier(temp.value) + temp = tokens.pop() + _assert_token(ExpectedPunctuation, temp, ':') + temp = tokens.pop() + _assert_token_mult(temp, ( + lexer.Keyword, + lexer.Identifier, + lexer.Punctuation, + )) + if isinstance(temp, lexer.Punctuation): + _assert_token(ExpectedPunctuation, temp, '*') + pointer = True + temp = tokens.pop() + _assert_token_mult(temp, ( + lexer.Keyword, + lexer.Identifier, + )) + else: + pointer = False + if isinstance(temp, lexer.Keyword): + if temp.value not in DefaultDataType: + raise UnexpectedKeyword( + temp, + [i.value for i in DefaultDataType], + ) + data_type = DefaultDataType(temp.value) + else: + data_type = Identifier(temp.value) + temp = tokens.pop() + _assert_token(ExpectedPunctuation, temp) + if temp.value not in [',', '=']: + raise UnexpectedPunctuation( + temp, + [',', '='], + ) + elif temp.value == '=': + temp = tokens.pop() + _assert_token_literal(temp) + literal = _literal_map(temp) # type: ignore + temp = tokens.pop() + else: literal = None + members.append(StructureMember( + member_id, + data_type, + pointer, + static, + literal, + )) + else: + raise UnexpectedToken( + temp, ["Keyword", "Identifier"]) + children.append(StructBlock( + Identifier(identifier.value), False, members)) + break + + return File(children) + + +def syntactical_analyzer(tokens: Sequence[lexer.Token]) -> File: + return file_syntactical_analyzer(list(tokens)) +