Added function syntactical analyzer

2024-02-28 18:42:51 -07:00 · 2024-02-28 18:42:51 -07:00 · 51c51502b5
parent c3eb62f5f2
commit 51c51502b5
2 changed files with 106 additions and 45 deletions
--- a/computer.md
+++ b/computer.md
@ -205,9 +205,9 @@ of *structure members*.

 ##### Structure Member

-A *structure member* begins with its `identifier`. After a colon (`:`) is its
-`data type`. It may then optionally be followed by an equal sign (`=`) and a
-`literal`.
+A *structure member* begins with its `identifier`, which may optionally be
+preceded with the `static` keyword. After a colon (`:`) is its `data type`. It
+may then optionally be followed by an equal sign (`=`) and a `literal`.

 #### enum

--- a/pytd12dk/compiler/syntactical_analyzer.py
+++ b/pytd12dk/compiler/syntactical_analyzer.py
@ -8,6 +8,9 @@ from .compiler_types import CompilerError, FileInfo
 from . import lexer


+class UnexpectedEndOfTokenStream(CompilerError): pass
+
+
 class _ExpectedTokenBase(CompilerError):

    _token_type = lexer.Token
@ -322,15 +325,15 @@ class FunctionArgument:
 class FunctionCall:

    _identifier: Identifier
-    _params: list[FunctionArgument]
+    _args: list[FunctionArgument]

    def __init__(
        self,
        identifier: Identifier,
-        params: list[FunctionArgument],
+        args: list[FunctionArgument],
    ):
        self._identifier = identifier
-        self._params = params
+        self._args = args


 class TernaryExpression:
@ -535,19 +538,22 @@ class FunctionParameter:
 class FunctionBlock:

    _identifier: Identifier
-    _args: list[FunctionParameter]
+    _params: list[FunctionParameter]
+    _return_type_pointer: bool
    _return_type: DataType
    _code: list[Statement]

    def __init__(
        self,
        identifier: Identifier,
-        args: list[FunctionParameter],
+        params: list[FunctionParameter],
+        return_type_pointer: bool,
        return_type: DataType,
        code: list[Statement],
    ):
        self._identifier = identifier
-        self._args = args[:]
+        self._params = params[:]
+        self._return_type_pointer = return_type_pointer
        self._return_type = return_type
        self._code = code[:]

@ -695,7 +701,7 @@ def _literal_map(literal: (
    elif isinstance(literal, lexer.StringLiteral):
        return StringLiteral(literal.value)

-def struct_syntactical_analyzer(tokens: list[lexer.Token]) -> StructBlock:
+def _struct_sa(tokens: list[lexer.Token]) -> StructBlock:
    identifier = tokens.pop(0)
    _assert_token(ExpectedIdentifier, identifier)
    temp = tokens.pop(0)
@ -713,38 +719,19 @@ def struct_syntactical_analyzer(tokens: list[lexer.Token]) -> StructBlock:
            member_id = Identifier(temp.value)
            temp = tokens.pop(0)
            _assert_token(ExpectedPunctuation, temp, ':')
-            temp = tokens.pop(0)
-            _assert_token_mult(temp, (
-                lexer.Keyword,
-                lexer.Identifier,
-                lexer.Punctuation,
-            ))
-            if isinstance(temp, lexer.Punctuation):
-                _assert_token(ExpectedPunctuation, temp, '*')
-                pointer = True
-                temp = tokens.pop(0)
-                _assert_token_mult(temp, (lexer.Keyword, lexer.Identifier))
-            else:
-                pointer = False
-            if isinstance(temp, lexer.Keyword):
-                if temp.value not in DefaultDataType:
-                    raise UnexpectedKeyword(
-                        temp,
-                        [i.value for i in DefaultDataType],
-                    )
-                data_type = DefaultDataType(temp.value)
-            else:
-                data_type = Identifier(temp.value)
+            pointer, data_type = _data_type_sa(tokens)
            temp = tokens.pop(0)
            _assert_token(ExpectedPunctuation, temp)
-            if temp.value not in [',', '=']:
-                raise UnexpectedPunctuation(temp, [',', '='])
+            if temp.value not in [',', '=', '}']:
+                raise UnexpectedPunctuation(temp, [',', '=', '}'])
            elif temp.value == '=':
                temp = tokens.pop(0)
                _assert_token_literal(temp)
                literal = _literal_map(temp) # type: ignore
                temp = tokens.pop(0)
-                _assert_token(ExpectedPunctuation, temp, ',')
+                _assert_token(ExpectedPunctuation, temp)
+                if temp.value not in [',', '=', '}']:
+                    raise UnexpectedPunctuation(temp, [',', '=', '}'])
            else: literal = None
            members.append(
                StructureMember(member_id, data_type, pointer, static, literal))
@ -752,7 +739,7 @@ def struct_syntactical_analyzer(tokens: list[lexer.Token]) -> StructBlock:
            raise UnexpectedToken(temp, ["Keyword", "Identifier"])
    return StructBlock(Identifier(identifier.value), members)

-def enumeration_syntactical_analyzer(tokens: list[lexer.Token]) -> EnumBlock:
+def _enumeration_sa(tokens: list[lexer.Token]) -> EnumBlock:
    identifier = tokens.pop(0)
    _assert_token(ExpectedIdentifier, identifier)
    temp = tokens.pop(0)
@ -764,21 +751,93 @@ def enumeration_syntactical_analyzer(tokens: list[lexer.Token]) -> EnumBlock:
        member_id = Identifier(temp.value)
        temp = tokens.pop(0)
        _assert_token(ExpectedPunctuation, temp)
-        if temp.value not in [',', '=']:
-            raise UnexpectedPunctuation(temp, [',', '='])
+        if temp.value not in [',', '=', '}']:
+            raise UnexpectedPunctuation(temp, [',', '=', '}'])
        elif temp.value == '=':
            temp = tokens.pop(0)
            _assert_token(ExpectedNumberLiteral, temp)
            temp = tokens.pop(0)
-            _assert_token(ExpectedPunctuation, temp, ',')
+            _assert_token(ExpectedPunctuation, temp)
+            if temp.value not in [',', '}']:
+                raise UnexpectedPunctuation(temp, [',', '}'])
        else: literal = None
        members.append(EnumMember(member_id, literal))
    return EnumBlock(Identifier(identifier.value), members)

-def function_syntactical_analyzer(tokens: list[lexer.Token]) -> FunctionBlock:
+def _function_sa(tokens: list[lexer.Token]) -> FunctionBlock:
+    identifier = tokens.pop(0)
+    _assert_token(ExpectedIdentifier, identifier)
+    temp = tokens.pop(0)
+    _assert_token(ExpectedPunctuation, temp, '(')
+    params: list[FunctionParameter] = []
+    while temp.value != ')':
+        temp = tokens.pop(0)
+        if isinstance(temp, lexer.Identifier):
+            member_id = Identifier(temp.value)
+            temp = tokens.pop(0)
+            _assert_token(ExpectedPunctuation, temp, ':')
+            pointer, data_type = _data_type_sa(tokens)
+            temp = tokens.pop(0)
+            _assert_token(ExpectedPunctuation, temp)
+            if temp.value not in [',', '=', ')']:
+                raise UnexpectedPunctuation(temp, [',', '=', ')'])
+            elif temp.value == '=':
+                temp = tokens.pop(0)
+                _assert_token_literal(temp)
+                literal = _literal_map(temp) # type: ignore
+                temp = tokens.pop(0)
+                _assert_token(ExpectedPunctuation, temp)
+                if temp.value not in [',', ')']:
+                    raise UnexpectedPunctuation(temp, [',', ')'])
+            else: literal = None
+            params.append(
+                FunctionParameter(member_id, data_type, pointer, literal))
+        else:
+            raise UnexpectedToken(temp, ["Keyword", "Identifier"])
+    temp = tokens.pop(0)
+    _assert_token(ExpectedPunctuation, temp, '->')
+    pointer, return_type = _data_type_sa(tokens)
+    temp = tokens.pop(0)
+    _assert_token(ExpectedPunctuation, temp, '{')
+    code: list[Statement] = []
+    while tokens[0].value != '}':
+        code.append(_statement_sa(tokens))
+    return FunctionBlock(
+        Identifier(identifier.value),
+        params,
+        pointer,
+        return_type,
+        code,
+    )
+
+def _data_type_sa(tokens: list[lexer.Token]) -> tuple[bool, DataType]:
+    temp = tokens.pop(0)
+    _assert_token_mult(temp, (
+        lexer.Keyword,
+        lexer.Identifier,
+        lexer.Punctuation,
+    ))
+    if isinstance(temp, lexer.Punctuation):
+        _assert_token(ExpectedPunctuation, temp, '*')
+        pointer = True
+        temp = tokens.pop(0)
+        _assert_token_mult(temp, (lexer.Keyword, lexer.Identifier))
+    else:
+        pointer = False
+    if isinstance(temp, lexer.Keyword):
+        if temp.value not in DefaultDataType:
+            raise UnexpectedKeyword(
+                temp,
+                [i.value for i in DefaultDataType],
+            )
+        return pointer, DefaultDataType(temp.value)
+    else:
+        return pointer, Identifier(temp.value)
+
+def _statement_sa(tokens: list[lexer.Token]) -> Statement:
    pass

-def file_syntactical_analyzer(tokens: list[lexer.Token]) -> File:
+def _file_sa(tokens: list[lexer.Token]) -> File:
    children: list[Directive | StructBlock | FunctionBlock | EnumBlock] = []

    while tokens:
@ -790,19 +849,21 @@ def file_syntactical_analyzer(tokens: list[lexer.Token]) -> File:
            match token.value:
                case 'struct':
                    children.append(
-                        struct_syntactical_analyzer(tokens))
+                        _struct_sa(tokens))
                case 'enum':
                    children.append(
-                        enumeration_syntactical_analyzer(tokens))
+                        _enumeration_sa(tokens))
                case 'fn':
                    children.append(
-                        function_syntactical_analyzer(tokens))
+                        _function_sa(tokens))
                case _:
                    raise ExpectedKeyword(token, "struct', 'enum', or 'fn")
+        else:
+            raise UnexpectedToken(token, "directive' or 'keyword")

    return File(children)


 def syntactical_analyzer(tokens: Sequence[lexer.Token]) -> File:
-    return file_syntactical_analyzer(list(tokens))
+    return _file_sa(list(tokens))