Added function syntactical analyzer

This commit is contained in:
Kyler 2024-02-28 18:42:51 -07:00
parent c3eb62f5f2
commit 51c51502b5
2 changed files with 106 additions and 45 deletions

View File

@ -205,9 +205,9 @@ of *structure members*.
##### Structure Member ##### Structure Member
A *structure member* begins with its `identifier`. After a colon (`:`) is its A *structure member* begins with its `identifier`, which may optionally be
`data type`. It may then optionally be followed by an equal sign (`=`) and a preceded with the `static` keyword. After a colon (`:`) is its `data type`. It
`literal`. may then optionally be followed by an equal sign (`=`) and a `literal`.
#### enum #### enum

View File

@ -8,6 +8,9 @@ from .compiler_types import CompilerError, FileInfo
from . import lexer from . import lexer
class UnexpectedEndOfTokenStream(CompilerError): pass
class _ExpectedTokenBase(CompilerError): class _ExpectedTokenBase(CompilerError):
_token_type = lexer.Token _token_type = lexer.Token
@ -322,15 +325,15 @@ class FunctionArgument:
class FunctionCall: class FunctionCall:
_identifier: Identifier _identifier: Identifier
_params: list[FunctionArgument] _args: list[FunctionArgument]
def __init__( def __init__(
self, self,
identifier: Identifier, identifier: Identifier,
params: list[FunctionArgument], args: list[FunctionArgument],
): ):
self._identifier = identifier self._identifier = identifier
self._params = params self._args = args
class TernaryExpression: class TernaryExpression:
@ -535,19 +538,22 @@ class FunctionParameter:
class FunctionBlock: class FunctionBlock:
_identifier: Identifier _identifier: Identifier
_args: list[FunctionParameter] _params: list[FunctionParameter]
_return_type_pointer: bool
_return_type: DataType _return_type: DataType
_code: list[Statement] _code: list[Statement]
def __init__( def __init__(
self, self,
identifier: Identifier, identifier: Identifier,
args: list[FunctionParameter], params: list[FunctionParameter],
return_type_pointer: bool,
return_type: DataType, return_type: DataType,
code: list[Statement], code: list[Statement],
): ):
self._identifier = identifier self._identifier = identifier
self._args = args[:] self._params = params[:]
self._return_type_pointer = return_type_pointer
self._return_type = return_type self._return_type = return_type
self._code = code[:] self._code = code[:]
@ -695,7 +701,7 @@ def _literal_map(literal: (
elif isinstance(literal, lexer.StringLiteral): elif isinstance(literal, lexer.StringLiteral):
return StringLiteral(literal.value) return StringLiteral(literal.value)
def struct_syntactical_analyzer(tokens: list[lexer.Token]) -> StructBlock: def _struct_sa(tokens: list[lexer.Token]) -> StructBlock:
identifier = tokens.pop(0) identifier = tokens.pop(0)
_assert_token(ExpectedIdentifier, identifier) _assert_token(ExpectedIdentifier, identifier)
temp = tokens.pop(0) temp = tokens.pop(0)
@ -713,38 +719,19 @@ def struct_syntactical_analyzer(tokens: list[lexer.Token]) -> StructBlock:
member_id = Identifier(temp.value) member_id = Identifier(temp.value)
temp = tokens.pop(0) temp = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, ':') _assert_token(ExpectedPunctuation, temp, ':')
temp = tokens.pop(0) pointer, data_type = _data_type_sa(tokens)
_assert_token_mult(temp, (
lexer.Keyword,
lexer.Identifier,
lexer.Punctuation,
))
if isinstance(temp, lexer.Punctuation):
_assert_token(ExpectedPunctuation, temp, '*')
pointer = True
temp = tokens.pop(0)
_assert_token_mult(temp, (lexer.Keyword, lexer.Identifier))
else:
pointer = False
if isinstance(temp, lexer.Keyword):
if temp.value not in DefaultDataType:
raise UnexpectedKeyword(
temp,
[i.value for i in DefaultDataType],
)
data_type = DefaultDataType(temp.value)
else:
data_type = Identifier(temp.value)
temp = tokens.pop(0) temp = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp) _assert_token(ExpectedPunctuation, temp)
if temp.value not in [',', '=']: if temp.value not in [',', '=', '}']:
raise UnexpectedPunctuation(temp, [',', '=']) raise UnexpectedPunctuation(temp, [',', '=', '}'])
elif temp.value == '=': elif temp.value == '=':
temp = tokens.pop(0) temp = tokens.pop(0)
_assert_token_literal(temp) _assert_token_literal(temp)
literal = _literal_map(temp) # type: ignore literal = _literal_map(temp) # type: ignore
temp = tokens.pop(0) temp = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, ',') _assert_token(ExpectedPunctuation, temp)
if temp.value not in [',', '=', '}']:
raise UnexpectedPunctuation(temp, [',', '=', '}'])
else: literal = None else: literal = None
members.append( members.append(
StructureMember(member_id, data_type, pointer, static, literal)) StructureMember(member_id, data_type, pointer, static, literal))
@ -752,7 +739,7 @@ def struct_syntactical_analyzer(tokens: list[lexer.Token]) -> StructBlock:
raise UnexpectedToken(temp, ["Keyword", "Identifier"]) raise UnexpectedToken(temp, ["Keyword", "Identifier"])
return StructBlock(Identifier(identifier.value), members) return StructBlock(Identifier(identifier.value), members)
def enumeration_syntactical_analyzer(tokens: list[lexer.Token]) -> EnumBlock: def _enumeration_sa(tokens: list[lexer.Token]) -> EnumBlock:
identifier = tokens.pop(0) identifier = tokens.pop(0)
_assert_token(ExpectedIdentifier, identifier) _assert_token(ExpectedIdentifier, identifier)
temp = tokens.pop(0) temp = tokens.pop(0)
@ -764,21 +751,93 @@ def enumeration_syntactical_analyzer(tokens: list[lexer.Token]) -> EnumBlock:
member_id = Identifier(temp.value) member_id = Identifier(temp.value)
temp = tokens.pop(0) temp = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp) _assert_token(ExpectedPunctuation, temp)
if temp.value not in [',', '=']: if temp.value not in [',', '=', '}']:
raise UnexpectedPunctuation(temp, [',', '=']) raise UnexpectedPunctuation(temp, [',', '=', '}'])
elif temp.value == '=': elif temp.value == '=':
temp = tokens.pop(0) temp = tokens.pop(0)
_assert_token(ExpectedNumberLiteral, temp) _assert_token(ExpectedNumberLiteral, temp)
temp = tokens.pop(0) temp = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, ',') _assert_token(ExpectedPunctuation, temp)
if temp.value not in [',', '}']:
raise UnexpectedPunctuation(temp, [',', '}'])
else: literal = None else: literal = None
members.append(EnumMember(member_id, literal)) members.append(EnumMember(member_id, literal))
return EnumBlock(Identifier(identifier.value), members) return EnumBlock(Identifier(identifier.value), members)
def function_syntactical_analyzer(tokens: list[lexer.Token]) -> FunctionBlock: def _function_sa(tokens: list[lexer.Token]) -> FunctionBlock:
identifier = tokens.pop(0)
_assert_token(ExpectedIdentifier, identifier)
temp = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, '(')
params: list[FunctionParameter] = []
while temp.value != ')':
temp = tokens.pop(0)
if isinstance(temp, lexer.Identifier):
member_id = Identifier(temp.value)
temp = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, ':')
pointer, data_type = _data_type_sa(tokens)
temp = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp)
if temp.value not in [',', '=', ')']:
raise UnexpectedPunctuation(temp, [',', '=', ')'])
elif temp.value == '=':
temp = tokens.pop(0)
_assert_token_literal(temp)
literal = _literal_map(temp) # type: ignore
temp = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp)
if temp.value not in [',', ')']:
raise UnexpectedPunctuation(temp, [',', ')'])
else: literal = None
params.append(
FunctionParameter(member_id, data_type, pointer, literal))
else:
raise UnexpectedToken(temp, ["Keyword", "Identifier"])
temp = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, '->')
pointer, return_type = _data_type_sa(tokens)
temp = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, '{')
code: list[Statement] = []
while tokens[0].value != '}':
code.append(_statement_sa(tokens))
return FunctionBlock(
Identifier(identifier.value),
params,
pointer,
return_type,
code,
)
def _data_type_sa(tokens: list[lexer.Token]) -> tuple[bool, DataType]:
temp = tokens.pop(0)
_assert_token_mult(temp, (
lexer.Keyword,
lexer.Identifier,
lexer.Punctuation,
))
if isinstance(temp, lexer.Punctuation):
_assert_token(ExpectedPunctuation, temp, '*')
pointer = True
temp = tokens.pop(0)
_assert_token_mult(temp, (lexer.Keyword, lexer.Identifier))
else:
pointer = False
if isinstance(temp, lexer.Keyword):
if temp.value not in DefaultDataType:
raise UnexpectedKeyword(
temp,
[i.value for i in DefaultDataType],
)
return pointer, DefaultDataType(temp.value)
else:
return pointer, Identifier(temp.value)
def _statement_sa(tokens: list[lexer.Token]) -> Statement:
pass pass
def file_syntactical_analyzer(tokens: list[lexer.Token]) -> File: def _file_sa(tokens: list[lexer.Token]) -> File:
children: list[Directive | StructBlock | FunctionBlock | EnumBlock] = [] children: list[Directive | StructBlock | FunctionBlock | EnumBlock] = []
while tokens: while tokens:
@ -790,19 +849,21 @@ def file_syntactical_analyzer(tokens: list[lexer.Token]) -> File:
match token.value: match token.value:
case 'struct': case 'struct':
children.append( children.append(
struct_syntactical_analyzer(tokens)) _struct_sa(tokens))
case 'enum': case 'enum':
children.append( children.append(
enumeration_syntactical_analyzer(tokens)) _enumeration_sa(tokens))
case 'fn': case 'fn':
children.append( children.append(
function_syntactical_analyzer(tokens)) _function_sa(tokens))
case _: case _:
raise ExpectedKeyword(token, "struct', 'enum', or 'fn") raise ExpectedKeyword(token, "struct', 'enum', or 'fn")
else:
raise UnexpectedToken(token, "directive' or 'keyword")
return File(children) return File(children)
def syntactical_analyzer(tokens: Sequence[lexer.Token]) -> File: def syntactical_analyzer(tokens: Sequence[lexer.Token]) -> File:
return file_syntactical_analyzer(list(tokens)) return _file_sa(list(tokens))