Worked on syntactical analyzer

This commit is contained in:
Kyler 2024-02-29 00:05:56 -07:00
parent 51c51502b5
commit 8a962bc525
2 changed files with 354 additions and 133 deletions

View File

@ -244,11 +244,11 @@ by an `else block`.
#### Do Loop #### Do Loop
A `do loop` begins with the `do` keyword, followed by a list enclosed in curly A `do loop` begins with the `do` keyword, followed by a list enclosed in curly
braces (`{` and `}`) of `statements`. It is then finished braces (`{` and `}`) of `statements`. It is then followed with the `while`
with the `while` keyword, followed by its condition, an `expression` enclosed keyword, then by its condition, an `expression` enclosed in parentheses
in parentheses (`(` and `)`). It may then optionally be followed by another list (`(` and `)`). It may then optionally be followed by another list enclosed in
enclosed in curly braces (`{` and `}`) of `statements` then curly braces (`{` and `}`) of `statements`. Finally the `do loop` may optionally
again optionally by an `else block`. be followed by an `else block`.
#### While Loop #### While Loop
@ -360,6 +360,7 @@ Here are all operators and their types and names in order of operator precedence
| `^^` | Binary | Boolean XOR *Operator* | | `^^` | Binary | Boolean XOR *Operator* |
| `\|\|` | Binary | Boolean OR *Operator* | | `\|\|` | Binary | Boolean OR *Operator* |
| `&&` | Binary | Boolean AND *Operator* | | `&&` | Binary | Boolean AND *Operator* |
| `?` `:` | Ternary | Ternary Conditional *Operator* |
| `>>=` | Binary | Right Shift Assignment *Operator* | | `>>=` | Binary | Right Shift Assignment *Operator* |
| `<<=` | Binary | Left Shift Assignment *Operator* | | `<<=` | Binary | Left Shift Assignment *Operator* |
| `^=` | Binary | Bitwise XOR Assignment *Operator* | | `^=` | Binary | Bitwise XOR Assignment *Operator* |

View File

@ -4,7 +4,7 @@
from enum import Enum from enum import Enum
from typing import Sequence from typing import Sequence
from .compiler_types import CompilerError, FileInfo from .compiler_types import CompilerError
from . import lexer from . import lexer
@ -163,7 +163,7 @@ class UnaryOperator(Enum):
Negate = "-" Negate = "-"
BitwiseNOT = "~" BitwiseNOT = "~"
BooleanNOT = "!" BooleanNOT = "!"
Addressof = "@" AddressOf = "@"
Dereference = "$" Dereference = "$"
@ -200,6 +200,10 @@ class BinaryOperator(Enum):
GreaterOrEqualToThan = ">=" GreaterOrEqualToThan = ">="
class TernaryOperator(Enum):
TernaryConditional = "?"
class DefaultDataType(Enum): class DefaultDataType(Enum):
unsigned = "unsigned" unsigned = "unsigned"
int = "int" int = "int"
@ -207,47 +211,50 @@ class DefaultDataType(Enum):
float = "float" float = "float"
_Operator_Precedence = [ _Operator_Precedence: tuple[
BinaryOperator.Assignment, UnaryOperator | BinaryOperator | TernaryOperator, ...
BinaryOperator.AdditionAssignment, ] = (
BinaryOperator.SubtractionAssignment, UnaryOperator.AddressOf,
BinaryOperator.MultiplicationAssignment,
BinaryOperator.DivisionAssignment,
BinaryOperator.ModulusAssignment,
BinaryOperator.BitwiseANDAssignment,
BinaryOperator.BitwiseORAssignment,
BinaryOperator.BitwiseXORAssignment,
BinaryOperator.LeftShiftAssignment,
BinaryOperator.RightShiftAssignment,
BinaryOperator.BooleanAND,
BinaryOperator.BooleanOR,
BinaryOperator.BooleanXOR,
BinaryOperator.EqualityComparison,
BinaryOperator.InequalityComparison,
BinaryOperator.LessThan,
BinaryOperator.LessOrEqualToThan,
BinaryOperator.GreaterThan,
BinaryOperator.GreaterOrEqualToThan,
BinaryOperator.Addition,
BinaryOperator.Subtraction,
BinaryOperator.Multiplication,
BinaryOperator.Division,
BinaryOperator.Modulus,
BinaryOperator.BitwiseAND,
BinaryOperator.BitwiseOR,
BinaryOperator.BitwiseXOR,
BinaryOperator.LeftShift,
BinaryOperator.RightShift,
UnaryOperator.BooleanNOT,
UnaryOperator.Negate,
UnaryOperator.PrefixIncrement,
UnaryOperator.PrefixDecrement,
UnaryOperator.PostfixIncrement,
UnaryOperator.PostfixDecrement,
UnaryOperator.BitwiseNOT,
UnaryOperator.Dereference, UnaryOperator.Dereference,
UnaryOperator.Addressof, UnaryOperator.BitwiseNOT,
] UnaryOperator.PostfixDecrement,
UnaryOperator.PostfixIncrement,
UnaryOperator.PrefixDecrement,
UnaryOperator.PrefixIncrement,
UnaryOperator.Negate,
UnaryOperator.BooleanNOT,
BinaryOperator.RightShift,
BinaryOperator.LeftShift,
BinaryOperator.BitwiseXOR,
BinaryOperator.BitwiseOR,
BinaryOperator.BitwiseAND,
BinaryOperator.Modulus,
BinaryOperator.Division,
BinaryOperator.Multiplication,
BinaryOperator.Subtraction,
BinaryOperator.Addition,
BinaryOperator.GreaterOrEqualToThan,
BinaryOperator.GreaterThan,
BinaryOperator.LessOrEqualToThan,
BinaryOperator.LessThan,
BinaryOperator.InequalityComparison,
BinaryOperator.EqualityComparison,
BinaryOperator.BooleanXOR,
BinaryOperator.BooleanOR,
BinaryOperator.BooleanAND,
TernaryOperator.TernaryConditional,
BinaryOperator.RightShiftAssignment,
BinaryOperator.LeftShiftAssignment,
BinaryOperator.BitwiseXORAssignment,
BinaryOperator.BitwiseORAssignment,
BinaryOperator.BitwiseANDAssignment,
BinaryOperator.ModulusAssignment,
BinaryOperator.DivisionAssignment,
BinaryOperator.MultiplicationAssignment,
BinaryOperator.SubtractionAssignment,
BinaryOperator.AdditionAssignment,
BinaryOperator.Assignment,
)
class Identifier: class Identifier:
@ -430,12 +437,12 @@ class ForPreDef:
identifier: Identifier, identifier: Identifier,
type: DataType, type: DataType,
pointer: bool, pointer: bool,
default: Literal | None, assignment: Expression,
): ):
self._identifier = identifier self._identifier = identifier
self._type = type self._type = type
self._pointer = pointer self._pointer = pointer
self._default = default self._assignment = assignment
class ForBlock: class ForBlock:
@ -482,19 +489,22 @@ class DoBlock:
_first_code: list[Statement] _first_code: list[Statement]
_condition: Expression _condition: Expression
_second_code: list[Statement] _second_code: list[Statement] | None
_else: ElseBlock | None _else: ElseBlock | None
def __init__( def __init__(
self, self,
first_code: list[Statement], first_code: list[Statement],
condition: Expression, condition: Expression,
second_code: list[Statement], second_code: list[Statement] | None,
else_block: ElseBlock | None, else_block: ElseBlock | None,
): ):
self._first_code = first_code[:] self._first_code = first_code[:]
self._condition = condition self._condition = condition
if second_code:
self._second_code = second_code[:] self._second_code = second_code[:]
else:
self._second_code = None
self._else = else_block self._else = else_block
@ -701,65 +711,102 @@ def _literal_map(literal: (
elif isinstance(literal, lexer.StringLiteral): elif isinstance(literal, lexer.StringLiteral):
return StringLiteral(literal.value) return StringLiteral(literal.value)
def _get_nested_group(
tokens: list[lexer.Token],
encloses: tuple[str, str] = ('(',')'),
) -> list[lexer.Token]:
token = tokens.pop(0)
_assert_token(ExpectedPunctuation, token, encloses[0])
nested = 1
expr_len = -1
for i in range(len(tokens)):
if tokens[i].value == encloses[0]: nested += 1
elif tokens[i].value == encloses[1]: nested -= 1
if nested == 0:
expr_len = i
break
else:
raise UnexpectedEndOfTokenStream(
"Unexpected End of Token Stream.", tokens[-1].file_info)
expr_tokens = tokens[:expr_len]
del tokens[:expr_len+1]
return expr_tokens
def _get_to_symbol(
tokens: list[lexer.Token],
symbol: str = ';',
) -> list[lexer.Token]:
expr_len = -1
for i in range(len(tokens)):
if tokens[i].value == symbol:
expr_len = i
break
else:
raise UnexpectedEndOfTokenStream(
"Unexpected End of Token Stream.", tokens[-1].file_info)
expr_tokens = tokens[:expr_len]
del tokens[:expr_len+1]
return expr_tokens
def _struct_sa(tokens: list[lexer.Token]) -> StructBlock: def _struct_sa(tokens: list[lexer.Token]) -> StructBlock:
identifier = tokens.pop(0) identifier = tokens.pop(0)
_assert_token(ExpectedIdentifier, identifier) _assert_token(ExpectedIdentifier, identifier)
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, '{') _assert_token(ExpectedPunctuation, token, '{')
members: list[StructureMember] = [] members: list[StructureMember] = []
while temp.value != '}': while token.value != '}':
temp = tokens.pop(0) token = tokens.pop(0)
if isinstance(temp, lexer.Keyword): if isinstance(token, lexer.Keyword):
_assert_token(ExpectedKeyword, temp, 'static') _assert_token(ExpectedKeyword, token, 'static')
temp = tokens.pop(0) token = tokens.pop(0)
static = True static = True
else: else:
static = False static = False
if isinstance(temp, lexer.Identifier): if isinstance(token, lexer.Identifier):
member_id = Identifier(temp.value) member_id = Identifier(token.value)
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, ':') _assert_token(ExpectedPunctuation, token, ':')
pointer, data_type = _data_type_sa(tokens) pointer, data_type = _data_type_sa(tokens)
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp) _assert_token(ExpectedPunctuation, token)
if temp.value not in [',', '=', '}']: if token.value not in [',', '=', '}']:
raise UnexpectedPunctuation(temp, [',', '=', '}']) raise UnexpectedPunctuation(token, [',', '=', '}'])
elif temp.value == '=': elif token.value == '=':
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token_literal(temp) _assert_token_literal(token)
literal = _literal_map(temp) # type: ignore literal = _literal_map(token) # type: ignore
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp) _assert_token(ExpectedPunctuation, token)
if temp.value not in [',', '=', '}']: if token.value not in [',', '}']:
raise UnexpectedPunctuation(temp, [',', '=', '}']) raise UnexpectedPunctuation(token, [',', '}'])
else: literal = None else: literal = None
members.append( members.append(
StructureMember(member_id, data_type, pointer, static, literal)) StructureMember(member_id, data_type, pointer, static, literal))
else: else:
raise UnexpectedToken(temp, ["Keyword", "Identifier"]) raise UnexpectedToken(token, ["Keyword", "Identifier"])
return StructBlock(Identifier(identifier.value), members) return StructBlock(Identifier(identifier.value), members)
def _enumeration_sa(tokens: list[lexer.Token]) -> EnumBlock: def _enumeration_sa(tokens: list[lexer.Token]) -> EnumBlock:
identifier = tokens.pop(0) identifier = tokens.pop(0)
_assert_token(ExpectedIdentifier, identifier) _assert_token(ExpectedIdentifier, identifier)
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, '{') _assert_token(ExpectedPunctuation, token, '{')
members: list[EnumMember] = [] members: list[EnumMember] = []
while temp.value != '}': while token.value != '}':
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedIdentifier, temp) _assert_token(ExpectedIdentifier, token)
member_id = Identifier(temp.value) member_id = Identifier(token.value)
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp) _assert_token(ExpectedPunctuation, token)
if temp.value not in [',', '=', '}']: if token.value not in [',', '=', '}']:
raise UnexpectedPunctuation(temp, [',', '=', '}']) raise UnexpectedPunctuation(token, [',', '=', '}'])
elif temp.value == '=': elif token.value == '=':
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedNumberLiteral, temp) _assert_token(ExpectedNumberLiteral, token)
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp) _assert_token(ExpectedPunctuation, token)
if temp.value not in [',', '}']: if token.value not in [',', '}']:
raise UnexpectedPunctuation(temp, [',', '}']) raise UnexpectedPunctuation(token, [',', '}'])
else: literal = None else: literal = None
members.append(EnumMember(member_id, literal)) members.append(EnumMember(member_id, literal))
return EnumBlock(Identifier(identifier.value), members) return EnumBlock(Identifier(identifier.value), members)
@ -767,41 +814,37 @@ def _enumeration_sa(tokens: list[lexer.Token]) -> EnumBlock:
def _function_sa(tokens: list[lexer.Token]) -> FunctionBlock: def _function_sa(tokens: list[lexer.Token]) -> FunctionBlock:
identifier = tokens.pop(0) identifier = tokens.pop(0)
_assert_token(ExpectedIdentifier, identifier) _assert_token(ExpectedIdentifier, identifier)
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, '(') _assert_token(ExpectedPunctuation, token, '(')
params: list[FunctionParameter] = [] params: list[FunctionParameter] = []
while temp.value != ')': while token.value != ')':
temp = tokens.pop(0) token = tokens.pop(0)
if isinstance(temp, lexer.Identifier): if isinstance(token, lexer.Identifier):
member_id = Identifier(temp.value) member_id = Identifier(token.value)
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, ':') _assert_token(ExpectedPunctuation, token, ':')
pointer, data_type = _data_type_sa(tokens) pointer, data_type = _data_type_sa(tokens)
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp) _assert_token(ExpectedPunctuation, token)
if temp.value not in [',', '=', ')']: if token.value not in [',', '=', ')']:
raise UnexpectedPunctuation(temp, [',', '=', ')']) raise UnexpectedPunctuation(token, [',', '=', ')'])
elif temp.value == '=': elif token.value == '=':
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token_literal(temp) _assert_token_literal(token)
literal = _literal_map(temp) # type: ignore literal = _literal_map(token) # type: ignore
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp) _assert_token(ExpectedPunctuation, token)
if temp.value not in [',', ')']: if token.value not in [',', ')']:
raise UnexpectedPunctuation(temp, [',', ')']) raise UnexpectedPunctuation(token, [',', ')'])
else: literal = None else: literal = None
params.append( params.append(
FunctionParameter(member_id, data_type, pointer, literal)) FunctionParameter(member_id, data_type, pointer, literal))
else: else:
raise UnexpectedToken(temp, ["Keyword", "Identifier"]) raise UnexpectedToken(token, ["Keyword", "Identifier"])
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token(ExpectedPunctuation, temp, '->') _assert_token(ExpectedPunctuation, token, '->')
pointer, return_type = _data_type_sa(tokens) pointer, return_type = _data_type_sa(tokens)
temp = tokens.pop(0) code = _code_block_sa(tokens)
_assert_token(ExpectedPunctuation, temp, '{')
code: list[Statement] = []
while tokens[0].value != '}':
code.append(_statement_sa(tokens))
return FunctionBlock( return FunctionBlock(
Identifier(identifier.value), Identifier(identifier.value),
params, params,
@ -811,31 +854,208 @@ def _function_sa(tokens: list[lexer.Token]) -> FunctionBlock:
) )
def _data_type_sa(tokens: list[lexer.Token]) -> tuple[bool, DataType]: def _data_type_sa(tokens: list[lexer.Token]) -> tuple[bool, DataType]:
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token_mult(temp, ( _assert_token_mult(token, (
lexer.Keyword, lexer.Keyword,
lexer.Identifier, lexer.Identifier,
lexer.Punctuation, lexer.Punctuation,
)) ))
if isinstance(temp, lexer.Punctuation): if isinstance(token, lexer.Punctuation):
_assert_token(ExpectedPunctuation, temp, '*') _assert_token(ExpectedPunctuation, token, '*')
pointer = True pointer = True
temp = tokens.pop(0) token = tokens.pop(0)
_assert_token_mult(temp, (lexer.Keyword, lexer.Identifier)) _assert_token_mult(token, (lexer.Keyword, lexer.Identifier))
else: else:
pointer = False pointer = False
if isinstance(temp, lexer.Keyword): if isinstance(token, lexer.Keyword):
if temp.value not in DefaultDataType: if token.value not in DefaultDataType:
raise UnexpectedKeyword( raise UnexpectedKeyword(
temp, token,
[i.value for i in DefaultDataType], [i.value for i in DefaultDataType],
) )
return pointer, DefaultDataType(temp.value) return pointer, DefaultDataType(token.value)
else: else:
return pointer, Identifier(temp.value) return pointer, Identifier(token.value)
def _code_block_sa(
tokens: list[lexer.Token],
encloses: tuple[str, str] = ('{','}'),
) -> list[Statement]:
token = tokens.pop(0)
_assert_token(ExpectedPunctuation, token, encloses[0])
code: list[Statement] = []
while tokens[0].value != encloses[1]:
code.append(_statement_sa(tokens))
return code
def _expression_sa(tokens: list[lexer.Token]) -> Expression:
if tokens[0] == '(' and tokens[-1] == ')':
return _expression_sa(tokens[1:-1])
elif len(tokens) == 1:
token = tokens.pop(0)
_assert_token_literal(token)
return _literal_map(token) # type: ignore
max_operator: int = -1
max_operator_precedence: int = -1
nested = 0
for i, token in enumerate(tokens):
if token.value == '(': nested += 1
elif token.value == ')':
if nested == 0:
raise UnexpectedPunctuation(token, "(' before ')", token.value)
nested -= 1
if nested == 0 and isinstance(token, lexer.Punctuation):
for j, operator in reversed(list(enumerate(_Operator_Precedence))):
if j <= max_operator_precedence:
break
elif operator.value == token.value:
max_operator = i
max_operator_precedence = j
break
if tokens[max_operator].value in UnaryOperator:
pass
# if tokens[max_operator].value in (
# UnaryOperator.PostfixDecrement,
# UnaryOperator.PostfixIncrement,
# ) and max_operator == len(tokens) - 1:
# return UnaryExpression(
# UnaryOperator(tokens[max_operator].value),
# _expression_sa(tokens[:max_operator])
# )
elif tokens[max_operator].value in BinaryOperator:
pass
elif tokens[max_operator].value in TernaryOperator:
condition = _expression_sa(tokens[:max_operator])
del tokens[:max_operator]
true_expr = _expression_sa(_get_nested_group(tokens, ('?', ':')))
false_expr = _expression_sa(tokens)
return TernaryExpression(condition, true_expr, false_expr)
def _statement_sa(tokens: list[lexer.Token]) -> Statement: def _statement_sa(tokens: list[lexer.Token]) -> Statement:
pass token = tokens.pop(0)
if isinstance(token, lexer.Keyword):
match token.value:
case 'let' | 'static' as key:
static = key == 'static'
if static:
token = tokens.pop(0)
_assert_token(ExpectedKeyword, token, 'let')
identifier = tokens.pop(0)
_assert_token(ExpectedIdentifier, identifier)
token = tokens.pop(0)
_assert_token(ExpectedPunctuation, token, ':')
pointer, data_type = _data_type_sa(tokens)
token = tokens.pop(0)
_assert_token(ExpectedPunctuation, token)
if token.value not in ['=', ';']:
raise UnexpectedPunctuation(token, ['=', ';'])
elif token.value == '=':
token = tokens.pop(0)
_assert_token_literal(token)
literal = _literal_map(token) # type: ignore
token = tokens.pop(0)
_assert_token(ExpectedPunctuation, token)
if token.value != ';':
raise UnexpectedPunctuation(token, ';')
else: literal = None
return LetStatement(
Identifier(identifier.value),
data_type,
pointer,
static,
literal,
)
case 'break' | 'continue' as key:
token = tokens.pop(0)
_assert_token(ExpectedPunctuation, token, ';')
return LoopStatements(key)
case 'if':
condition = _expression_sa(_get_nested_group(tokens))
code = _code_block_sa(tokens)
if tokens[0].value == 'else':
else_block = ElseBlock(_code_block_sa(tokens))
else:
else_block = None
return IfBlock(condition, code, else_block)
case 'do':
code1 = _code_block_sa(tokens)
token = tokens.pop(0)
_assert_token(ExpectedKeyword, token, 'while')
condition = _expression_sa(_get_nested_group(tokens))
if tokens[0].value == '{':
code2 = _code_block_sa(tokens)
else:
code2 = None
if tokens[0].value == 'else':
else_block = ElseBlock(_code_block_sa(tokens))
else:
else_block = None
return DoBlock(code1, condition, code2, else_block)
case 'while':
condition = _expression_sa(_get_nested_group(tokens))
code = _code_block_sa(tokens)
if tokens[0].value == 'else':
else_block = ElseBlock(_code_block_sa(tokens))
else:
else_block = None
return WhileBlock(condition, code, else_block)
case 'for':
three_expressions = _get_nested_group(tokens)
token = three_expressions.pop(0)
pre_loop_tokens: list[lexer.Token] = []
while token.value != ';':
pre_loop_tokens.append(token)
token = three_expressions.pop(0)
token = three_expressions.pop(0)
if (
type(pre_loop_tokens[0]) is lexer.Identifier and
pre_loop_tokens[1].value == ':'
):
identifier = Identifier(pre_loop_tokens.pop(0).value)
token = pre_loop_tokens.pop(0)
_assert_token(ExpectedPunctuation, token, ':')
pointer, data_type = _data_type_sa(pre_loop_tokens)
if pre_loop_tokens:
token = pre_loop_tokens.pop(0)
_assert_token(ExpectedPunctuation, token, '=')
pre_loop_expr = _expression_sa(pre_loop_tokens)
pre_loop = ForPreDef(
identifier,
data_type,
pointer,
pre_loop_expr,
)
else:
pre_loop = _expression_sa(pre_loop_tokens)
loop_condition_tokens: list[lexer.Token] = []
while token.value != ';':
loop_condition_tokens.append(token)
token = three_expressions.pop(0)
token = three_expressions.pop(0)
condition = _expression_sa(loop_condition_tokens)
post_loop = _expression_sa(three_expressions)
code = _code_block_sa(tokens)
if tokens[0].value == 'else':
else_block = ElseBlock(_code_block_sa(tokens))
else:
else_block = None
return ForBlock(
pre_loop, condition, code, post_loop, else_block)
case key if key not in BuildInConst:
raise UnexpectedKeyword(token, [
'static',
'let',
'break',
'continue',
'if',
'do',
'while',
'for',
] + [i.value for i in BuildInConst])
expr_tokens: list[lexer.Token] = [token] + _get_to_symbol(tokens)
return _expression_sa(expr_tokens)
def _file_sa(tokens: list[lexer.Token]) -> File: def _file_sa(tokens: list[lexer.Token]) -> File:
children: list[Directive | StructBlock | FunctionBlock | EnumBlock] = [] children: list[Directive | StructBlock | FunctionBlock | EnumBlock] = []