Fixes to lexer and syntactical analyzer

This commit is contained in:
Kyler 2024-03-05 00:12:37 -07:00
parent 6ede3a4c5e
commit c2051ffce4
6 changed files with 621 additions and 94 deletions

18
examples/test1.ytd12c Normal file
View File

@ -0,0 +1,18 @@
// Kyler Olsen
// Source Code Test 1
// Syntax Test
# Dummy Directive
fn test_func(arg1: int, arg2: unsigned = 10) -> fixed {
let var1: float;
var1 = (arg1 + arg2) / arg1;
var1 = var1 + test_func(arg2, arg1) + 15;
test_func = var1;
}
fn main() -> int {
let var1: int = 1;
let var2: unsigned = 1;
main = 0;
test_func(arg1=var1, arg2=var2);
}

93
examples/test2.ytd12c Normal file
View File

@ -0,0 +1,93 @@
// Kyler Olsen
// Source Code Test 2
// Syntax Test
# Dummy Directive
struct Point {
static points: @Point = 0,
static count: int = 0,
x: int = 0,
y: int = 0,
z: int,
random: @int
}
enum Colors { RED, GREEN, BLUE = 7 }
fn test_func(arg1: int, arg2: unsigned = 10) -> fixed {
let var1: float;
var1 = (arg1 + arg2) / arg1;
var1 = var1 + test_func(arg2, arg1) + 15;
test_func = var1;
}
fn test_func2() -> Point {
test++;
test--;
--test;
++test;
-test;
!test;
$test;
~test;
@test;
test >> test;
test << test;
test ^ test;
test | test;
test & test;
test % test;
test / test;
test * test;
test - test;
test + test;
test >= test;
test > test;
test <= test;
test < test;
test != test;
test == test;
test ^^ test;
test || test;
test && test;
test >>= test;
test <<= test;
test ^= test;
test |= test;
test &= test;
test %= test;
test /= test;
test *= test;
test -= test;
test += test;
test = test;
test ? test : test;
test = None;
test = False;
test = True;
test = 1234;
test = 0xff1;
test = .156;
test = 15e5;
test = 0o1526;
test = 0b1010;
test = '5';
test = 'g';
test = '\n';
test = '\'';
test = '\\';
test = '"';
test = "Hello World!";
test = "This is \"cool\"!";
}
fn main() -> int {
let var1: int = 1;
let var2: unsigned = 1;
main = 0;
test_func(arg1=var1, arg2=var2);
test_func2();
}

View File

@ -21,6 +21,12 @@ class FileInfo:
self._col = col
self._length = length
def __repr__(self) -> str:
return (
f"{type(self).__name__}"
f"('{self._filename}',{self._line},{self._col},{self._length})"
)
class CompilerError(Exception):

View File

@ -113,7 +113,7 @@ _Num_Continue_Next = {
}
}
_Punctuation_Any = "@$+-*/%~&|^<>=!?{}[]().->,;:"
_Punctuation_Any = "@$+-*/%~&|^<>=!?{[(}]).->,;:"
_Punctuation = (
"++", "--", "@", "$", "+", "-",
@ -143,6 +143,9 @@ class Token:
self._value = value
self._file_info = file_info
def __str__(self) -> str:
return f"Type: {self._type}, Value: {self.value}"
@property
def value(self) -> str: return self._value
@ -183,6 +186,7 @@ def lexer(file: str, filename: str) -> Sequence[Token]:
tokens.append(Identifier(current, fi))
elif token_type is _InterTokenType.NumberLiteral:
tokens.append(NumberLiteral(current, fi))
number_type = _NumberLiteralType.Number
elif token_type is _InterTokenType.Punctuation:
if current not in _Punctuation:
raise LexerError("Invalid Punctuation", fi)
@ -209,20 +213,34 @@ def lexer(file: str, filename: str) -> Sequence[Token]:
tokens.append(Keyword(current, fi))
else:
tokens.append(Identifier(current, fi))
token_type = _InterTokenType.Generic
token_type = _InterTokenType.Generic
elif token_type is _InterTokenType.NumberLiteral:
if len(current) == 2 and char in _Num_Second[number_type]:
if (
len(current) == 1 and
number_type in _Num_Second and
char in _Num_Second[number_type]
):
current += char
if char in _Num_Second_Next[number_type]:
if (
number_type in _Num_Second_Next and
char in _Num_Second_Next[number_type]
):
number_type = _Num_Second_Next[number_type][char]
elif char in _Num_Continue:
elif (
number_type in _Num_Continue and
char in _Num_Continue[number_type]
):
current += char
if char in _Num_Continue_Next[number_type]:
if (
number_type in _Num_Continue_Next and
char in _Num_Continue_Next[number_type]
):
number_type = _Num_Continue_Next[number_type][char]
else:
fi = FileInfo(
filename, current_line, current_col, len(current))
tokens.append(NumberLiteral(current, fi))
number_type = _NumberLiteralType.Number
token_type = _InterTokenType.Generic
elif token_type is _InterTokenType.CharLiteral:
if escaped:
@ -234,12 +252,11 @@ def lexer(file: str, filename: str) -> Sequence[Token]:
fi = FileInfo(
filename, current_line, current_col, len(current))
if (
current[1] != '\\' and
len(current) == 3 or
len(current) > 3
(current[1] != '\\' and len(current) > 3) or
len(current) > 4
):
raise LexerError("Character Literal Too Long", fi)
tokens.append(StringLiteral(current, fi))
tokens.append(CharLiteral(current, fi))
token_type = _InterTokenType.Generic
continue
current += char
@ -257,7 +274,7 @@ def lexer(file: str, filename: str) -> Sequence[Token]:
continue
current += char
elif token_type is _InterTokenType.Punctuation:
if char in _Punctuation_Any:
if char in _Punctuation_Any and current + char in _Punctuation:
current += char
else:
fi = FileInfo(
@ -265,7 +282,7 @@ def lexer(file: str, filename: str) -> Sequence[Token]:
if current not in _Punctuation:
raise LexerError("Invalid Punctuation", fi)
tokens.append(Punctuation(current, fi))
token_type = _InterTokenType.Generic
token_type = _InterTokenType.Generic
if token_type is _InterTokenType.Generic:
current = char
@ -297,4 +314,26 @@ def lexer(file: str, filename: str) -> Sequence[Token]:
elif char in _Punctuation_Any:
token_type = _InterTokenType.Punctuation
fi = FileInfo(filename, current_line, current_col, len(current))
if token_type in _NewLineErrorTokens:
raise LexerError("Unexpected Newline", fi)
if token_type in _NewLineTerminatedTokens:
if token_type is _InterTokenType.Directive:
tokens.append(Directive(current, fi))
elif token_type is _InterTokenType.Word:
if len(current) > 15:
raise LexerError("Identifier Too Long", fi)
if current in _Keywords:
tokens.append(Keyword(current, fi))
else:
tokens.append(Identifier(current, fi))
elif token_type is _InterTokenType.NumberLiteral:
tokens.append(NumberLiteral(current, fi))
number_type = _NumberLiteralType.Number
elif token_type is _InterTokenType.Punctuation:
if current not in _Punctuation:
raise LexerError("Invalid Punctuation", fi)
tokens.append(Punctuation(current, fi))
token_type = _InterTokenType.Generic
return tokens

View File

@ -4,10 +4,29 @@
from typing import Sequence
import argparse
from .lexer import lexer
from .syntactical_analyzer import syntactical_analyzer
def compile(args: argparse.Namespace):
pass
tokens = lexer(args.input_file.read(), args.input_file.name)
if args.token_file:
for token in tokens:
args.token_file.write(str(token) + "\n")
syntax = syntactical_analyzer(tokens)
if args.syntax_file:
args.syntax_file.write(syntax.tree_str())
def parser(parser: argparse.ArgumentParser):
parser.add_argument(
'input_file', type=argparse.FileType('r', encoding='utf-8'))
# parser.add_argument('-o', '--output_file', type=argparse.FileType('wb'))
parser.add_argument(
'-t', '--token_file', type=argparse.FileType('w', encoding='utf-8'))
parser.add_argument(
'-x', '--syntax_file', type=argparse.FileType('w', encoding='utf-8'))
parser.set_defaults(func=compile)
def main(argv: Sequence[str] | None = None):

View File

@ -4,10 +4,16 @@
from enum import Enum
from typing import Sequence
from .compiler_types import CompilerError
from .compiler_types import CompilerError# , FileInfo
from . import lexer
# _file_info: FileInfo
# file_info: FileInfo,
# self._file_info = file_info
class UnexpectedEndOfTokenStream(CompilerError): pass
@ -99,7 +105,7 @@ class UnexpectedToken(_UnexpectedTokenBase):
s = ""
for i in expected[:-1]:
s += i + "', '"
s = s[:-1] + "or '" + i
s = s[:-1] + "or '" + expected[-1]
expected = s
found = found or type(token).__name__
super().__init__(token, expected, found)
@ -124,7 +130,7 @@ class UnexpectedPunctuation(_UnexpectedTokenBase):
type NestableCodeBlock = ForBlock | WhileBlock | DoBlock | IfBlock
type Literal = (
BuildInConst |
BuiltInConst |
NumberLiteral |
CharLiteral |
StringLiteral
@ -141,25 +147,38 @@ type Expression = (
type Statement = Expression | LetStatement | LoopStatements | NestableCodeBlock
type DataType = DefaultDataType | Identifier
type DataType = BuiltInDataType | Identifier
type UnaryOperator = PostfixUnaryOperator | PrefixUnaryOperator
class BuildInConst(Enum):
class BuiltInConst(Enum):
ConstTrue = "True"
ConstFalse = "False"
ConstNone = "None"
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Built-In Constant: {self.value}\n"
return s
class LoopStatements(Enum):
ContinueStatement = "continue"
BreakStatement = "break"
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} {self.value.upper()}\n"
return s
class UnaryOperator(Enum):
PostfixIncrement = "++"
PostfixDecrement = "--"
PrefixIncrement = "++"
PrefixDecrement = "--"
class PostfixUnaryOperator(Enum):
Increment = "++"
Decrement = "--"
class PrefixUnaryOperator(Enum):
Increment = "++"
Decrement = "--"
Negate = "-"
BitwiseNOT = "~"
BooleanNOT = "!"
@ -204,7 +223,7 @@ class TernaryOperator(Enum):
TernaryConditional = "?"
class DefaultDataType(Enum):
class BuiltInDataType(Enum):
unsigned = "unsigned"
int = "int"
fixed = "fixed"
@ -214,15 +233,15 @@ class DefaultDataType(Enum):
_Operator_Precedence: tuple[
UnaryOperator | BinaryOperator | TernaryOperator, ...
] = (
UnaryOperator.AddressOf,
UnaryOperator.Dereference,
UnaryOperator.BitwiseNOT,
UnaryOperator.PostfixDecrement,
UnaryOperator.PostfixIncrement,
UnaryOperator.PrefixDecrement,
UnaryOperator.PrefixIncrement,
UnaryOperator.Negate,
UnaryOperator.BooleanNOT,
PrefixUnaryOperator.AddressOf,
PrefixUnaryOperator.Dereference,
PrefixUnaryOperator.BitwiseNOT,
PostfixUnaryOperator.Decrement,
PostfixUnaryOperator.Increment,
PrefixUnaryOperator.Decrement,
PrefixUnaryOperator.Increment,
PrefixUnaryOperator.Negate,
PrefixUnaryOperator.BooleanNOT,
BinaryOperator.RightShift,
BinaryOperator.LeftShift,
BinaryOperator.BitwiseXOR,
@ -267,6 +286,12 @@ class Identifier:
):
self._content = content
def __str__(self) -> str: return self._content
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Identifier: {self._content}\n"
return s
class StringLiteral:
@ -278,6 +303,12 @@ class StringLiteral:
):
self._content = content
def __str__(self) -> str: return self._content
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} String Literal: {self._content}\n"
return s
class CharLiteral:
@ -289,6 +320,12 @@ class CharLiteral:
):
self._content = content
def __str__(self) -> str: return self._content
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Character Literal: {self._content}\n"
return s
class NumberLiteral:
@ -300,6 +337,12 @@ class NumberLiteral:
):
self._content = content
def __str__(self) -> str: return self._content
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Number Literal: {self._content}\n"
return s
class ArraySubscription:
@ -314,6 +357,11 @@ class ArraySubscription:
self._identifier = identifier
self._index = index
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Array Subscription: {self._identifier}\n"
s += f"{pre_cont}└─ Index: {self._index}\n"
return s
class FunctionArgument:
@ -322,12 +370,18 @@ class FunctionArgument:
def __init__(
self,
identifier: Identifier,
identifier: Identifier | None,
value: Expression,
):
self._identifier = identifier
self._value = value
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Function Argument\n"
if self._identifier: s += f"{pre_cont}├─ Name: {self._identifier}\n"
s += f"{pre_cont}└─ Value: {self._value}\n"
return s
class FunctionCall:
@ -342,23 +396,41 @@ class FunctionCall:
self._identifier = identifier
self._args = args
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Function Call: {self._identifier}\n"
if self._args:
for arg in self._args[:-1]:
s += arg.tree_str(pre_cont + "├─", pre_cont + "")
s += self._args[-1].tree_str(pre_cont + "└─", pre_cont + " ")
return s
class TernaryExpression:
_operator: TernaryOperator
_operand1: Expression
_operand2: Expression
_operand3: Expression
def __init__(
self,
operator: TernaryOperator,
operand1: Expression,
operand2: Expression,
operand3: Expression,
):
self._operator = operator
self._operand1 = operand1
self._operand2 = operand2
self._operand3 = operand3
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Ternary Expression: {self._operator}\n"
s += self._operand1.tree_str(pre_cont + "├─", pre_cont + "")
s += self._operand2.tree_str(pre_cont + "├─", pre_cont + "")
s += self._operand3.tree_str(pre_cont + "└─", pre_cont + " ")
return s
class BinaryExpression:
@ -376,6 +448,12 @@ class BinaryExpression:
self._operand1 = operand1
self._operand2 = operand2
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Binary Expression: {self._operator}\n"
s += self._operand1.tree_str(pre_cont + "├─", pre_cont + "")
s += self._operand2.tree_str(pre_cont + "└─", pre_cont + " ")
return s
class UnaryExpression:
@ -390,6 +468,11 @@ class UnaryExpression:
self._operator = operator
self._operand = operand
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Unary Expression: {self._operator}\n"
s += self._operand.tree_str(pre_cont + "└─", pre_cont + " ")
return s
class LetStatement:
@ -413,6 +496,17 @@ class LetStatement:
self._static = static
self._assignment = assignment
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Let Statement: {self._identifier}\n"
s += pre_cont
s += '├─ Type: ' if self._assignment else '└─ Type: '
if self._static: s+= "static "
if self._pointer: s+= "@"
s += f"{self._type}\n"
if self._assignment is not None:
s += f"{pre_cont}└─ Default Value: {self._assignment}\n"
return s
class ElseBlock:
@ -424,6 +518,15 @@ class ElseBlock:
):
self._code = code[:]
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Else Block\n"
if self._code:
s += f"{pre_cont}└─ Code\n"
for code in self._code[:-1]:
s += code.tree_str(pre_cont + " ├─", pre_cont + "")
s += self._code[-1].tree_str(pre_cont + " └─", pre_cont + " ")
return s
class ForPreDef:
@ -444,6 +547,14 @@ class ForPreDef:
self._pointer = pointer
self._assignment = assignment
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} For Loop Pre-Definition: {self._identifier}\n"
s += f"{pre_cont}├─ Type: "
if self._pointer: s+= "@"
s += f"{self._type}\n"
s += f"{pre_cont}└─ Value: {self._assignment}\n"
return s
class ForBlock:
@ -467,6 +578,37 @@ class ForBlock:
self._post_statement = post_statement
self._else = else_block
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} If Statement\n"
if self._code or self._else is not None:
cond_pre = f"{pre_cont}├─"
cond_pre_cont = f"{pre_cont}"
else:
cond_pre = f"{pre_cont}└─"
cond_pre_cont = f"{pre_cont} "
s += f"{cond_pre} Pre-Statement\n"
s += self._pre_statement.tree_str(
cond_pre_cont + "└─", cond_pre_cont + " ")
s += f"{cond_pre} Condition\n"
s += self._condition.tree_str(
cond_pre_cont + "└─", cond_pre_cont + " ")
s += f"{cond_pre} Post-Statement\n"
s += self._post_statement.tree_str(
cond_pre_cont + "└─", cond_pre_cont + " ")
if self._code:
if self._else is not None:
s += f"{pre_cont}├─ Code\n"
code_pre = f"{pre_cont}"
else:
s += f"{pre_cont}└─ Code\n"
code_pre = f"{pre_cont} "
for code in self._code[:-1]:
s += code.tree_str(code_pre + "├─", code_pre + "")
s += self._code[-1].tree_str(code_pre + "└─", code_pre + " ")
if self._else is not None:
s += self._else.tree_str(pre_cont + "└─", pre_cont + " ")
return s
class WhileBlock:
@ -484,6 +626,29 @@ class WhileBlock:
self._code = code[:]
self._else = else_block
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} While Loop\n"
if self._code or self._else is not None:
s += f"{pre_cont}├─ Condition\n"
cond_pre = f"{pre_cont}"
else:
s += f"{pre_cont}└─ Condition\n"
cond_pre = f"{pre_cont} "
s += self._condition.tree_str(cond_pre + "└─", cond_pre + " ")
if self._code:
if self._else is not None:
s += f"{pre_cont}├─ Code\n"
code_pre = f"{pre_cont}"
else:
s += f"{pre_cont}└─ Code\n"
code_pre = f"{pre_cont} "
for code in self._code[:-1]:
s += code.tree_str(code_pre + "├─", code_pre + "")
s += self._code[-1].tree_str(code_pre + "└─", code_pre + " ")
if self._else is not None:
s += self._else.tree_str(pre_cont + "└─", pre_cont + " ")
return s
class DoBlock:
@ -507,6 +672,41 @@ class DoBlock:
self._second_code = None
self._else = else_block
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Do Loop\n"
if self._first_code:
if self._second_code or self._else is not None:
s += f"{pre_cont}├─ First Code\n"
code_pre = f"{pre_cont}"
else:
s += f"{pre_cont}└─ First Code\n"
code_pre = f"{pre_cont} "
for code in self._first_code[:-1]:
s += code.tree_str(code_pre + "├─", code_pre + "")
s += self._first_code[-1].tree_str(
code_pre + "└─", code_pre + " ")
if self._second_code or self._else is not None:
s += f"{pre_cont}├─ Condition\n"
cond_pre = f"{pre_cont}"
else:
s += f"{pre_cont}└─ Condition\n"
cond_pre = f"{pre_cont} "
s += self._condition.tree_str(cond_pre + "└─", cond_pre + " ")
if self._second_code:
if self._else is not None:
s += f"{pre_cont}├─ Second Code\n"
code_pre = f"{pre_cont}"
else:
s += f"{pre_cont}└─ Second Code\n"
code_pre = f"{pre_cont} "
for code in self._second_code[:-1]:
s += code.tree_str(code_pre + "├─", code_pre + "")
s += self._second_code[-1].tree_str(
code_pre + "└─", code_pre + " ")
if self._else is not None:
s += self._else.tree_str(pre_cont + "└─", pre_cont + " ")
return s
class IfBlock:
@ -524,6 +724,29 @@ class IfBlock:
self._code = code[:]
self._else = else_block
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} If Statement\n"
if self._code or self._else is not None:
s += f"{pre_cont}├─ Condition\n"
cond_pre = f"{pre_cont}"
else:
s += f"{pre_cont}└─ Condition\n"
cond_pre = f"{pre_cont} "
s += self._condition.tree_str(cond_pre + "└─", cond_pre + " ")
if self._code:
if self._else is not None:
s += f"{pre_cont}├─ Code\n"
code_pre = f"{pre_cont}"
else:
s += f"{pre_cont}└─ Code\n"
code_pre = f"{pre_cont} "
for code in self._code[:-1]:
s += code.tree_str(code_pre + "├─", code_pre + "")
s += self._code[-1].tree_str(code_pre + "└─", code_pre + " ")
if self._else is not None:
s += self._else.tree_str(pre_cont + "└─", pre_cont + " ")
return s
class FunctionParameter:
@ -544,13 +767,23 @@ class FunctionParameter:
self._pointer = pointer
self._default = default
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Function Parameter: {self._identifier}\n"
s += pre_cont
s += '├─ Type: ' if self._default else '└─ Type: '
if self._pointer: s+= "@"
s += f"{self._type}\n"
if self._default:
s += f"{pre_cont}└─ Default Value: {self._default}\n"
return s
class FunctionBlock:
_identifier: Identifier
_params: list[FunctionParameter]
_return_type_pointer: bool
_return_type: DataType
_return_type: DataType | None
_code: list[Statement]
def __init__(
@ -558,7 +791,7 @@ class FunctionBlock:
identifier: Identifier,
params: list[FunctionParameter],
return_type_pointer: bool,
return_type: DataType,
return_type: DataType | None,
code: list[Statement],
):
self._identifier = identifier
@ -567,6 +800,32 @@ class FunctionBlock:
self._return_type = return_type
self._code = code[:]
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Function: {self._identifier}\n"
if self._params:
if self._code or self._return_type is not None:
s += f"{pre_cont}├─ Parameters\n"
params_pre = f"{pre_cont}"
else:
s += f"{pre_cont}└─ Parameters\n"
params_pre = f"{pre_cont} "
for param in self._params[:-1]:
s += param.tree_str(params_pre + "├─", params_pre + "")
s += self._params[-1].tree_str(params_pre + "└─", params_pre + " ")
if self._return_type is not None:
if self._code:
s += f"{pre_cont}├─ Return Type: "
else:
s += f"{pre_cont}└─ Return Type: "
if self._return_type_pointer: s+= "@"
s += f"{self._return_type}\n"
if self._code:
s += f"{pre_cont}└─ Code\n"
for code in self._code[:-1]:
s += code.tree_str(pre_cont + " ├─", pre_cont + "")
s += self._code[-1].tree_str(pre_cont + " └─", pre_cont + " ")
return s
class EnumMember:
@ -581,6 +840,12 @@ class EnumMember:
self._identifier = identifier
self._value = value
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Enum Member: {self._identifier}\n"
if self._value is not None:
s += f"{pre_cont}└─ Value: {self._value}\n"
return s
class EnumBlock:
@ -595,6 +860,14 @@ class EnumBlock:
self._identifier = identifier
self._members = members[:]
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Enum: {self._identifier}\n"
if self._members:
for member in self._members[:-1]:
s += member.tree_str(pre_cont + "├─", pre_cont + "")
s += self._members[-1].tree_str(pre_cont + "└─", pre_cont + "")
return s
class StructureMember:
@ -618,6 +891,17 @@ class StructureMember:
self._static = static
self._default = default
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Struct Member: {self._identifier}\n"
s += pre_cont
s += '├─ Type: ' if self._default else '└─ Type: '
if self._static: s+= "static "
if self._pointer: s+= "@"
s += f"{self._type}\n"
if self._default is not None:
s += f"{pre_cont}└─ Default Value: {self._default}\n"
return s
class StructBlock:
@ -632,6 +916,14 @@ class StructBlock:
self._identifier = identifier
self._members = members[:]
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Struct: {self._identifier}\n"
if self._members:
for member in self._members[:-1]:
s += member.tree_str(pre_cont + "├─", pre_cont + "")
s += self._members[-1].tree_str(pre_cont + "└─", pre_cont + "")
return s
class Directive:
@ -643,6 +935,9 @@ class Directive:
):
self._content = content
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
return f"{pre} Directive: {self._content}\n"
class File:
@ -654,6 +949,14 @@ class File:
):
self._children = children[:]
def tree_str(self) -> str:
s: str = "File\n"
if self._children:
for child in self._children[:-1]:
s += child.tree_str("├─", "")
s += self._children[-1].tree_str("└─", " ")
return s
def _assert_token(
exception: type[_ExpectedTokenBase],
@ -693,8 +996,8 @@ def _assert_token_literal(
type(token).__name__,
)
if isinstance(token, lexer.Keyword):
if token.value not in BuildInConst:
raise UnexpectedKeyword(token, [i.value for i in DefaultDataType])
if token.value not in BuiltInConst:
raise UnexpectedKeyword(token, [i.value for i in BuiltInDataType])
def _literal_map(literal: (
lexer.Keyword |
@ -703,7 +1006,45 @@ def _literal_map(literal: (
lexer.StringLiteral
)) -> Literal:
if isinstance(literal, lexer.Keyword):
return BuildInConst(literal.value)
return BuiltInConst(literal.value)
elif isinstance(literal, lexer.NumberLiteral):
return NumberLiteral(literal.value)
elif isinstance(literal, lexer.CharLiteral):
return CharLiteral(literal.value)
elif isinstance(literal, lexer.StringLiteral):
return StringLiteral(literal.value)
def _assert_token_value(
token: lexer.Token,
):
token_types = (
lexer.Identifier,
lexer.Keyword,
lexer.NumberLiteral,
lexer.CharLiteral,
lexer.StringLiteral,
)
if not isinstance(token, token_types):
raise ExpectedLiteral(
token,
[i.__name__ for i in token_types], # type: ignore
type(token).__name__,
)
if isinstance(token, lexer.Keyword):
if token.value not in BuiltInConst:
raise UnexpectedKeyword(token, [i.value for i in BuiltInDataType])
def _value_map(literal: (
lexer.Identifier |
lexer.Keyword |
lexer.NumberLiteral |
lexer.CharLiteral |
lexer.StringLiteral
)) -> Literal | Identifier:
if isinstance(literal, lexer.Identifier):
return Identifier(literal.value)
elif isinstance(literal, lexer.Keyword):
return BuiltInConst(literal.value)
elif isinstance(literal, lexer.NumberLiteral):
return NumberLiteral(literal.value)
elif isinstance(literal, lexer.CharLiteral):
@ -734,11 +1075,11 @@ def _get_nested_group(
def _get_to_symbol(
tokens: list[lexer.Token],
symbol: str = ';',
symbols: str | Sequence[str] = ';',
) -> list[lexer.Token]:
expr_len = -1
for i in range(len(tokens)):
if tokens[i].value == symbol:
if tokens[i].value in symbols:
expr_len = i
break
else:
@ -819,7 +1160,9 @@ def _function_sa(tokens: list[lexer.Token]) -> FunctionBlock:
params: list[FunctionParameter] = []
while token.value != ')':
token = tokens.pop(0)
if isinstance(token, lexer.Identifier):
if isinstance(token, lexer.Punctuation):
_assert_token(ExpectedPunctuation, token, ')')
elif isinstance(token, lexer.Identifier):
member_id = Identifier(token.value)
token = tokens.pop(0)
_assert_token(ExpectedPunctuation, token, ':')
@ -840,11 +1183,12 @@ def _function_sa(tokens: list[lexer.Token]) -> FunctionBlock:
params.append(
FunctionParameter(member_id, data_type, pointer, literal))
else:
raise UnexpectedToken(token, ["Keyword", "Identifier"])
raise UnexpectedToken(
token, ["Keyword", "Identifier", "Punctuation"])
token = tokens.pop(0)
_assert_token(ExpectedPunctuation, token, '->')
pointer, return_type = _data_type_sa(tokens)
code = _code_block_sa(tokens)
code = _code_block_sa(_get_nested_group(tokens, ('{','}')))
return FunctionBlock(
Identifier(identifier.value),
params,
@ -861,40 +1205,39 @@ def _data_type_sa(tokens: list[lexer.Token]) -> tuple[bool, DataType]:
lexer.Punctuation,
))
if isinstance(token, lexer.Punctuation):
_assert_token(ExpectedPunctuation, token, '*')
_assert_token(ExpectedPunctuation, token, '@')
pointer = True
token = tokens.pop(0)
_assert_token_mult(token, (lexer.Keyword, lexer.Identifier))
else:
pointer = False
if isinstance(token, lexer.Keyword):
if token.value not in DefaultDataType:
if token.value not in BuiltInDataType:
raise UnexpectedKeyword(
token,
[i.value for i in DefaultDataType],
[i.value for i in BuiltInDataType],
)
return pointer, DefaultDataType(token.value)
return pointer, BuiltInDataType(token.value)
else:
return pointer, Identifier(token.value)
def _code_block_sa(
tokens: list[lexer.Token],
encloses: tuple[str, str] = ('{','}'),
) -> list[Statement]:
token = tokens.pop(0)
_assert_token(ExpectedPunctuation, token, encloses[0])
def _code_block_sa(tokens: list[lexer.Token]) -> list[Statement]:
code: list[Statement] = []
while tokens[0].value != encloses[1]:
while tokens:
code.append(_statement_sa(tokens))
return code
def _expression_sa(tokens: list[lexer.Token]) -> Expression:
if tokens[0] == '(' and tokens[-1] == ')':
print([(type(i).__name__, i.value) for i in tokens])
if not tokens:
raise UnexpectedEndOfTokenStream(
"Unexpected Expression.", None) # type: ignore
if tokens[0].value == '(' and tokens[-1].value == ')':
return _expression_sa(tokens[1:-1])
elif len(tokens) == 1:
token = tokens.pop(0)
_assert_token_literal(token)
return _literal_map(token) # type: ignore
_assert_token_value(token)
return _value_map(token) # type: ignore
max_operator: int = -1
max_operator_precedence: int = -1
@ -905,7 +1248,7 @@ def _expression_sa(tokens: list[lexer.Token]) -> Expression:
if nested == 0:
raise UnexpectedPunctuation(token, "(' before ')", token.value)
nested -= 1
if nested == 0 and isinstance(token, lexer.Punctuation):
elif nested == 0 and isinstance(token, lexer.Punctuation):
for j, operator in reversed(list(enumerate(_Operator_Precedence))):
if j <= max_operator_precedence:
break
@ -914,38 +1257,42 @@ def _expression_sa(tokens: list[lexer.Token]) -> Expression:
max_operator_precedence = j
break
if tokens[max_operator].value in UnaryOperator:
if tokens[max_operator].value in (
UnaryOperator.PostfixDecrement,
UnaryOperator.PostfixIncrement,
) and max_operator == len(tokens) - 1:
operators = {
'--': UnaryOperator.PostfixDecrement,
'++': UnaryOperator.PostfixIncrement,
}
return UnaryExpression(
operators[tokens[max_operator].value],
_expression_sa(tokens[:max_operator]),
)
elif tokens[max_operator].value in (
UnaryOperator.PrefixDecrement,
UnaryOperator.PrefixIncrement,
) and max_operator == 0:
operators = {
'--': UnaryOperator.PrefixDecrement,
'++': UnaryOperator.PrefixIncrement,
}
return UnaryExpression(
operators[tokens[max_operator].value],
_expression_sa(tokens[max_operator+1:]),
)
elif max_operator == 0:
return UnaryExpression(
UnaryOperator(tokens[max_operator].value),
_expression_sa(tokens[max_operator+1:]),
)
else: raise CompilerError(
"Operator Precedence Error", tokens[max_operator].file_info)
if max_operator == -1:
function_identifier = tokens.pop(0)
_assert_token(ExpectedIdentifier, function_identifier)
token = tokens.pop(0)
_assert_token(ExpectedPunctuation, token, '(')
function_args: list[FunctionArgument] = []
while tokens:
arg_tokens = _get_to_symbol(tokens, (',', ')'))
if arg_tokens:
if len(arg_tokens) > 1 and arg_tokens[1].value == '=':
_assert_token(ExpectedIdentifier, arg_tokens[0])
arg_identifier = Identifier(arg_tokens[0].value)
del arg_tokens[:2]
else:
arg_identifier = None
function_args.append(FunctionArgument(
arg_identifier, _expression_sa(arg_tokens)))
return FunctionCall(
Identifier(function_identifier.value), function_args)
if (
tokens[max_operator].value in PostfixUnaryOperator and
max_operator == len(tokens) - 1
):
return UnaryExpression(
PostfixUnaryOperator(tokens[max_operator].value),
_expression_sa(tokens[:max_operator]),
)
elif (
tokens[max_operator].value in PrefixUnaryOperator and
max_operator == 0
):
return UnaryExpression(
PrefixUnaryOperator(tokens[max_operator].value),
_expression_sa(tokens[max_operator+1:]),
)
elif tokens[max_operator].value in BinaryOperator:
return BinaryExpression(
BinaryOperator(tokens[max_operator].value),
@ -957,7 +1304,12 @@ def _expression_sa(tokens: list[lexer.Token]) -> Expression:
del tokens[:max_operator]
true_expr = _expression_sa(_get_nested_group(tokens, ('?', ':')))
false_expr = _expression_sa(tokens)
return TernaryExpression(condition, true_expr, false_expr)
return TernaryExpression(
TernaryOperator.TernaryConditional,
condition,
true_expr,
false_expr,
)
else: raise CompilerError(
"Expression Error", tokens[max_operator].file_info)
@ -1071,7 +1423,7 @@ def _statement_sa(tokens: list[lexer.Token]) -> Statement:
else_block = None
return ForBlock(
pre_loop, condition, code, post_loop, else_block)
case key if key not in BuildInConst:
case key if key not in BuiltInConst:
raise UnexpectedKeyword(token, [
'static',
'let',
@ -1081,7 +1433,7 @@ def _statement_sa(tokens: list[lexer.Token]) -> Statement:
'do',
'while',
'for',
] + [i.value for i in BuildInConst])
] + [i.value for i in BuiltInConst])
expr_tokens: list[lexer.Token] = [token] + _get_to_symbol(tokens)
return _expression_sa(expr_tokens)