ytd_12-bit_computer/pytd12dk/compiler/semantical_analyzer.py

579 lines
18 KiB
Python

# Kyler Olsen
# Mar 2024
from enum import Enum
from .compiler_types import CompilerError, FileInfo
from . import syntactical_analyzer
type SymbolDefinitionTypes = (
InternalDefinition |
syntactical_analyzer.FunctionParameter |
syntactical_analyzer.LetStatement |
syntactical_analyzer.ForPreDef |
syntactical_analyzer.StructBlock |
FunctionBlock |
syntactical_analyzer.EnumBlock
)
type SymbolReferenceTypes = (
syntactical_analyzer.Identifier |
syntactical_analyzer.StructBlock |
FunctionBlock |
syntactical_analyzer.EnumBlock
)
type Identifier = syntactical_analyzer.Identifier | CompoundIdentifier
type Statement = (
syntactical_analyzer.Expression |
syntactical_analyzer.LetStatement |
syntactical_analyzer.LoopStatements |
syntactical_analyzer.NestableCodeBlock |
Identifier
)
BaseValues: tuple[type, ...] = (
syntactical_analyzer.BuiltInConst,
syntactical_analyzer.NumberLiteral,
syntactical_analyzer.CharLiteral,
syntactical_analyzer.StringLiteral,
syntactical_analyzer.Identifier,
syntactical_analyzer.FunctionCall,
)
NestableCodeBlocks: tuple[type, ...] = (
syntactical_analyzer.ForBlock,
syntactical_analyzer.WhileBlock,
syntactical_analyzer.DoBlock,
syntactical_analyzer.IfBlock,
)
class SyntaxError(CompilerError):
_compiler_error_type = "Semantic"
class VariableAlreadyDeclared(SyntaxError):
def __init__(
self,
new: SymbolDefinitionTypes,
existing: SymbolDefinitionTypes,
):
message = (
f"The variable '{new.identifier.content}' was already "
f"declared at {str(existing.file_info)}" # type: ignore
)
super().__init__(message, new.file_info) # type: ignore
class UndeclaredVariable(SyntaxError):
def __init__(
self,
variable: SymbolDefinitionTypes,
):
message = (
f"The variable '{variable.identifier.content}' is undeclared."
)
super().__init__(message, variable.file_info) # type: ignore
class InvalidOperand(SyntaxError):
def __init__(
self,
operator: (
syntactical_analyzer.TernaryExpression |
syntactical_analyzer.BinaryExpression |
syntactical_analyzer.UnaryExpression
),
operand: Statement,
):
message = (
f"The operand at '{operand}' is invalid for the "
f"operator '{operator.operator.content.value}'."
)
super().__init__(
message,
operand.file_info, # type: ignore
operator.file_info, # type: ignore
)
class CompoundIdentifier:
_owner: Identifier
_member: Identifier
_file_info: FileInfo
def __init__(
self,
owner: Identifier,
member: Identifier,
file_info: FileInfo,
):
self._owner = owner
self._member = member
self._file_info = file_info
@property
def owner(self) -> Identifier: return self._owner
@property
def member(self) -> Identifier: return self._member
@property
def file_info(self) -> FileInfo: return self._file_info
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} CompoundIdentifier\n"
s += f"{pre_cont}├─ Owner\n"
s += self._owner.tree_str(pre_cont + " ├─", pre_cont + "")
s += f"{pre_cont}└─ Member\n"
s += self._member.tree_str(pre_cont + " └─", pre_cont + " ")
return s
class InternalDefinition:
_identifier: syntactical_analyzer.Identifier
_type: syntactical_analyzer.DataType
_pointer: bool
def __init__(
self,
identifier: syntactical_analyzer.Identifier,
type: syntactical_analyzer.DataType,
pointer: bool,
):
self._identifier = identifier
self._type = type
self._pointer = pointer
@property
def identifier(self) -> syntactical_analyzer.Identifier:
return self._identifier
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Let Statement: {self._identifier}\n"
s += pre_cont
s += '└─ Type: '
if self._pointer: s+= "@"
s += f"{self._type}\n"
return s
class SymbolType(Enum):
struct = "struct"
enum = "enum"
function = "function"
variable = "variable"
class Symbol:
_name: str
_static: bool
_symbol_type: SymbolType
_definition: SymbolDefinitionTypes
_references: list[SymbolReferenceTypes]
def __init__(
self,
name: str,
symbol_type: SymbolType,
definition: SymbolDefinitionTypes,
):
self._name = name
self._symbol_type = symbol_type
self._definition = definition
self._references = []
@property
def name(self) -> str: return self._name
@property
def symbol_type(self) -> SymbolType: return self._symbol_type
@property
def references(self) -> list[SymbolReferenceTypes]:
return self._references[:]
@property
def definition(self) -> SymbolDefinitionTypes: return self._definition
def add_reference(self, ref: SymbolReferenceTypes):
self._references.append(ref)
class SymbolTable:
_parent_table: "SymbolTable | None"
_symbols: list[Symbol]
def __init__(self, parent_table: "SymbolTable | None" = None):
self._parent_table = parent_table
self._symbols = []
def __getitem__(self, key: str) -> Symbol: return self.get(key)
def __setitem__(self, key: str, value: Symbol):
if key != value.name:
raise KeyError
self.set(value)
def get(self, key: str) -> Symbol:
for symbol in self._symbols:
if symbol.name == key:
return symbol
if self._parent_table is None:
raise KeyError
else:
return self._parent_table.get(key)
def set(self, value: Symbol):
for i, symbol in enumerate(self._symbols):
if symbol.name == value.name:
self._symbols[i] = value
break
else:
if self._parent_table is None:
raise KeyError
else:
self._parent_table.set(value)
def add(self, value: Symbol):
for symbol in self._symbols:
if symbol.name == value.name:
raise KeyError
else:
self._symbols.append(value)
def table_str(self, title: str, pre: str = "", pre_cont: str = "") -> str:
if len(self._symbols):
names: list[str] = []
types: list[SymbolType] = []
counts: list[int] = []
for symbol in self._symbols:
names.append(symbol.name)
types.append(symbol.symbol_type)
counts.append(len(symbol.references))
name_width = max(len(i) for i in names)
type_width = max(len(i.value) for i in types)
count_width = max(len(str(i)) for i in counts)
title_width = name_width + 2 + type_width + 3 + count_width
s = f"{pre} o{title.center(title_width, '-')}o\n"
for i in range(len(self._symbols)):
s += f"{pre_cont} |{(names[i] + ':').ljust(name_width + 1)} "
s += f"{types[i].value.ljust(type_width)} - "
s += f"{str(counts[i]).rjust(count_width)}|\n"
s += f"{pre_cont} o{'-' * title_width}o\n"
return s
else: return f"{pre} o-{title}-o\n"
class FunctionBlock:
_identifier: syntactical_analyzer.Identifier
_params: list[syntactical_analyzer.FunctionParameter]
_return_type_pointer: bool
_return_type: syntactical_analyzer.DataType | None
_members: list[syntactical_analyzer.LetStatement]
_code: list[syntactical_analyzer.Statement]
_file_info: FileInfo
_symbol_table: SymbolTable
def __init__(
self,
identifier: syntactical_analyzer.Identifier,
params: list[syntactical_analyzer.FunctionParameter],
return_type_pointer: bool,
return_type: syntactical_analyzer.DataType | None,
members: list[syntactical_analyzer.LetStatement],
code: list[syntactical_analyzer.Statement],
file_info: FileInfo,
symbol_table: SymbolTable,
):
self._identifier = identifier
self._params = params[:]
self._return_type_pointer = return_type_pointer
self._return_type = return_type
self._members = members[:]
self._code = code[:]
self._file_info = file_info
self._symbol_table = symbol_table
@property
def identifier(self) -> syntactical_analyzer.Identifier:
return self._identifier
@property
def params(self) -> list[syntactical_analyzer.FunctionParameter]:
return self._params[:]
@property
def return_type_pointer(self) -> bool: return self._return_type_pointer
@property
def return_type(self) -> syntactical_analyzer.DataType | None:
return self._return_type
@property
def members(self) -> list[syntactical_analyzer.LetStatement]:
return self._members[:]
@property
def code(self) -> list[syntactical_analyzer.Statement]: return self._code[:]
@property
def file_info(self) -> FileInfo: return self._file_info
def tree_str(self, pre: str = "", pre_cont: str = "") -> str:
s: str = f"{pre} Function: {self._identifier}\n"
if (
self._params or
self._code or
self._return_type is not None or
self._members
):
s += self._symbol_table.table_str(
self.identifier.content, "├─", "")
else:
s += self._symbol_table.table_str(
self.identifier.content, "└─", " ")
if self._params:
if self._code or self._return_type is not None or self._members:
s += f"{pre_cont}├─ Parameters\n"
params_pre = f"{pre_cont}"
else:
s += f"{pre_cont}└─ Parameters\n"
params_pre = f"{pre_cont} "
for param in self._params[:-1]:
s += param.tree_str(params_pre + "├─", params_pre + "")
s += self._params[-1].tree_str(params_pre + "└─", params_pre + " ")
if self._return_type is not None:
if self._code or self._members:
s += f"{pre_cont}├─ Return Type: "
else:
s += f"{pre_cont}└─ Return Type: "
if self._return_type_pointer: s+= "@"
s += f"{self._return_type}\n"
if self._members:
if self._code:
s += f"{pre_cont}├─ Members: "
else:
s += f"{pre_cont}└─ Members: "
for code in self._members[:-1]:
s += code.tree_str(pre_cont + " ├─", pre_cont + "")
s += self._members[-1].tree_str(
pre_cont + " └─", pre_cont + " ")
if self._code:
s += f"{pre_cont}└─ Code\n"
for code in self._code[:-1]:
s += code.tree_str(pre_cont + " ├─", pre_cont + "")
s += self._code[-1].tree_str(pre_cont + " └─", pre_cont + " ")
return s
@staticmethod
def _sa(
func: syntactical_analyzer.FunctionBlock,
parent_table: SymbolTable,
) -> "FunctionBlock":
symbol_table = SymbolTable(parent_table)
for param in func.params:
try:
symbol_table.add(Symbol(
param.identifier.content, SymbolType.variable, param))
except KeyError:
raise VariableAlreadyDeclared(
param,
symbol_table.get(param.identifier.content).definition,
)
members: list[syntactical_analyzer.LetStatement] = []
code: list[syntactical_analyzer.Statement] = []
for statement in func.code:
if isinstance(statement, syntactical_analyzer.LetStatement):
try:
symbol_table.add(Symbol(
statement.identifier.content,
SymbolType.variable, statement,
))
except KeyError:
raise VariableAlreadyDeclared(
statement,
symbol_table.get(
statement.identifier.content
).definition,
)
if statement.static:
members.append(statement)
else:
code.append(statement)
else:
code.append(statement)
return FunctionBlock(
func.identifier,
func.params,
func.return_type_pointer,
func.return_type,
members,
code,
func.file_info,
symbol_table,
)
class File:
_children: list[
syntactical_analyzer.Directive |
syntactical_analyzer.StructBlock |
FunctionBlock |
syntactical_analyzer.EnumBlock
]
_file_info: FileInfo
_symbol_table: SymbolTable
def __init__(
self,
children: list[
syntactical_analyzer.Directive |
syntactical_analyzer.StructBlock |
FunctionBlock |
syntactical_analyzer.EnumBlock
],
file_info: FileInfo,
symbol_table: SymbolTable,
):
self._children = children[:]
self._file_info = file_info
self._symbol_table = symbol_table
@property
def file_info(self) -> FileInfo: return self._file_info
def tree_str(self) -> str:
s: str = " File\n"
if self._children:
s += self._symbol_table.table_str("GLOBAL", "├─", "")
for child in self._children[:-1]:
s += child.tree_str("├─", "")
s += self._children[-1].tree_str("└─", " ")
else:
s += self._symbol_table.table_str("GLOBAL", "└─", " ")
return s
@staticmethod
def _sa(syntax_tree: syntactical_analyzer.File) -> "File":
symbol_table = SymbolTable()
children: list[
syntactical_analyzer.Directive |
syntactical_analyzer.StructBlock |
FunctionBlock |
syntactical_analyzer.EnumBlock
] = []
for child in syntax_tree.children:
symbol: Symbol | None = None
if isinstance(child, syntactical_analyzer.StructBlock):
symbol = Symbol(
child.identifier.content,
SymbolType.struct,
child,
)
elif isinstance(child, syntactical_analyzer.FunctionBlock):
symbol = Symbol(
child.identifier.content,
SymbolType.function,
child, # type: ignore
)
elif isinstance(child, syntactical_analyzer.EnumBlock):
symbol = Symbol(
child.identifier.content,
SymbolType.enum,
child,
)
if symbol is not None:
symbol_table.add(symbol)
for child in syntax_tree.children:
new_child: (
syntactical_analyzer.Directive |
syntactical_analyzer.StructBlock |
FunctionBlock |
syntactical_analyzer.EnumBlock
)
if isinstance(child, syntactical_analyzer.FunctionBlock):
new_child = FunctionBlock._sa(child, symbol_table)
symbol_table.get(
child.identifier.content
)._definition = new_child # type: ignore
else:
new_child = child
children.append(new_child)
file = File(children, syntax_tree._file_info, symbol_table)
return file
def _get_all_operands(
expression: syntactical_analyzer.Expression,
) -> list[syntactical_analyzer.Expression]:
if isinstance(
expression,
BaseValues + (
syntactical_analyzer.LoopStatements,
syntactical_analyzer.NoOperation,
),
):
return [expression]
elif isinstance(expression, syntactical_analyzer.UnaryExpression):
return _get_all_operands(expression.operand)
elif isinstance(expression, syntactical_analyzer.BinaryExpression):
return (
_get_all_operands(expression.operand1) +
_get_all_operands(expression.operand2)
)
elif isinstance(expression, syntactical_analyzer.TernaryExpression):
return (
_get_all_operands(expression.operand1) +
_get_all_operands(expression.operand2) +
_get_all_operands(expression.operand3)
)
def _flatten_statement(
statement: syntactical_analyzer.Statement,
) -> list[syntactical_analyzer.Statement]:
if isinstance(statement, NestableCodeBlocks):
return [statement]
elif isinstance(
statement,
BaseValues + (
syntactical_analyzer.LoopStatements,
syntactical_analyzer.NoOperation,
),
):
return [statement]
elif isinstance(statement, syntactical_analyzer.UnaryExpression):
if isinstance(statement.operand, BaseValues):
return [statement]
elif isinstance(statement, syntactical_analyzer.BinaryExpression):
if (
statement.operator.content ==
syntactical_analyzer.BinaryOperatorEnum.MemberOf
):
pass
elif isinstance(statement, syntactical_analyzer.TernaryExpression):
pass
def semantical_analyzer(syntax_tree: syntactical_analyzer.File) -> File:
return File._sa(syntax_tree)