More error reporting improvements
This commit is contained in:
parent
c83aa3981e
commit
e23888844b
|
@ -67,7 +67,6 @@ class CompilerError(Exception):
|
||||||
|
|
||||||
def __init__(self, message: str, file_info: FileInfo):
|
def __init__(self, message: str, file_info: FileInfo):
|
||||||
new_message = message
|
new_message = message
|
||||||
if file_info is not None:
|
|
||||||
new_message += (
|
new_message += (
|
||||||
f"\nIn file {file_info.filename} at line {file_info.line} "
|
f"\nIn file {file_info.filename} at line {file_info.line} "
|
||||||
)
|
)
|
||||||
|
|
|
@ -7,6 +7,9 @@ from typing import ClassVar, Sequence
|
||||||
from .compiler_types import CompilerError, FileInfo
|
from .compiler_types import CompilerError, FileInfo
|
||||||
|
|
||||||
|
|
||||||
|
class LexerError(CompilerError): pass
|
||||||
|
|
||||||
|
|
||||||
class _InterTokenType(Enum):
|
class _InterTokenType(Enum):
|
||||||
Generic = 'Generic'
|
Generic = 'Generic'
|
||||||
Directive = 'Directive'
|
Directive = 'Directive'
|
||||||
|
@ -127,12 +130,6 @@ _Punctuation = (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LexerError(CompilerError):
|
|
||||||
|
|
||||||
def __init__(self, message: str, file_info: FileInfo):
|
|
||||||
super().__init__(message, file_info)
|
|
||||||
|
|
||||||
|
|
||||||
class Token:
|
class Token:
|
||||||
|
|
||||||
_type: ClassVar[str] = 'Generic'
|
_type: ClassVar[str] = 'Generic'
|
||||||
|
|
|
@ -1,31 +1,54 @@
|
||||||
# Kyler Olsen
|
# Kyler Olsen
|
||||||
# Feb 2024
|
# Feb 2024
|
||||||
|
|
||||||
|
from textwrap import indent
|
||||||
from typing import Sequence
|
from typing import Sequence
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from .compiler_types import CompilerError
|
from .compiler_types import CompilerError
|
||||||
from .lexer import lexer
|
from .lexer import lexer, LexerError
|
||||||
from .syntactical_analyzer import syntactical_analyzer
|
from .syntactical_analyzer import syntactical_analyzer, SyntaxError
|
||||||
|
|
||||||
def compile(args: argparse.Namespace):
|
|
||||||
try: tokens = lexer(args.input_file.read(), args.input_file.name)
|
def _compile(args: argparse.Namespace):
|
||||||
except CompilerError as e:
|
tokens = lexer(args.input_file.read(), args.input_file.name)
|
||||||
print(type(e).__name__+':', e)
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.token_file:
|
if args.token_file:
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
args.token_file.write(str(token) + "\n")
|
args.token_file.write(str(token) + "\n")
|
||||||
|
|
||||||
try: syntax = syntactical_analyzer(tokens)
|
syntax = syntactical_analyzer(tokens)
|
||||||
except CompilerError as e:
|
|
||||||
print(type(e).__name__+':', e)
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.syntax_file:
|
if args.syntax_file:
|
||||||
args.syntax_file.write(syntax.tree_str())
|
args.syntax_file.write(syntax.tree_str())
|
||||||
|
|
||||||
|
def compile(args: argparse.Namespace):
|
||||||
|
try: _compile(args)
|
||||||
|
except LexerError as e:
|
||||||
|
print(
|
||||||
|
f"[Lexical Error] {type(e).__name__}:\n"
|
||||||
|
f"{indent(str(e), ' |', lambda _: True)}"
|
||||||
|
)
|
||||||
|
# raise
|
||||||
|
except SyntaxError as e:
|
||||||
|
print(
|
||||||
|
f"[Syntax Error] {type(e).__name__}:\n"
|
||||||
|
f"{indent(str(e), ' |', lambda _: True)}"
|
||||||
|
)
|
||||||
|
# raise
|
||||||
|
except CompilerError as e:
|
||||||
|
print(
|
||||||
|
f"[Compiler Error] {type(e).__name__}:\n"
|
||||||
|
f"{indent(str(e), ' |', lambda _: True)}"
|
||||||
|
)
|
||||||
|
# raise
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(
|
||||||
|
"You found an error in the compiler!\n"
|
||||||
|
"\tPlease report this issue on Github:\n"
|
||||||
|
"\thttps://github.com/KylerOlsen/ytd_12-bit_computer/issues"
|
||||||
|
) from e
|
||||||
|
|
||||||
def parser(parser: argparse.ArgumentParser):
|
def parser(parser: argparse.ArgumentParser):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'input_file', type=argparse.FileType('r', encoding='utf-8'))
|
'input_file', type=argparse.FileType('r', encoding='utf-8'))
|
||||||
|
|
|
@ -8,10 +8,13 @@ from .compiler_types import CompilerError , FileInfo
|
||||||
from . import lexer
|
from . import lexer
|
||||||
|
|
||||||
|
|
||||||
class UnexpectedEndOfTokenStream(CompilerError): pass
|
class SyntaxError(CompilerError): pass
|
||||||
|
|
||||||
|
|
||||||
class _ExpectedTokenBase(CompilerError):
|
class UnexpectedEndOfTokenStream(SyntaxError): pass
|
||||||
|
|
||||||
|
|
||||||
|
class _ExpectedTokenBase(SyntaxError):
|
||||||
|
|
||||||
_token_type = lexer.Token
|
_token_type = lexer.Token
|
||||||
|
|
||||||
|
@ -121,6 +124,9 @@ class UnexpectedPunctuation(_UnexpectedTokenBase):
|
||||||
_type_name = lexer.Punctuation
|
_type_name = lexer.Punctuation
|
||||||
|
|
||||||
|
|
||||||
|
class ExpressionError(Exception): pass
|
||||||
|
|
||||||
|
|
||||||
type NestableCodeBlock = ForBlock | WhileBlock | DoBlock | IfBlock
|
type NestableCodeBlock = ForBlock | WhileBlock | DoBlock | IfBlock
|
||||||
|
|
||||||
type Literal = (
|
type Literal = (
|
||||||
|
@ -865,8 +871,8 @@ class ForBlock:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _sa(tokens: list[lexer.Token], stoken: lexer.Token) -> "ForBlock":
|
def _sa(tokens: list[lexer.Token], stoken: lexer.Token) -> "ForBlock":
|
||||||
three_expressions = _get_nested_group(tokens)[1]
|
_, three_expressions, closing_parentheses = _get_nested_group(tokens)
|
||||||
pre_loop_tokens = _get_to_symbol(three_expressions)[0]
|
pre_loop_tokens, semicolon = _get_to_symbol(three_expressions)
|
||||||
if (
|
if (
|
||||||
isinstance(pre_loop_tokens[0], lexer.Identifier) and
|
isinstance(pre_loop_tokens[0], lexer.Identifier) and
|
||||||
pre_loop_tokens[1].value == ':'
|
pre_loop_tokens[1].value == ':'
|
||||||
|
@ -879,6 +885,9 @@ class ForBlock:
|
||||||
if pre_loop_tokens:
|
if pre_loop_tokens:
|
||||||
token = pre_loop_tokens.pop(0)
|
token = pre_loop_tokens.pop(0)
|
||||||
_assert_token(ExpectedPunctuation, token, '=')
|
_assert_token(ExpectedPunctuation, token, '=')
|
||||||
|
if not pre_loop_tokens:
|
||||||
|
fi = semicolon.file_info
|
||||||
|
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
|
||||||
pre_loop_expr = _expression_sa(pre_loop_tokens)
|
pre_loop_expr = _expression_sa(pre_loop_tokens)
|
||||||
else:
|
else:
|
||||||
pre_loop_expr = None
|
pre_loop_expr = None
|
||||||
|
@ -894,9 +903,18 @@ class ForBlock:
|
||||||
fi,
|
fi,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
if not pre_loop_tokens:
|
||||||
|
fi = semicolon.file_info
|
||||||
|
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
|
||||||
pre_loop = _expression_sa(pre_loop_tokens)
|
pre_loop = _expression_sa(pre_loop_tokens)
|
||||||
loop_condition_tokens = _get_to_symbol(three_expressions)[0]
|
loop_condition_tokens, semicolon = _get_to_symbol(three_expressions)
|
||||||
|
if not loop_condition_tokens:
|
||||||
|
fi = semicolon.file_info
|
||||||
|
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
|
||||||
condition = _expression_sa(loop_condition_tokens)
|
condition = _expression_sa(loop_condition_tokens)
|
||||||
|
if not three_expressions:
|
||||||
|
fi = closing_parentheses.file_info
|
||||||
|
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
|
||||||
post_loop = _expression_sa(three_expressions)
|
post_loop = _expression_sa(three_expressions)
|
||||||
if tokens[0].value == '{':
|
if tokens[0].value == '{':
|
||||||
code = _code_block_sa(_get_nested_group(tokens, ('{','}'))[1])
|
code = _code_block_sa(_get_nested_group(tokens, ('{','}'))[1])
|
||||||
|
@ -957,7 +975,11 @@ class WhileBlock:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _sa(tokens: list[lexer.Token], token: lexer.Token) -> "WhileBlock":
|
def _sa(tokens: list[lexer.Token], token: lexer.Token) -> "WhileBlock":
|
||||||
condition = _expression_sa(_get_nested_group(tokens)[1])
|
_, condition_tokens, closing_parentheses = _get_nested_group(tokens)
|
||||||
|
if not condition_tokens:
|
||||||
|
fi = closing_parentheses.file_info
|
||||||
|
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
|
||||||
|
condition = _expression_sa(condition_tokens)
|
||||||
if tokens[0].value == '{':
|
if tokens[0].value == '{':
|
||||||
code_tokens = _get_nested_group(tokens, ('{','}'))[1]
|
code_tokens = _get_nested_group(tokens, ('{','}'))[1]
|
||||||
code = _code_block_sa(code_tokens)
|
code = _code_block_sa(code_tokens)
|
||||||
|
@ -1039,7 +1061,10 @@ class DoBlock:
|
||||||
code1 = [_statement_sa(tokens)]
|
code1 = [_statement_sa(tokens)]
|
||||||
token = tokens.pop(0)
|
token = tokens.pop(0)
|
||||||
_assert_token(ExpectedKeyword, token, 'while')
|
_assert_token(ExpectedKeyword, token, 'while')
|
||||||
condition_tokens = _get_nested_group(tokens)[1]
|
_, condition_tokens, closing_parentheses = _get_nested_group(tokens)
|
||||||
|
if not condition_tokens:
|
||||||
|
fi = closing_parentheses.file_info
|
||||||
|
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
|
||||||
last_token = condition_tokens[-1]
|
last_token = condition_tokens[-1]
|
||||||
condition = _expression_sa(condition_tokens)
|
condition = _expression_sa(condition_tokens)
|
||||||
if tokens[0].value == '{':
|
if tokens[0].value == '{':
|
||||||
|
@ -1108,7 +1133,11 @@ class IfBlock:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _sa(tokens: list[lexer.Token], token: lexer.Token) -> "IfBlock":
|
def _sa(tokens: list[lexer.Token], token: lexer.Token) -> "IfBlock":
|
||||||
condition = _expression_sa(_get_nested_group(tokens)[1])
|
_, condition_tokens, closing_parentheses = _get_nested_group(tokens)
|
||||||
|
if not condition_tokens:
|
||||||
|
fi = closing_parentheses.file_info
|
||||||
|
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
|
||||||
|
condition = _expression_sa(condition_tokens)
|
||||||
if tokens[0].value == '{':
|
if tokens[0].value == '{':
|
||||||
code = _code_block_sa(_get_nested_group(tokens, ('{','}'))[1])
|
code = _code_block_sa(_get_nested_group(tokens, ('{','}'))[1])
|
||||||
else:
|
else:
|
||||||
|
@ -1579,7 +1608,7 @@ def _assert_token_literal(
|
||||||
lexer.StringLiteral,
|
lexer.StringLiteral,
|
||||||
)
|
)
|
||||||
if not isinstance(token, token_types):
|
if not isinstance(token, token_types):
|
||||||
raise ExpectedLiteral(
|
raise UnexpectedToken(
|
||||||
token,
|
token,
|
||||||
[i.__name__ for i in token_types], # type: ignore
|
[i.__name__ for i in token_types], # type: ignore
|
||||||
type(token).__name__,
|
type(token).__name__,
|
||||||
|
@ -1614,7 +1643,7 @@ def _assert_token_value(
|
||||||
lexer.StringLiteral,
|
lexer.StringLiteral,
|
||||||
)
|
)
|
||||||
if not isinstance(token, token_types):
|
if not isinstance(token, token_types):
|
||||||
raise ExpectedLiteral(
|
raise UnexpectedToken(
|
||||||
token,
|
token,
|
||||||
[i.__name__ for i in token_types], # type: ignore
|
[i.__name__ for i in token_types], # type: ignore
|
||||||
type(token).__name__,
|
type(token).__name__,
|
||||||
|
@ -1717,16 +1746,18 @@ def _code_block_sa(tokens: list[lexer.Token]) -> list[Statement]:
|
||||||
return code
|
return code
|
||||||
|
|
||||||
def _expression_sa(tokens: list[lexer.Token]) -> Expression:
|
def _expression_sa(tokens: list[lexer.Token]) -> Expression:
|
||||||
# print([str(i) for i in tokens])
|
|
||||||
if not tokens:
|
if not tokens:
|
||||||
raise UnexpectedEndOfTokenStream(
|
raise ExpressionError("Expected Expression.")
|
||||||
"Unexpected Expression.", None) # type: ignore
|
|
||||||
if tokens[0].value == '(' and tokens[-1].value == ')':
|
|
||||||
return _expression_sa(tokens[1:-1])
|
|
||||||
elif len(tokens) == 1:
|
elif len(tokens) == 1:
|
||||||
token = tokens.pop(0)
|
token = tokens.pop(0)
|
||||||
_assert_token_value(token)
|
_assert_token_value(token)
|
||||||
return _value_map(token) # type: ignore
|
return _value_map(token) # type: ignore
|
||||||
|
elif tokens[0].value == '(' and tokens[-1].value == ')':
|
||||||
|
if not tokens[1:-1]:
|
||||||
|
fi = tokens[0].file_info + tokens[-1].file_info
|
||||||
|
raise UnexpectedEndOfTokenStream(
|
||||||
|
"Expected expression between '(' and ')'.", fi)
|
||||||
|
return _expression_sa(tokens[1:-1])
|
||||||
|
|
||||||
max_operator: int = -1
|
max_operator: int = -1
|
||||||
max_operator_precedence: int = -1
|
max_operator_precedence: int = -1
|
||||||
|
@ -1764,6 +1795,9 @@ def _expression_sa(tokens: list[lexer.Token]) -> Expression:
|
||||||
del arg_tokens[:2]
|
del arg_tokens[:2]
|
||||||
else:
|
else:
|
||||||
arg_identifier = None
|
arg_identifier = None
|
||||||
|
if not arg_tokens:
|
||||||
|
fi = last_token.file_info
|
||||||
|
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
|
||||||
expression = _expression_sa(arg_tokens)
|
expression = _expression_sa(arg_tokens)
|
||||||
if arg_identifier is not None:
|
if arg_identifier is not None:
|
||||||
fi = arg_identifier.file_info + expression.file_info
|
fi = arg_identifier.file_info + expression.file_info
|
||||||
|
@ -1789,6 +1823,12 @@ def _expression_sa(tokens: list[lexer.Token]) -> Expression:
|
||||||
PostfixUnaryOperatorEnum(tokens[max_operator].value),
|
PostfixUnaryOperatorEnum(tokens[max_operator].value),
|
||||||
tokens[max_operator].file_info,
|
tokens[max_operator].file_info,
|
||||||
)
|
)
|
||||||
|
if not tokens[:max_operator]:
|
||||||
|
fi = tokens[max_operator].file_info
|
||||||
|
raise UnexpectedEndOfTokenStream(
|
||||||
|
f"Expected expression before '{tokens[max_operator].value}'.",
|
||||||
|
fi,
|
||||||
|
)
|
||||||
expression = _expression_sa(tokens[:max_operator])
|
expression = _expression_sa(tokens[:max_operator])
|
||||||
fi = expression.file_info + operator.file_info
|
fi = expression.file_info + operator.file_info
|
||||||
return UnaryExpression(operator, expression, fi)
|
return UnaryExpression(operator, expression, fi)
|
||||||
|
@ -1800,6 +1840,12 @@ def _expression_sa(tokens: list[lexer.Token]) -> Expression:
|
||||||
PrefixUnaryOperatorEnum(tokens[max_operator].value),
|
PrefixUnaryOperatorEnum(tokens[max_operator].value),
|
||||||
tokens[max_operator].file_info,
|
tokens[max_operator].file_info,
|
||||||
)
|
)
|
||||||
|
if not tokens[max_operator + 1:]:
|
||||||
|
fi = tokens[max_operator].file_info
|
||||||
|
raise UnexpectedEndOfTokenStream(
|
||||||
|
f"Expected expression after '{tokens[max_operator].value}'.",
|
||||||
|
fi,
|
||||||
|
)
|
||||||
expression = _expression_sa(tokens[max_operator + 1:])
|
expression = _expression_sa(tokens[max_operator + 1:])
|
||||||
fi = operator.file_info + expression.file_info
|
fi = operator.file_info + expression.file_info
|
||||||
return UnaryExpression(operator, expression, fi)
|
return UnaryExpression(operator, expression, fi)
|
||||||
|
@ -1808,20 +1854,52 @@ def _expression_sa(tokens: list[lexer.Token]) -> Expression:
|
||||||
BinaryOperatorEnum(tokens[max_operator].value),
|
BinaryOperatorEnum(tokens[max_operator].value),
|
||||||
tokens[max_operator].file_info,
|
tokens[max_operator].file_info,
|
||||||
)
|
)
|
||||||
|
if not tokens[:max_operator]:
|
||||||
|
fi = tokens[max_operator].file_info
|
||||||
|
raise UnexpectedEndOfTokenStream(
|
||||||
|
f"Expected expression before '{tokens[max_operator].value}'.",
|
||||||
|
fi,
|
||||||
|
)
|
||||||
expression1 = _expression_sa(tokens[:max_operator])
|
expression1 = _expression_sa(tokens[:max_operator])
|
||||||
|
if not tokens[max_operator + 1:]:
|
||||||
|
fi = tokens[max_operator].file_info
|
||||||
|
raise UnexpectedEndOfTokenStream(
|
||||||
|
f"Expected expression after '{tokens[max_operator].value}'.",
|
||||||
|
fi,
|
||||||
|
)
|
||||||
expression2 = _expression_sa(tokens[max_operator + 1:])
|
expression2 = _expression_sa(tokens[max_operator + 1:])
|
||||||
fi = expression1.file_info + expression2.file_info
|
fi = expression1.file_info + expression2.file_info
|
||||||
return BinaryExpression(operator, expression1, expression2, fi)
|
return BinaryExpression(operator, expression1, expression2, fi)
|
||||||
elif tokens[max_operator].value in TernaryOperatorEnum:
|
elif tokens[max_operator].value in TernaryOperatorEnum:
|
||||||
|
if not tokens[:max_operator]:
|
||||||
|
fi = tokens[max_operator].file_info
|
||||||
|
raise UnexpectedEndOfTokenStream(
|
||||||
|
f"Expected expression before '{tokens[max_operator].value}'.",
|
||||||
|
fi,
|
||||||
|
)
|
||||||
condition = _expression_sa(tokens[:max_operator])
|
condition = _expression_sa(tokens[:max_operator])
|
||||||
del tokens[:max_operator]
|
del tokens[:max_operator]
|
||||||
operator = TernaryOperator(
|
operator = TernaryOperator(
|
||||||
TernaryOperatorEnum.TernaryConditional, tokens[0].file_info)
|
TernaryOperatorEnum.TernaryConditional, tokens[0].file_info)
|
||||||
true_expr = _expression_sa(_get_nested_group(tokens, ('?', ':'))[1])
|
first_op, true_tokens, second_op = _get_nested_group(tokens, ('?', ':'))
|
||||||
|
if not true_tokens:
|
||||||
|
fi = first_op.file_info + second_op.file_info
|
||||||
|
raise UnexpectedEndOfTokenStream(
|
||||||
|
"Expected expression between "
|
||||||
|
f"'{first_op.value}' and '{second_op.value}'.",
|
||||||
|
fi,
|
||||||
|
)
|
||||||
|
true_expr = _expression_sa(true_tokens)
|
||||||
|
if not tokens:
|
||||||
|
fi = second_op.file_info
|
||||||
|
raise UnexpectedEndOfTokenStream(
|
||||||
|
f"Expected expression after '{second_op.value}'.",
|
||||||
|
fi,
|
||||||
|
)
|
||||||
false_expr = _expression_sa(tokens)
|
false_expr = _expression_sa(tokens)
|
||||||
fi = condition.file_info + false_expr.file_info
|
fi = condition.file_info + false_expr.file_info
|
||||||
return TernaryExpression(operator, condition, true_expr, false_expr, fi)
|
return TernaryExpression(operator, condition, true_expr, false_expr, fi)
|
||||||
else: raise CompilerError(
|
else: raise SyntaxError(
|
||||||
"Expression Error", tokens[max_operator].file_info)
|
"Expression Error", tokens[max_operator].file_info)
|
||||||
|
|
||||||
def _statement_sa(tokens: list[lexer.Token]) -> Statement:
|
def _statement_sa(tokens: list[lexer.Token]) -> Statement:
|
||||||
|
|
Loading…
Reference in New Issue