More error reporting improvements

This commit is contained in:
Kyler 2024-03-06 23:36:19 -07:00
parent c83aa3981e
commit e23888844b
4 changed files with 147 additions and 50 deletions

View File

@ -67,21 +67,20 @@ class CompilerError(Exception):
def __init__(self, message: str, file_info: FileInfo):
new_message = message
if file_info is not None:
new_message += (
f"\nIn file {file_info.filename} at line {file_info.line} "
)
if file_info.lines:
new_message += f"to line {file_info.line + file_info.lines}"
with open(file_info.filename, 'r') as file:
new_message += ''.join(
file.readlines()[
file_info.line-1:file_info.line + file_info.lines])
else:
new_message += f"col {file_info.col}\n\n"
with open(file_info.filename, 'r') as file:
new_message += file.readlines()[file_info.line-1]
new_message += ' ' * (
file_info.col - 1) + '^' * file_info.length
new_message += (
f"\nIn file {file_info.filename} at line {file_info.line} "
)
if file_info.lines:
new_message += f"to line {file_info.line + file_info.lines}"
with open(file_info.filename, 'r') as file:
new_message += ''.join(
file.readlines()[
file_info.line-1:file_info.line + file_info.lines])
else:
new_message += f"col {file_info.col}\n\n"
with open(file_info.filename, 'r') as file:
new_message += file.readlines()[file_info.line-1]
new_message += ' ' * (
file_info.col - 1) + '^' * file_info.length
super().__init__(new_message)

View File

@ -7,6 +7,9 @@ from typing import ClassVar, Sequence
from .compiler_types import CompilerError, FileInfo
class LexerError(CompilerError): pass
class _InterTokenType(Enum):
Generic = 'Generic'
Directive = 'Directive'
@ -127,12 +130,6 @@ _Punctuation = (
)
class LexerError(CompilerError):
def __init__(self, message: str, file_info: FileInfo):
super().__init__(message, file_info)
class Token:
_type: ClassVar[str] = 'Generic'

View File

@ -1,31 +1,54 @@
# Kyler Olsen
# Feb 2024
from textwrap import indent
from typing import Sequence
import argparse
from .compiler_types import CompilerError
from .lexer import lexer
from .syntactical_analyzer import syntactical_analyzer
from .lexer import lexer, LexerError
from .syntactical_analyzer import syntactical_analyzer, SyntaxError
def compile(args: argparse.Namespace):
try: tokens = lexer(args.input_file.read(), args.input_file.name)
except CompilerError as e:
print(type(e).__name__+':', e)
return
def _compile(args: argparse.Namespace):
tokens = lexer(args.input_file.read(), args.input_file.name)
if args.token_file:
for token in tokens:
args.token_file.write(str(token) + "\n")
try: syntax = syntactical_analyzer(tokens)
except CompilerError as e:
print(type(e).__name__+':', e)
return
syntax = syntactical_analyzer(tokens)
if args.syntax_file:
args.syntax_file.write(syntax.tree_str())
def compile(args: argparse.Namespace):
try: _compile(args)
except LexerError as e:
print(
f"[Lexical Error] {type(e).__name__}:\n"
f"{indent(str(e), ' |', lambda _: True)}"
)
# raise
except SyntaxError as e:
print(
f"[Syntax Error] {type(e).__name__}:\n"
f"{indent(str(e), ' |', lambda _: True)}"
)
# raise
except CompilerError as e:
print(
f"[Compiler Error] {type(e).__name__}:\n"
f"{indent(str(e), ' |', lambda _: True)}"
)
# raise
except Exception as e:
raise Exception(
"You found an error in the compiler!\n"
"\tPlease report this issue on Github:\n"
"\thttps://github.com/KylerOlsen/ytd_12-bit_computer/issues"
) from e
def parser(parser: argparse.ArgumentParser):
parser.add_argument(
'input_file', type=argparse.FileType('r', encoding='utf-8'))

View File

@ -8,10 +8,13 @@ from .compiler_types import CompilerError , FileInfo
from . import lexer
class UnexpectedEndOfTokenStream(CompilerError): pass
class SyntaxError(CompilerError): pass
class _ExpectedTokenBase(CompilerError):
class UnexpectedEndOfTokenStream(SyntaxError): pass
class _ExpectedTokenBase(SyntaxError):
_token_type = lexer.Token
@ -121,6 +124,9 @@ class UnexpectedPunctuation(_UnexpectedTokenBase):
_type_name = lexer.Punctuation
class ExpressionError(Exception): pass
type NestableCodeBlock = ForBlock | WhileBlock | DoBlock | IfBlock
type Literal = (
@ -865,8 +871,8 @@ class ForBlock:
@staticmethod
def _sa(tokens: list[lexer.Token], stoken: lexer.Token) -> "ForBlock":
three_expressions = _get_nested_group(tokens)[1]
pre_loop_tokens = _get_to_symbol(three_expressions)[0]
_, three_expressions, closing_parentheses = _get_nested_group(tokens)
pre_loop_tokens, semicolon = _get_to_symbol(three_expressions)
if (
isinstance(pre_loop_tokens[0], lexer.Identifier) and
pre_loop_tokens[1].value == ':'
@ -879,6 +885,9 @@ class ForBlock:
if pre_loop_tokens:
token = pre_loop_tokens.pop(0)
_assert_token(ExpectedPunctuation, token, '=')
if not pre_loop_tokens:
fi = semicolon.file_info
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
pre_loop_expr = _expression_sa(pre_loop_tokens)
else:
pre_loop_expr = None
@ -894,9 +903,18 @@ class ForBlock:
fi,
)
else:
if not pre_loop_tokens:
fi = semicolon.file_info
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
pre_loop = _expression_sa(pre_loop_tokens)
loop_condition_tokens = _get_to_symbol(three_expressions)[0]
loop_condition_tokens, semicolon = _get_to_symbol(three_expressions)
if not loop_condition_tokens:
fi = semicolon.file_info
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
condition = _expression_sa(loop_condition_tokens)
if not three_expressions:
fi = closing_parentheses.file_info
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
post_loop = _expression_sa(three_expressions)
if tokens[0].value == '{':
code = _code_block_sa(_get_nested_group(tokens, ('{','}'))[1])
@ -957,7 +975,11 @@ class WhileBlock:
@staticmethod
def _sa(tokens: list[lexer.Token], token: lexer.Token) -> "WhileBlock":
condition = _expression_sa(_get_nested_group(tokens)[1])
_, condition_tokens, closing_parentheses = _get_nested_group(tokens)
if not condition_tokens:
fi = closing_parentheses.file_info
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
condition = _expression_sa(condition_tokens)
if tokens[0].value == '{':
code_tokens = _get_nested_group(tokens, ('{','}'))[1]
code = _code_block_sa(code_tokens)
@ -1039,7 +1061,10 @@ class DoBlock:
code1 = [_statement_sa(tokens)]
token = tokens.pop(0)
_assert_token(ExpectedKeyword, token, 'while')
condition_tokens = _get_nested_group(tokens)[1]
_, condition_tokens, closing_parentheses = _get_nested_group(tokens)
if not condition_tokens:
fi = closing_parentheses.file_info
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
last_token = condition_tokens[-1]
condition = _expression_sa(condition_tokens)
if tokens[0].value == '{':
@ -1108,7 +1133,11 @@ class IfBlock:
@staticmethod
def _sa(tokens: list[lexer.Token], token: lexer.Token) -> "IfBlock":
condition = _expression_sa(_get_nested_group(tokens)[1])
_, condition_tokens, closing_parentheses = _get_nested_group(tokens)
if not condition_tokens:
fi = closing_parentheses.file_info
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
condition = _expression_sa(condition_tokens)
if tokens[0].value == '{':
code = _code_block_sa(_get_nested_group(tokens, ('{','}'))[1])
else:
@ -1579,7 +1608,7 @@ def _assert_token_literal(
lexer.StringLiteral,
)
if not isinstance(token, token_types):
raise ExpectedLiteral(
raise UnexpectedToken(
token,
[i.__name__ for i in token_types], # type: ignore
type(token).__name__,
@ -1614,7 +1643,7 @@ def _assert_token_value(
lexer.StringLiteral,
)
if not isinstance(token, token_types):
raise ExpectedLiteral(
raise UnexpectedToken(
token,
[i.__name__ for i in token_types], # type: ignore
type(token).__name__,
@ -1717,16 +1746,18 @@ def _code_block_sa(tokens: list[lexer.Token]) -> list[Statement]:
return code
def _expression_sa(tokens: list[lexer.Token]) -> Expression:
# print([str(i) for i in tokens])
if not tokens:
raise UnexpectedEndOfTokenStream(
"Unexpected Expression.", None) # type: ignore
if tokens[0].value == '(' and tokens[-1].value == ')':
return _expression_sa(tokens[1:-1])
raise ExpressionError("Expected Expression.")
elif len(tokens) == 1:
token = tokens.pop(0)
_assert_token_value(token)
return _value_map(token) # type: ignore
elif tokens[0].value == '(' and tokens[-1].value == ')':
if not tokens[1:-1]:
fi = tokens[0].file_info + tokens[-1].file_info
raise UnexpectedEndOfTokenStream(
"Expected expression between '(' and ')'.", fi)
return _expression_sa(tokens[1:-1])
max_operator: int = -1
max_operator_precedence: int = -1
@ -1764,6 +1795,9 @@ def _expression_sa(tokens: list[lexer.Token]) -> Expression:
del arg_tokens[:2]
else:
arg_identifier = None
if not arg_tokens:
fi = last_token.file_info
raise UnexpectedEndOfTokenStream("Expected Expression.", fi)
expression = _expression_sa(arg_tokens)
if arg_identifier is not None:
fi = arg_identifier.file_info + expression.file_info
@ -1789,6 +1823,12 @@ def _expression_sa(tokens: list[lexer.Token]) -> Expression:
PostfixUnaryOperatorEnum(tokens[max_operator].value),
tokens[max_operator].file_info,
)
if not tokens[:max_operator]:
fi = tokens[max_operator].file_info
raise UnexpectedEndOfTokenStream(
f"Expected expression before '{tokens[max_operator].value}'.",
fi,
)
expression = _expression_sa(tokens[:max_operator])
fi = expression.file_info + operator.file_info
return UnaryExpression(operator, expression, fi)
@ -1800,6 +1840,12 @@ def _expression_sa(tokens: list[lexer.Token]) -> Expression:
PrefixUnaryOperatorEnum(tokens[max_operator].value),
tokens[max_operator].file_info,
)
if not tokens[max_operator + 1:]:
fi = tokens[max_operator].file_info
raise UnexpectedEndOfTokenStream(
f"Expected expression after '{tokens[max_operator].value}'.",
fi,
)
expression = _expression_sa(tokens[max_operator + 1:])
fi = operator.file_info + expression.file_info
return UnaryExpression(operator, expression, fi)
@ -1808,20 +1854,52 @@ def _expression_sa(tokens: list[lexer.Token]) -> Expression:
BinaryOperatorEnum(tokens[max_operator].value),
tokens[max_operator].file_info,
)
if not tokens[:max_operator]:
fi = tokens[max_operator].file_info
raise UnexpectedEndOfTokenStream(
f"Expected expression before '{tokens[max_operator].value}'.",
fi,
)
expression1 = _expression_sa(tokens[:max_operator])
if not tokens[max_operator + 1:]:
fi = tokens[max_operator].file_info
raise UnexpectedEndOfTokenStream(
f"Expected expression after '{tokens[max_operator].value}'.",
fi,
)
expression2 = _expression_sa(tokens[max_operator + 1:])
fi = expression1.file_info + expression2.file_info
return BinaryExpression(operator, expression1, expression2, fi)
elif tokens[max_operator].value in TernaryOperatorEnum:
if not tokens[:max_operator]:
fi = tokens[max_operator].file_info
raise UnexpectedEndOfTokenStream(
f"Expected expression before '{tokens[max_operator].value}'.",
fi,
)
condition = _expression_sa(tokens[:max_operator])
del tokens[:max_operator]
operator = TernaryOperator(
TernaryOperatorEnum.TernaryConditional, tokens[0].file_info)
true_expr = _expression_sa(_get_nested_group(tokens, ('?', ':'))[1])
first_op, true_tokens, second_op = _get_nested_group(tokens, ('?', ':'))
if not true_tokens:
fi = first_op.file_info + second_op.file_info
raise UnexpectedEndOfTokenStream(
"Expected expression between "
f"'{first_op.value}' and '{second_op.value}'.",
fi,
)
true_expr = _expression_sa(true_tokens)
if not tokens:
fi = second_op.file_info
raise UnexpectedEndOfTokenStream(
f"Expected expression after '{second_op.value}'.",
fi,
)
false_expr = _expression_sa(tokens)
fi = condition.file_info + false_expr.file_info
return TernaryExpression(operator, condition, true_expr, false_expr, fi)
else: raise CompilerError(
else: raise SyntaxError(
"Expression Error", tokens[max_operator].file_info)
def _statement_sa(tokens: list[lexer.Token]) -> Statement: