From 6aad7332630f922074fccab7af6c3b56c873d33e Mon Sep 17 00:00:00 2001 From: Kyler Date: Wed, 5 Nov 2025 22:39:31 -0700 Subject: [PATCH] Updated to test lexical errors --- SLS_C/include/tests/lexer_test_helpers.h | 6 ++ SLS_C/include/tests/tests.h | 11 +-- SLS_C/tests/lexer_test_helpers.c | 37 +++++++++- SLS_C/tests/lexer_tests.c | 89 ++++++++++++++++++++++-- SLS_C/tests/tests.c | 11 ++- SLS_Tests/cases.yaml | 34 ++++----- SLS_Tests/yaml_to_c_tests.py | 37 +++++++--- 7 files changed, 190 insertions(+), 35 deletions(-) diff --git a/SLS_C/include/tests/lexer_test_helpers.h b/SLS_C/include/tests/lexer_test_helpers.h index 0dcccfb..8c3d200 100644 --- a/SLS_C/include/tests/lexer_test_helpers.h +++ b/SLS_C/include/tests/lexer_test_helpers.h @@ -103,6 +103,11 @@ typedef struct { TestIdentifierValue *output_values; } TestTypeTupleValue; +typedef struct { + size_t length; + const char *message; +} TestErrorMessage; + LexerTest start_up_test(const char *test_name, const char *test_code); void clean_up_test(LexerResult result); TestResult error_test(LexerTest *test, LexerResult result, SlsError error); @@ -128,5 +133,6 @@ Boolean test_array_boolean_value(LexerTest *test, LexerResult result, size_t i, Boolean test_array_struct_inline_value(LexerTest *test, LexerResult result, size_t i, TestArrayStructInlineValue *values); Boolean test_token_string_value(LexerTest *test, LexerResult result, size_t i, TestTokenStringValue *values); Boolean test_type_tuple_value(LexerTest *test, LexerResult result, size_t i, TestTypeTupleValue *values); +Boolean test_for_error(LexerTest *test, LexerResult result, size_t i, TestErrorMessage *error); #endif // SLS_LEXER_TEST_HELPERS_H diff --git a/SLS_C/include/tests/tests.h b/SLS_C/include/tests/tests.h index 4cc80d5..5fcafc0 100644 --- a/SLS_C/include/tests/tests.h +++ b/SLS_C/include/tests/tests.h @@ -13,11 +13,12 @@ extern const char *TEST_FILE_NAME; typedef enum { - TEST_ERROR, - TEST_LOGIC_FAIL, - TEST_ERROR_FAIL, - TEST_PASS, - TEST_NOT_IMPLEMENTED, + TEST_ERROR, // The test encountered an error + TEST_LOGIC_FAIL, // The lexer encountered an error + TEST_LOGIC_ERROR_FAIL, // The test failed because of a lexical error reported from the parsed code + TEST_ERROR_FAIL, // The test failed + TEST_PASS, // The test passed + TEST_NOT_IMPLEMENTED, // The test is not implemented } TestResultType; typedef struct { diff --git a/SLS_C/tests/lexer_test_helpers.c b/SLS_C/tests/lexer_test_helpers.c index 1939412..f63c257 100644 --- a/SLS_C/tests/lexer_test_helpers.c +++ b/SLS_C/tests/lexer_test_helpers.c @@ -48,6 +48,13 @@ TestResult logic_fail_test(LexerTest *test, LexerResult result, char *message) { return test->result; } +TestResult logic_error_fail_test(LexerTest *test, LexerResult result, SlsError error) { + clean_up_test(result); + test->result.status = TEST_LOGIC_ERROR_FAIL; + test->result.error = error; + return test->result; +} + TestResult error_fail_test(LexerTest *test, LexerResult result, SlsError error) { clean_up_test(result); test->result.status = TEST_ERROR_FAIL; @@ -245,6 +252,22 @@ static char *type_tuple_element_boolean_should_be(size_t i, size_t j, Boolean va return string; } +static char *token_should_be_error(size_t i, TestErrorMessage should, TokenType found) { + size_t length = ceil(log10(i + 1)) + should.length + strnlen(TOKEN_TYPES_NAMES[found], TYPE_NAMES_SAFE_LENGTH) + 72; + char *string = (char *)malloc(sizeof(char) * length); + if (string == 0) return string; + snprintf(string, length, "Token #%zu should be an error with a message of %s, but found token of type %s", i, should.message, TOKEN_TYPES_NAMES[found]); + return string; +} + +static char *error_should_be(size_t i, TestErrorMessage should, SlsError found) { + size_t length = ceil(log10(i + 1)) + should.length + strlen(found.message) + 77; + char *string = (char *)malloc(sizeof(char) * length); + if (string == 0) return string; + snprintf(string, length, "Token #%zu should be an error with a message of %s, but found error with message %s", i, should.message, found.message); + return string; +} + // Test parts static Boolean test_token_type(LexerTest *test, LexerResult result, size_t i, TokenType token_type) { @@ -253,7 +276,7 @@ static Boolean test_token_type(LexerTest *test, LexerResult result, size_t i, To logic_fail_test(test, result, unexpected_end_of_token_stream(i + 1)); return TRUE; } if (head->type == SLS_ERROR) { - error_fail_test(test, result, head->error); + logic_error_fail_test(test, result, head->error); return TRUE; } if (head->result.type != token_type) { logic_fail_test(test, result, token_should_be(i + 1, token_type, head->result.type)); @@ -576,3 +599,15 @@ Boolean test_type_tuple_value(LexerTest *test, LexerResult result, size_t i, Tes } return FALSE; } + +Boolean test_for_error(LexerTest *test, LexerResult result, size_t i, TestErrorMessage *error) { + LexerTokenResult *head = get_token(result.result, i); + if (head->type != SLS_ERROR) { + logic_fail_test(test, result, token_should_be_error(i + 1, *error, head->result.type)); + return TRUE; + } if (strncmp(head->error.message, error->message, error->length+1) != 0) { + logic_fail_test(test, result, error_should_be(i + 1, *error, head->error)); + return TRUE; + } + return FALSE; +} diff --git a/SLS_C/tests/lexer_tests.c b/SLS_C/tests/lexer_tests.c index c80c95d..39b7224 100644 --- a/SLS_C/tests/lexer_tests.c +++ b/SLS_C/tests/lexer_tests.c @@ -16,7 +16,8 @@ #include "tests/tests.h" -static const size_t NUM_OF_TESTS = 14; +static const size_t NUM_OF_TESTS = 22; + static TestResult test_Empty_Statement() { LexerTest test = start_up_test("test_Empty_Statement", ""); LexerResult result = lexical_analysis(&test.lexer_info); @@ -111,7 +112,7 @@ static TestResult test_Integer_i8_Decimal_127() { LexerResult result = lexical_analysis(&test.lexer_info); if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); size_t i = 0; - // Unhandled token type: i8 + if (test_integer_value(&test, result, i++, &(TestIntegerValue){INTEGER_I8, 127})) return test.result; if (test_eof_value(&test, result, i++, 0)) return test.result; return pass_test(&test, result); } @@ -121,7 +122,7 @@ static TestResult test_Integer_i8_Decimal_128() { LexerResult result = lexical_analysis(&test.lexer_info); if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); size_t i = 0; - // Unhandled token type: i8 + if (test_integer_value(&test, result, i++, &(TestIntegerValue){INTEGER_I8, -128})) return test.result; if (test_eof_value(&test, result, i++, 0)) return test.result; return pass_test(&test, result); } @@ -131,7 +132,7 @@ static TestResult test_Integer_u8_Decimal_255() { LexerResult result = lexical_analysis(&test.lexer_info); if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); size_t i = 0; - // Unhandled token type: u8 + if (test_integer_value(&test, result, i++, &(TestIntegerValue){INTEGER_U8, 255})) return test.result; if (test_eof_value(&test, result, i++, 0)) return test.result; return pass_test(&test, result); } @@ -146,6 +147,42 @@ static TestResult test_Integer_Default_Decimal_with_Underscore() { return pass_test(&test, result); } +static TestResult test_Integer_Default_Decimal_with_Commas_Invalid() { + LexerTest test = start_up_test("test_Integer_Default_Decimal_with_Commas_Invalid", "1,000,000"); + LexerResult result = lexical_analysis(&test.lexer_info); + if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); + size_t i = 0; + if (test_for_error(&test, result, i++, &(TestErrorMessage){52, "Invalid numeric literal: unexpected ',' in integer."})) return test.result; + return pass_test(&test, result); +} + +static TestResult test_Integer_i8_Overflow() { + LexerTest test = start_up_test("test_Integer_i8_Overflow", "128i8"); + LexerResult result = lexical_analysis(&test.lexer_info); + if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); + size_t i = 0; + if (test_eof_value(&test, result, i++, 0)) return test.result; + return pass_test(&test, result); +} + +static TestResult test_Integer_i8_Underflow() { + LexerTest test = start_up_test("test_Integer_i8_Underflow", "-129i8"); + LexerResult result = lexical_analysis(&test.lexer_info); + if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); + size_t i = 0; + if (test_eof_value(&test, result, i++, 0)) return test.result; + return pass_test(&test, result); +} + +static TestResult test_Integer_Default_Invalid_Characters() { + LexerTest test = start_up_test("test_Integer_Default_Invalid_Characters", "12a3"); + LexerResult result = lexical_analysis(&test.lexer_info); + if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); + size_t i = 0; + if (test_for_error(&test, result, i++, &(TestErrorMessage){51, "Invalid numeric literal: unexpected character 'a'."})) return test.result; + return pass_test(&test, result); +} + static TestResult test_Integer_Default_Whitespace() { LexerTest test = start_up_test("test_Integer_Default_Whitespace", " 42 "); LexerResult result = lexical_analysis(&test.lexer_info); @@ -156,6 +193,42 @@ static TestResult test_Integer_Default_Whitespace() { return pass_test(&test, result); } +static TestResult test_Integer_Default_Invalid_Prefix() { + LexerTest test = start_up_test("test_Integer_Default_Invalid_Prefix", "0b2"); + LexerResult result = lexical_analysis(&test.lexer_info); + if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); + size_t i = 0; + if (test_for_error(&test, result, i++, &(TestErrorMessage){47, "Invalid binary literal: digit '2' not allowed."})) return test.result; + return pass_test(&test, result); +} + +static TestResult test_Integer_Default_Invalid_Underscore_Start() { + LexerTest test = start_up_test("test_Integer_Default_Invalid_Underscore_Start", "_42"); + LexerResult result = lexical_analysis(&test.lexer_info); + if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); + size_t i = 0; + if (test_for_error(&test, result, i++, &(TestErrorMessage){55, "Invalid numeric literal: cannot start with underscore."})) return test.result; + return pass_test(&test, result); +} + +static TestResult test_Integer_Default_Invalid_Underscore_End() { + LexerTest test = start_up_test("test_Integer_Default_Invalid_Underscore_End", "42_"); + LexerResult result = lexical_analysis(&test.lexer_info); + if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); + size_t i = 0; + if (test_for_error(&test, result, i++, &(TestErrorMessage){53, "Invalid numeric literal: cannot end with underscore."})) return test.result; + return pass_test(&test, result); +} + +static TestResult test_Integer_Default_Invalid_Underscore_Double() { + LexerTest test = start_up_test("test_Integer_Default_Invalid_Underscore_Double", "4__2"); + LexerResult result = lexical_analysis(&test.lexer_info); + if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); + size_t i = 0; + if (test_for_error(&test, result, i++, &(TestErrorMessage){62, "Invalid numeric literal: consecutive underscores not allowed."})) return test.result; + return pass_test(&test, result); +} + TestsReport run_lexer_tests() { TestsReport test_report = (TestsReport) { .section = "lexer_tests", @@ -178,7 +251,15 @@ TestsReport run_lexer_tests() { test_report.tests[i++] = test_Integer_i8_Decimal_128(); test_report.tests[i++] = test_Integer_u8_Decimal_255(); test_report.tests[i++] = test_Integer_Default_Decimal_with_Underscore(); + test_report.tests[i++] = test_Integer_Default_Decimal_with_Commas_Invalid(); + test_report.tests[i++] = test_Integer_i8_Overflow(); + test_report.tests[i++] = test_Integer_i8_Underflow(); + test_report.tests[i++] = test_Integer_Default_Invalid_Characters(); test_report.tests[i++] = test_Integer_Default_Whitespace(); + test_report.tests[i++] = test_Integer_Default_Invalid_Prefix(); + test_report.tests[i++] = test_Integer_Default_Invalid_Underscore_Start(); + test_report.tests[i++] = test_Integer_Default_Invalid_Underscore_End(); + test_report.tests[i++] = test_Integer_Default_Invalid_Underscore_Double(); return test_report; } diff --git a/SLS_C/tests/tests.c b/SLS_C/tests/tests.c index ff35d37..fe962da 100644 --- a/SLS_C/tests/tests.c +++ b/SLS_C/tests/tests.c @@ -22,7 +22,12 @@ int main(void) { break; case TEST_ERROR_FAIL: // Magenta - printf("\x1b[35mTest failed (errored): %s\n\t%s\n\x1b[0m", lexer_reports.tests[i].name, lexer_reports.tests[i].error.message); + printf("\x1b[35mLexing errored: %s\n\t%s\n\x1b[0m", lexer_reports.tests[i].name, lexer_reports.tests[i].error.message); + break; + case TEST_LOGIC_ERROR_FAIL: + // Red + printf("\x1b[31mTest failed with error: %s\n\t%s\n\x1b[0m", lexer_reports.tests[i].name, lexer_reports.tests[i].error.message); + free(lexer_reports.tests[i].message); break; case TEST_LOGIC_FAIL: // Red @@ -37,6 +42,10 @@ int main(void) { // Blue printf("\x1b[34mTest not implemented: %s\n\x1b[0m", lexer_reports.tests[i].name); break; + default: + // Bright Red + printf("\x1b[91mTest errored: %s\n\tUnknown test result status.\n\x1b[0m", lexer_reports.tests[i].name); + break; } } diff --git a/SLS_Tests/cases.yaml b/SLS_Tests/cases.yaml index 40c766c..efa7023 100644 --- a/SLS_Tests/cases.yaml +++ b/SLS_Tests/cases.yaml @@ -10,13 +10,9 @@ # tokens: # - type: Token Type # value: Token Value -# lexer_error: -# message: Error message. # # Parsed operations or parsing error (if no lexer error) # operations: # - function: Operation Name -# parsing_error: -# message: Error message. # # Final stack state and/or runtime error (if no lexer or parsing error) # stack_final: # - type: Stack Item Type @@ -190,8 +186,9 @@ - name: Integer Default Decimal with Commas Invalid code: "1,000,000" - lexer_error: - message: "Invalid numeric literal: unexpected ',' in integer." + tokens: + - type: error + value: "Invalid numeric literal: unexpected ',' in integer." - name: Integer i8 Overflow code: "128i8" @@ -205,8 +202,9 @@ - name: Integer Default Invalid Characters code: "12a3" - lexer_error: - message: "Invalid numeric literal: unexpected character 'a'." + tokens: + - type: error + value: "Invalid numeric literal: unexpected character 'a'." - name: Integer Default Whitespace code: " 42 " @@ -223,23 +221,27 @@ - name: Integer Default Invalid Prefix code: "0b2" - lexer_error: - message: "Invalid binary literal: digit '2' not allowed." + tokens: + - type: error + value: "Invalid binary literal: digit '2' not allowed." - name: Integer Default Invalid Underscore Start code: "_42" - lexer_error: - message: "Invalid numeric literal: cannot start with underscore." + tokens: + - type: error + value: "Invalid numeric literal: cannot start with underscore." - name: Integer Default Invalid Underscore End code: "42_" - lexer_error: - message: "Invalid numeric literal: cannot end with underscore." + tokens: + - type: error + value: "Invalid numeric literal: cannot end with underscore." - name: Integer Default Invalid Underscore Double code: "4__2" - lexer_error: - message: "Invalid numeric literal: consecutive underscores not allowed." + tokens: + - type: error + value: "Invalid numeric literal: consecutive underscores not allowed." # Basic Floats # Basic Strings diff --git a/SLS_Tests/yaml_to_c_tests.py b/SLS_Tests/yaml_to_c_tests.py index dc89797..9df7726 100644 --- a/SLS_Tests/yaml_to_c_tests.py +++ b/SLS_Tests/yaml_to_c_tests.py @@ -47,19 +47,39 @@ def c_string_literal(s: str) -> str: """Escape quotes for embedding in C string literals.""" return s.replace('"', '\\"') -def token_to_c_call(token: dict, idx_var="i") -> str: +def _token_to_c_call(token: dict, idx_var="i") -> str: """Generate a C 'test_*_value' call based on token type.""" ttype = token.get("type") value = token.get("value") if ttype == "i64": - return f'if (test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I64, {value}}})) return test.result;' + return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I64, {value}}})' + elif ttype == "i32": + return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I32, {value}}})' + elif ttype == "i16": + return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I16, {value}}})' + elif ttype == "i8": + return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I8, {value}}})' + elif ttype == "u64": + return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U64, {value}}})' + elif ttype == "u32": + return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U32, {value}}})' + elif ttype == "u16": + return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U16, {value}}})' + elif ttype == "u8": + return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U8, {value}}})' elif ttype == "identifier": - return f'if (test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{FALSE, {len(value)}, "{value}"}})) return test.result;' # type: ignore + return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{FALSE, {len(value)}, "{value}"}})' # type: ignore elif ttype == "identifier_literal": - return f'if (test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{TRUE, {len(value)}, "{value}"}})) return test.result;' # type: ignore + return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{TRUE, {len(value)}, "{value}"}})' # type: ignore + elif ttype == "error": + return f'test_for_error(&test, result, i++, &(TestErrorMessage){{{len(value)+1}, "{c_string_literal(value)}"}})' # type: ignore else: - return f'// Unhandled token type: {ttype}' + raise ValueError(f' Unhandled token type: {ttype}') + +def token_to_c_call(token: dict, idx_var="i") -> str: + """Generate a C 'test_*_value' call based on token type.""" + return f"if ({_token_to_c_call(token, idx_var)}) return test.result;" def generate_c_test(test: dict) -> str: """Convert a single YAML test entry to a C test function.""" @@ -68,7 +88,7 @@ def generate_c_test(test: dict) -> str: tokens = test.get("tokens", []) # Function header - c_code = [f"static TestResult {name}() " "{", + c_code = [f"static TestResult {name}() {{", f' LexerTest test = start_up_test("{name}", "{code}");', " LexerResult result = lexical_analysis(&test.lexer_info);", " if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);", @@ -79,7 +99,8 @@ def generate_c_test(test: dict) -> str: c_code.append(" " + token_to_c_call(token)) # EOF check and return - c_code.append(" if (test_eof_value(&test, result, i++, 0)) return test.result;") + if 'error' not in [token.get("type") for token in tokens]: + c_code.append(" if (test_eof_value(&test, result, i++, 0)) return test.result;") c_code.append(" return pass_test(&test, result);") c_code.append("}\n") @@ -100,7 +121,7 @@ def yaml_to_c_tests(yaml_path: str, output_path: str): program = [ file_headers, - f"static const size_t NUM_OF_TESTS = {len(tests)};", + f"static const size_t NUM_OF_TESTS = {len(tests)};\n", "\n".join(c_tests), main_header, ] + [f" test_report.tests[i++] = {sanitize_name(test['name'])}();" for test in tests]