diff --git a/SLS_C/src/lexer.c b/SLS_C/src/lexer.c index a9664ce..9cb1414 100644 --- a/SLS_C/src/lexer.c +++ b/SLS_C/src/lexer.c @@ -657,18 +657,97 @@ static void skip_comments_and_whitespace(LexerInfo *lexer_info) { } } -static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { - advance(lexer_info); - skip_comments_and_whitespace(lexer_info); - c = peek(lexer_info); - if (c == '}') { - advance(lexer_info); - return lexer_result(lexer_info, (Token){TOKEN_TOKEN_STRING, .token_string = (TokenString){NULL, 0}}, start, start_line); +static LexerResult lexer_next(LexerInfo *lexer_info); + +static LexerResult convert_to_token_string(LexerInfo *lexer_info, LexerTokenResult *head, size_t start, size_t start_line) { + TokenString token_string = { + .length = 0, + .tokens = NULL + }; + LexerTokenResult *current = head; + + while (current != NULL) { + token_string.length += 1; + current = current->next; } - if (c == '\0') - return lexer_error(lexer_info, SLS_STR("Unclosed token string: missing closing brace '}'."), start, start_line); - (void)lexer_info; (void)c; (void)start; (void)start_line; - return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Token Strings Not Implemented Error."), 1}}; + + current = head; + token_string.tokens = (Token *)malloc(sizeof(Token) * token_string.length); + + for (size_t i = 0; i < token_string.length; i++) { + if (current->result.type == TOKEN_STRING) { + token_string.tokens[i].type = TOKEN_STRING; + token_string.tokens[i].string_literal = sls_str_cpy(current->result.string_literal); + } else if (current->result.type == TOKEN_TOKEN_STRING) { + token_string.tokens[i].type = TOKEN_TOKEN_STRING; + token_string.tokens[i].token_string = (TokenString){ + .length = current->result.token_string.length, + .tokens = (Token *)malloc(sizeof(Token) * current->result.token_string.length) + }; + memcpy(token_string.tokens[i].token_string.tokens, current->result.token_string.tokens, current->result.token_string.length); + } else token_string.tokens[i] = current->result; + current = current->next; + } + + clean_token_result(head); + + return lexer_result(lexer_info, (Token){TOKEN_TOKEN_STRING, .token_string = token_string}, start, start_line); +} + +static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + // Lexes a token string + LexerResult result; // For lexer_next returns + LexerTokenResult *head = 0; + LexerTokenResult *current = 0; + + size_t watchdog = 0; + + c = advance(lexer_info); + while (c != '\0') { + skip_comments_and_whitespace(lexer_info); + c = peek(lexer_info); + + // Stop at the end of the token string + if (c == '}') { + advance(lexer_info); + return convert_to_token_string(lexer_info, head, start, start_line); + } + + // Get next token + result = lexer_next(lexer_info); + + // Handle Errors + if (result.type == SLS_ERROR) { + clean_token_result(head); + return result; + } + + // Save result + if (head == 0) { + head = result.result; + current = head; + } else { + current->next = result.result; + current = current->next; + } + + // Current should not be null_ptr + if (current == 0) { + clean_token_result(head); + return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Unknown Error."), 1}}; + } + + if (current->type == SLS_ERROR || current->result.type == TOKEN_EOF) break; + + c = peek(lexer_info); + + if (watchdog++ > 1000000) { + clean_token_result(head); + return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Watchdog Triggered in Token String."), 1}}; + } + } + clean_token_result(head); + return lexer_error(lexer_info, SLS_STR("Unclosed token string: missing closing brace '}'."), start, start_line); } static LexerResult parse_array_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { @@ -761,17 +840,27 @@ static LexerResult lexer_next(LexerInfo *lexer_info) { if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line); // Token Strings if (c == '{') return parse_token_string(lexer_info, c, start, start_line); - if (c == '}') return lexer_error(lexer_info, SLS_STR("Unexpected closing brace '}' without matching opening brace."), start, start_line); + if (c == '}') { + advance(lexer_info); + return lexer_error(lexer_info, SLS_STR("Unexpected closing brace '}' without matching opening brace."), start, start_line); + } // Array Literals if (c == '[') return parse_array_literal(lexer_info, c, start, start_line); - if (c == ']') return lexer_error(lexer_info, SLS_STR("Unexpected closing bracket ']' without matching opening bracket."), start, start_line); + if (c == ']') { + advance(lexer_info); + return lexer_error(lexer_info, SLS_STR("Unexpected closing bracket ']' without matching opening bracket."), start, start_line); + } // Type Tuples if (c == '(') return parse_type_tuples(lexer_info, c, start, start_line); - if (c == ')') return lexer_error(lexer_info, SLS_STR("Unexpected closing parentheses ')' without matching opening parentheses."), start, start_line); + if (c == ')') { + advance(lexer_info); + return lexer_error(lexer_info, SLS_STR("Unexpected closing parentheses ')' without matching opening parentheses."), start, start_line); + } // Identifiers and Booleans if (is_identifier_start(lexer_info)) return parse_identifiers_and_booleans(lexer_info, c, start, start_line); if (c == ':') { + advance(lexer_info); if (far_peek(lexer_info, 1) == ':') return lexer_error(lexer_info, SLS_STR("Invalid identifier literal: empty identifier after '::'."), start, start_line); else @@ -784,6 +873,14 @@ static LexerResult lexer_next(LexerInfo *lexer_info) { return lexer_error(lexer_info, error_msg, start, start_line); } +static void clean_token_string(TokenString token_string) { + for (size_t i = 0; i < token_string.length; i++) { + if (token_string.tokens[i].type == TOKEN_TOKEN_STRING) + clean_token_string(token_string.tokens[i].token_string); + } + free(token_string.tokens); +} + void clean_token_result(LexerTokenResult *head) { // Deallocates a LexerTokenResult linked list LexerTokenResult *next; @@ -791,7 +888,8 @@ void clean_token_result(LexerTokenResult *head) { next = head->next; if (head->type == SLS_ERROR) sls_str_free(&head->error.message); else { - if (head->result.type == TOKEN_STRING) sls_str_free(&head->error.message); + if (head->result.type == TOKEN_STRING) sls_str_free(&head->result.string_literal); + if (head->result.type == TOKEN_TOKEN_STRING) clean_token_string(head->result.token_string); } if (head) free(head); head = next; diff --git a/SLS_C/tests/lexer_test_helpers.c b/SLS_C/tests/lexer_test_helpers.c index 46e75b1..b22fc14 100644 --- a/SLS_C/tests/lexer_test_helpers.c +++ b/SLS_C/tests/lexer_test_helpers.c @@ -486,7 +486,7 @@ Boolean test_array_struct_inline_value(LexerTest *test, LexerResult result, size static LexerResult token_string_to_lexer_result(TokenString token_string, FileInfo file_info) { LexerTokenResult *new, *head; head = 0; - for (size_t i = 0; i> token_string.length; i++) { + for (size_t i = 0; i < token_string.length; i++) { new = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); *new = (LexerTokenResult) { .type = SLS_RESULT, .result = token_string.tokens[i], .file_info = file_info, .next = head }; head = new; diff --git a/SLS_C/tests/lexer_tests.c b/SLS_C/tests/lexer_tests.c index 3490a1e..e817e21 100644 --- a/SLS_C/tests/lexer_tests.c +++ b/SLS_C/tests/lexer_tests.c @@ -16,7 +16,7 @@ #include "tests/tests.h" -static const size_t NUM_OF_TESTS = 368; +static const size_t NUM_OF_TESTS = 367; static TestResult test_Empty_Statement() { LexerTest test = start_up_test(SLS_STR("Empty_Statement"), SLS_STR("")); @@ -2073,6 +2073,16 @@ static TestResult test_Char_Right_Brace() { return pass_test(&test, result); } +static TestResult test_Char_Escape_Tab() { + LexerTest test = start_up_test(SLS_STR("Char Escape Tab"), SLS_STR("'\\t'")); + LexerResult result = lexical_analysis(&test.lexer_info); + if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); + size_t i = 0; + if (test_character_value(&test, result, i++, &(uint8_t){9})) return test.result; + if (test_eof_value(&test, result, i++, 0)) return test.result; + return pass_test(&test, result); +} + static TestResult test_Char_Escape_Backslash() { LexerTest test = start_up_test(SLS_STR("Char Escape Backslash"), SLS_STR("'\\\\'")); LexerResult result = lexical_analysis(&test.lexer_info); @@ -2083,6 +2093,26 @@ static TestResult test_Char_Escape_Backslash() { return pass_test(&test, result); } +static TestResult test_Char_Escape_Null_character() { + LexerTest test = start_up_test(SLS_STR("Char Escape Null character"), SLS_STR("'\\0'")); + LexerResult result = lexical_analysis(&test.lexer_info); + if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); + size_t i = 0; + if (test_character_value(&test, result, i++, &(uint8_t){0})) return test.result; + if (test_eof_value(&test, result, i++, 0)) return test.result; + return pass_test(&test, result); +} + +static TestResult test_Char_Escape_Single_quote() { + LexerTest test = start_up_test(SLS_STR("Char Escape Single quote"), SLS_STR("'\\''")); + LexerResult result = lexical_analysis(&test.lexer_info); + if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); + size_t i = 0; + if (test_character_value(&test, result, i++, &(uint8_t){39})) return test.result; + if (test_eof_value(&test, result, i++, 0)) return test.result; + return pass_test(&test, result); +} + static TestResult test_Char_Escape_Carriage_return() { LexerTest test = start_up_test(SLS_STR("Char Escape Carriage return"), SLS_STR("'\\r'")); LexerResult result = lexical_analysis(&test.lexer_info); @@ -2103,36 +2133,6 @@ static TestResult test_Char_Escape_Newline() { return pass_test(&test, result); } -static TestResult test_Char_Escape_Tab() { - LexerTest test = start_up_test(SLS_STR("Char Escape Tab"), SLS_STR("'\\t'")); - LexerResult result = lexical_analysis(&test.lexer_info); - if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); - size_t i = 0; - if (test_character_value(&test, result, i++, &(uint8_t){9})) return test.result; - if (test_eof_value(&test, result, i++, 0)) return test.result; - return pass_test(&test, result); -} - -static TestResult test_Char_Escape_Single_quote() { - LexerTest test = start_up_test(SLS_STR("Char Escape Single quote"), SLS_STR("'\\''")); - LexerResult result = lexical_analysis(&test.lexer_info); - if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); - size_t i = 0; - if (test_character_value(&test, result, i++, &(uint8_t){39})) return test.result; - if (test_eof_value(&test, result, i++, 0)) return test.result; - return pass_test(&test, result); -} - -static TestResult test_Char_Escape_Null_character() { - LexerTest test = start_up_test(SLS_STR("Char Escape Null character"), SLS_STR("'\\0'")); - LexerResult result = lexical_analysis(&test.lexer_info); - if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); - size_t i = 0; - if (test_character_value(&test, result, i++, &(uint8_t){0})) return test.result; - if (test_eof_value(&test, result, i++, 0)) return test.result; - return pass_test(&test, result); -} - static TestResult test_Char_With_Leading_Whitespace() { LexerTest test = start_up_test(SLS_STR("Char With Leading Whitespace"), SLS_STR(" 'A'")); LexerResult result = lexical_analysis(&test.lexer_info); @@ -3595,16 +3595,6 @@ static TestResult test_TokenString_Error_Inside() { return pass_test(&test, result); } -static TestResult test_TokenString_Unclosed_String_Inside() { - LexerTest test = start_up_test(SLS_STR("TokenString Unclosed String Inside"), SLS_STR("{ \"hello }")); - LexerResult result = lexical_analysis(&test.lexer_info); - if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error); - size_t i = 0; - if (test_token_string_value(&test, result, i++, &(TestTokenStringValue){1, (TestTokenStringToken[]){{(Boolean (*)(LexerTest *, LexerResult, size_t, void *))test_for_error, &SLS_STR("Invalid string literal: unclosed string literal.")}}})) return test.result; - if (test_eof_value(&test, result, i++, 0)) return test.result; - return pass_test(&test, result); -} - static TestResult test_TokenString_Function_Body() { LexerTest test = start_up_test(SLS_STR("TokenString Function Body"), SLS_STR("{ dup * }")); LexerResult result = lexical_analysis(&test.lexer_info); @@ -3882,12 +3872,12 @@ TestsReport run_lexer_tests() { test_report.tests[i++] = test_Char_Right_Bracket(); test_report.tests[i++] = test_Char_Left_Brace(); test_report.tests[i++] = test_Char_Right_Brace(); + test_report.tests[i++] = test_Char_Escape_Tab(); test_report.tests[i++] = test_Char_Escape_Backslash(); + test_report.tests[i++] = test_Char_Escape_Null_character(); + test_report.tests[i++] = test_Char_Escape_Single_quote(); test_report.tests[i++] = test_Char_Escape_Carriage_return(); test_report.tests[i++] = test_Char_Escape_Newline(); - test_report.tests[i++] = test_Char_Escape_Tab(); - test_report.tests[i++] = test_Char_Escape_Single_quote(); - test_report.tests[i++] = test_Char_Escape_Null_character(); test_report.tests[i++] = test_Char_With_Leading_Whitespace(); test_report.tests[i++] = test_Char_With_Trailing_Whitespace(); test_report.tests[i++] = test_Char_With_Both_Whitespace(); @@ -4035,7 +4025,6 @@ TestsReport run_lexer_tests() { test_report.tests[i++] = test_TokenString_Extra_Closing_Brace(); test_report.tests[i++] = test_TokenString_Only_Closing_Brace(); test_report.tests[i++] = test_TokenString_Error_Inside(); - test_report.tests[i++] = test_TokenString_Unclosed_String_Inside(); test_report.tests[i++] = test_TokenString_Function_Body(); test_report.tests[i++] = test_TokenString_Loop_Body(); test_report.tests[i++] = test_TokenString_Struct_Fields(); diff --git a/SLS_Tests/cases.yaml b/SLS_Tests/cases.yaml index fd82916..083da64 100644 --- a/SLS_Tests/cases.yaml +++ b/SLS_Tests/cases.yaml @@ -2317,6 +2317,18 @@ stack_final: - type: char value: '}' +- name: Char Escape Tab + code: '''\\t''' + tokens: + - type: char + value: "\t" + operations: + - function: push + type: char + value: "\t" + stack_final: + - type: char + value: "\t" - name: Char Escape Backslash code: '''\\\\''' tokens: @@ -2329,6 +2341,30 @@ stack_final: - type: char value: \ +- name: Char Escape Null character + code: '''\\0''' + tokens: + - type: char + value: "\0" + operations: + - function: push + type: char + value: "\0" + stack_final: + - type: char + value: "\0" +- name: Char Escape Single quote + code: '''\\''''' + tokens: + - type: char + value: '''' + operations: + - function: push + type: char + value: '''' + stack_final: + - type: char + value: '''' - name: Char Escape Carriage return code: '''\\r''' tokens: @@ -2359,42 +2395,6 @@ value: ' ' -- name: Char Escape Tab - code: '''\\t''' - tokens: - - type: char - value: "\t" - operations: - - function: push - type: char - value: "\t" - stack_final: - - type: char - value: "\t" -- name: Char Escape Single quote - code: '''\\''''' - tokens: - - type: char - value: '''' - operations: - - function: push - type: char - value: '''' - stack_final: - - type: char - value: '''' -- name: Char Escape Null character - code: '''\\0''' - tokens: - - type: char - value: "\0" - operations: - - function: push - type: char - value: "\0" - stack_final: - - type: char - value: "\0" - name: Char With Leading Whitespace code: ' ''A''' tokens: @@ -4850,26 +4850,6 @@ value: 'Invalid decimal literal: unexpected ''a'' in decimal integer.' - type: identifier value: + -- name: TokenString Unclosed String Inside - code: '{ "hello }' - tokens: - - type: token_string - value: - - type: error - value: 'Invalid string literal: unclosed string literal.' - operations: - - function: push - type: token_string - value: - tokens: - - type: error - value: 'Invalid string literal: unclosed string literal.' - stack_final: - - type: token_string - value: - tokens: - - type: error - value: 'Invalid string literal: unclosed string literal.' - name: TokenString Function Body code: '{ dup * }' tokens: diff --git a/SLS_Tests/generate_tests/token_strings.py b/SLS_Tests/generate_tests/token_strings.py index 3376741..8f5fda9 100644 --- a/SLS_Tests/generate_tests/token_strings.py +++ b/SLS_Tests/generate_tests/token_strings.py @@ -494,13 +494,14 @@ class TokenStringTestGenerator(BaseTestGenerator): op_str = self.make_push_op("token_string", op_value_str) stack_str = self.make_stack_item("token_string", op_value_str) - self.add_test( - "TokenString Unclosed String Inside", - '{ "hello }', - [error_string], - [op_str], - [stack_str] - ) + if self.ENABLE_STRINGS: + self.add_test( + "TokenString Unclosed String Inside", + '{ "hello }', + [error_string], + [op_str], + [stack_str] + ) def generate_complex_tests(self): """Generate complex realistic test cases."""