worked on token_string

2025-11-26 23:57:58 -07:00 · 2025-11-26 23:57:58 -07:00 · a080dbc2fb
parent b49130bce7
commit a080dbc2fb
5 changed files with 192 additions and 124 deletions
--- a/SLS_C/src/lexer.c
+++ b/SLS_C/src/lexer.c
@ -657,18 +657,97 @@ static void skip_comments_and_whitespace(LexerInfo *lexer_info) {
    }
 }
-static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
+static LexerResult lexer_next(LexerInfo *lexer_info);
-    advance(lexer_info);
+
-    skip_comments_and_whitespace(lexer_info);
+static LexerResult convert_to_token_string(LexerInfo *lexer_info, LexerTokenResult *head, size_t start, size_t start_line) {
-    c = peek(lexer_info);
+    TokenString token_string = {
-    if (c == '}') {
+        .length = 0,
-        advance(lexer_info);
+        .tokens = NULL
-        return lexer_result(lexer_info, (Token){TOKEN_TOKEN_STRING, .token_string = (TokenString){NULL, 0}}, start, start_line);
+    };
    LexerTokenResult *current = head;
    while (current != NULL) {
        token_string.length += 1;
        current = current->next;
    }
-    if (c == '\0')
+
-        return lexer_error(lexer_info, SLS_STR("Unclosed token string: missing closing brace '}'."), start, start_line);
+    current = head;
-    (void)lexer_info; (void)c; (void)start; (void)start_line;
+    token_string.tokens = (Token *)malloc(sizeof(Token) * token_string.length);
-    return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Token Strings Not Implemented Error."), 1}};
+
    for (size_t i = 0; i < token_string.length; i++) {
        if (current->result.type == TOKEN_STRING) {
            token_string.tokens[i].type = TOKEN_STRING;
            token_string.tokens[i].string_literal = sls_str_cpy(current->result.string_literal);
        } else if (current->result.type == TOKEN_TOKEN_STRING) {
            token_string.tokens[i].type = TOKEN_TOKEN_STRING;
            token_string.tokens[i].token_string = (TokenString){
                .length = current->result.token_string.length,
                .tokens = (Token *)malloc(sizeof(Token) * current->result.token_string.length)
            };
            memcpy(token_string.tokens[i].token_string.tokens, current->result.token_string.tokens, current->result.token_string.length);
        } else token_string.tokens[i] = current->result;
        current = current->next;
    }
    clean_token_result(head);
    return lexer_result(lexer_info, (Token){TOKEN_TOKEN_STRING, .token_string = token_string}, start, start_line);
 }
 static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
    // Lexes a token string
    LexerResult result; // For lexer_next returns
    LexerTokenResult *head = 0;
    LexerTokenResult *current = 0;
    size_t watchdog = 0;
    c = advance(lexer_info);
    while (c != '\0') {
        skip_comments_and_whitespace(lexer_info);
        c = peek(lexer_info);
        // Stop at the end of the token string
        if (c == '}') {
            advance(lexer_info);
            return convert_to_token_string(lexer_info, head, start, start_line);
        }
        // Get next token
        result = lexer_next(lexer_info);
        // Handle Errors
        if (result.type == SLS_ERROR) {
            clean_token_result(head);
            return result;
        }
        // Save result
        if (head == 0) {
            head = result.result;
            current = head;
        } else {
            current->next = result.result;
            current = current->next;
        }
        // Current should not be null_ptr
        if (current == 0) {
            clean_token_result(head);
            return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Unknown Error."), 1}};
        }
        if (current->type == SLS_ERROR || current->result.type == TOKEN_EOF) break;
        c = peek(lexer_info);
        if (watchdog++ > 1000000) {
            clean_token_result(head);
            return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Watchdog Triggered in Token String."), 1}};
        }
    }
    clean_token_result(head);
    return lexer_error(lexer_info, SLS_STR("Unclosed token string: missing closing brace '}'."), start, start_line);
 }
 static LexerResult parse_array_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
@ -761,17 +840,27 @@ static LexerResult lexer_next(LexerInfo *lexer_info) {
    if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line);
    // Token Strings
    if (c == '{') return parse_token_string(lexer_info, c, start, start_line);
-    if (c == '}') return lexer_error(lexer_info, SLS_STR("Unexpected closing brace '}' without matching opening brace."), start, start_line);
+    if (c == '}') {
        advance(lexer_info);
        return lexer_error(lexer_info, SLS_STR("Unexpected closing brace '}' without matching opening brace."), start, start_line);
    }
    // Array Literals
    if (c == '[') return parse_array_literal(lexer_info, c, start, start_line);
-    if (c == ']') return lexer_error(lexer_info, SLS_STR("Unexpected closing bracket ']' without matching opening bracket."), start, start_line);
+    if (c == ']') {
        advance(lexer_info);
        return lexer_error(lexer_info, SLS_STR("Unexpected closing bracket ']' without matching opening bracket."), start, start_line);
    }
    // Type Tuples
    if (c == '(') return parse_type_tuples(lexer_info, c, start, start_line);
-    if (c == ')') return lexer_error(lexer_info, SLS_STR("Unexpected closing parentheses ')' without matching opening parentheses."), start, start_line);
+    if (c == ')') {
        advance(lexer_info);
        return lexer_error(lexer_info, SLS_STR("Unexpected closing parentheses ')' without matching opening parentheses."), start, start_line);
    }
    // Identifiers and Booleans
    if (is_identifier_start(lexer_info))
        return parse_identifiers_and_booleans(lexer_info, c, start, start_line);
    if (c == ':') {
        advance(lexer_info);
        if (far_peek(lexer_info, 1) == ':')
            return lexer_error(lexer_info, SLS_STR("Invalid identifier literal: empty identifier after '::'."), start, start_line);
        else
@ -784,6 +873,14 @@ static LexerResult lexer_next(LexerInfo *lexer_info) {
    return lexer_error(lexer_info, error_msg, start, start_line);
 }
 static void clean_token_string(TokenString token_string) {
    for (size_t i = 0; i < token_string.length; i++) {
        if (token_string.tokens[i].type == TOKEN_TOKEN_STRING)
            clean_token_string(token_string.tokens[i].token_string);
    }
    free(token_string.tokens);
 }
 void clean_token_result(LexerTokenResult *head) {
    // Deallocates a LexerTokenResult linked list
    LexerTokenResult *next;
@ -791,7 +888,8 @@ void clean_token_result(LexerTokenResult *head) {
        next = head->next;
        if (head->type == SLS_ERROR) sls_str_free(&head->error.message);
        else {
-            if (head->result.type == TOKEN_STRING) sls_str_free(&head->error.message);
+            if (head->result.type == TOKEN_STRING) sls_str_free(&head->result.string_literal);
            if (head->result.type == TOKEN_TOKEN_STRING) clean_token_string(head->result.token_string);
        }
        if (head) free(head);
        head = next;
--- a/SLS_C/tests/lexer_test_helpers.c
+++ b/SLS_C/tests/lexer_test_helpers.c
@ -486,7 +486,7 @@ Boolean test_array_struct_inline_value(LexerTest *test, LexerResult result, size
 static LexerResult token_string_to_lexer_result(TokenString token_string, FileInfo file_info) {
    LexerTokenResult *new, *head;
    head = 0;
-    for (size_t i = 0; i> token_string.length; i++) {
+    for (size_t i = 0; i < token_string.length; i++) {
        new = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
        *new = (LexerTokenResult) { .type = SLS_RESULT, .result = token_string.tokens[i], .file_info = file_info, .next = head };
        head = new;
--- a/SLS_C/tests/lexer_tests.c
+++ b/SLS_C/tests/lexer_tests.c
@ -16,7 +16,7 @@
 #include "tests/tests.h"
-static const size_t NUM_OF_TESTS = 368;
+static const size_t NUM_OF_TESTS = 367;
 static TestResult test_Empty_Statement() {
    LexerTest test = start_up_test(SLS_STR("Empty_Statement"), SLS_STR(""));
@ -2073,6 +2073,16 @@ static TestResult test_Char_Right_Brace() {
    return pass_test(&test, result);
 }
 static TestResult test_Char_Escape_Tab() {
    LexerTest test = start_up_test(SLS_STR("Char Escape Tab"), SLS_STR("'\\t'"));
    LexerResult result = lexical_analysis(&test.lexer_info);
    if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
    size_t i = 0;
    if (test_character_value(&test, result, i++, &(uint8_t){9})) return test.result;
    if (test_eof_value(&test, result, i++, 0)) return test.result;
    return pass_test(&test, result);
 }
 static TestResult test_Char_Escape_Backslash() {
    LexerTest test = start_up_test(SLS_STR("Char Escape Backslash"), SLS_STR("'\\\\'"));
    LexerResult result = lexical_analysis(&test.lexer_info);
@ -2083,6 +2093,26 @@ static TestResult test_Char_Escape_Backslash() {
    return pass_test(&test, result);
 }
 static TestResult test_Char_Escape_Null_character() {
    LexerTest test = start_up_test(SLS_STR("Char Escape Null character"), SLS_STR("'\\0'"));
    LexerResult result = lexical_analysis(&test.lexer_info);
    if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
    size_t i = 0;
    if (test_character_value(&test, result, i++, &(uint8_t){0})) return test.result;
    if (test_eof_value(&test, result, i++, 0)) return test.result;
    return pass_test(&test, result);
 }
 static TestResult test_Char_Escape_Single_quote() {
    LexerTest test = start_up_test(SLS_STR("Char Escape Single quote"), SLS_STR("'\\''"));
    LexerResult result = lexical_analysis(&test.lexer_info);
    if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
    size_t i = 0;
    if (test_character_value(&test, result, i++, &(uint8_t){39})) return test.result;
    if (test_eof_value(&test, result, i++, 0)) return test.result;
    return pass_test(&test, result);
 }
 static TestResult test_Char_Escape_Carriage_return() {
    LexerTest test = start_up_test(SLS_STR("Char Escape Carriage return"), SLS_STR("'\\r'"));
    LexerResult result = lexical_analysis(&test.lexer_info);
@ -2103,36 +2133,6 @@ static TestResult test_Char_Escape_Newline() {
    return pass_test(&test, result);
 }
 static TestResult test_Char_Escape_Tab() {
    LexerTest test = start_up_test(SLS_STR("Char Escape Tab"), SLS_STR("'\\t'"));
    LexerResult result = lexical_analysis(&test.lexer_info);
    if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
    size_t i = 0;
    if (test_character_value(&test, result, i++, &(uint8_t){9})) return test.result;
    if (test_eof_value(&test, result, i++, 0)) return test.result;
    return pass_test(&test, result);
 }
 static TestResult test_Char_Escape_Single_quote() {
    LexerTest test = start_up_test(SLS_STR("Char Escape Single quote"), SLS_STR("'\\''"));
    LexerResult result = lexical_analysis(&test.lexer_info);
    if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
    size_t i = 0;
    if (test_character_value(&test, result, i++, &(uint8_t){39})) return test.result;
    if (test_eof_value(&test, result, i++, 0)) return test.result;
    return pass_test(&test, result);
 }
 static TestResult test_Char_Escape_Null_character() {
    LexerTest test = start_up_test(SLS_STR("Char Escape Null character"), SLS_STR("'\\0'"));
    LexerResult result = lexical_analysis(&test.lexer_info);
    if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
    size_t i = 0;
    if (test_character_value(&test, result, i++, &(uint8_t){0})) return test.result;
    if (test_eof_value(&test, result, i++, 0)) return test.result;
    return pass_test(&test, result);
 }
 static TestResult test_Char_With_Leading_Whitespace() {
    LexerTest test = start_up_test(SLS_STR("Char With Leading Whitespace"), SLS_STR("  'A'"));
    LexerResult result = lexical_analysis(&test.lexer_info);
@ -3595,16 +3595,6 @@ static TestResult test_TokenString_Error_Inside() {
    return pass_test(&test, result);
 }
 static TestResult test_TokenString_Unclosed_String_Inside() {
    LexerTest test = start_up_test(SLS_STR("TokenString Unclosed String Inside"), SLS_STR("{ \"hello }"));
    LexerResult result = lexical_analysis(&test.lexer_info);
    if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
    size_t i = 0;
    if (test_token_string_value(&test, result, i++, &(TestTokenStringValue){1, (TestTokenStringToken[]){{(Boolean (*)(LexerTest *, LexerResult, size_t, void *))test_for_error, &SLS_STR("Invalid string literal: unclosed string literal.")}}})) return test.result;
    if (test_eof_value(&test, result, i++, 0)) return test.result;
    return pass_test(&test, result);
 }
 static TestResult test_TokenString_Function_Body() {
    LexerTest test = start_up_test(SLS_STR("TokenString Function Body"), SLS_STR("{ dup * }"));
    LexerResult result = lexical_analysis(&test.lexer_info);
@ -3882,12 +3872,12 @@ TestsReport run_lexer_tests() {
    test_report.tests[i++] = test_Char_Right_Bracket();
    test_report.tests[i++] = test_Char_Left_Brace();
    test_report.tests[i++] = test_Char_Right_Brace();
    test_report.tests[i++] = test_Char_Escape_Tab();
    test_report.tests[i++] = test_Char_Escape_Backslash();
    test_report.tests[i++] = test_Char_Escape_Null_character();
    test_report.tests[i++] = test_Char_Escape_Single_quote();
    test_report.tests[i++] = test_Char_Escape_Carriage_return();
    test_report.tests[i++] = test_Char_Escape_Newline();
    test_report.tests[i++] = test_Char_Escape_Tab();
    test_report.tests[i++] = test_Char_Escape_Single_quote();
    test_report.tests[i++] = test_Char_Escape_Null_character();
    test_report.tests[i++] = test_Char_With_Leading_Whitespace();
    test_report.tests[i++] = test_Char_With_Trailing_Whitespace();
    test_report.tests[i++] = test_Char_With_Both_Whitespace();
@ -4035,7 +4025,6 @@ TestsReport run_lexer_tests() {
    test_report.tests[i++] = test_TokenString_Extra_Closing_Brace();
    test_report.tests[i++] = test_TokenString_Only_Closing_Brace();
    test_report.tests[i++] = test_TokenString_Error_Inside();
    test_report.tests[i++] = test_TokenString_Unclosed_String_Inside();
    test_report.tests[i++] = test_TokenString_Function_Body();
    test_report.tests[i++] = test_TokenString_Loop_Body();
    test_report.tests[i++] = test_TokenString_Struct_Fields();
--- a/SLS_Tests/cases.yaml
+++ b/SLS_Tests/cases.yaml
@ -2317,6 +2317,18 @@
  stack_final:
  - type: char
    value: '}'
 - name: Char Escape Tab
  code: '''\\t'''
  tokens:
  - type: char
    value: "\t"
  operations:
  - function: push
    type: char
    value: "\t"
  stack_final:
  - type: char
    value: "\t"
 - name: Char Escape Backslash
  code: '''\\\\'''
  tokens:
@ -2329,6 +2341,30 @@
  stack_final:
  - type: char
    value: \
 - name: Char Escape Null character
  code: '''\\0'''
  tokens:
  - type: char
    value: "\0"
  operations:
  - function: push
    type: char
    value: "\0"
  stack_final:
  - type: char
    value: "\0"
 - name: Char Escape Single quote
  code: '''\\'''''
  tokens:
  - type: char
    value: ''''
  operations:
  - function: push
    type: char
    value: ''''
  stack_final:
  - type: char
    value: ''''
 - name: Char Escape Carriage return
  code: '''\\r'''
  tokens:
@ -2359,42 +2395,6 @@
    value: '
      '
 - name: Char Escape Tab
  code: '''\\t'''
  tokens:
  - type: char
    value: "\t"
  operations:
  - function: push
    type: char
    value: "\t"
  stack_final:
  - type: char
    value: "\t"
 - name: Char Escape Single quote
  code: '''\\'''''
  tokens:
  - type: char
    value: ''''
  operations:
  - function: push
    type: char
    value: ''''
  stack_final:
  - type: char
    value: ''''
 - name: Char Escape Null character
  code: '''\\0'''
  tokens:
  - type: char
    value: "\0"
  operations:
  - function: push
    type: char
    value: "\0"
  stack_final:
  - type: char
    value: "\0"
 - name: Char With Leading Whitespace
  code: '  ''A'''
  tokens:
@ -4850,26 +4850,6 @@
        value: 'Invalid decimal literal: unexpected ''a'' in decimal integer.'
      - type: identifier
        value: +
 - name: TokenString Unclosed String Inside
  code: '{ "hello }'
  tokens:
  - type: token_string
    value:
    - type: error
      value: 'Invalid string literal: unclosed string literal.'
  operations:
  - function: push
    type: token_string
    value:
      tokens:
      - type: error
        value: 'Invalid string literal: unclosed string literal.'
  stack_final:
  - type: token_string
    value:
      tokens:
      - type: error
        value: 'Invalid string literal: unclosed string literal.'
 - name: TokenString Function Body
  code: '{ dup * }'
  tokens:
--- a/SLS_Tests/generate_tests/token_strings.py
+++ b/SLS_Tests/generate_tests/token_strings.py
@ -494,13 +494,14 @@ class TokenStringTestGenerator(BaseTestGenerator):
        op_str = self.make_push_op("token_string", op_value_str)
        stack_str = self.make_stack_item("token_string", op_value_str)
-        self.add_test(
+        if self.ENABLE_STRINGS:
-            "TokenString Unclosed String Inside",
+            self.add_test(
-            '{ "hello }',
+                "TokenString Unclosed String Inside",
-            [error_string],
+                '{ "hello }',
-            [op_str],
+                [error_string],
-            [stack_str]
+                [op_str],
-        )
+                [stack_str]
            )
    def generate_complex_tests(self):
        """Generate complex realistic test cases."""