worked on token_string

This commit is contained in:
Kyler Olsen 2025-11-26 23:57:58 -07:00
parent b49130bce7
commit a080dbc2fb
5 changed files with 192 additions and 124 deletions

View File

@ -657,18 +657,97 @@ static void skip_comments_and_whitespace(LexerInfo *lexer_info) {
} }
} }
static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { static LexerResult lexer_next(LexerInfo *lexer_info);
advance(lexer_info);
skip_comments_and_whitespace(lexer_info); static LexerResult convert_to_token_string(LexerInfo *lexer_info, LexerTokenResult *head, size_t start, size_t start_line) {
c = peek(lexer_info); TokenString token_string = {
if (c == '}') { .length = 0,
advance(lexer_info); .tokens = NULL
return lexer_result(lexer_info, (Token){TOKEN_TOKEN_STRING, .token_string = (TokenString){NULL, 0}}, start, start_line); };
LexerTokenResult *current = head;
while (current != NULL) {
token_string.length += 1;
current = current->next;
} }
if (c == '\0')
return lexer_error(lexer_info, SLS_STR("Unclosed token string: missing closing brace '}'."), start, start_line); current = head;
(void)lexer_info; (void)c; (void)start; (void)start_line; token_string.tokens = (Token *)malloc(sizeof(Token) * token_string.length);
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Token Strings Not Implemented Error."), 1}};
for (size_t i = 0; i < token_string.length; i++) {
if (current->result.type == TOKEN_STRING) {
token_string.tokens[i].type = TOKEN_STRING;
token_string.tokens[i].string_literal = sls_str_cpy(current->result.string_literal);
} else if (current->result.type == TOKEN_TOKEN_STRING) {
token_string.tokens[i].type = TOKEN_TOKEN_STRING;
token_string.tokens[i].token_string = (TokenString){
.length = current->result.token_string.length,
.tokens = (Token *)malloc(sizeof(Token) * current->result.token_string.length)
};
memcpy(token_string.tokens[i].token_string.tokens, current->result.token_string.tokens, current->result.token_string.length);
} else token_string.tokens[i] = current->result;
current = current->next;
}
clean_token_result(head);
return lexer_result(lexer_info, (Token){TOKEN_TOKEN_STRING, .token_string = token_string}, start, start_line);
}
static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
// Lexes a token string
LexerResult result; // For lexer_next returns
LexerTokenResult *head = 0;
LexerTokenResult *current = 0;
size_t watchdog = 0;
c = advance(lexer_info);
while (c != '\0') {
skip_comments_and_whitespace(lexer_info);
c = peek(lexer_info);
// Stop at the end of the token string
if (c == '}') {
advance(lexer_info);
return convert_to_token_string(lexer_info, head, start, start_line);
}
// Get next token
result = lexer_next(lexer_info);
// Handle Errors
if (result.type == SLS_ERROR) {
clean_token_result(head);
return result;
}
// Save result
if (head == 0) {
head = result.result;
current = head;
} else {
current->next = result.result;
current = current->next;
}
// Current should not be null_ptr
if (current == 0) {
clean_token_result(head);
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Unknown Error."), 1}};
}
if (current->type == SLS_ERROR || current->result.type == TOKEN_EOF) break;
c = peek(lexer_info);
if (watchdog++ > 1000000) {
clean_token_result(head);
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Watchdog Triggered in Token String."), 1}};
}
}
clean_token_result(head);
return lexer_error(lexer_info, SLS_STR("Unclosed token string: missing closing brace '}'."), start, start_line);
} }
static LexerResult parse_array_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { static LexerResult parse_array_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
@ -761,17 +840,27 @@ static LexerResult lexer_next(LexerInfo *lexer_info) {
if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line); if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line);
// Token Strings // Token Strings
if (c == '{') return parse_token_string(lexer_info, c, start, start_line); if (c == '{') return parse_token_string(lexer_info, c, start, start_line);
if (c == '}') return lexer_error(lexer_info, SLS_STR("Unexpected closing brace '}' without matching opening brace."), start, start_line); if (c == '}') {
advance(lexer_info);
return lexer_error(lexer_info, SLS_STR("Unexpected closing brace '}' without matching opening brace."), start, start_line);
}
// Array Literals // Array Literals
if (c == '[') return parse_array_literal(lexer_info, c, start, start_line); if (c == '[') return parse_array_literal(lexer_info, c, start, start_line);
if (c == ']') return lexer_error(lexer_info, SLS_STR("Unexpected closing bracket ']' without matching opening bracket."), start, start_line); if (c == ']') {
advance(lexer_info);
return lexer_error(lexer_info, SLS_STR("Unexpected closing bracket ']' without matching opening bracket."), start, start_line);
}
// Type Tuples // Type Tuples
if (c == '(') return parse_type_tuples(lexer_info, c, start, start_line); if (c == '(') return parse_type_tuples(lexer_info, c, start, start_line);
if (c == ')') return lexer_error(lexer_info, SLS_STR("Unexpected closing parentheses ')' without matching opening parentheses."), start, start_line); if (c == ')') {
advance(lexer_info);
return lexer_error(lexer_info, SLS_STR("Unexpected closing parentheses ')' without matching opening parentheses."), start, start_line);
}
// Identifiers and Booleans // Identifiers and Booleans
if (is_identifier_start(lexer_info)) if (is_identifier_start(lexer_info))
return parse_identifiers_and_booleans(lexer_info, c, start, start_line); return parse_identifiers_and_booleans(lexer_info, c, start, start_line);
if (c == ':') { if (c == ':') {
advance(lexer_info);
if (far_peek(lexer_info, 1) == ':') if (far_peek(lexer_info, 1) == ':')
return lexer_error(lexer_info, SLS_STR("Invalid identifier literal: empty identifier after '::'."), start, start_line); return lexer_error(lexer_info, SLS_STR("Invalid identifier literal: empty identifier after '::'."), start, start_line);
else else
@ -784,6 +873,14 @@ static LexerResult lexer_next(LexerInfo *lexer_info) {
return lexer_error(lexer_info, error_msg, start, start_line); return lexer_error(lexer_info, error_msg, start, start_line);
} }
static void clean_token_string(TokenString token_string) {
for (size_t i = 0; i < token_string.length; i++) {
if (token_string.tokens[i].type == TOKEN_TOKEN_STRING)
clean_token_string(token_string.tokens[i].token_string);
}
free(token_string.tokens);
}
void clean_token_result(LexerTokenResult *head) { void clean_token_result(LexerTokenResult *head) {
// Deallocates a LexerTokenResult linked list // Deallocates a LexerTokenResult linked list
LexerTokenResult *next; LexerTokenResult *next;
@ -791,7 +888,8 @@ void clean_token_result(LexerTokenResult *head) {
next = head->next; next = head->next;
if (head->type == SLS_ERROR) sls_str_free(&head->error.message); if (head->type == SLS_ERROR) sls_str_free(&head->error.message);
else { else {
if (head->result.type == TOKEN_STRING) sls_str_free(&head->error.message); if (head->result.type == TOKEN_STRING) sls_str_free(&head->result.string_literal);
if (head->result.type == TOKEN_TOKEN_STRING) clean_token_string(head->result.token_string);
} }
if (head) free(head); if (head) free(head);
head = next; head = next;

View File

@ -486,7 +486,7 @@ Boolean test_array_struct_inline_value(LexerTest *test, LexerResult result, size
static LexerResult token_string_to_lexer_result(TokenString token_string, FileInfo file_info) { static LexerResult token_string_to_lexer_result(TokenString token_string, FileInfo file_info) {
LexerTokenResult *new, *head; LexerTokenResult *new, *head;
head = 0; head = 0;
for (size_t i = 0; i> token_string.length; i++) { for (size_t i = 0; i < token_string.length; i++) {
new = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); new = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
*new = (LexerTokenResult) { .type = SLS_RESULT, .result = token_string.tokens[i], .file_info = file_info, .next = head }; *new = (LexerTokenResult) { .type = SLS_RESULT, .result = token_string.tokens[i], .file_info = file_info, .next = head };
head = new; head = new;

View File

@ -16,7 +16,7 @@
#include "tests/tests.h" #include "tests/tests.h"
static const size_t NUM_OF_TESTS = 368; static const size_t NUM_OF_TESTS = 367;
static TestResult test_Empty_Statement() { static TestResult test_Empty_Statement() {
LexerTest test = start_up_test(SLS_STR("Empty_Statement"), SLS_STR("")); LexerTest test = start_up_test(SLS_STR("Empty_Statement"), SLS_STR(""));
@ -2073,6 +2073,16 @@ static TestResult test_Char_Right_Brace() {
return pass_test(&test, result); return pass_test(&test, result);
} }
static TestResult test_Char_Escape_Tab() {
LexerTest test = start_up_test(SLS_STR("Char Escape Tab"), SLS_STR("'\\t'"));
LexerResult result = lexical_analysis(&test.lexer_info);
if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
size_t i = 0;
if (test_character_value(&test, result, i++, &(uint8_t){9})) return test.result;
if (test_eof_value(&test, result, i++, 0)) return test.result;
return pass_test(&test, result);
}
static TestResult test_Char_Escape_Backslash() { static TestResult test_Char_Escape_Backslash() {
LexerTest test = start_up_test(SLS_STR("Char Escape Backslash"), SLS_STR("'\\\\'")); LexerTest test = start_up_test(SLS_STR("Char Escape Backslash"), SLS_STR("'\\\\'"));
LexerResult result = lexical_analysis(&test.lexer_info); LexerResult result = lexical_analysis(&test.lexer_info);
@ -2083,6 +2093,26 @@ static TestResult test_Char_Escape_Backslash() {
return pass_test(&test, result); return pass_test(&test, result);
} }
static TestResult test_Char_Escape_Null_character() {
LexerTest test = start_up_test(SLS_STR("Char Escape Null character"), SLS_STR("'\\0'"));
LexerResult result = lexical_analysis(&test.lexer_info);
if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
size_t i = 0;
if (test_character_value(&test, result, i++, &(uint8_t){0})) return test.result;
if (test_eof_value(&test, result, i++, 0)) return test.result;
return pass_test(&test, result);
}
static TestResult test_Char_Escape_Single_quote() {
LexerTest test = start_up_test(SLS_STR("Char Escape Single quote"), SLS_STR("'\\''"));
LexerResult result = lexical_analysis(&test.lexer_info);
if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
size_t i = 0;
if (test_character_value(&test, result, i++, &(uint8_t){39})) return test.result;
if (test_eof_value(&test, result, i++, 0)) return test.result;
return pass_test(&test, result);
}
static TestResult test_Char_Escape_Carriage_return() { static TestResult test_Char_Escape_Carriage_return() {
LexerTest test = start_up_test(SLS_STR("Char Escape Carriage return"), SLS_STR("'\\r'")); LexerTest test = start_up_test(SLS_STR("Char Escape Carriage return"), SLS_STR("'\\r'"));
LexerResult result = lexical_analysis(&test.lexer_info); LexerResult result = lexical_analysis(&test.lexer_info);
@ -2103,36 +2133,6 @@ static TestResult test_Char_Escape_Newline() {
return pass_test(&test, result); return pass_test(&test, result);
} }
static TestResult test_Char_Escape_Tab() {
LexerTest test = start_up_test(SLS_STR("Char Escape Tab"), SLS_STR("'\\t'"));
LexerResult result = lexical_analysis(&test.lexer_info);
if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
size_t i = 0;
if (test_character_value(&test, result, i++, &(uint8_t){9})) return test.result;
if (test_eof_value(&test, result, i++, 0)) return test.result;
return pass_test(&test, result);
}
static TestResult test_Char_Escape_Single_quote() {
LexerTest test = start_up_test(SLS_STR("Char Escape Single quote"), SLS_STR("'\\''"));
LexerResult result = lexical_analysis(&test.lexer_info);
if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
size_t i = 0;
if (test_character_value(&test, result, i++, &(uint8_t){39})) return test.result;
if (test_eof_value(&test, result, i++, 0)) return test.result;
return pass_test(&test, result);
}
static TestResult test_Char_Escape_Null_character() {
LexerTest test = start_up_test(SLS_STR("Char Escape Null character"), SLS_STR("'\\0'"));
LexerResult result = lexical_analysis(&test.lexer_info);
if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
size_t i = 0;
if (test_character_value(&test, result, i++, &(uint8_t){0})) return test.result;
if (test_eof_value(&test, result, i++, 0)) return test.result;
return pass_test(&test, result);
}
static TestResult test_Char_With_Leading_Whitespace() { static TestResult test_Char_With_Leading_Whitespace() {
LexerTest test = start_up_test(SLS_STR("Char With Leading Whitespace"), SLS_STR(" 'A'")); LexerTest test = start_up_test(SLS_STR("Char With Leading Whitespace"), SLS_STR(" 'A'"));
LexerResult result = lexical_analysis(&test.lexer_info); LexerResult result = lexical_analysis(&test.lexer_info);
@ -3595,16 +3595,6 @@ static TestResult test_TokenString_Error_Inside() {
return pass_test(&test, result); return pass_test(&test, result);
} }
static TestResult test_TokenString_Unclosed_String_Inside() {
LexerTest test = start_up_test(SLS_STR("TokenString Unclosed String Inside"), SLS_STR("{ \"hello }"));
LexerResult result = lexical_analysis(&test.lexer_info);
if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);
size_t i = 0;
if (test_token_string_value(&test, result, i++, &(TestTokenStringValue){1, (TestTokenStringToken[]){{(Boolean (*)(LexerTest *, LexerResult, size_t, void *))test_for_error, &SLS_STR("Invalid string literal: unclosed string literal.")}}})) return test.result;
if (test_eof_value(&test, result, i++, 0)) return test.result;
return pass_test(&test, result);
}
static TestResult test_TokenString_Function_Body() { static TestResult test_TokenString_Function_Body() {
LexerTest test = start_up_test(SLS_STR("TokenString Function Body"), SLS_STR("{ dup * }")); LexerTest test = start_up_test(SLS_STR("TokenString Function Body"), SLS_STR("{ dup * }"));
LexerResult result = lexical_analysis(&test.lexer_info); LexerResult result = lexical_analysis(&test.lexer_info);
@ -3882,12 +3872,12 @@ TestsReport run_lexer_tests() {
test_report.tests[i++] = test_Char_Right_Bracket(); test_report.tests[i++] = test_Char_Right_Bracket();
test_report.tests[i++] = test_Char_Left_Brace(); test_report.tests[i++] = test_Char_Left_Brace();
test_report.tests[i++] = test_Char_Right_Brace(); test_report.tests[i++] = test_Char_Right_Brace();
test_report.tests[i++] = test_Char_Escape_Tab();
test_report.tests[i++] = test_Char_Escape_Backslash(); test_report.tests[i++] = test_Char_Escape_Backslash();
test_report.tests[i++] = test_Char_Escape_Null_character();
test_report.tests[i++] = test_Char_Escape_Single_quote();
test_report.tests[i++] = test_Char_Escape_Carriage_return(); test_report.tests[i++] = test_Char_Escape_Carriage_return();
test_report.tests[i++] = test_Char_Escape_Newline(); test_report.tests[i++] = test_Char_Escape_Newline();
test_report.tests[i++] = test_Char_Escape_Tab();
test_report.tests[i++] = test_Char_Escape_Single_quote();
test_report.tests[i++] = test_Char_Escape_Null_character();
test_report.tests[i++] = test_Char_With_Leading_Whitespace(); test_report.tests[i++] = test_Char_With_Leading_Whitespace();
test_report.tests[i++] = test_Char_With_Trailing_Whitespace(); test_report.tests[i++] = test_Char_With_Trailing_Whitespace();
test_report.tests[i++] = test_Char_With_Both_Whitespace(); test_report.tests[i++] = test_Char_With_Both_Whitespace();
@ -4035,7 +4025,6 @@ TestsReport run_lexer_tests() {
test_report.tests[i++] = test_TokenString_Extra_Closing_Brace(); test_report.tests[i++] = test_TokenString_Extra_Closing_Brace();
test_report.tests[i++] = test_TokenString_Only_Closing_Brace(); test_report.tests[i++] = test_TokenString_Only_Closing_Brace();
test_report.tests[i++] = test_TokenString_Error_Inside(); test_report.tests[i++] = test_TokenString_Error_Inside();
test_report.tests[i++] = test_TokenString_Unclosed_String_Inside();
test_report.tests[i++] = test_TokenString_Function_Body(); test_report.tests[i++] = test_TokenString_Function_Body();
test_report.tests[i++] = test_TokenString_Loop_Body(); test_report.tests[i++] = test_TokenString_Loop_Body();
test_report.tests[i++] = test_TokenString_Struct_Fields(); test_report.tests[i++] = test_TokenString_Struct_Fields();

View File

@ -2317,6 +2317,18 @@
stack_final: stack_final:
- type: char - type: char
value: '}' value: '}'
- name: Char Escape Tab
code: '''\\t'''
tokens:
- type: char
value: "\t"
operations:
- function: push
type: char
value: "\t"
stack_final:
- type: char
value: "\t"
- name: Char Escape Backslash - name: Char Escape Backslash
code: '''\\\\''' code: '''\\\\'''
tokens: tokens:
@ -2329,6 +2341,30 @@
stack_final: stack_final:
- type: char - type: char
value: \ value: \
- name: Char Escape Null character
code: '''\\0'''
tokens:
- type: char
value: "\0"
operations:
- function: push
type: char
value: "\0"
stack_final:
- type: char
value: "\0"
- name: Char Escape Single quote
code: '''\\'''''
tokens:
- type: char
value: ''''
operations:
- function: push
type: char
value: ''''
stack_final:
- type: char
value: ''''
- name: Char Escape Carriage return - name: Char Escape Carriage return
code: '''\\r''' code: '''\\r'''
tokens: tokens:
@ -2359,42 +2395,6 @@
value: ' value: '
' '
- name: Char Escape Tab
code: '''\\t'''
tokens:
- type: char
value: "\t"
operations:
- function: push
type: char
value: "\t"
stack_final:
- type: char
value: "\t"
- name: Char Escape Single quote
code: '''\\'''''
tokens:
- type: char
value: ''''
operations:
- function: push
type: char
value: ''''
stack_final:
- type: char
value: ''''
- name: Char Escape Null character
code: '''\\0'''
tokens:
- type: char
value: "\0"
operations:
- function: push
type: char
value: "\0"
stack_final:
- type: char
value: "\0"
- name: Char With Leading Whitespace - name: Char With Leading Whitespace
code: ' ''A''' code: ' ''A'''
tokens: tokens:
@ -4850,26 +4850,6 @@
value: 'Invalid decimal literal: unexpected ''a'' in decimal integer.' value: 'Invalid decimal literal: unexpected ''a'' in decimal integer.'
- type: identifier - type: identifier
value: + value: +
- name: TokenString Unclosed String Inside
code: '{ "hello }'
tokens:
- type: token_string
value:
- type: error
value: 'Invalid string literal: unclosed string literal.'
operations:
- function: push
type: token_string
value:
tokens:
- type: error
value: 'Invalid string literal: unclosed string literal.'
stack_final:
- type: token_string
value:
tokens:
- type: error
value: 'Invalid string literal: unclosed string literal.'
- name: TokenString Function Body - name: TokenString Function Body
code: '{ dup * }' code: '{ dup * }'
tokens: tokens:

View File

@ -494,13 +494,14 @@ class TokenStringTestGenerator(BaseTestGenerator):
op_str = self.make_push_op("token_string", op_value_str) op_str = self.make_push_op("token_string", op_value_str)
stack_str = self.make_stack_item("token_string", op_value_str) stack_str = self.make_stack_item("token_string", op_value_str)
self.add_test( if self.ENABLE_STRINGS:
"TokenString Unclosed String Inside", self.add_test(
'{ "hello }', "TokenString Unclosed String Inside",
[error_string], '{ "hello }',
[op_str], [error_string],
[stack_str] [op_str],
) [stack_str]
)
def generate_complex_tests(self): def generate_complex_tests(self):
"""Generate complex realistic test cases.""" """Generate complex realistic test cases."""