From c1322874f1cce9d17203131f111c0c9048c26ca6 Mon Sep 17 00:00:00 2001 From: Kyler Date: Tue, 4 Nov 2025 23:16:12 -0700 Subject: [PATCH] Worked on lexer --- SLS_C/src/lexer.c | 121 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 117 insertions(+), 4 deletions(-) diff --git a/SLS_C/src/lexer.c b/SLS_C/src/lexer.c index 63fc4b5..f3fca85 100644 --- a/SLS_C/src/lexer.c +++ b/SLS_C/src/lexer.c @@ -57,6 +57,7 @@ const char *INTEGER_TYPES_NAMES[] = { }; void init_lexer(LexerInfo *lexer_info, const char *filename, const char *source_code) { + // Initializes a LexerInfo struct with file info and source code lexer_info->filename = filename; lexer_info->source_code = source_code; lexer_info->pos = 0; @@ -65,6 +66,7 @@ void init_lexer(LexerInfo *lexer_info, const char *filename, const char *source_ } static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_line) { + // Creates a FileInfo struct based on starting and current lexer states return (FileInfo){ .filename = lexer_info->filename, .line = lexer_info->line, @@ -75,24 +77,31 @@ static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_ } static char peek(LexerInfo *lexer_info) { + // Returns the current character from the source code return lexer_info->source_code[lexer_info->pos]; } static char far_peek(LexerInfo *lexer_info, size_t index) { + // Returns the character index away from the current char in the source code return lexer_info->source_code[lexer_info->pos + index]; } -static void advance(LexerInfo *lexer_info) { +static char advance(LexerInfo *lexer_info) { + // Advances lexer_info to the next character if (lexer_info->source_code[lexer_info->pos] == '\n') { + // If a new line is encountered, advance line and reset column lexer_info->line++; lexer_info->column = 1; } else { + // Elsewhere in a line, advance column lexer_info->column++; } - lexer_info->source_code[lexer_info->pos++]; + // Advance to and return the next character + return lexer_info->source_code[++lexer_info->pos]; } static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start, size_t start_line) { + // Create a LexerTokenResult to store the results of lexing the current token LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); if (result == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}}; @@ -104,6 +113,7 @@ static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start } static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t start, size_t start_line) { + // Create a LexerTokenResult to store an error from lexing the current token LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); if (result == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}}; @@ -112,8 +122,111 @@ static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t sta result->error.code = 1; result->file_info = get_file_info(lexer, start, start_line); result->next = NULL; + // SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error return (LexerResult){SLS_RESULT, .result = result}; } -LexerResult lexical_analysis(LexerInfo *lexer_info); -void clean_token_result(LexerTokenResult *head); +static LexerResult lexer_next(LexerInfo *lexer_info) { + // Gets the next token from the source + + while (isspace(peek(lexer_info)) || peek(lexer_info) == '/' || peek(lexer_info) == '#') { + // Skip Comments + if ((peek(lexer_info) == '/' && far_peek(lexer_info, 1) == '/') || peek(lexer_info) == '#') + while (peek(lexer_info) != '\n') advance(lexer_info); + // Skip whitespace + while (isspace(peek(lexer_info))) advance(lexer_info); + } + + // Initialize begining variables + char c = peek(lexer_info); + size_t start = lexer_info->pos; + size_t start_line = lexer_info->line; + + // End of file tokens + if (c == '\0') { + return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line); + } + return lexer_error(lexer_info, "Lexer: Not Implemented Error", start, start_line); + + // Integers and Floats + if (isdigit(c)) { + } + + // Character Literals + if (c == '\'') { + } + + // String Literals + if (c == '\"') { + } + + // Token Strings + if (c == '{') { + } + + // Array Literals + if (c == '[') { + } + + // Type Tuples + if (c == '(') { + } + + // Identifiers and Booleans + if (isascii(c) && isprint(c) && !isspace(c) && c != '.') { + } +} + +void clean_token_result(LexerTokenResult *head) { + // Deallocates a LexerTokenResult linked list + LexerTokenResult *next; + while (head) { + next = head->next; + if (head) free(head); + head = next; + } +} + +LexerTokenResult *get_token(LexerTokenResult *head, size_t i) { + // Returns the token at i in a LexerTokenResult linked list, or null_ptr if i is out of bounds + for (size_t j = 0; j < i && head; j++) { + head = head->next; + } + return head; +} + +LexerResult lexical_analysis(LexerInfo *lexer_info) { + // Lexes code loaded into lexer_info + LexerResult result; // For lexer_next returns + LexerTokenResult *head = 0; + LexerTokenResult *current = 0; + + do { + // Get next token + result = lexer_next(lexer_info); + + // Handle Errors + if (result.type == SLS_ERROR) { + clean_token_result(head); + return result; + } + + // Save result + if (head == 0) { + head = result.result; + current = head; + } else { + current->next = result.result; + current = current->next; + } + + // Current should not be null_ptr + if (current == 0) { + clean_token_result(head); + return (LexerResult){SLS_ERROR, .error = (SlsError){"Unknown Error.", 1}}; + } + + } while (current->type != SLS_ERROR && current->result.type != TOKEN_EOF); + + return (LexerResult) {.type = SLS_RESULT, .result = head}; +}