Worked on lexer

2025-11-04 23:16:12 -07:00 · 2025-11-04 23:16:12 -07:00 · c1322874f1
parent 906a632fef
commit c1322874f1
1 changed files with 117 additions and 4 deletions
--- a/SLS_C/src/lexer.c
+++ b/SLS_C/src/lexer.c
@ -57,6 +57,7 @@ const char *INTEGER_TYPES_NAMES[] = {
 };

 void init_lexer(LexerInfo *lexer_info, const char *filename, const char *source_code) {
+    // Initializes a LexerInfo struct with file info and source code
    lexer_info->filename = filename;
    lexer_info->source_code = source_code;
    lexer_info->pos = 0;
@ -65,6 +66,7 @@ void init_lexer(LexerInfo *lexer_info, const char *filename, const char *source_
 }

 static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_line) {
+    // Creates a FileInfo struct based on starting and current lexer states
    return (FileInfo){
        .filename = lexer_info->filename,
        .line = lexer_info->line,
@ -75,24 +77,31 @@ static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_
 }

 static char peek(LexerInfo *lexer_info) {
+    // Returns the current character from the source code
    return lexer_info->source_code[lexer_info->pos];
 }

 static char far_peek(LexerInfo *lexer_info, size_t index) {
+    // Returns the character index away from the current char in the source code
    return lexer_info->source_code[lexer_info->pos + index];
 }

-static void advance(LexerInfo *lexer_info) {
+static char advance(LexerInfo *lexer_info) {
+    // Advances lexer_info to the next character
    if (lexer_info->source_code[lexer_info->pos] == '\n') {
+        // If a new line is encountered, advance line and reset column
        lexer_info->line++;
        lexer_info->column = 1;
    } else {
+        // Elsewhere in a line, advance column
        lexer_info->column++;
    }
-    lexer_info->source_code[lexer_info->pos++];
+    // Advance to and return the next character
+    return lexer_info->source_code[++lexer_info->pos];
 }

 static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start, size_t start_line) {
+    // Create a LexerTokenResult to store the results of lexing the current token
    LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
    if (result == NULL)
        return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}};
@ -104,6 +113,7 @@ static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start
 }

 static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t start, size_t start_line) {
+    // Create a LexerTokenResult to store an error from lexing the current token
    LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
    if (result == NULL)
        return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}};
@ -112,8 +122,111 @@ static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t sta
    result->error.code = 1;
    result->file_info = get_file_info(lexer, start, start_line);
    result->next = NULL;
+    // SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error
    return (LexerResult){SLS_RESULT, .result = result};
 }

-LexerResult lexical_analysis(LexerInfo *lexer_info);
-void clean_token_result(LexerTokenResult *head);
+static LexerResult lexer_next(LexerInfo *lexer_info) {
+    // Gets the next token from the source
+
+    while (isspace(peek(lexer_info)) || peek(lexer_info) == '/' || peek(lexer_info) == '#') {
+        // Skip Comments
+        if ((peek(lexer_info) == '/' && far_peek(lexer_info, 1) == '/') || peek(lexer_info) == '#')
+            while (peek(lexer_info) != '\n') advance(lexer_info);
+        // Skip whitespace
+        while (isspace(peek(lexer_info))) advance(lexer_info);
+    }
+
+    // Initialize begining variables
+    char c = peek(lexer_info);
+    size_t start = lexer_info->pos;
+    size_t start_line = lexer_info->line;
+
+    // End of file tokens
+    if (c == '\0') {
+        return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line);
+    }
+    return lexer_error(lexer_info, "Lexer: Not Implemented Error", start, start_line);
+
+    // Integers and Floats
+    if (isdigit(c)) {
+    }
+
+    // Character Literals
+    if (c == '\'') {
+    }
+
+    // String Literals
+    if (c == '\"') {
+    }
+
+    // Token Strings
+    if (c == '{') {
+    }
+
+    // Array Literals
+    if (c == '[') {
+    }
+
+    // Type Tuples
+    if (c == '(') {
+    }
+
+    // Identifiers and Booleans
+    if (isascii(c) && isprint(c) && !isspace(c) && c != '.') {
+    }
+}
+
+void clean_token_result(LexerTokenResult *head) {
+    // Deallocates a LexerTokenResult linked list
+    LexerTokenResult *next;
+    while (head) {
+        next = head->next;
+        if (head) free(head);
+        head = next;
+    }
+}
+
+LexerTokenResult *get_token(LexerTokenResult *head, size_t i) {
+    // Returns the token at i in a LexerTokenResult linked list, or null_ptr if i is out of bounds
+    for (size_t j = 0; j < i && head; j++) {
+        head = head->next;
+    }
+    return head;
+}
+
+LexerResult lexical_analysis(LexerInfo *lexer_info) {
+    // Lexes code loaded into lexer_info
+    LexerResult result; // For lexer_next returns
+    LexerTokenResult *head = 0;
+    LexerTokenResult *current = 0;
+
+    do {
+        // Get next token
+        result = lexer_next(lexer_info);
+
+        // Handle Errors
+        if (result.type == SLS_ERROR) {
+            clean_token_result(head);
+            return result;
+        }
+
+        // Save result
+        if (head == 0) {
+            head = result.result;
+            current = head;
+        } else {
+            current->next = result.result;
+            current = current->next;
+        }
+
+        // Current should not be null_ptr
+        if (current == 0) {
+            clean_token_result(head);
+            return (LexerResult){SLS_ERROR, .error = (SlsError){"Unknown Error.", 1}};
+        }
+
+    } while (current->type != SLS_ERROR && current->result.type != TOKEN_EOF);
+
+    return (LexerResult) {.type = SLS_RESULT, .result = head};
+}