// Kyler Olsen // YREA SLS // Lexer // November 2025 #include #include #include #include #include #include "sls/sls_errors.h" #include "sls/lexer.h" #include "sls/string.h" const size_t TYPE_NAMES_SAFE_LENGTH = 20; const char *TOKEN_TYPES_NAMES[] = { "End of File", "Identifier", "Integer", "Float", "Double", "String", "Boolean", "Array", "Token String", "Type Tuple", }; const char *ARRAY_TYPES_NAMES[] = { "Identifier", "i64", "i32", "i16", "i8", "u64", "u32", "u16", "u8", "Float", "Double", "String", "Boolean", "Inline Struct", }; const char *INTEGER_TYPES_NAMES[] = { "i64", "i32", "i16", "i8", "u64", "u32", "u16", "u8", }; void init_lexer(LexerInfo *lexer_info, const char *filename, const char *source_code) { // Initializes a LexerInfo struct with file info and source code lexer_info->filename = filename; lexer_info->source_code = source_code; lexer_info->pos = 0; lexer_info->column = 1; lexer_info->line = 1; } static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_line) { // Creates a FileInfo struct based on starting and current lexer states return (FileInfo){ .filename = lexer_info->filename, .line = lexer_info->line, .column = lexer_info->column, .length = lexer_info->pos - start, .lines = lexer_info->line - start_line }; } static char peek(LexerInfo *lexer_info) { // Returns the current character from the source code return lexer_info->source_code[lexer_info->pos]; } static char far_peek(LexerInfo *lexer_info, size_t index) { // Returns the character index away from the current char in the source code return lexer_info->source_code[lexer_info->pos + index]; } static char advance(LexerInfo *lexer_info) { // Advances lexer_info to the next character if (lexer_info->source_code[lexer_info->pos] == '\n') { // If a new line is encountered, advance line and reset column lexer_info->line++; lexer_info->column = 1; } else { // Elsewhere in a line, advance column lexer_info->column++; } // Advance to and return the next character return lexer_info->source_code[++lexer_info->pos]; } static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start, size_t start_line) { // Create a LexerTokenResult to store the results of lexing the current token LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); if (result == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}}; result->type = SLS_RESULT; result->result = token; result->file_info = get_file_info(lexer_info, start, start_line); result->next = NULL; return (LexerResult){SLS_RESULT, .result = result}; } static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t start, size_t start_line) { // Create a LexerTokenResult to store an error from lexing the current token LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); if (result == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}}; result->type = SLS_ERROR; result->error.message = message; result->error.code = 1; result->file_info = get_file_info(lexer, start, start_line); result->next = NULL; // SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error return (LexerResult){SLS_RESULT, .result = result}; } static LexerResult lexer_next(LexerInfo *lexer_info) { // Gets the next token from the source while (isspace(peek(lexer_info)) || peek(lexer_info) == '/' || peek(lexer_info) == '#') { // Skip Comments if ((peek(lexer_info) == '/' && far_peek(lexer_info, 1) == '/') || peek(lexer_info) == '#') while (peek(lexer_info) != '\n') advance(lexer_info); // Skip whitespace while (isspace(peek(lexer_info))) advance(lexer_info); } // Initialize begining variables char c = peek(lexer_info); size_t start = lexer_info->pos; size_t start_line = lexer_info->line; // End of file tokens if (c == '\0') { return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line); } return lexer_error(lexer_info, "Lexer: Not Implemented Error", start, start_line); // Integers and Floats if (isdigit(c)) { } // Character Literals if (c == '\'') { } // String Literals if (c == '\"') { } // Token Strings if (c == '{') { } // Array Literals if (c == '[') { } // Type Tuples if (c == '(') { } // Identifiers and Booleans if (isascii(c) && isprint(c) && !isspace(c) && c != '.') { } } void clean_token_result(LexerTokenResult *head) { // Deallocates a LexerTokenResult linked list LexerTokenResult *next; while (head) { next = head->next; if (head) free(head); head = next; } } LexerTokenResult *get_token(LexerTokenResult *head, size_t i) { // Returns the token at i in a LexerTokenResult linked list, or null_ptr if i is out of bounds for (size_t j = 0; j < i && head; j++) { head = head->next; } return head; } LexerResult lexical_analysis(LexerInfo *lexer_info) { // Lexes code loaded into lexer_info LexerResult result; // For lexer_next returns LexerTokenResult *head = 0; LexerTokenResult *current = 0; do { // Get next token result = lexer_next(lexer_info); // Handle Errors if (result.type == SLS_ERROR) { clean_token_result(head); return result; } // Save result if (head == 0) { head = result.result; current = head; } else { current->next = result.result; current = current->next; } // Current should not be null_ptr if (current == 0) { clean_token_result(head); return (LexerResult){SLS_ERROR, .error = (SlsError){"Unknown Error.", 1}}; } } while (current->type != SLS_ERROR && current->result.type != TOKEN_EOF); return (LexerResult) {.type = SLS_RESULT, .result = head}; }