// Kyler Olsen // YREA SLS // Lexer // November 2025 #include #include #include #include #include #include #include "sls/sls_errors.h" #include "sls/lexer.h" #include "sls/string.h" const size_t TYPE_NAMES_SAFE_LENGTH = 20; const char *TOKEN_TYPES_NAMES[] = { "End of File", "Identifier", "Integer", "Float", "Double", "String", "Boolean", "Array", "Token String", "Type Tuple", }; const char *ARRAY_TYPES_NAMES[] = { "Identifier", "i64", "i32", "i16", "i8", "u64", "u32", "u16", "u8", "Float", "Double", "String", "Boolean", "Inline Struct", }; const char *INTEGER_TYPES_NAMES[] = { "i64", "i32", "i16", "i8", "u64", "u32", "u16", "u8", }; void init_lexer(LexerInfo *lexer_info, const char *filename, const char *source_code) { // Initializes a LexerInfo struct with file info and source code lexer_info->filename = filename; lexer_info->source_code = source_code; lexer_info->pos = 0; lexer_info->column = 1; lexer_info->line = 1; } static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_line) { // Creates a FileInfo struct based on starting and current lexer states return (FileInfo){ .filename = lexer_info->filename, .line = lexer_info->line, .column = lexer_info->column, .length = lexer_info->pos - start, .lines = lexer_info->line - start_line }; } static const char *get_token_text(LexerInfo *lexer_info, size_t start) { // Returns the current character from the source code return lexer_info->source_code + start; } static char peek(LexerInfo *lexer_info) { // Returns the current character from the source code return lexer_info->source_code[lexer_info->pos]; } static char far_peek(LexerInfo *lexer_info, size_t index) { // Returns the character index away from the current char in the source code return lexer_info->source_code[lexer_info->pos + index]; } static char advance(LexerInfo *lexer_info) { // Advances lexer_info to the next character if (lexer_info->source_code[lexer_info->pos] == '\n') { // If a new line is encountered, advance line and reset column lexer_info->line++; lexer_info->column = 1; } else { // Elsewhere in a line, advance column lexer_info->column++; } // Advance to and return the next character return lexer_info->source_code[++lexer_info->pos]; } static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start, size_t start_line) { // Create a LexerTokenResult to store the results of lexing the current token LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); if (result == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}}; result->type = SLS_RESULT; result->result = token; result->file_info = get_file_info(lexer_info, start, start_line); result->next = NULL; return (LexerResult){SLS_RESULT, .result = result}; } static LexerResult lexer_error(LexerInfo *lexer_info, const char* message, size_t start, size_t start_line) { // Create a LexerTokenResult to store an error from lexing the current token LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); if (result == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}}; result->type = SLS_ERROR; result->error.message = message; result->error.code = 1; result->file_info = get_file_info(lexer_info, start, start_line); result->next = NULL; // SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error return (LexerResult){SLS_RESULT, .result = result}; } static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { do {c = advance(lexer_info);} while (c == '0' || c == '1' || c == '_'); if (c == ':') return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Binary Integer Type Not Implemented Error.", 1}}; if (isspace(c) || c == '/') return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Binary Integer Not Implemented Error.", 1}}; char *error_message = (char *)malloc(sizeof(char) * 58); snprintf(error_message, 58, "Invalid binary literal: unexpected '%c' in binary integer.", c); return lexer_error(lexer_info, error_message, start, start_line); } static LexerResult parse_octal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { do {c = advance(lexer_info);} while ((isdigit(c) || c == '_') && !(c == '8' || c == '9')); if (c == ':') return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Octal Integer Type Not Implemented Error.", 1}}; if (isspace(c) || c == '/') return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Octal Integer Not Implemented Error.", 1}}; char *error_message = (char *)malloc(sizeof(char) * 56); snprintf(error_message, 56, "Invalid octal literal: unexpected '%c' in octal integer.", c); return lexer_error(lexer_info, error_message, start, start_line); } static LexerResult parse_exponential(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Exponential Not Implemented Error.", 1}}; } static LexerResult parse_float(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Not Implemented Error.", 1}}; } static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { do {c = advance(lexer_info);} while (isdigit(c) || c == '_'); if (c == '.') return parse_float(lexer_info, c, start, start_line); if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line); if (c == ':') return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Decimal Integer Type Not Implemented Error.", 1}}; if (isspace(c) || c == '/') return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Decimal Integer Not Implemented Error.", 1}}; char *error_message = (char *)malloc(sizeof(char) * 60); snprintf(error_message, 60, "Invalid decimal literal: unexpected '%c' in decimal integer.", c); return lexer_error(lexer_info, error_message, start, start_line); } static LexerResult parse_hexadecimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { do {c = advance(lexer_info);} while (isxdigit(c) || c == '_'); if (c == ':') return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Hexadecimal Integer Type Not Implemented Error.", 1}}; if (isspace(c) || c == '/') return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Hexadecimal Integer Not Implemented Error.", 1}}; char *error_message = (char *)malloc(sizeof(char) * 68); snprintf(error_message, 68, "Invalid hexadecimal literal: unexpected '%c' in hexadecimal integer.", c); return lexer_error(lexer_info, error_message, start, start_line); } static LexerResult parse_numeric_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { if (c == '-') c = advance(lexer_info); if (c == '0') { c = advance(lexer_info); if (c == 'b' || c == 'B') return parse_binary_integer(lexer_info, c, start, start_line); else if (c == 'o' || c == 'O') return parse_octal_integer(lexer_info, c, start, start_line); else if (c == 'x' || c == 'X') return parse_hexadecimal_integer(lexer_info, c, start, start_line); } return parse_decimal_integer(lexer_info, c, start, start_line); } static LexerResult parse_character_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Character Literals Not Implemented Error.", 1}}; } static LexerResult parse_string_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: String Literals Not Implemented Error.", 1}}; } static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Token Strings Not Implemented Error.", 1}}; } static LexerResult parse_array_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Array Literals Not Implemented Error.", 1}}; } static LexerResult parse_type_tuples(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Type Tuples Not Implemented Error.", 1}}; } static LexerResult parse_identifiers_and_booleans(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Identifiers and Booleans Not Implemented Error.", 1}}; } static LexerResult lexer_next(LexerInfo *lexer_info) { // Gets the next token from the source while (isspace(peek(lexer_info)) || peek(lexer_info) == '/' || peek(lexer_info) == '#') { // Skip Comments if ((peek(lexer_info) == '/' && far_peek(lexer_info, 1) == '/') || peek(lexer_info) == '#') while (peek(lexer_info) != '\n') advance(lexer_info); // Skip whitespace while (isspace(peek(lexer_info))) advance(lexer_info); } // Initialize begining variables char c = peek(lexer_info); size_t start = lexer_info->pos; size_t start_line = lexer_info->line; // End of file tokens if (c == '\0') return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line); // Integers and Floats if (isdigit(c) || c == '.' || (c == '-' && isdigit(far_peek(lexer_info, 1)))) return parse_numeric_literal(lexer_info, c, start, start_line); // Character Literals if (c == '\'') return parse_character_literal(lexer_info, c, start, start_line); // String Literals if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line); // Token Strings if (c == '{') return parse_token_string(lexer_info, c, start, start_line); // Array Literals if (c == '[') return parse_array_literal(lexer_info, c, start, start_line); // Type Tuples if (c == '(') return parse_type_tuples(lexer_info, c, start, start_line); // Identifiers and Booleans if (isascii(c)) return parse_identifiers_and_booleans(lexer_info, c, start, start_line); // Lexing Error return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Unknown Character Error.", 1}}; } void clean_token_result(LexerTokenResult *head) { // Deallocates a LexerTokenResult linked list LexerTokenResult *next; while (head) { next = head->next; if (head->type == SLS_ERROR) free(head->error.message); if (head) free(head); head = next; } } LexerTokenResult *get_token(LexerTokenResult *head, size_t i) { // Returns the token at i in a LexerTokenResult linked list, or null_ptr if i is out of bounds for (size_t j = 0; j < i && head; j++) { head = head->next; } return head; } LexerResult lexical_analysis(LexerInfo *lexer_info) { // Lexes code loaded into lexer_info LexerResult result; // For lexer_next returns LexerTokenResult *head = 0; LexerTokenResult *current = 0; do { // Get next token result = lexer_next(lexer_info); // Handle Errors if (result.type == SLS_ERROR) { clean_token_result(head); return result; } // Save result if (head == 0) { head = result.result; current = head; } else { current->next = result.result; current = current->next; } // Current should not be null_ptr if (current == 0) { clean_token_result(head); return (LexerResult){SLS_ERROR, .error = (SlsError){"Unknown Error.", 1}}; } } while (current->type != SLS_ERROR && current->result.type != TOKEN_EOF); return (LexerResult) {.type = SLS_RESULT, .result = head}; }