From c00ff5c7980b2f104c46924e4575b45112d6f45c Mon Sep 17 00:00:00 2001 From: Kyler Date: Thu, 6 Nov 2025 00:55:02 -0700 Subject: [PATCH] Worked on lexing --- SLS_C/src/lexer.c | 143 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 116 insertions(+), 27 deletions(-) diff --git a/SLS_C/src/lexer.c b/SLS_C/src/lexer.c index f3fca85..778f4f4 100644 --- a/SLS_C/src/lexer.c +++ b/SLS_C/src/lexer.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "sls/sls_errors.h" #include "sls/lexer.h" @@ -76,6 +77,11 @@ static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_ }; } +static const char *get_token_text(LexerInfo *lexer_info, size_t start) { + // Returns the current character from the source code + return lexer_info->source_code + start; +} + static char peek(LexerInfo *lexer_info) { // Returns the current character from the source code return lexer_info->source_code[lexer_info->pos]; @@ -112,7 +118,7 @@ static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start return (LexerResult){SLS_RESULT, .result = result}; } -static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t start, size_t start_line) { +static LexerResult lexer_error(LexerInfo *lexer_info, const char* message, size_t start, size_t start_line) { // Create a LexerTokenResult to store an error from lexing the current token LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); if (result == NULL) @@ -120,12 +126,109 @@ static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t sta result->type = SLS_ERROR; result->error.message = message; result->error.code = 1; - result->file_info = get_file_info(lexer, start, start_line); + result->file_info = get_file_info(lexer_info, start, start_line); result->next = NULL; // SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error return (LexerResult){SLS_RESULT, .result = result}; } +static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + do {c = advance(lexer_info);} while (c == '0' || c == '1' || c == '_'); + if (c == ':') + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Binary Integer Type Not Implemented Error.", 1}}; + if (isspace(c) || c == '/') + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Binary Integer Not Implemented Error.", 1}}; + char *error_message = (char *)malloc(sizeof(char) * 58); + snprintf(error_message, 58, "Invalid binary literal: unexpected '%c' in binary integer.", c); + return lexer_error(lexer_info, error_message, start, start_line); +} + +static LexerResult parse_octal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + do {c = advance(lexer_info);} while ((isdigit(c) || c == '_') && !(c == '8' || c == '9')); + if (c == ':') + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Octal Integer Type Not Implemented Error.", 1}}; + if (isspace(c) || c == '/') + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Octal Integer Not Implemented Error.", 1}}; + char *error_message = (char *)malloc(sizeof(char) * 56); + snprintf(error_message, 56, "Invalid octal literal: unexpected '%c' in octal integer.", c); + return lexer_error(lexer_info, error_message, start, start_line); +} + +static LexerResult parse_exponential(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + (void)lexer_info; (void)c; (void)start; (void)start_line; + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Exponential Not Implemented Error.", 1}}; +} + +static LexerResult parse_float(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + (void)lexer_info; (void)c; (void)start; (void)start_line; + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Not Implemented Error.", 1}}; +} + +static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + do {c = advance(lexer_info);} while (isdigit(c) || c == '_'); + if (c == '.') return parse_float(lexer_info, c, start, start_line); + if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line); + if (c == ':') + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Decimal Integer Type Not Implemented Error.", 1}}; + if (isspace(c) || c == '/') + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Decimal Integer Not Implemented Error.", 1}}; + char *error_message = (char *)malloc(sizeof(char) * 60); + snprintf(error_message, 60, "Invalid decimal literal: unexpected '%c' in decimal integer.", c); + return lexer_error(lexer_info, error_message, start, start_line); +} + +static LexerResult parse_hexadecimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + do {c = advance(lexer_info);} while (isxdigit(c) || c == '_'); + if (c == ':') + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Hexadecimal Integer Type Not Implemented Error.", 1}}; + if (isspace(c) || c == '/') + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Hexadecimal Integer Not Implemented Error.", 1}}; + char *error_message = (char *)malloc(sizeof(char) * 68); + snprintf(error_message, 68, "Invalid hexadecimal literal: unexpected '%c' in hexadecimal integer.", c); + return lexer_error(lexer_info, error_message, start, start_line); +} + +static LexerResult parse_numeric_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + if (c == '-') c = advance(lexer_info); + if (c == '0') { + c = advance(lexer_info); + if (c == 'b' || c == 'B') return parse_binary_integer(lexer_info, c, start, start_line); + else if (c == 'o' || c == 'O') return parse_octal_integer(lexer_info, c, start, start_line); + else if (c == 'x' || c == 'X') return parse_hexadecimal_integer(lexer_info, c, start, start_line); + } + return parse_decimal_integer(lexer_info, c, start, start_line); +} + +static LexerResult parse_character_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + (void)lexer_info; (void)c; (void)start; (void)start_line; + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Character Literals Not Implemented Error.", 1}}; +} + +static LexerResult parse_string_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + (void)lexer_info; (void)c; (void)start; (void)start_line; + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: String Literals Not Implemented Error.", 1}}; +} + +static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + (void)lexer_info; (void)c; (void)start; (void)start_line; + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Token Strings Not Implemented Error.", 1}}; +} + +static LexerResult parse_array_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + (void)lexer_info; (void)c; (void)start; (void)start_line; + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Array Literals Not Implemented Error.", 1}}; +} + +static LexerResult parse_type_tuples(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + (void)lexer_info; (void)c; (void)start; (void)start_line; + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Type Tuples Not Implemented Error.", 1}}; +} + +static LexerResult parse_identifiers_and_booleans(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + (void)lexer_info; (void)c; (void)start; (void)start_line; + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Identifiers and Booleans Not Implemented Error.", 1}}; +} + static LexerResult lexer_next(LexerInfo *lexer_info) { // Gets the next token from the source @@ -143,38 +246,23 @@ static LexerResult lexer_next(LexerInfo *lexer_info) { size_t start_line = lexer_info->line; // End of file tokens - if (c == '\0') { - return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line); - } - return lexer_error(lexer_info, "Lexer: Not Implemented Error", start, start_line); - + if (c == '\0') return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line); // Integers and Floats - if (isdigit(c)) { - } - + if (isdigit(c) || c == '.' || (c == '-' && isdigit(far_peek(lexer_info, 1)))) return parse_numeric_literal(lexer_info, c, start, start_line); // Character Literals - if (c == '\'') { - } - + if (c == '\'') return parse_character_literal(lexer_info, c, start, start_line); // String Literals - if (c == '\"') { - } - + if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line); // Token Strings - if (c == '{') { - } - + if (c == '{') return parse_token_string(lexer_info, c, start, start_line); // Array Literals - if (c == '[') { - } - + if (c == '[') return parse_array_literal(lexer_info, c, start, start_line); // Type Tuples - if (c == '(') { - } - + if (c == '(') return parse_type_tuples(lexer_info, c, start, start_line); // Identifiers and Booleans - if (isascii(c) && isprint(c) && !isspace(c) && c != '.') { - } + if (isascii(c)) return parse_identifiers_and_booleans(lexer_info, c, start, start_line); + // Lexing Error + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Unknown Character Error.", 1}}; } void clean_token_result(LexerTokenResult *head) { @@ -182,6 +270,7 @@ void clean_token_result(LexerTokenResult *head) { LexerTokenResult *next; while (head) { next = head->next; + if (head->type == SLS_ERROR) free(head->error.message); if (head) free(head); head = next; }