From 9bc67d4268d0e94327523670cd133803d0a145a3 Mon Sep 17 00:00:00 2001 From: Kyler Date: Fri, 13 Jun 2025 00:07:07 -0600 Subject: [PATCH] Added lexical_analysis that returns a linked list of tokens --- include/sync/lexer.h | 14 +++++++-- include/sync/types.h | 21 ++++++------- src/lexer.c | 74 +++++++++++++++++++++++++++++++++++++------- src/main.c | 32 ++++++++++++------- 4 files changed, 105 insertions(+), 36 deletions(-) diff --git a/include/sync/lexer.h b/include/sync/lexer.h index c02afcc..f5b19bd 100644 --- a/include/sync/lexer.h +++ b/include/sync/lexer.h @@ -35,15 +35,25 @@ typedef struct { size_t line; } Lexer; -typedef struct { +typedef struct TokenResult { SyncResultType type; union { Token result; SyncError error; }; + struct TokenResult* next; } TokenResult; +typedef struct { + SyncResultType type; + union { + struct TokenResult* result; + GeneralError error; + }; +} LexerResult; + void lexer_init(Lexer* lexer, const char* filename, const char* source); -TokenResult lexer_next(Lexer* lexer); +LexerResult lexical_analysis(Lexer* lexer); +void clean_token_result(TokenResult* head); #endif // SYNC_LEXER_H diff --git a/include/sync/types.h b/include/sync/types.h index 74547e2..dc8a41e 100644 --- a/include/sync/types.h +++ b/include/sync/types.h @@ -3,6 +3,11 @@ #include +typedef struct { + const char *message; + int code; +} GeneralError; + typedef struct { const char *filename; size_t line; @@ -12,9 +17,9 @@ typedef struct { } FileInfo; typedef enum { - SYNC_LEXER_ERROR, - SYNC_PARSER_ERROR, - SYNC_RUNTIME_ERROR + SYNC_LEXICAL_ERROR, + SYNC_SYNTACTICAL_ERROR, + SYNC_SEMANTICAL_ERROR } SyncErrorType; typedef struct { @@ -24,16 +29,8 @@ typedef struct { } SyncError; typedef enum { - SYNC_RESULT, SYNC_ERROR, + SYNC_RESULT, } SyncResultType; -// typedef struct { -// SyncResultType type; -// union { -// void *result; -// SyncError error; -// }; -// } SyncResult; - #endif // SYNC_TYPES_H diff --git a/src/lexer.c b/src/lexer.c index 7418847..de85230 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,5 +1,6 @@ #include #include +#include #include "sync/types.h" #include "sync/lexer.h" @@ -78,20 +79,32 @@ static char is_identifier_char(char c) { return isalnum(c) || c == '_'; } -static TokenResult lexer_result(Lexer* lexer, TokenType type, size_t start, size_t start_line) { - return (TokenResult){SYNC_RESULT, .result = (Token){ - type, - &lexer->source[start], - lexer->pos - start, - get_file_info(lexer, start, start_line) - }}; +static LexerResult lexer_result(Lexer* lexer, TokenType type, size_t start, size_t start_line) { + TokenResult* result = (TokenResult*)malloc(sizeof(TokenResult)); + if (result == NULL) + return (LexerResult){SYNC_ERROR, .error = (GeneralError){"Failed to allocate memory.", 1}}; + result->type = SYNC_RESULT; + result->result.type = type; + result->result.start = &lexer->source[start]; + result->result.length = lexer->pos - start; + result->result.file_info = get_file_info(lexer, start, start_line); + result->next = NULL; + return (LexerResult){SYNC_RESULT, .result = result}; } -static TokenResult lexer_error(Lexer* lexer, const char* message, size_t start, size_t start_line) { - return (TokenResult){SYNC_ERROR, .error = (SyncError){SYNC_LEXER_ERROR, message, get_file_info(lexer, start, start_line)}}; +static LexerResult lexer_error(Lexer* lexer, const char* message, size_t start, size_t start_line) { + TokenResult* result = (TokenResult*)malloc(sizeof(TokenResult)); + if (result == NULL) + return (LexerResult){SYNC_ERROR, .error = (GeneralError){"Failed to allocate memory.", 1}}; + result->type = SYNC_ERROR; + result->error.type = SYNC_LEXICAL_ERROR; + result->error.message = message; + result->error.file_info = get_file_info(lexer, start, start_line); + result->next = NULL; + return (LexerResult){SYNC_RESULT, .result = result}; } -TokenResult lexer_next(Lexer* lexer) { +static LexerResult lexer_next(Lexer* lexer) { // Gets the next token from the source while (isspace(peek(lexer)) || peek(lexer) == '/') { @@ -190,7 +203,7 @@ TokenResult lexer_next(Lexer* lexer) { if (peek(lexer) == '=') advance(lexer); return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); case '-': - if (peek(lexer) == '=') advance(lexer); + if (peek(lexer) == '=' || peek(lexer) == '>') advance(lexer); return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); case '*': if (peek(lexer) == '=') advance(lexer); @@ -210,3 +223,42 @@ TokenResult lexer_next(Lexer* lexer) { default: return lexer_error(lexer, "Unknown token", start, start_line); } } + +LexerResult lexical_analysis(Lexer *lexer) { + LexerResult result; + TokenResult* head = NULL; + TokenResult* current = NULL; + do { + if (head == NULL) { + result = lexer_next(lexer); + if (result.type == SYNC_ERROR) { + clean_token_result(head); + return result; + } + head = result.result; + current = head; + } else { + result = lexer_next(lexer); + if (result.type == SYNC_ERROR) { + clean_token_result(head); + return result; + } + current->next = result.result; + current = current->next; + } + if (current == NULL) { + clean_token_result(head); + return (LexerResult){SYNC_ERROR, .error = (GeneralError){"Unknown Error.", 1}}; + } + } while (current->type != SYNC_ERROR && current->result.type != TOKEN_EOF); + + return (LexerResult){SYNC_RESULT, .result = head}; +} + +void clean_token_result(TokenResult* head) { + while (head != NULL) { + TokenResult* temp = head; + head = head->next; + free(temp); + } +} diff --git a/src/main.c b/src/main.c index 86ffe59..3515e6c 100644 --- a/src/main.c +++ b/src/main.c @@ -40,21 +40,31 @@ int main(void) { Lexer lexer; lexer_init(&lexer, filename, source); - TokenResult result; - do { - result = lexer_next(&lexer); - if (result.type == SYNC_RESULT) { - print_token(result.result); - } else { - fprintf(stderr, "Error: %s\n", result.error.message); - fprintf(stderr, "\tFilename: %s\n", result.error.file_info.filename); - fprintf(stderr, "\tLine: %zi\n", result.error.file_info.line); - fprintf(stderr, "\tColumn: %zi\n", result.error.file_info.column); + LexerResult lexer_result = lexical_analysis(&lexer); + if (lexer_result.type == SYNC_ERROR) { + fprintf(stderr, "Error: %s\n", lexer_result.error.message); + free(source); + return lexer_result.error.code; + } + + TokenResult* token_result = lexer_result.result; + int error_count = 0; + while (token_result != NULL) { + if (token_result->type == SYNC_ERROR) { + fprintf(stderr, "Error: %s\n", token_result->error.message); + fprintf(stderr, "\tFilename: %s\n", token_result->error.file_info.filename); + fprintf(stderr, "\tLn: %zi, Col: %zi\n", token_result->error.file_info.line, token_result->error.file_info.column); + clean_token_result(lexer_result.result); free(source); return 1; + } else { + print_token(token_result->result); } - } while (result.type != SYNC_ERROR && result.result.type != TOKEN_EOF); + token_result = token_result->next; + } + clean_token_result(lexer_result.result); free(source); + return 0; }