Worked on lexer

This commit is contained in:
Kyler Olsen 2025-11-04 23:16:12 -07:00
parent 906a632fef
commit c1322874f1
1 changed files with 117 additions and 4 deletions

View File

@ -57,6 +57,7 @@ const char *INTEGER_TYPES_NAMES[] = {
}; };
void init_lexer(LexerInfo *lexer_info, const char *filename, const char *source_code) { void init_lexer(LexerInfo *lexer_info, const char *filename, const char *source_code) {
// Initializes a LexerInfo struct with file info and source code
lexer_info->filename = filename; lexer_info->filename = filename;
lexer_info->source_code = source_code; lexer_info->source_code = source_code;
lexer_info->pos = 0; lexer_info->pos = 0;
@ -65,6 +66,7 @@ void init_lexer(LexerInfo *lexer_info, const char *filename, const char *source_
} }
static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_line) { static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_line) {
// Creates a FileInfo struct based on starting and current lexer states
return (FileInfo){ return (FileInfo){
.filename = lexer_info->filename, .filename = lexer_info->filename,
.line = lexer_info->line, .line = lexer_info->line,
@ -75,24 +77,31 @@ static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_
} }
static char peek(LexerInfo *lexer_info) { static char peek(LexerInfo *lexer_info) {
// Returns the current character from the source code
return lexer_info->source_code[lexer_info->pos]; return lexer_info->source_code[lexer_info->pos];
} }
static char far_peek(LexerInfo *lexer_info, size_t index) { static char far_peek(LexerInfo *lexer_info, size_t index) {
// Returns the character index away from the current char in the source code
return lexer_info->source_code[lexer_info->pos + index]; return lexer_info->source_code[lexer_info->pos + index];
} }
static void advance(LexerInfo *lexer_info) { static char advance(LexerInfo *lexer_info) {
// Advances lexer_info to the next character
if (lexer_info->source_code[lexer_info->pos] == '\n') { if (lexer_info->source_code[lexer_info->pos] == '\n') {
// If a new line is encountered, advance line and reset column
lexer_info->line++; lexer_info->line++;
lexer_info->column = 1; lexer_info->column = 1;
} else { } else {
// Elsewhere in a line, advance column
lexer_info->column++; lexer_info->column++;
} }
lexer_info->source_code[lexer_info->pos++]; // Advance to and return the next character
return lexer_info->source_code[++lexer_info->pos];
} }
static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start, size_t start_line) { static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start, size_t start_line) {
// Create a LexerTokenResult to store the results of lexing the current token
LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
if (result == NULL) if (result == NULL)
return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}}; return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}};
@ -104,6 +113,7 @@ static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start
} }
static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t start, size_t start_line) { static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t start, size_t start_line) {
// Create a LexerTokenResult to store an error from lexing the current token
LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
if (result == NULL) if (result == NULL)
return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}}; return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}};
@ -112,8 +122,111 @@ static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t sta
result->error.code = 1; result->error.code = 1;
result->file_info = get_file_info(lexer, start, start_line); result->file_info = get_file_info(lexer, start, start_line);
result->next = NULL; result->next = NULL;
// SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error
return (LexerResult){SLS_RESULT, .result = result}; return (LexerResult){SLS_RESULT, .result = result};
} }
LexerResult lexical_analysis(LexerInfo *lexer_info); static LexerResult lexer_next(LexerInfo *lexer_info) {
void clean_token_result(LexerTokenResult *head); // Gets the next token from the source
while (isspace(peek(lexer_info)) || peek(lexer_info) == '/' || peek(lexer_info) == '#') {
// Skip Comments
if ((peek(lexer_info) == '/' && far_peek(lexer_info, 1) == '/') || peek(lexer_info) == '#')
while (peek(lexer_info) != '\n') advance(lexer_info);
// Skip whitespace
while (isspace(peek(lexer_info))) advance(lexer_info);
}
// Initialize begining variables
char c = peek(lexer_info);
size_t start = lexer_info->pos;
size_t start_line = lexer_info->line;
// End of file tokens
if (c == '\0') {
return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line);
}
return lexer_error(lexer_info, "Lexer: Not Implemented Error", start, start_line);
// Integers and Floats
if (isdigit(c)) {
}
// Character Literals
if (c == '\'') {
}
// String Literals
if (c == '\"') {
}
// Token Strings
if (c == '{') {
}
// Array Literals
if (c == '[') {
}
// Type Tuples
if (c == '(') {
}
// Identifiers and Booleans
if (isascii(c) && isprint(c) && !isspace(c) && c != '.') {
}
}
void clean_token_result(LexerTokenResult *head) {
// Deallocates a LexerTokenResult linked list
LexerTokenResult *next;
while (head) {
next = head->next;
if (head) free(head);
head = next;
}
}
LexerTokenResult *get_token(LexerTokenResult *head, size_t i) {
// Returns the token at i in a LexerTokenResult linked list, or null_ptr if i is out of bounds
for (size_t j = 0; j < i && head; j++) {
head = head->next;
}
return head;
}
LexerResult lexical_analysis(LexerInfo *lexer_info) {
// Lexes code loaded into lexer_info
LexerResult result; // For lexer_next returns
LexerTokenResult *head = 0;
LexerTokenResult *current = 0;
do {
// Get next token
result = lexer_next(lexer_info);
// Handle Errors
if (result.type == SLS_ERROR) {
clean_token_result(head);
return result;
}
// Save result
if (head == 0) {
head = result.result;
current = head;
} else {
current->next = result.result;
current = current->next;
}
// Current should not be null_ptr
if (current == 0) {
clean_token_result(head);
return (LexerResult){SLS_ERROR, .error = (SlsError){"Unknown Error.", 1}};
}
} while (current->type != SLS_ERROR && current->result.type != TOKEN_EOF);
return (LexerResult) {.type = SLS_RESULT, .result = head};
}