233 lines
6.3 KiB
C
233 lines
6.3 KiB
C
// Kyler Olsen
|
|
// YREA SLS
|
|
// Lexer
|
|
// November 2025
|
|
|
|
#include <ctype.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include <stddef.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "sls/sls_errors.h"
|
|
#include "sls/lexer.h"
|
|
#include "sls/string.h"
|
|
|
|
const size_t TYPE_NAMES_SAFE_LENGTH = 20;
|
|
|
|
const char *TOKEN_TYPES_NAMES[] = {
|
|
"End of File",
|
|
"Identifier",
|
|
"Integer",
|
|
"Float",
|
|
"Double",
|
|
"String",
|
|
"Boolean",
|
|
"Array",
|
|
"Token String",
|
|
"Type Tuple",
|
|
};
|
|
|
|
const char *ARRAY_TYPES_NAMES[] = {
|
|
"Identifier",
|
|
"i64",
|
|
"i32",
|
|
"i16",
|
|
"i8",
|
|
"u64",
|
|
"u32",
|
|
"u16",
|
|
"u8",
|
|
"Float",
|
|
"Double",
|
|
"String",
|
|
"Boolean",
|
|
"Inline Struct",
|
|
};
|
|
|
|
const char *INTEGER_TYPES_NAMES[] = {
|
|
"i64",
|
|
"i32",
|
|
"i16",
|
|
"i8",
|
|
"u64",
|
|
"u32",
|
|
"u16",
|
|
"u8",
|
|
};
|
|
|
|
void init_lexer(LexerInfo *lexer_info, const char *filename, const char *source_code) {
|
|
// Initializes a LexerInfo struct with file info and source code
|
|
lexer_info->filename = filename;
|
|
lexer_info->source_code = source_code;
|
|
lexer_info->pos = 0;
|
|
lexer_info->column = 1;
|
|
lexer_info->line = 1;
|
|
}
|
|
|
|
static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_line) {
|
|
// Creates a FileInfo struct based on starting and current lexer states
|
|
return (FileInfo){
|
|
.filename = lexer_info->filename,
|
|
.line = lexer_info->line,
|
|
.column = lexer_info->column,
|
|
.length = lexer_info->pos - start,
|
|
.lines = lexer_info->line - start_line
|
|
};
|
|
}
|
|
|
|
static char peek(LexerInfo *lexer_info) {
|
|
// Returns the current character from the source code
|
|
return lexer_info->source_code[lexer_info->pos];
|
|
}
|
|
|
|
static char far_peek(LexerInfo *lexer_info, size_t index) {
|
|
// Returns the character index away from the current char in the source code
|
|
return lexer_info->source_code[lexer_info->pos + index];
|
|
}
|
|
|
|
static char advance(LexerInfo *lexer_info) {
|
|
// Advances lexer_info to the next character
|
|
if (lexer_info->source_code[lexer_info->pos] == '\n') {
|
|
// If a new line is encountered, advance line and reset column
|
|
lexer_info->line++;
|
|
lexer_info->column = 1;
|
|
} else {
|
|
// Elsewhere in a line, advance column
|
|
lexer_info->column++;
|
|
}
|
|
// Advance to and return the next character
|
|
return lexer_info->source_code[++lexer_info->pos];
|
|
}
|
|
|
|
static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start, size_t start_line) {
|
|
// Create a LexerTokenResult to store the results of lexing the current token
|
|
LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
|
|
if (result == NULL)
|
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}};
|
|
result->type = SLS_RESULT;
|
|
result->result = token;
|
|
result->file_info = get_file_info(lexer_info, start, start_line);
|
|
result->next = NULL;
|
|
return (LexerResult){SLS_RESULT, .result = result};
|
|
}
|
|
|
|
static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t start, size_t start_line) {
|
|
// Create a LexerTokenResult to store an error from lexing the current token
|
|
LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
|
|
if (result == NULL)
|
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}};
|
|
result->type = SLS_ERROR;
|
|
result->error.message = message;
|
|
result->error.code = 1;
|
|
result->file_info = get_file_info(lexer, start, start_line);
|
|
result->next = NULL;
|
|
// SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error
|
|
return (LexerResult){SLS_RESULT, .result = result};
|
|
}
|
|
|
|
static LexerResult lexer_next(LexerInfo *lexer_info) {
|
|
// Gets the next token from the source
|
|
|
|
while (isspace(peek(lexer_info)) || peek(lexer_info) == '/' || peek(lexer_info) == '#') {
|
|
// Skip Comments
|
|
if ((peek(lexer_info) == '/' && far_peek(lexer_info, 1) == '/') || peek(lexer_info) == '#')
|
|
while (peek(lexer_info) != '\n') advance(lexer_info);
|
|
// Skip whitespace
|
|
while (isspace(peek(lexer_info))) advance(lexer_info);
|
|
}
|
|
|
|
// Initialize begining variables
|
|
char c = peek(lexer_info);
|
|
size_t start = lexer_info->pos;
|
|
size_t start_line = lexer_info->line;
|
|
|
|
// End of file tokens
|
|
if (c == '\0') {
|
|
return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line);
|
|
}
|
|
return lexer_error(lexer_info, "Lexer: Not Implemented Error", start, start_line);
|
|
|
|
// Integers and Floats
|
|
if (isdigit(c)) {
|
|
}
|
|
|
|
// Character Literals
|
|
if (c == '\'') {
|
|
}
|
|
|
|
// String Literals
|
|
if (c == '\"') {
|
|
}
|
|
|
|
// Token Strings
|
|
if (c == '{') {
|
|
}
|
|
|
|
// Array Literals
|
|
if (c == '[') {
|
|
}
|
|
|
|
// Type Tuples
|
|
if (c == '(') {
|
|
}
|
|
|
|
// Identifiers and Booleans
|
|
if (isascii(c) && isprint(c) && !isspace(c) && c != '.') {
|
|
}
|
|
}
|
|
|
|
void clean_token_result(LexerTokenResult *head) {
|
|
// Deallocates a LexerTokenResult linked list
|
|
LexerTokenResult *next;
|
|
while (head) {
|
|
next = head->next;
|
|
if (head) free(head);
|
|
head = next;
|
|
}
|
|
}
|
|
|
|
LexerTokenResult *get_token(LexerTokenResult *head, size_t i) {
|
|
// Returns the token at i in a LexerTokenResult linked list, or null_ptr if i is out of bounds
|
|
for (size_t j = 0; j < i && head; j++) {
|
|
head = head->next;
|
|
}
|
|
return head;
|
|
}
|
|
|
|
LexerResult lexical_analysis(LexerInfo *lexer_info) {
|
|
// Lexes code loaded into lexer_info
|
|
LexerResult result; // For lexer_next returns
|
|
LexerTokenResult *head = 0;
|
|
LexerTokenResult *current = 0;
|
|
|
|
do {
|
|
// Get next token
|
|
result = lexer_next(lexer_info);
|
|
|
|
// Handle Errors
|
|
if (result.type == SLS_ERROR) {
|
|
clean_token_result(head);
|
|
return result;
|
|
}
|
|
|
|
// Save result
|
|
if (head == 0) {
|
|
head = result.result;
|
|
current = head;
|
|
} else {
|
|
current->next = result.result;
|
|
current = current->next;
|
|
}
|
|
|
|
// Current should not be null_ptr
|
|
if (current == 0) {
|
|
clean_token_result(head);
|
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Unknown Error.", 1}};
|
|
}
|
|
|
|
} while (current->type != SLS_ERROR && current->result.type != TOKEN_EOF);
|
|
|
|
return (LexerResult) {.type = SLS_RESULT, .result = head};
|
|
}
|