YREA-SLS/SLS_C/src/lexer.c

322 lines
13 KiB
C

// Kyler Olsen
// YREA SLS
// Lexer
// November 2025
#include <ctype.h>
#include <string.h>
#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include "sls/sls_errors.h"
#include "sls/lexer.h"
#include "sls/string.h"
const size_t TYPE_NAMES_SAFE_LENGTH = 20;
const char *TOKEN_TYPES_NAMES[] = {
"End of File",
"Identifier",
"Integer",
"Float",
"Double",
"String",
"Boolean",
"Array",
"Token String",
"Type Tuple",
};
const char *ARRAY_TYPES_NAMES[] = {
"Identifier",
"i64",
"i32",
"i16",
"i8",
"u64",
"u32",
"u16",
"u8",
"Float",
"Double",
"String",
"Boolean",
"Inline Struct",
};
const char *INTEGER_TYPES_NAMES[] = {
"i64",
"i32",
"i16",
"i8",
"u64",
"u32",
"u16",
"u8",
};
void init_lexer(LexerInfo *lexer_info, const char *filename, const char *source_code) {
// Initializes a LexerInfo struct with file info and source code
lexer_info->filename = filename;
lexer_info->source_code = source_code;
lexer_info->pos = 0;
lexer_info->column = 1;
lexer_info->line = 1;
}
static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_line) {
// Creates a FileInfo struct based on starting and current lexer states
return (FileInfo){
.filename = lexer_info->filename,
.line = lexer_info->line,
.column = lexer_info->column,
.length = lexer_info->pos - start,
.lines = lexer_info->line - start_line
};
}
static const char *get_token_text(LexerInfo *lexer_info, size_t start) {
// Returns the current character from the source code
return lexer_info->source_code + start;
}
static char peek(LexerInfo *lexer_info) {
// Returns the current character from the source code
return lexer_info->source_code[lexer_info->pos];
}
static char far_peek(LexerInfo *lexer_info, size_t index) {
// Returns the character index away from the current char in the source code
return lexer_info->source_code[lexer_info->pos + index];
}
static char advance(LexerInfo *lexer_info) {
// Advances lexer_info to the next character
if (lexer_info->source_code[lexer_info->pos] == '\n') {
// If a new line is encountered, advance line and reset column
lexer_info->line++;
lexer_info->column = 1;
} else {
// Elsewhere in a line, advance column
lexer_info->column++;
}
// Advance to and return the next character
return lexer_info->source_code[++lexer_info->pos];
}
static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start, size_t start_line) {
// Create a LexerTokenResult to store the results of lexing the current token
LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
if (result == NULL)
return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}};
result->type = SLS_RESULT;
result->result = token;
result->file_info = get_file_info(lexer_info, start, start_line);
result->next = NULL;
return (LexerResult){SLS_RESULT, .result = result};
}
static LexerResult lexer_error(LexerInfo *lexer_info, const char* message, size_t start, size_t start_line) {
// Create a LexerTokenResult to store an error from lexing the current token
LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
if (result == NULL)
return (LexerResult){SLS_ERROR, .error = (SlsError){"Failed to allocate memory.", 1}};
result->type = SLS_ERROR;
result->error.message = message;
result->error.code = 1;
result->file_info = get_file_info(lexer_info, start, start_line);
result->next = NULL;
// SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error
return (LexerResult){SLS_RESULT, .result = result};
}
static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
do {c = advance(lexer_info);} while (c == '0' || c == '1' || c == '_');
if (c == ':')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Binary Integer Type Not Implemented Error.", 1}};
if (isspace(c) || c == '/')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Binary Integer Not Implemented Error.", 1}};
char *error_message = (char *)malloc(sizeof(char) * 58);
snprintf(error_message, 58, "Invalid binary literal: unexpected '%c' in binary integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);
}
static LexerResult parse_octal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
do {c = advance(lexer_info);} while ((isdigit(c) || c == '_') && !(c == '8' || c == '9'));
if (c == ':')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Octal Integer Type Not Implemented Error.", 1}};
if (isspace(c) || c == '/')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Octal Integer Not Implemented Error.", 1}};
char *error_message = (char *)malloc(sizeof(char) * 56);
snprintf(error_message, 56, "Invalid octal literal: unexpected '%c' in octal integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);
}
static LexerResult parse_exponential(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Exponential Not Implemented Error.", 1}};
}
static LexerResult parse_float(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Not Implemented Error.", 1}};
}
static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
do {c = advance(lexer_info);} while (isdigit(c) || c == '_');
if (c == '.') return parse_float(lexer_info, c, start, start_line);
if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line);
if (c == ':')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Decimal Integer Type Not Implemented Error.", 1}};
if (isspace(c) || c == '/')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Decimal Integer Not Implemented Error.", 1}};
char *error_message = (char *)malloc(sizeof(char) * 60);
snprintf(error_message, 60, "Invalid decimal literal: unexpected '%c' in decimal integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);
}
static LexerResult parse_hexadecimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
do {c = advance(lexer_info);} while (isxdigit(c) || c == '_');
if (c == ':')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Hexadecimal Integer Type Not Implemented Error.", 1}};
if (isspace(c) || c == '/')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Hexadecimal Integer Not Implemented Error.", 1}};
char *error_message = (char *)malloc(sizeof(char) * 68);
snprintf(error_message, 68, "Invalid hexadecimal literal: unexpected '%c' in hexadecimal integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);
}
static LexerResult parse_numeric_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
if (c == '-') c = advance(lexer_info);
if (c == '0') {
c = advance(lexer_info);
if (c == 'b' || c == 'B') return parse_binary_integer(lexer_info, c, start, start_line);
else if (c == 'o' || c == 'O') return parse_octal_integer(lexer_info, c, start, start_line);
else if (c == 'x' || c == 'X') return parse_hexadecimal_integer(lexer_info, c, start, start_line);
}
return parse_decimal_integer(lexer_info, c, start, start_line);
}
static LexerResult parse_character_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Character Literals Not Implemented Error.", 1}};
}
static LexerResult parse_string_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: String Literals Not Implemented Error.", 1}};
}
static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Token Strings Not Implemented Error.", 1}};
}
static LexerResult parse_array_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Array Literals Not Implemented Error.", 1}};
}
static LexerResult parse_type_tuples(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Type Tuples Not Implemented Error.", 1}};
}
static LexerResult parse_identifiers_and_booleans(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Identifiers and Booleans Not Implemented Error.", 1}};
}
static LexerResult lexer_next(LexerInfo *lexer_info) {
// Gets the next token from the source
while (isspace(peek(lexer_info)) || peek(lexer_info) == '/' || peek(lexer_info) == '#') {
// Skip Comments
if ((peek(lexer_info) == '/' && far_peek(lexer_info, 1) == '/') || peek(lexer_info) == '#')
while (peek(lexer_info) != '\n') advance(lexer_info);
// Skip whitespace
while (isspace(peek(lexer_info))) advance(lexer_info);
}
// Initialize begining variables
char c = peek(lexer_info);
size_t start = lexer_info->pos;
size_t start_line = lexer_info->line;
// End of file tokens
if (c == '\0') return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line);
// Integers and Floats
if (isdigit(c) || c == '.' || (c == '-' && isdigit(far_peek(lexer_info, 1)))) return parse_numeric_literal(lexer_info, c, start, start_line);
// Character Literals
if (c == '\'') return parse_character_literal(lexer_info, c, start, start_line);
// String Literals
if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line);
// Token Strings
if (c == '{') return parse_token_string(lexer_info, c, start, start_line);
// Array Literals
if (c == '[') return parse_array_literal(lexer_info, c, start, start_line);
// Type Tuples
if (c == '(') return parse_type_tuples(lexer_info, c, start, start_line);
// Identifiers and Booleans
if (isascii(c)) return parse_identifiers_and_booleans(lexer_info, c, start, start_line);
// Lexing Error
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Unknown Character Error.", 1}};
}
void clean_token_result(LexerTokenResult *head) {
// Deallocates a LexerTokenResult linked list
LexerTokenResult *next;
while (head) {
next = head->next;
if (head->type == SLS_ERROR) free(head->error.message);
if (head) free(head);
head = next;
}
}
LexerTokenResult *get_token(LexerTokenResult *head, size_t i) {
// Returns the token at i in a LexerTokenResult linked list, or null_ptr if i is out of bounds
for (size_t j = 0; j < i && head; j++) {
head = head->next;
}
return head;
}
LexerResult lexical_analysis(LexerInfo *lexer_info) {
// Lexes code loaded into lexer_info
LexerResult result; // For lexer_next returns
LexerTokenResult *head = 0;
LexerTokenResult *current = 0;
do {
// Get next token
result = lexer_next(lexer_info);
// Handle Errors
if (result.type == SLS_ERROR) {
clean_token_result(head);
return result;
}
// Save result
if (head == 0) {
head = result.result;
current = head;
} else {
current->next = result.result;
current = current->next;
}
// Current should not be null_ptr
if (current == 0) {
clean_token_result(head);
return (LexerResult){SLS_ERROR, .error = (SlsError){"Unknown Error.", 1}};
}
} while (current->type != SLS_ERROR && current->result.type != TOKEN_EOF);
return (LexerResult) {.type = SLS_RESULT, .result = head};
}