YREA-SLS/SLS_C/src/lexer.c

802 lines
32 KiB
C

// Kyler Olsen
// YREA SLS
// Lexer
// November 2025
#include <ctype.h>
#include <string.h>
#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "sls/errors.h"
#include "sls/bool.h"
#include "sls/lexer.h"
#include "sls/string.h"
const size_t TYPE_NAMES_SAFE_LENGTH = 20;
const char *TOKEN_TYPES_NAMES[] = {
"End of File",
"Identifier",
"Integer",
"Float",
"Double",
"Character",
"String",
"Boolean",
"Array",
"Token String",
"Type Tuple",
};
const char *ARRAY_TYPES_NAMES[] = {
"Identifier",
"i64",
"i32",
"i16",
"i8",
"u64",
"u32",
"u16",
"u8",
"Float",
"Double",
"Character",
"String",
"Boolean",
"Inline Struct",
};
const char *INTEGER_TYPES_NAMES[] = {
"i64",
"i32",
"i16",
"i8",
"u64",
"u32",
"u16",
"u8",
};
void init_lexer(LexerInfo *lexer_info, SlsStr filename, SlsStr source_code) {
// Initializes a LexerInfo struct with file info and source code
lexer_info->filename = filename;
lexer_info->source_code = source_code;
lexer_info->pos = 0;
lexer_info->column = 1;
lexer_info->line = 1;
}
static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_line) {
// Creates a FileInfo struct based on starting and current lexer states
return (FileInfo){
.filename = lexer_info->filename,
.line = lexer_info->line,
.column = lexer_info->column,
.length = lexer_info->pos - start,
.lines = lexer_info->line - start_line
};
}
static const char *get_token_text(LexerInfo *lexer_info, size_t start) {
// Returns the current character from the source code
return lexer_info->source_code.str + start;
}
static char peek(LexerInfo *lexer_info) {
// Returns the current character from the source code
return lexer_info->source_code.str[lexer_info->pos];
}
static char far_peek(LexerInfo *lexer_info, size_t index) {
// Returns the character index away from the current char in the source code
return lexer_info->source_code.str[lexer_info->pos + index];
}
static char seek(LexerInfo *lexer_info, size_t index) {
// Returns the character from the given index from the source code
return lexer_info->source_code.str[index];
}
static char advance(LexerInfo *lexer_info) {
// Advances lexer_info to the next character
if (lexer_info->source_code.str[lexer_info->pos] == '\n') {
// If a new line is encountered, advance line and reset column
lexer_info->line++;
lexer_info->column = 1;
} else {
// Elsewhere in a line, advance column
lexer_info->column++;
}
// Advance to and return the next character
return lexer_info->source_code.str[++lexer_info->pos];
}
static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start, size_t start_line) {
// Create a LexerTokenResult to store the results of lexing the current token
LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
if (result == NULL)
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Failed to allocate memory."), 1}};
result->type = SLS_RESULT;
result->result = token;
result->file_info = get_file_info(lexer_info, start, start_line);
result->next = NULL;
return (LexerResult){SLS_RESULT, .result = result};
}
static LexerResult lexer_error(LexerInfo *lexer_info, SlsStr message, size_t start, size_t start_line) {
// Create a LexerTokenResult to store an error from lexing the current token
LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
if (result == NULL)
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Failed to allocate memory."), 1}};
result->type = SLS_ERROR;
result->error.message = message;
result->error.code = 1;
result->file_info = get_file_info(lexer_info, start, start_line);
result->next = NULL;
// SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error
return (LexerResult){SLS_RESULT, .result = result};
}
typedef enum {
NUMERIC_FLOAT_BASE = 1 << 0,
NUMERIC_UNSIGNED = 1 << 1,
NUMERIC_64 = 1 << 2,
NUMERIC_32 = 1 << 3,
NUMERIC_16 = 1 << 4,
NUMERIC_8 = 1 << 5,
} NumericTypesBase;
typedef enum {
NUMERIC_F64 = NUMERIC_64 | NUMERIC_FLOAT_BASE,
NUMERIC_F32 = NUMERIC_32 | NUMERIC_FLOAT_BASE,
NUMERIC_I64 = NUMERIC_64,
NUMERIC_I32 = NUMERIC_32,
NUMERIC_I16 = NUMERIC_16,
NUMERIC_I8 = NUMERIC_8,
NUMERIC_U64 = NUMERIC_64 | NUMERIC_UNSIGNED,
NUMERIC_U32 = NUMERIC_32 | NUMERIC_UNSIGNED,
NUMERIC_U16 = NUMERIC_16 | NUMERIC_UNSIGNED,
NUMERIC_U8 = NUMERIC_8 | NUMERIC_UNSIGNED,
} NumericTypes;
typedef struct {
SlsResultType type;
union {
IntegerBuiltInType integer_type; // type == SLS_RESULT
SlsError error; // type == SLS_ERROR
};
} IntegerTypeResult;
static IntegerTypeResult get_integer_type(NumericTypes numeric_type) {
IntegerBuiltInType integer_type;
switch (numeric_type) {
case NUMERIC_I64:
integer_type = INTEGER_I64;
break;
case NUMERIC_I32:
integer_type = INTEGER_I32;
break;
case NUMERIC_I16:
integer_type = INTEGER_I16;
break;
case NUMERIC_I8:
integer_type = INTEGER_I8;
break;
case NUMERIC_U64:
integer_type = INTEGER_U64;
break;
case NUMERIC_U32:
integer_type = INTEGER_U32;
break;
case NUMERIC_U16:
integer_type = INTEGER_U16;
break;
case NUMERIC_U8:
integer_type = INTEGER_U8;
break;
default:
return (IntegerTypeResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer Error: Encountered a Float where there should not be one."), 1}};
}
return (IntegerTypeResult){SLS_RESULT, .integer_type = integer_type};
}
static uint64_t create_binary_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 2;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '.' || token[i] == '_') continue;
value *= 2;
switch (token[i]) {
case '1': value += 1; break;
}
}
if (negative) value = (~value) + 1;
return value;
}
static uint64_t create_octal_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 2;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '.' || token[i] == '_') continue;
value *= 8;
switch (token[i]) {
case '1': value += 1; break;
case '2': value += 2; break;
case '3': value += 3; break;
case '4': value += 4; break;
case '5': value += 5; break;
case '6': value += 6; break;
case '7': value += 7; break;
}
}
if (negative) value = (~value) + 1;
return value;
}
static uint64_t create_decimal_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 0;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '_') continue;
value *= 10;
switch (token[i]) {
case '1': value += 1; break;
case '2': value += 2; break;
case '3': value += 3; break;
case '4': value += 4; break;
case '5': value += 5; break;
case '6': value += 6; break;
case '7': value += 7; break;
case '8': value += 8; break;
case '9': value += 9; break;
}
}
if (negative) value = (~value) + 1;
return value;
}
static uint64_t create_hexadecimal_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 2;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '.' || token[i] == '_') continue;
value *= 16;
switch (token[i]) {
case '1': value += 1; break;
case '2': value += 2; break;
case '3': value += 3; break;
case '4': value += 4; break;
case '5': value += 5; break;
case '6': value += 6; break;
case '7': value += 7; break;
case '8': value += 8; break;
case '9': value += 9; break;
case 'A':
case 'a': value += 10; break;
case 'B':
case 'b': value += 11; break;
case 'C':
case 'c': value += 12; break;
case 'D':
case 'd': value += 13; break;
case 'E':
case 'e': value += 14; break;
case 'F':
case 'f': value += 15; break;
}
}
if (negative) value = (~value) + 1;
return value;
}
static LexerResult create_integer_token(LexerInfo *lexer_info, IntegerBuiltInType type, uint64_t value, size_t start, size_t start_line) {
switch (type) {
case INTEGER_I64: break;
case INTEGER_U64: break;
case INTEGER_I32:
if (value > (uint64_t)UINT32_MAX) {
return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for i32."), start, start_line);
}
break;
case INTEGER_I16:
if (value > (uint64_t)UINT16_MAX) {
return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for i16."), start, start_line);
}
break;
case INTEGER_I8:
if (value > (uint64_t)UINT8_MAX) {
return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for i8."), start, start_line);
}
break;
case INTEGER_U32:
if (seek(lexer_info, start) == '-') {
return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u32."), start, start_line);
} if (value > (uint64_t)UINT32_MAX) {
return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u32."), start, start_line);
}
break;
case INTEGER_U16:
if (seek(lexer_info, start) == '-') {
return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u16."), start, start_line);
} if (value > (uint64_t)UINT16_MAX) {
return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u16."), start, start_line);
}
break;
case INTEGER_U8:
if (seek(lexer_info, start) == '-') {
return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u8."), start, start_line);
} if (value > (uint64_t)UINT8_MAX) {
return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u8."), start, start_line);
}
break;
}
return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = type, .value = value}}, start, start_line);
}
static double create_float(LexerInfo *lexer_info, size_t start) {
double value = 0;
Boolean negative = FALSE;
uint64_t fractional = 0;
const char *token = get_token_text(lexer_info, start);
size_t i = 0;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '_') continue;
if (token[i] == '.') {
fractional = 1;
continue;
}
if (fractional == 0) value *= 10;
else fractional *= 10;
switch (token[i]) {
case '1': value += 1.0 / (fractional == 0 ? 1 : fractional); break;
case '2': value += 2.0 / (fractional == 0 ? 1 : fractional); break;
case '3': value += 3.0 / (fractional == 0 ? 1 : fractional); break;
case '4': value += 4.0 / (fractional == 0 ? 1 : fractional); break;
case '5': value += 5.0 / (fractional == 0 ? 1 : fractional); break;
case '6': value += 6.0 / (fractional == 0 ? 1 : fractional); break;
case '7': value += 7.0 / (fractional == 0 ? 1 : fractional); break;
case '8': value += 8.0 / (fractional == 0 ? 1 : fractional); break;
case '9': value += 9.0 / (fractional == 0 ? 1 : fractional); break;
}
}
if (negative) value = -value;
return value;
}
static LexerResult create_float_token(LexerInfo *lexer_info, NumericTypes type, size_t start, size_t start_line) {
double value = create_float(lexer_info, start);
if (type == NUMERIC_F64)
return lexer_result(lexer_info, (Token){TOKEN_DOUBLE, .double_literal = value}, start, start_line);
else
return lexer_result(lexer_info, (Token){TOKEN_FLOAT, .float_literal = (float){value}}, start, start_line);
}
typedef enum {
NUMERIC_BINARY,
NUMERIC_OCTAL,
NUMERIC_DECIMAL,
NUMERIC_HEXADECIMAL,
NUMERIC_FLOAT,
NUMERIC_EXPONENTIAL,
} NumericLiteralTypes;
static LexerResult parse_numeric_type(LexerInfo *lexer_info, char c, size_t start, size_t start_line, NumericLiteralTypes numeric_literal_type) {
NumericTypes numeric_type = 0;
c = advance(lexer_info);
if (c == 'f') {
numeric_type |= NUMERIC_FLOAT_BASE;
if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) {
c = advance(lexer_info);
if (c == '6' && far_peek(lexer_info, 1) == '4') {
numeric_type |= NUMERIC_64;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '3' && far_peek(lexer_info, 1) == '2') {
numeric_type |= NUMERIC_32;
c = advance(lexer_info);
c = advance(lexer_info);
} else {
return lexer_error(lexer_info, SLS_STR("Invalid float type: must be of type 'f64' or 'f32'."), start, start_line);
}
} else {
return lexer_error(lexer_info, SLS_STR("Invalid numeric literal: float type not allowed."), start, start_line);
}
} else if (c == 'i' || c == 'u') {
if (numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL)
return lexer_error(lexer_info, SLS_STR("Invalid float type: must be of type 'f64' or 'f32'."), start, start_line);
if (c == 'u') numeric_type |= NUMERIC_UNSIGNED;
c = advance(lexer_info);
if (c == '6' && far_peek(lexer_info, 1) == '4') {
numeric_type |= NUMERIC_64;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '3' && far_peek(lexer_info, 1) == '2') {
numeric_type |= NUMERIC_32;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '1' && far_peek(lexer_info, 1) == '6') {
numeric_type |= NUMERIC_16;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '8') {
numeric_type |= NUMERIC_8;
c = advance(lexer_info);
} else {
if (numeric_type & NUMERIC_UNSIGNED) {
return lexer_error(lexer_info, SLS_STR("Invalid unsigned integer type: must be of type 'u64', 'u32', 'u16', and 'u8'."), start, start_line);
} else {
return lexer_error(lexer_info, SLS_STR("Invalid signed integer type: must be of type 'i64', 'i32', 'i16', and 'i8'."), start, start_line);
}
}
} else {
if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) {
return lexer_error(lexer_info, SLS_STR("Invalid numeric type: type must start with 'f', 'i', or 'u'."), start, start_line);
} else {
return lexer_error(lexer_info, SLS_STR("Invalid integer type: type must start with 'i' or 'u'."), start, start_line);
}
} if (isspace(c) || c == '/' || c == '\0') {
IntegerTypeResult integer_type = get_integer_type(numeric_type);
if (numeric_literal_type == NUMERIC_DECIMAL && numeric_type & NUMERIC_FLOAT_BASE)
numeric_literal_type = NUMERIC_FLOAT;
uint64_t value;
switch (numeric_literal_type) {
case NUMERIC_BINARY:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_binary_integer(lexer_info, start);
return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line);
case NUMERIC_OCTAL:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_octal_integer(lexer_info, start);
return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line);
case NUMERIC_DECIMAL:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_decimal_integer(lexer_info, start);
return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line);
case NUMERIC_HEXADECIMAL:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_hexadecimal_integer(lexer_info, start);
return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line);
case NUMERIC_FLOAT:
return create_float_token(lexer_info, numeric_type, start, start_line);
case NUMERIC_EXPONENTIAL:
break;
}
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Numeric Literal Not Implemented Error."), 1}};
}
SlsStr error_msg = sls_format(SLS_STR("Invalid numeric literal: unexpected '%c' in numeric type."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
while (c == '0' || c == '1' || c == '_') c = advance(lexer_info);
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_BINARY);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_binary_integer(lexer_info, start);
return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line);
}
SlsStr error_msg = sls_format(SLS_STR("Invalid binary literal: unexpected '%c' in binary integer."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
static LexerResult parse_octal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
while ((isdigit(c) || c == '_') && !(c == '8' || c == '9')) c = advance(lexer_info);
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_OCTAL);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_octal_integer(lexer_info, start);
return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line);
}
SlsStr error_msg = sls_format(SLS_STR("Invalid octal literal: unexpected '%c' in octal integer."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
static LexerResult parse_exponential(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Float Exponential Not Implemented Error."), 1}};
}
static LexerResult parse_float(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
while (isdigit(c) || c == '_') c = advance(lexer_info);
if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line);
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_FLOAT);
if (isspace(c) || c == '/' || c == '\0')
return create_float_token(lexer_info, NUMERIC_F64, start, start_line);
SlsStr error_msg = sls_format(SLS_STR("Invalid float literal: unexpected '%c' in float."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
while (isdigit(c) || c == '_') c = advance(lexer_info);
if (c == '.') {
c = advance(lexer_info);
return parse_float(lexer_info, c, start, start_line);
} if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line);
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_DECIMAL);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_decimal_integer(lexer_info, start);
return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line);
}
SlsStr error_msg = sls_format(SLS_STR("Invalid decimal literal: unexpected '%c' in decimal integer."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
static LexerResult parse_hexadecimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
while (isxdigit(c) || c == '_') c = advance(lexer_info);
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_HEXADECIMAL);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_hexadecimal_integer(lexer_info, start);
return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line);
}
SlsStr error_msg = sls_format(SLS_STR("Invalid hexadecimal literal: unexpected '%c' in hexadecimal integer."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
static LexerResult parse_numeric_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
if (c == '-') c = advance(lexer_info);
if (c == '0') {
c = advance(lexer_info);
if (c == 'b' || c == 'B') {
c = advance(lexer_info);
return parse_binary_integer(lexer_info, c, start, start_line);
} else if (c == 'o' || c == 'O') {
c = advance(lexer_info);
return parse_octal_integer(lexer_info, c, start, start_line);
} else if (c == 'x' || c == 'X') {
c = advance(lexer_info);
return parse_hexadecimal_integer(lexer_info, c, start, start_line);
}
}
return parse_decimal_integer(lexer_info, c, start, start_line);
}
static LexerResult parse_character_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
if (c == '\'')
return lexer_error(lexer_info, SLS_STR("Invalid character literal: empty character literal."), start, start_line);
char value = '\0';
if (c == '\\') {
c = advance(lexer_info);
switch (c) {
case 'n':
value = '\n';
break;
case 'r':
value = '\r';
break;
case 't':
value = '\t';
break;
case '\\':
value = '\\';
break;
case '\'':
value = '\'';
break;
case '0':
value = '\0';
break;
default:
SlsStr error_msg = sls_format(SLS_STR("Invalid character literal: unknown escape sequence '\\%c'."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
} else if (c == '\n' || c == '\r')
return lexer_error(lexer_info, SLS_STR("Invalid character literal: unclosed character literal."), start, start_line);
else value = c;
c = advance(lexer_info);
if (isspace(c) || c == '/' || c == '\0')
return lexer_error(lexer_info, SLS_STR("Invalid character literal: unclosed character literal."), start, start_line);
else if (c != '\'') {
SlsStr error_msg = sls_format(SLS_STR("Invalid character literal: unexpected '%c' in character."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
advance(lexer_info);
return lexer_result(lexer_info, (Token){TOKEN_CHARACTER, .character_literal = (uint8_t){value}}, start, start_line);
}
static LexerResult parse_string_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: String Literals Not Implemented Error."), 1}};
}
static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Token Strings Not Implemented Error."), 1}};
}
static LexerResult parse_array_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Array Literals Not Implemented Error."), 1}};
}
static LexerResult parse_type_tuples(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Type Tuples Not Implemented Error."), 1}};
}
Boolean is_identifier_continue(LexerInfo *lexer_info, char c) {
if (!isprint(c)) return FALSE;
if (c == '/' && far_peek(lexer_info, 1) == '/') return FALSE;
if (c == '{' || c == '}') return FALSE;
if (c == '[' || c == ']') return FALSE;
if (c == '(' || c == ')') return FALSE;
if (c == '\'' || c == '"') return FALSE;
if (c == '.' || c == ':' || c == '#') return FALSE;
if (isspace(c) || c == '\0') return FALSE;
return TRUE;
}
Boolean is_identifier_start(LexerInfo *lexer_info, char c) {
if (c == ':' && far_peek(lexer_info, 1) == ':') c = far_peek(lexer_info, 2);
if ((!isdigit(c)) && is_identifier_continue(lexer_info, c)) return TRUE;
else return FALSE;
}
static LexerResult parse_identifiers_and_booleans(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
Boolean literal = FALSE;
if (c == ':' && far_peek(lexer_info, 1) == ':') {
literal = TRUE;
c = advance(lexer_info);
c = advance(lexer_info);
}
size_t length = 0;
while (is_identifier_continue(lexer_info, c)) {
c = advance(lexer_info);
length++;
}
char *name_value = (char *)calloc(length+1, sizeof(char));
for (size_t i = 0; i < length; i++)
name_value[i] = lexer_info->source_code.str[start + i + (2 * literal)];
SlsStr name = sls_str_malloc(name_value, length);
free(name_value);
if (sls_str_cmp(name, SLS_STR("false")) == 0)
return lexer_result(lexer_info, (Token){TOKEN_BOOLEAN, .boolean_literal = FALSE}, start, start_line);
else if (sls_str_cmp(name, SLS_STR("true")) == 0)
return lexer_result(lexer_info, (Token){TOKEN_BOOLEAN, .boolean_literal = TRUE}, start, start_line);
else
return lexer_result(lexer_info, (Token){TOKEN_IDENTIFIER, .identifier = (Identifier){.is_literal = literal, .name = name}}, start, start_line);
}
static LexerResult lexer_next(LexerInfo *lexer_info) {
// Gets the next token from the source
while (isspace(peek(lexer_info)) || peek(lexer_info) == '/' || peek(lexer_info) == '#') {
// Skip Comments
if ((peek(lexer_info) == '/' && far_peek(lexer_info, 1) == '/') || peek(lexer_info) == '#')
while (peek(lexer_info) != '\n') advance(lexer_info);
// Skip whitespace
while (isspace(peek(lexer_info))) advance(lexer_info);
}
// Initialize begining variables
char c = peek(lexer_info);
size_t start = lexer_info->pos;
size_t start_line = lexer_info->line;
// End of file tokens
if (c == '\0') return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line);
// Integers and Floats
if (isdigit(c) || (c == '.' && isdigit(far_peek(lexer_info, 1))) || (c == '-' && isdigit(far_peek(lexer_info, 1))))
return parse_numeric_literal(lexer_info, c, start, start_line);
// Character Literals
if (c == '\'') {
c = advance(lexer_info);
return parse_character_literal(lexer_info, c, start, start_line);
}
// String Literals
if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line);
// Token Strings
if (c == '{') return parse_token_string(lexer_info, c, start, start_line);
// Array Literals
if (c == '[') return parse_array_literal(lexer_info, c, start, start_line);
// Type Tuples
if (c == '(') return parse_type_tuples(lexer_info, c, start, start_line);
// Identifiers and Booleans
if (is_identifier_start(lexer_info, c))
return parse_identifiers_and_booleans(lexer_info, c, start, start_line);
// Lexing Error
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Unknown Character Error."), 1}};
}
void clean_token_result(LexerTokenResult *head) {
// Deallocates a LexerTokenResult linked list
LexerTokenResult *next;
while (head) {
next = head->next;
if (head->type == SLS_ERROR) sls_str_free(&head->error.message);
else {
if (head->result.type == TOKEN_STRING) sls_str_free(&head->error.message);
}
if (head) free(head);
head = next;
}
}
LexerTokenResult *get_token(LexerTokenResult *head, size_t i) {
// Returns the token at i in a LexerTokenResult linked list, or null_ptr if i is out of bounds
for (size_t j = 0; j < i && head; j++) {
head = head->next;
}
return head;
}
LexerResult lexical_analysis(LexerInfo *lexer_info) {
// Lexes code loaded into lexer_info
LexerResult result; // For lexer_next returns
LexerTokenResult *head = 0;
LexerTokenResult *current = 0;
do {
// Get next token
result = lexer_next(lexer_info);
// Handle Errors
if (result.type == SLS_ERROR) {
clean_token_result(head);
return result;
}
// Save result
if (head == 0) {
head = result.result;
current = head;
} else {
current->next = result.result;
current = current->next;
}
// Current should not be null_ptr
if (current == 0) {
clean_token_result(head);
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Unknown Error."), 1}};
}
} while (current->type != SLS_ERROR && current->result.type != TOKEN_EOF);
return (LexerResult) {.type = SLS_RESULT, .result = head};
}