// Kyler Olsen // YREA SLS // Lexer // November 2025 #include #include #include #include #include #include #include #include "sls/errors.h" #include "sls/bool.h" #include "sls/lexer.h" #include "sls/string.h" const size_t TYPE_NAMES_SAFE_LENGTH = 20; const char *TOKEN_TYPES_NAMES[] = { "End of File", "Identifier", "Integer", "Float", "Double", "Character", "String", "Boolean", "Array", "Token String", "Type Tuple", }; const size_t TOKEN_TYPE_COUNT = sizeof(TOKEN_TYPES_NAMES) / sizeof(*TOKEN_TYPES_NAMES); const char *ARRAY_TYPES_NAMES[] = { "Identifier", "i64", "i32", "i16", "i8", "u64", "u32", "u16", "u8", "Float", "Double", "Character", "String", "Boolean", "Inline Struct", }; const size_t ARRAY_TYPE_COUNT = sizeof(ARRAY_TYPES_NAMES) / sizeof(*ARRAY_TYPES_NAMES); const char *INTEGER_TYPES_NAMES[] = { "i64", "i32", "i16", "i8", "u64", "u32", "u16", "u8", }; const size_t INTEGER_TYPE_COUNT = sizeof(INTEGER_TYPES_NAMES) / sizeof(*INTEGER_TYPES_NAMES); void init_lexer(LexerInfo *lexer_info, SlsStr filename, SlsStr source_code) { // Initializes a LexerInfo struct with file info and source code lexer_info->filename = filename; lexer_info->source_code = source_code; lexer_info->pos = 0; lexer_info->column = 1; lexer_info->line = 1; } static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_line) { // Creates a FileInfo struct based on starting and current lexer states return (FileInfo){ .filename = lexer_info->filename, .line = lexer_info->line, .column = lexer_info->column, .length = lexer_info->pos - start, .lines = lexer_info->line - start_line }; } static const char *get_token_text(LexerInfo *lexer_info, size_t start) { // Returns the current character from the source code return lexer_info->source_code.str + start; } static char peek(LexerInfo *lexer_info) { // Returns the current character from the source code return lexer_info->source_code.str[lexer_info->pos]; } static char far_peek(LexerInfo *lexer_info, size_t index) { // Returns the character index away from the current char in the source code return lexer_info->source_code.str[lexer_info->pos + index]; } static char seek(LexerInfo *lexer_info, size_t index) { // Returns the character from the given index from the source code return lexer_info->source_code.str[index]; } static char advance(LexerInfo *lexer_info) { // Advances lexer_info to the next character if (lexer_info->source_code.str[lexer_info->pos] == '\n') { // If a new line is encountered, advance line and reset column lexer_info->line++; lexer_info->column = 1; } else { // Elsewhere in a line, advance column lexer_info->column++; } // Advance to and return the next character return lexer_info->source_code.str[++lexer_info->pos]; } static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start, size_t start_line) { // Create a LexerTokenResult to store the results of lexing the current token LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); if (result == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Failed to allocate memory."), 1}}; result->type = SLS_RESULT; result->result = token; result->file_info = get_file_info(lexer_info, start, start_line); result->next = NULL; return (LexerResult){SLS_RESULT, .result = result}; } static LexerResult lexer_error(LexerInfo *lexer_info, SlsStr message, size_t start, size_t start_line) { // Create a LexerTokenResult to store an error from lexing the current token LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); if (result == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Failed to allocate memory."), 1}}; result->type = SLS_ERROR; result->error.message = message; result->error.code = 1; result->file_info = get_file_info(lexer_info, start, start_line); result->next = NULL; // SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error return (LexerResult){SLS_RESULT, .result = result}; } typedef enum { NUMERIC_FLOAT_BASE = 1 << 0, NUMERIC_UNSIGNED = 1 << 1, NUMERIC_64 = 1 << 2, NUMERIC_32 = 1 << 3, NUMERIC_16 = 1 << 4, NUMERIC_8 = 1 << 5, } NumericTypesBase; typedef enum { NUMERIC_F64 = NUMERIC_64 | NUMERIC_FLOAT_BASE, NUMERIC_F32 = NUMERIC_32 | NUMERIC_FLOAT_BASE, NUMERIC_I64 = NUMERIC_64, NUMERIC_I32 = NUMERIC_32, NUMERIC_I16 = NUMERIC_16, NUMERIC_I8 = NUMERIC_8, NUMERIC_U64 = NUMERIC_64 | NUMERIC_UNSIGNED, NUMERIC_U32 = NUMERIC_32 | NUMERIC_UNSIGNED, NUMERIC_U16 = NUMERIC_16 | NUMERIC_UNSIGNED, NUMERIC_U8 = NUMERIC_8 | NUMERIC_UNSIGNED, } NumericTypes; typedef struct { SlsResultType type; union { IntegerBuiltInType integer_type; // type == SLS_RESULT SlsError error; // type == SLS_ERROR }; } IntegerTypeResult; static IntegerTypeResult get_integer_type(NumericTypes numeric_type) { IntegerBuiltInType integer_type; switch (numeric_type) { case NUMERIC_I64: integer_type = INTEGER_I64; break; case NUMERIC_I32: integer_type = INTEGER_I32; break; case NUMERIC_I16: integer_type = INTEGER_I16; break; case NUMERIC_I8: integer_type = INTEGER_I8; break; case NUMERIC_U64: integer_type = INTEGER_U64; break; case NUMERIC_U32: integer_type = INTEGER_U32; break; case NUMERIC_U16: integer_type = INTEGER_U16; break; case NUMERIC_U8: integer_type = INTEGER_U8; break; default: return (IntegerTypeResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer Error: Encountered a Float where there should not be one."), 1}}; } return (IntegerTypeResult){SLS_RESULT, .integer_type = integer_type}; } static uint64_t create_binary_integer(LexerInfo *lexer_info, size_t start) { uint64_t value = 0; Boolean negative = FALSE; const char *token = get_token_text(lexer_info, start); size_t i = 2; if (token[0] == '-') { negative = TRUE; i += 1; } for (; i < lexer_info->pos - start; i++) { if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; if (token[i] == '.' || token[i] == '_') continue; value *= 2; switch (token[i]) { case '1': value += 1; break; } } if (negative) value = (~value) + 1; return value; } static uint64_t create_octal_integer(LexerInfo *lexer_info, size_t start) { uint64_t value = 0; Boolean negative = FALSE; const char *token = get_token_text(lexer_info, start); size_t i = 2; if (token[0] == '-') { negative = TRUE; i += 1; } for (; i < lexer_info->pos - start; i++) { if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; if (token[i] == '.' || token[i] == '_') continue; value *= 8; switch (token[i]) { case '1': value += 1; break; case '2': value += 2; break; case '3': value += 3; break; case '4': value += 4; break; case '5': value += 5; break; case '6': value += 6; break; case '7': value += 7; break; } } if (negative) value = (~value) + 1; return value; } static uint64_t create_decimal_integer(LexerInfo *lexer_info, size_t start) { uint64_t value = 0; Boolean negative = FALSE; const char *token = get_token_text(lexer_info, start); size_t i = 0; if (token[0] == '-') { negative = TRUE; i += 1; } for (; i < lexer_info->pos - start; i++) { if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; if (token[i] == '_') continue; value *= 10; switch (token[i]) { case '1': value += 1; break; case '2': value += 2; break; case '3': value += 3; break; case '4': value += 4; break; case '5': value += 5; break; case '6': value += 6; break; case '7': value += 7; break; case '8': value += 8; break; case '9': value += 9; break; } } if (negative) value = (~value) + 1; return value; } static uint64_t create_hexadecimal_integer(LexerInfo *lexer_info, size_t start) { uint64_t value = 0; Boolean negative = FALSE; const char *token = get_token_text(lexer_info, start); size_t i = 2; if (token[0] == '-') { negative = TRUE; i += 1; } for (; i < lexer_info->pos - start; i++) { if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; if (token[i] == '.' || token[i] == '_') continue; value *= 16; switch (token[i]) { case '1': value += 1; break; case '2': value += 2; break; case '3': value += 3; break; case '4': value += 4; break; case '5': value += 5; break; case '6': value += 6; break; case '7': value += 7; break; case '8': value += 8; break; case '9': value += 9; break; case 'A': case 'a': value += 10; break; case 'B': case 'b': value += 11; break; case 'C': case 'c': value += 12; break; case 'D': case 'd': value += 13; break; case 'E': case 'e': value += 14; break; case 'F': case 'f': value += 15; break; } } if (negative) value = (~value) + 1; return value; } static LexerResult create_integer_token(LexerInfo *lexer_info, IntegerBuiltInType type, uint64_t value, size_t start, size_t start_line) { switch (type) { case INTEGER_I64: break; case INTEGER_I32: if ((int64_t)value < INT32_MIN || (int64_t)value > INT32_MAX) { return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for i32."), start, start_line); } break; case INTEGER_I16: if ((int64_t)value < INT16_MIN || (int64_t)value > INT16_MAX) { return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for i16."), start, start_line); } break; case INTEGER_I8: if ((int64_t)value < INT8_MIN || (int64_t)value > INT8_MAX) { return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for i8."), start, start_line); } break; case INTEGER_U64: if (seek(lexer_info, start) == '-') { return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u64."), start, start_line); } break; case INTEGER_U32: if (seek(lexer_info, start) == '-') { return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u32."), start, start_line); } else if (value > (uint64_t)UINT32_MAX) { return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u32."), start, start_line); } break; case INTEGER_U16: if (seek(lexer_info, start) == '-') { return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u16."), start, start_line); } else if (value > (uint64_t)UINT16_MAX) { return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u16."), start, start_line); } break; case INTEGER_U8: if (seek(lexer_info, start) == '-') { return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u8."), start, start_line); } else if (value > (uint64_t)UINT8_MAX) { return lexer_error(lexer_info, SLS_STR("Integer overflow: value exceeds range for u8."), start, start_line); } break; } return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = type, .value = value}}, start, start_line); } static double create_float(LexerInfo *lexer_info, size_t start) { double value = 0; Boolean negative = FALSE; uint64_t fractional = 0; const char *token = get_token_text(lexer_info, start); size_t i = 0; if (token[0] == '-') { negative = TRUE; i += 1; } for (; i < lexer_info->pos - start; i++) { if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; if (token[i] == '_') continue; if (token[i] == '.') { fractional = 1; continue; } if (fractional == 0) value *= 10; else fractional *= 10; switch (token[i]) { case '1': value += 1.0 / (fractional == 0 ? 1 : fractional); break; case '2': value += 2.0 / (fractional == 0 ? 1 : fractional); break; case '3': value += 3.0 / (fractional == 0 ? 1 : fractional); break; case '4': value += 4.0 / (fractional == 0 ? 1 : fractional); break; case '5': value += 5.0 / (fractional == 0 ? 1 : fractional); break; case '6': value += 6.0 / (fractional == 0 ? 1 : fractional); break; case '7': value += 7.0 / (fractional == 0 ? 1 : fractional); break; case '8': value += 8.0 / (fractional == 0 ? 1 : fractional); break; case '9': value += 9.0 / (fractional == 0 ? 1 : fractional); break; } } if (negative) value = -value; return value; } static LexerResult create_float_token(LexerInfo *lexer_info, NumericTypes type, size_t start, size_t start_line) { double value = create_float(lexer_info, start); if (type == NUMERIC_F64) return lexer_result(lexer_info, (Token){TOKEN_DOUBLE, .double_literal = value}, start, start_line); else return lexer_result(lexer_info, (Token){TOKEN_FLOAT, .float_literal = (float){value}}, start, start_line); } typedef enum { NUMERIC_BINARY, NUMERIC_OCTAL, NUMERIC_DECIMAL, NUMERIC_HEXADECIMAL, NUMERIC_FLOAT, NUMERIC_EXPONENTIAL, } NumericLiteralTypes; static LexerResult parse_numeric_type(LexerInfo *lexer_info, char c, size_t start, size_t start_line, NumericLiteralTypes numeric_literal_type) { NumericTypes numeric_type = 0; c = advance(lexer_info); if (c == 'f') { numeric_type |= NUMERIC_FLOAT_BASE; if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) { c = advance(lexer_info); if (c == '6' && far_peek(lexer_info, 1) == '4') { numeric_type |= NUMERIC_64; c = advance(lexer_info); c = advance(lexer_info); } else if (c == '3' && far_peek(lexer_info, 1) == '2') { numeric_type |= NUMERIC_32; c = advance(lexer_info); c = advance(lexer_info); } else { return lexer_error(lexer_info, SLS_STR("Invalid float type: must be of type 'f64' or 'f32'."), start, start_line); } } else { return lexer_error(lexer_info, SLS_STR("Invalid numeric literal: float type not allowed."), start, start_line); } } else if (c == 'i' || c == 'u') { if (numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) return lexer_error(lexer_info, SLS_STR("Invalid float type: must be of type 'f64' or 'f32'."), start, start_line); if (c == 'u') numeric_type |= NUMERIC_UNSIGNED; c = advance(lexer_info); if (c == '6' && far_peek(lexer_info, 1) == '4') { numeric_type |= NUMERIC_64; c = advance(lexer_info); c = advance(lexer_info); } else if (c == '3' && far_peek(lexer_info, 1) == '2') { numeric_type |= NUMERIC_32; c = advance(lexer_info); c = advance(lexer_info); } else if (c == '1' && far_peek(lexer_info, 1) == '6') { numeric_type |= NUMERIC_16; c = advance(lexer_info); c = advance(lexer_info); } else if (c == '8') { numeric_type |= NUMERIC_8; c = advance(lexer_info); } else { if (numeric_type & NUMERIC_UNSIGNED) { return lexer_error(lexer_info, SLS_STR("Invalid unsigned integer type: must be of type 'u64', 'u32', 'u16', and 'u8'."), start, start_line); } else { return lexer_error(lexer_info, SLS_STR("Invalid signed integer type: must be of type 'i64', 'i32', 'i16', and 'i8'."), start, start_line); } } } else { if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) { return lexer_error(lexer_info, SLS_STR("Invalid numeric type: type must start with 'f', 'i', or 'u'."), start, start_line); } else { return lexer_error(lexer_info, SLS_STR("Invalid integer type: type must start with 'i' or 'u'."), start, start_line); } } if (isspace(c) || c == '/' || c == '\0') { IntegerTypeResult integer_type = get_integer_type(numeric_type); if (numeric_literal_type == NUMERIC_DECIMAL && numeric_type & NUMERIC_FLOAT_BASE) numeric_literal_type = NUMERIC_FLOAT; uint64_t value; switch (numeric_literal_type) { case NUMERIC_BINARY: if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; value = create_binary_integer(lexer_info, start); return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line); case NUMERIC_OCTAL: if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; value = create_octal_integer(lexer_info, start); return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line); case NUMERIC_DECIMAL: if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; value = create_decimal_integer(lexer_info, start); return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line); case NUMERIC_HEXADECIMAL: if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; value = create_hexadecimal_integer(lexer_info, start); return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line); case NUMERIC_FLOAT: return create_float_token(lexer_info, numeric_type, start, start_line); case NUMERIC_EXPONENTIAL: break; } return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Numeric Literal Not Implemented Error."), 1}}; } SlsStr error_msg = sls_format(SLS_STR("Invalid numeric literal: unexpected '%c' in numeric type."), c); if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}}; return lexer_error(lexer_info, error_msg, start, start_line); } static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { while (c == '0' || c == '1' || c == '_') c = advance(lexer_info); if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_BINARY); if (isspace(c) || c == '/' || c == '\0') { uint64_t value = create_binary_integer(lexer_info, start); return create_integer_token(lexer_info, SLS_INTEGER_TYPE_DEFAULT, value, start, start_line); } SlsStr error_msg = sls_format(SLS_STR("Invalid binary literal: unexpected '%c' in binary integer."), c); if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}}; return lexer_error(lexer_info, error_msg, start, start_line); } static LexerResult parse_octal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { while ((isdigit(c) || c == '_') && !(c == '8' || c == '9')) c = advance(lexer_info); if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_OCTAL); if (isspace(c) || c == '/' || c == '\0') { uint64_t value = create_octal_integer(lexer_info, start); return create_integer_token(lexer_info, SLS_INTEGER_TYPE_DEFAULT, value, start, start_line); } SlsStr error_msg = sls_format(SLS_STR("Invalid octal literal: unexpected '%c' in octal integer."), c); if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}}; return lexer_error(lexer_info, error_msg, start, start_line); } static LexerResult parse_exponential(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Float Exponential Not Implemented Error."), 1}}; } static LexerResult parse_float(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { while (isdigit(c) || c == '_') c = advance(lexer_info); if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line); if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_FLOAT); if (isspace(c) || c == '/' || c == '\0') return create_float_token(lexer_info, SLS_FLOAT_TYPE_DEFAULT, start, start_line); SlsStr error_msg = sls_format(SLS_STR("Invalid float literal: unexpected '%c' in float."), c); if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}}; return lexer_error(lexer_info, error_msg, start, start_line); } static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { while (isdigit(c) || c == '_') c = advance(lexer_info); if (c == '.') { c = advance(lexer_info); return parse_float(lexer_info, c, start, start_line); } if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line); if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_DECIMAL); if (isspace(c) || c == '/' || c == '\0') { uint64_t value = create_decimal_integer(lexer_info, start); return create_integer_token(lexer_info, SLS_INTEGER_TYPE_DEFAULT, value, start, start_line); } SlsStr error_msg = sls_format(SLS_STR("Invalid decimal literal: unexpected '%c' in decimal integer."), c); if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}}; return lexer_error(lexer_info, error_msg, start, start_line); } static LexerResult parse_hexadecimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { while (isxdigit(c) || c == '_') c = advance(lexer_info); if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_HEXADECIMAL); if (isspace(c) || c == '/' || c == '\0') { uint64_t value = create_hexadecimal_integer(lexer_info, start); return create_integer_token(lexer_info, SLS_INTEGER_TYPE_DEFAULT, value, start, start_line); } SlsStr error_msg = sls_format(SLS_STR("Invalid hexadecimal literal: unexpected '%c' in hexadecimal integer."), c); if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}}; return lexer_error(lexer_info, error_msg, start, start_line); } static LexerResult parse_numeric_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { if (c == '-') c = advance(lexer_info); if (c == '0') { c = advance(lexer_info); if (c == 'b' || c == 'B') { c = advance(lexer_info); return parse_binary_integer(lexer_info, c, start, start_line); } else if (c == 'o' || c == 'O') { c = advance(lexer_info); return parse_octal_integer(lexer_info, c, start, start_line); } else if (c == 'x' || c == 'X') { c = advance(lexer_info); return parse_hexadecimal_integer(lexer_info, c, start, start_line); } } return parse_decimal_integer(lexer_info, c, start, start_line); } static LexerResult parse_character_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { if (c == '\'') return lexer_error(lexer_info, SLS_STR("Invalid character literal: empty character literal."), start, start_line); char value = '\0'; if (c == '\\') { c = advance(lexer_info); switch (c) { case 'n': value = '\n'; break; case 'r': value = '\r'; break; case 't': value = '\t'; break; case '\\': value = '\\'; break; case '\'': value = '\''; break; case '0': value = '\0'; break; default: SlsStr error_msg = sls_format(SLS_STR("Invalid character literal: unknown escape sequence '\\%c'."), c); if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}}; return lexer_error(lexer_info, error_msg, start, start_line); } } else if (c == '\n' || c == '\r') return lexer_error(lexer_info, SLS_STR("Invalid character literal: unclosed character literal."), start, start_line); else value = c; c = advance(lexer_info); if (isspace(c) || c == '/' || c == '\0') return lexer_error(lexer_info, SLS_STR("Invalid character literal: unclosed character literal."), start, start_line); else if (c != '\'') { SlsStr error_msg = sls_format(SLS_STR("Invalid character literal: unexpected '%c' in character."), c); if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}}; return lexer_error(lexer_info, error_msg, start, start_line); } advance(lexer_info); return lexer_result(lexer_info, (Token){TOKEN_CHARACTER, .character_literal = (uint8_t){value}}, start, start_line); } static LexerResult parse_string_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: String Literals Not Implemented Error."), 1}}; } static void skip_comments_and_whitespace(LexerInfo *lexer_info) { while (isspace(peek(lexer_info)) || (peek(lexer_info) == '/' && far_peek(lexer_info, 1) == '/') || peek(lexer_info) == '#') { // Skip Comments if ((peek(lexer_info) == '/' && far_peek(lexer_info, 1) == '/') || peek(lexer_info) == '#') while (!(peek(lexer_info) == '\n' || peek(lexer_info) == '\0')) advance(lexer_info); // Skip whitespace while (isspace(peek(lexer_info))) advance(lexer_info); } } static LexerResult lexer_next(LexerInfo *lexer_info); TokenString copy_token_string(TokenString token_string) { TokenString new_string = (TokenString){ .length = token_string.length, .tokens = (Token *)malloc(sizeof(Token) * token_string.length) }; for (size_t i = 0; i < token_string.length; i++) { if (token_string.tokens[i].type == TOKEN_STRING) { new_string.tokens[i].type = TOKEN_STRING; new_string.tokens[i].string_literal = sls_str_cpy(token_string.tokens[i].string_literal); } else if (token_string.tokens[i].type == TOKEN_TOKEN_STRING) { new_string.tokens[i].type = TOKEN_TOKEN_STRING; new_string.tokens[i].token_string = copy_token_string(token_string.tokens[i].token_string); } else { new_string.tokens[i] = token_string.tokens[i]; } } return new_string; } static LexerResult convert_to_token_string(LexerInfo *lexer_info, LexerTokenResult *head, size_t start, size_t start_line) { TokenString token_string = { .length = 0, .tokens = NULL }; LexerTokenResult *current = head; while (current != NULL) { token_string.length += 1; current = current->next; } current = head; token_string.tokens = (Token *)malloc(sizeof(Token) * token_string.length); for (size_t i = 0; i < token_string.length; i++) { if (current->result.type == TOKEN_STRING) { token_string.tokens[i].type = TOKEN_STRING; token_string.tokens[i].string_literal = sls_str_cpy(current->result.string_literal); } else if (current->result.type == TOKEN_TOKEN_STRING) { token_string.tokens[i].type = TOKEN_TOKEN_STRING; token_string.tokens[i].token_string = copy_token_string(current->result.token_string); memcpy(token_string.tokens[i].token_string.tokens, current->result.token_string.tokens, current->result.token_string.length); } else token_string.tokens[i] = current->result; current = current->next; } clean_token_result(head); return lexer_result(lexer_info, (Token){TOKEN_TOKEN_STRING, .token_string = token_string}, start, start_line); } static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { // Lexes a token string LexerResult result; // For lexer_next returns LexerTokenResult *head = 0; LexerTokenResult *current = 0; size_t watchdog = 0; c = advance(lexer_info); while (c != '\0') { skip_comments_and_whitespace(lexer_info); c = peek(lexer_info); // Stop at the end of the token string if (c == '}') { advance(lexer_info); return convert_to_token_string(lexer_info, head, start, start_line); } // Get next token result = lexer_next(lexer_info); // Handle Errors if (result.type == SLS_ERROR) { clean_token_result(head); return result; } // Save result if (head == 0) { head = result.result; current = head; } else { current->next = result.result; current = current->next; } // Current should not be null_ptr if (current == 0) { clean_token_result(head); return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Unknown Error."), 1}}; } if (current->type == SLS_ERROR) { LexerTokenResult *e = (LexerTokenResult *)malloc(sizeof(LexerTokenResult)); *e = (LexerTokenResult){ .type = SLS_ERROR, .error = (SlsError){sls_str_cpy(current->error.message), 1}, .file_info = current->file_info, .next = NULL }; clean_token_result(head); return (LexerResult){SLS_RESULT, .result = e}; } if (current->result.type == TOKEN_EOF) break; c = peek(lexer_info); if (watchdog++ > 1000000) { clean_token_result(head); return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Watchdog Triggered in Token String."), 1}}; } } clean_token_result(head); return lexer_error(lexer_info, SLS_STR("Unclosed token string: missing closing brace '}'."), start, start_line); } static LexerResult parse_array_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Array Literals Not Implemented Error."), 1}}; } static LexerResult parse_type_tuples(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Type Tuples Not Implemented Error."), 1}}; } Boolean is_identifier_continue(LexerInfo *lexer_info, char c) { // If the current character and its context are a valid identifier character if (!isprint(c)) return FALSE; if (c == '/' && far_peek(lexer_info, 1) == '/') return FALSE; if (c == '{' || c == '}') return FALSE; if (c == '[' || c == ']') return FALSE; if (c == '(' || c == ')') return FALSE; if (c == '\'' || c == '"' || c == '#') return FALSE; if (isspace(c) || c == '\0') return FALSE; return TRUE; } Boolean is_identifier_start(LexerInfo *lexer_info) { // If the current character and its context are a valid identifier start char c = peek(lexer_info); if (c == ':' && far_peek(lexer_info, 1) == ':') c = far_peek(lexer_info, 2); if ((!isdigit(c)) && is_identifier_continue(lexer_info, c)) return TRUE; else return FALSE; } static LexerResult parse_identifiers_and_booleans(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { // Parses identifier, identifier literals, and boolean tokens Boolean literal = FALSE; // Skip leading `::` for identifier literals if (c == ':' && far_peek(lexer_info, 1) == ':') { literal = TRUE; c = advance(lexer_info); c = advance(lexer_info); } // Read the name of the identifier size_t length = 0; while (is_identifier_continue(lexer_info, c)) { if (c == ':') // && !literal) return lexer_error(lexer_info, SLS_STR("Invalid identifier: ':' is not allowed in identifiers."), start, start_line); if (c == '.') // && !literal) return lexer_error(lexer_info, SLS_STR("Invalid identifier: '.' is not allowed in identifiers."), start, start_line); c = advance(lexer_info); length++; } char *name_value = (char *)calloc(length+1, sizeof(char)); for (size_t i = 0; i < length; i++) name_value[i] = lexer_info->source_code.str[start + i + (2 * literal)]; SlsStr name = sls_str_malloc(name_value, length); free(name_value); // Return as identifier or boolean tokens if (sls_str_cmp(name, SLS_STR("false")) == 0) return lexer_result(lexer_info, (Token){TOKEN_BOOLEAN, .boolean_literal = FALSE}, start, start_line); else if (sls_str_cmp(name, SLS_STR("true")) == 0) return lexer_result(lexer_info, (Token){TOKEN_BOOLEAN, .boolean_literal = TRUE}, start, start_line); else return lexer_result(lexer_info, (Token){TOKEN_IDENTIFIER, .identifier = (Identifier){.is_literal = literal, .name = name}}, start, start_line); } static LexerResult lexer_next(LexerInfo *lexer_info) { // Gets the next token from the source skip_comments_and_whitespace(lexer_info); // Initialize begining variables char c = peek(lexer_info); size_t start = lexer_info->pos; size_t start_line = lexer_info->line; // End of file tokens if (c == '\0') return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line); // Integers and Floats if (isdigit(c) || (c == '.' && isdigit(far_peek(lexer_info, 1))) || (c == '-' && isdigit(far_peek(lexer_info, 1)))) return parse_numeric_literal(lexer_info, c, start, start_line); // Character Literals if (c == '\'') { c = advance(lexer_info); return parse_character_literal(lexer_info, c, start, start_line); } // String Literals if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line); // Token Strings if (c == '{') return parse_token_string(lexer_info, c, start, start_line); if (c == '}') { advance(lexer_info); return lexer_error(lexer_info, SLS_STR("Unexpected closing brace '}' without matching opening brace."), start, start_line); } // Array Literals if (c == '[') return parse_array_literal(lexer_info, c, start, start_line); if (c == ']') { advance(lexer_info); return lexer_error(lexer_info, SLS_STR("Unexpected closing bracket ']' without matching opening bracket."), start, start_line); } // Type Tuples if (c == '(') return parse_type_tuples(lexer_info, c, start, start_line); if (c == ')') { advance(lexer_info); return lexer_error(lexer_info, SLS_STR("Unexpected closing parentheses ')' without matching opening parentheses."), start, start_line); } // Identifiers and Booleans if (is_identifier_start(lexer_info)) return parse_identifiers_and_booleans(lexer_info, c, start, start_line); if (c == ':') { advance(lexer_info); if (far_peek(lexer_info, 1) == ':') return lexer_error(lexer_info, SLS_STR("Invalid identifier literal: empty identifier after '::'."), start, start_line); else return lexer_error(lexer_info, SLS_STR("Unexpected single colon ':'."), start, start_line); } // Random Characters SlsStr error_msg = sls_format(SLS_STR("Unexpected character: unexpected '%c' during parsing."), c); if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}}; return lexer_error(lexer_info, error_msg, start, start_line); } void clean_token_string(TokenString token_string) { if (token_string.tokens == NULL) return; for (size_t i = 0; i < token_string.length; i++) { if (token_string.tokens[i].type == TOKEN_STRING) sls_str_free(&token_string.tokens[i].string_literal); if (token_string.tokens[i].type == TOKEN_TOKEN_STRING) clean_token_string(token_string.tokens[i].token_string); } free(token_string.tokens); token_string.tokens = NULL; } void clean_token_result(LexerTokenResult *head) { // Deallocates a LexerTokenResult linked list if (head == NULL) return; if (head->type == SLS_ERROR) sls_str_free(&head->error.message); else { if (head->result.type == TOKEN_STRING) sls_str_free(&head->result.string_literal); if (head->result.type == TOKEN_TOKEN_STRING) clean_token_string(head->result.token_string); } clean_token_result(head->next); head->next = NULL; if (head) free(head); } LexerTokenResult *get_token(LexerTokenResult *head, size_t i) { // Returns the token at i in a LexerTokenResult linked list, or null_ptr if i is out of bounds for (size_t j = 0; j < i && head; j++) { head = head->next; } return head; } LexerResult lexical_analysis(LexerInfo *lexer_info) { // Lexes code loaded into lexer_info LexerResult result; // For lexer_next returns LexerTokenResult *head = 0; LexerTokenResult *current = 0; do { // Get next token result = lexer_next(lexer_info); // Handle Errors if (result.type == SLS_ERROR) { clean_token_result(head); return result; } // Save result if (head == 0) { head = result.result; current = head; } else { current->next = result.result; current = current->next; } // Current should not be null_ptr if (current == 0) { clean_token_result(head); return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Unknown Error."), 1}}; } } while (current->type != SLS_ERROR && current->result.type != TOKEN_EOF); return (LexerResult) {.type = SLS_RESULT, .result = head}; }