From 29a83a4bae216fbe25b2adfb17bdd560084b4d15 Mon Sep 17 00:00:00 2001 From: Kyler Date: Fri, 7 Nov 2025 14:32:03 -0700 Subject: [PATCH] Started reworking numeric literals --- SLS_C/src/lexer.c | 520 ++++++++++------------------------------------ 1 file changed, 108 insertions(+), 412 deletions(-) diff --git a/SLS_C/src/lexer.c b/SLS_C/src/lexer.c index 7eb8cc3..cb40350 100644 --- a/SLS_C/src/lexer.c +++ b/SLS_C/src/lexer.c @@ -140,429 +140,125 @@ static LexerResult lexer_error(LexerInfo *lexer_info, const char* message, size_ } typedef enum { - NUMERIC_FLOAT_BASE = 1 << 0, + NUMERIC_FLOAT = 1 << 0, NUMERIC_UNSIGNED = 1 << 1, - NUMERIC_64 = 1 << 2, - NUMERIC_32 = 1 << 3, - NUMERIC_16 = 1 << 4, - NUMERIC_8 = 1 << 5, -} NumericTypesBase; + NUMERIC_EXPONENTIAL = 1 << 2, + NUMERIC_64 = 1 << 3, + NUMERIC_32 = 1 << 4, + NUMERIC_16 = 1 << 5, + NUMERIC_8 = 1 << 6, + NUMERIC_BINARY = 1 << 7, + NUMERIC_OCTAL = 1 << 8, + NUMERIC_DECIMAL = 1 << 9, + NUMERIC_HEXADECIMAL = 1 << 10, +} NumericFlags; -typedef enum { - NUMERIC_F64 = NUMERIC_64 | NUMERIC_FLOAT_BASE, - NUMERIC_F32 = NUMERIC_32 | NUMERIC_FLOAT_BASE, - NUMERIC_I64 = NUMERIC_64, - NUMERIC_I32 = NUMERIC_32, - NUMERIC_I16 = NUMERIC_16, - NUMERIC_I8 = NUMERIC_8, - NUMERIC_U64 = NUMERIC_64 | NUMERIC_UNSIGNED, - NUMERIC_U32 = NUMERIC_32 | NUMERIC_UNSIGNED, - NUMERIC_U16 = NUMERIC_16 | NUMERIC_UNSIGNED, - NUMERIC_U8 = NUMERIC_8 | NUMERIC_UNSIGNED, -} NumericTypes; +typedef struct { + uint64_t value[8]; + Boolean overflow; +} Numeric; + +static const Numeric ZERO = (Numeric) { {0, 0, 0, 0, 0, 0, 0, 0}, FALSE }; +static const Numeric ONE = (Numeric) { {0, 0, 0, 1, 0, 0, 0, 0}, FALSE }; + +static Numeric normalize_numeric(Numeric a) { + for (size_t i = 7; i > 0; i++) + if (a.value[i] > 0xffffffffUL) { + a.value[i-1] += ((a.value[i] & 0xffffffff00000000UL) >> 32); + a.value[i] &= 0xffffffffUL; + } + if (a.value[0] > 0xffffffffUL) a.overflow = TRUE; + return a; +} + +static Numeric from_int(uint64_t a) { + return normalize_numeric((Numeric){{0, 0, (a & 0xffffffff00000000UL) >> 32, (a & 0xffffffff), 0, 0, 0, 0}, FALSE}); +} + +static Boolean is_zero(Numeric a) { + for (size_t i = 0; i < 8; i++) + if (a.value[i] != 0) + return FALSE; + return TRUE; +} + +static Boolean is_neg(Numeric a) { + return (a.value[0] & 0x80000000UL) ? TRUE : FALSE; +} + +static Numeric add(Numeric a, Numeric b) { + for (size_t i = 0; i < 8; i++) + a.value[i] += b.value[i]; + if (b.overflow) a.overflow = TRUE; + return normalize_numeric(a); +} + +static Numeric sub(Numeric a, Numeric b) { + for (size_t i = 0; i < 8; i++) + b.value[i] ^= 0xffffffffUL; + b = add(b, ONE); + if (b.overflow) a.overflow = TRUE; + return add(a, b); +} + +static Numeric multi(Numeric a, Numeric b) { + if (is_zero(a) || is_zero(b)) return ZERO; + Numeric s = a; + for (Numeric i = ZERO; is_neg(sub(i, b)); i = add(i, ONE)) + s = add(s, a); + if (b.overflow) s.overflow = TRUE; + return s; +} + +static Numeric div_n(Numeric a, Numeric b) { + Numeric s = a; + Numeric i = ZERO; + for (; !is_neg(sub(s, b)); i = add(i, ONE)) + s = add(s, a); + if (a.overflow) i.overflow = TRUE; + if (b.overflow) i.overflow = TRUE; + if (s.overflow) i.overflow = TRUE; + return i; +} + +static Boolean is_numeric_start(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { + char c2 = far_peek(lexer_info, 1); + if (isdigit(c) || c == '.' || (c == '-' && (isdigit(c2) || c2 == '.'))) return TRUE; + return FALSE; +} + +static Boolean is_base_digit(LexerInfo *lexer_info, char c, size_t start, size_t start_line, uint16_t flags) { + if (flags & NUMERIC_BINARY) return (c == '0' || c == '1') ? TRUE : FALSE; + if (flags & NUMERIC_OCTAL) return (isdigit(c) && !(c == '8' || c == '9')) ? TRUE : FALSE; + if (flags & NUMERIC_DECIMAL) return (isdigit(c)) ? TRUE : FALSE; + if (flags & NUMERIC_HEXADECIMAL) return (isxdigit(c)) ? TRUE : FALSE; + return FALSE; +} typedef struct { SlsResultType type; union { - IntegerBuiltInType integer_type; // type == SLS_RESULT - SlsError error; // type == SLS_ERROR + uint16_t flags; + SlsError error; }; -} IntegerTypeResult; +} NumericNextResult; -static IntegerTypeResult get_integer_type(NumericTypes numeric_type) { - IntegerBuiltInType integer_type; - switch (numeric_type) { - case NUMERIC_I64: - integer_type = INTEGER_I64; - break; - case NUMERIC_I32: - integer_type = INTEGER_I32; - break; - case NUMERIC_I16: - integer_type = INTEGER_I16; - break; - case NUMERIC_I8: - integer_type = INTEGER_I8; - break; - case NUMERIC_U64: - integer_type = INTEGER_U64; - break; - case NUMERIC_U32: - integer_type = INTEGER_U32; - break; - case NUMERIC_U16: - integer_type = INTEGER_U16; - break; - case NUMERIC_U8: - integer_type = INTEGER_U8; - break; - default: - return (IntegerTypeResult){SLS_ERROR, .error = (SlsError){.message = "Lexer Error: Encountered a Float where there should not be one.", .code = 1}}; - } - return (IntegerTypeResult){SLS_RESULT, .integer_type = integer_type}; -} - -static uint64_t create_binary_integer(LexerInfo *lexer_info, size_t start) { - uint64_t value = 0; - Boolean negative = FALSE; - const char *token = get_token_text(lexer_info, start); - size_t i = 2; - if (token[0] == '-') { - negative = TRUE; - i += 1; - } - for (; i < lexer_info->pos - start; i++) { - if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; - if (token[i] == '.' || token[i] == '_') continue; - value *= 2; - switch (token[i]) { - case '1': value += 1; break; - } - } - if (negative) value = (~value) + 1; - return value; -} - -static uint64_t create_octal_integer(LexerInfo *lexer_info, size_t start) { - uint64_t value = 0; - Boolean negative = FALSE; - const char *token = get_token_text(lexer_info, start); - size_t i = 2; - if (token[0] == '-') { - negative = TRUE; - i += 1; - } - for (; i < lexer_info->pos - start; i++) { - if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; - if (token[i] == '.' || token[i] == '_') continue; - value *= 8; - switch (token[i]) { - case '1': value += 1; break; - case '2': value += 2; break; - case '3': value += 3; break; - case '4': value += 4; break; - case '5': value += 5; break; - case '6': value += 6; break; - case '7': value += 7; break; - } - } - if (negative) value = (~value) + 1; - return value; -} - -static uint64_t create_decimal_integer(LexerInfo *lexer_info, size_t start) { - uint64_t value = 0; - Boolean negative = FALSE; - const char *token = get_token_text(lexer_info, start); - size_t i = 0; - if (token[0] == '-') { - negative = TRUE; - i += 1; - } - for (; i < lexer_info->pos - start; i++) { - if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; - if (token[i] == '.' || token[i] == '_') continue; - value *= 10; - switch (token[i]) { - case '1': value += 1; break; - case '2': value += 2; break; - case '3': value += 3; break; - case '4': value += 4; break; - case '5': value += 5; break; - case '6': value += 6; break; - case '7': value += 7; break; - case '8': value += 8; break; - case '9': value += 9; break; - } - } - if (negative) value = (~value) + 1; - return value; -} - -static uint64_t create_hexadecimal_integer(LexerInfo *lexer_info, size_t start) { - uint64_t value = 0; - Boolean negative = FALSE; - const char *token = get_token_text(lexer_info, start); - size_t i = 2; - if (token[0] == '-') { - negative = TRUE; - i += 1; - } - for (; i < lexer_info->pos - start; i++) { - if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; - if (token[i] == '.' || token[i] == '_') continue; - value *= 16; - switch (token[i]) { - case '1': value += 1; break; - case '2': value += 2; break; - case '3': value += 3; break; - case '4': value += 4; break; - case '5': value += 5; break; - case '6': value += 6; break; - case '7': value += 7; break; - case '8': value += 8; break; - case '9': value += 9; break; - case 'A': - case 'a': value += 10; break; - case 'B': - case 'b': value += 11; break; - case 'C': - case 'c': value += 12; break; - case 'D': - case 'd': value += 13; break; - case 'E': - case 'e': value += 14; break; - case 'F': - case 'f': value += 15; break; - } - } - if (negative) value = (~value) + 1; - return value; -} - -static LexerResult create_integer_token(LexerInfo *lexer_info, IntegerBuiltInType type, uint64_t value, size_t start, size_t start_line) { - switch (type) { - case INTEGER_I32: - if (value > (uint64_t)UINT32_MAX) { - char *error_message = (char *)malloc(sizeof(char) * 47); - strncpy(error_message, "Integer overflow: value exceeds range for i32.", 47); - return lexer_error(lexer_info, error_message, start, start_line); - } - break; - case INTEGER_I16: - if (value > (uint64_t)UINT16_MAX) { - char *error_message = (char *)malloc(sizeof(char) * 47); - strncpy(error_message, "Integer overflow: value exceeds range for i16.", 47); - return lexer_error(lexer_info, error_message, start, start_line); - } - break; - case INTEGER_I8: - if (value > (uint64_t)UINT8_MAX) { - char *error_message = (char *)malloc(sizeof(char) * 46); - strncpy(error_message, "Integer overflow: value exceeds range for i8.", 46); - return lexer_error(lexer_info, error_message, start, start_line); - } - break; - case INTEGER_U32: - if (seek(lexer_info, start) == '-') { - char *error_message = (char *)malloc(sizeof(char) * 48); - strncpy(error_message, "Integer overflow: value exceeds range for u32.", 48); - return lexer_error(lexer_info, error_message, start, start_line); - } if (value > (uint64_t)UINT32_MAX) { - char *error_message = (char *)malloc(sizeof(char) * 47); - strncpy(error_message, "Integer overflow: value exceeds range for u32.", 47); - return lexer_error(lexer_info, error_message, start, start_line); - } - break; - case INTEGER_U16: - if (seek(lexer_info, start) == '-') { - char *error_message = (char *)malloc(sizeof(char) * 48); - strncpy(error_message, "Integer overflow: value exceeds range for u16.", 48); - return lexer_error(lexer_info, error_message, start, start_line); - } if (value > (uint64_t)UINT16_MAX) { - char *error_message = (char *)malloc(sizeof(char) * 47); - strncpy(error_message, "Integer overflow: value exceeds range for u16.", 47); - return lexer_error(lexer_info, error_message, start, start_line); - } - break; - case INTEGER_U8: - if (seek(lexer_info, start) == '-') { - char *error_message = (char *)malloc(sizeof(char) * 47); - strncpy(error_message, "Integer overflow: value exceeds range for u8.", 47); - return lexer_error(lexer_info, error_message, start, start_line); - } if (value > (uint64_t)UINT8_MAX) { - char *error_message = (char *)malloc(sizeof(char) * 46); - strncpy(error_message, "Integer overflow: value exceeds range for u8.", 46); - return lexer_error(lexer_info, error_message, start, start_line); - } - break; - } - return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = type, .value = value}}, start, start_line); -} - -typedef enum { - NUMERIC_BINARY, - NUMERIC_OCTAL, - NUMERIC_DECIMAL, - NUMERIC_HEXADECIMAL, - NUMERIC_FLOAT, - NUMERIC_EXPONENTIAL, -} NumericLiteralTypes; - -static LexerResult parse_numeric_type(LexerInfo *lexer_info, char c, size_t start, size_t start_line, NumericLiteralTypes numeric_literal_type) { - NumericTypes numeric_type = 0; - c = advance(lexer_info); - if (c == 'f') { - numeric_type |= NUMERIC_FLOAT_BASE; - if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) { +static NumericNextResult numeric_next(LexerInfo *lexer_info, char c, size_t start, size_t start_line, uint16_t flags) { + if (lexer_info->pos == start && c == '0') + if (isalpha(far_peek(lexer_info, 1))) { c = advance(lexer_info); - if (c == '6' && far_peek(lexer_info, 1) == '4') { - numeric_type |= NUMERIC_64; - c = advance(lexer_info); - c = advance(lexer_info); - } else if (c == '3' && far_peek(lexer_info, 1) == '2') { - numeric_type |= NUMERIC_32; - c = advance(lexer_info); - c = advance(lexer_info); - } else { - char *error_message = (char *)malloc(sizeof(char) * 52); - strncpy(error_message, "Invalid float type: must be of type 'f64' or 'f32'.", 52); - return lexer_error(lexer_info, error_message, start, start_line); - } - } else { - char *error_message = (char *)malloc(sizeof(char) * 49); - strncpy(error_message, "Invalid numeric literal: float type not allowed.", 49); - return lexer_error(lexer_info, error_message, start, start_line); - } - } else if (c == 'i' || c == 'u') { - if (c == 'u') numeric_type |= NUMERIC_UNSIGNED; - c = advance(lexer_info); - if (c == '6' && far_peek(lexer_info, 1) == '4') { - numeric_type |= NUMERIC_64; - c = advance(lexer_info); - c = advance(lexer_info); - } else if (c == '3' && far_peek(lexer_info, 1) == '2') { - numeric_type |= NUMERIC_32; - c = advance(lexer_info); - c = advance(lexer_info); - } else if (c == '1' && far_peek(lexer_info, 1) == '6') { - numeric_type |= NUMERIC_16; - c = advance(lexer_info); - c = advance(lexer_info); - } else if (c == '8') { - numeric_type |= NUMERIC_8; - c = advance(lexer_info); - } else { - if (numeric_type & NUMERIC_UNSIGNED) { - char *error_message = (char *)malloc(sizeof(char) * 78); - strncpy(error_message, "Invalid unsigned integer type: must be of type 'u64', 'u32', 'u16', and 'u8'.", 78); - return lexer_error(lexer_info, error_message, start, start_line); - } else { - char *error_message = (char *)malloc(sizeof(char) * 76); - strncpy(error_message, "Invalid signed integer type: must be of type 'i64', 'i32', 'i16', and 'i8'.", 76); - return lexer_error(lexer_info, error_message, start, start_line); - } - } - } else { - if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) { - char *error_message = (char *)malloc(sizeof(char) * 61); - strncpy(error_message, "Invalid numeric type: type must start with 'f', 'i', or 'u'.", 61); - return lexer_error(lexer_info, error_message, start, start_line); - } else { - char *error_message = (char *)malloc(sizeof(char) * 55); - strncpy(error_message, "Invalid integer type: type must start with 'i' or 'u'.", 55); - return lexer_error(lexer_info, error_message, start, start_line); - } - } if (isspace(c) || c == '/' || c == '\0') { - IntegerTypeResult integer_type = get_integer_type(numeric_type); - if (numeric_literal_type == NUMERIC_DECIMAL && numeric_type & NUMERIC_FLOAT_BASE) - numeric_literal_type = NUMERIC_FLOAT; - uint64_t value; - switch (numeric_literal_type) { - case NUMERIC_BINARY: - if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; - value = create_binary_integer(lexer_info, start); - return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line); - case NUMERIC_OCTAL: - if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; - value = create_octal_integer(lexer_info, start); - return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line); - case NUMERIC_DECIMAL: - if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; - value = create_decimal_integer(lexer_info, start); - return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line); - case NUMERIC_HEXADECIMAL: - if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; - value = create_hexadecimal_integer(lexer_info, start); - return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line); - case NUMERIC_FLOAT: - break; - case NUMERIC_EXPONENTIAL: - break; - } - return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Numeric Literal Not Implemented Error.", 1}}; - } - char *error_message = (char *)malloc(sizeof(char) * 57); - snprintf(error_message, 57, "Invalid numeric literal: unexpected '%c' in numeric type.", c); - return lexer_error(lexer_info, error_message, start, start_line); + if (c == 'b' || c == 'B') flags |= NUMERIC_BINARY; + else if (c == 'o' || c == 'O') flags |= NUMERIC_OCTAL; + else if (c == 'o' || c == 'O') flags |= NUMERIC_OCTAL; + else { + + }; + } else flags |= NUMERIC_DECIMAL; } -static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { - while (c == '0' || c == '1' || c == '_') c = advance(lexer_info); - if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_BINARY); - if (isspace(c) || c == '/' || c == '\0') { - uint64_t value = create_binary_integer(lexer_info, start); - return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line); - } - char *error_message = (char *)malloc(sizeof(char) * 58); - snprintf(error_message, 58, "Invalid binary literal: unexpected '%c' in binary integer.", c); - return lexer_error(lexer_info, error_message, start, start_line); -} - -static LexerResult parse_octal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { - while ((isdigit(c) || c == '_') && !(c == '8' || c == '9')) c = advance(lexer_info); - if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_OCTAL); - if (isspace(c) || c == '/' || c == '\0') { - uint64_t value = create_octal_integer(lexer_info, start); - return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line); - } - char *error_message = (char *)malloc(sizeof(char) * 56); - snprintf(error_message, 56, "Invalid octal literal: unexpected '%c' in octal integer.", c); - return lexer_error(lexer_info, error_message, start, start_line); -} - -static LexerResult parse_exponential(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { +static LexerResult read_numeric(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { (void)lexer_info; (void)c; (void)start; (void)start_line; - return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Exponential Not Implemented Error.", 1}}; -} - -static LexerResult parse_float(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { - (void)lexer_info; (void)c; (void)start; (void)start_line; - return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Not Implemented Error.", 1}}; -} - -static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { - while (isdigit(c) || c == '_') c = advance(lexer_info); - if (c == '.') return parse_float(lexer_info, c, start, start_line); - if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line); - if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_DECIMAL); - if (isspace(c) || c == '/' || c == '\0') { - uint64_t value = create_decimal_integer(lexer_info, start); - return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line); - } - char *error_message = (char *)malloc(sizeof(char) * 60); - snprintf(error_message, 60, "Invalid decimal literal: unexpected '%c' in decimal integer.", c); - return lexer_error(lexer_info, error_message, start, start_line); -} - -static LexerResult parse_hexadecimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { - while (isxdigit(c) || c == '_') c = advance(lexer_info); - if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_HEXADECIMAL); - if (isspace(c) || c == '/' || c == '\0') { - uint64_t value = create_hexadecimal_integer(lexer_info, start); - return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line); - } - char *error_message = (char *)malloc(sizeof(char) * 68); - snprintf(error_message, 68, "Invalid hexadecimal literal: unexpected '%c' in hexadecimal integer.", c); - return lexer_error(lexer_info, error_message, start, start_line); -} - -static LexerResult parse_numeric_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { - if (c == '-') c = advance(lexer_info); - if (c == '0') { - c = advance(lexer_info); - if (c == 'b' || c == 'B') { - c = advance(lexer_info); - return parse_binary_integer(lexer_info, c, start, start_line); - } else if (c == 'o' || c == 'O') { - c = advance(lexer_info); - return parse_octal_integer(lexer_info, c, start, start_line); - } else if (c == 'x' || c == 'X') { - c = advance(lexer_info); - return parse_hexadecimal_integer(lexer_info, c, start, start_line); - } - } - return parse_decimal_integer(lexer_info, c, start, start_line); + return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Integers Not Implemented Error.", 1}}; } static LexerResult parse_character_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { @@ -614,7 +310,7 @@ static LexerResult lexer_next(LexerInfo *lexer_info) { // End of file tokens if (c == '\0') return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line); // Integers and Floats - if (isdigit(c) || c == '.' || (c == '-' && isdigit(far_peek(lexer_info, 1)))) return parse_numeric_literal(lexer_info, c, start, start_line); + if (is_numeric_start(lexer_info, c, start, start_line)) return read_numeric(lexer_info, c, start, start_line); // Character Literals if (c == '\'') return parse_character_literal(lexer_info, c, start, start_line); // String Literals