Started reworking numeric literals

This commit is contained in:
Kyler Olsen 2025-11-07 14:32:03 -07:00
parent 40007c27a6
commit 29a83a4bae
1 changed files with 108 additions and 412 deletions

View File

@ -140,429 +140,125 @@ static LexerResult lexer_error(LexerInfo *lexer_info, const char* message, size_
}
typedef enum {
NUMERIC_FLOAT_BASE = 1 << 0,
NUMERIC_FLOAT = 1 << 0,
NUMERIC_UNSIGNED = 1 << 1,
NUMERIC_64 = 1 << 2,
NUMERIC_32 = 1 << 3,
NUMERIC_16 = 1 << 4,
NUMERIC_8 = 1 << 5,
} NumericTypesBase;
NUMERIC_EXPONENTIAL = 1 << 2,
NUMERIC_64 = 1 << 3,
NUMERIC_32 = 1 << 4,
NUMERIC_16 = 1 << 5,
NUMERIC_8 = 1 << 6,
NUMERIC_BINARY = 1 << 7,
NUMERIC_OCTAL = 1 << 8,
NUMERIC_DECIMAL = 1 << 9,
NUMERIC_HEXADECIMAL = 1 << 10,
} NumericFlags;
typedef enum {
NUMERIC_F64 = NUMERIC_64 | NUMERIC_FLOAT_BASE,
NUMERIC_F32 = NUMERIC_32 | NUMERIC_FLOAT_BASE,
NUMERIC_I64 = NUMERIC_64,
NUMERIC_I32 = NUMERIC_32,
NUMERIC_I16 = NUMERIC_16,
NUMERIC_I8 = NUMERIC_8,
NUMERIC_U64 = NUMERIC_64 | NUMERIC_UNSIGNED,
NUMERIC_U32 = NUMERIC_32 | NUMERIC_UNSIGNED,
NUMERIC_U16 = NUMERIC_16 | NUMERIC_UNSIGNED,
NUMERIC_U8 = NUMERIC_8 | NUMERIC_UNSIGNED,
} NumericTypes;
typedef struct {
uint64_t value[8];
Boolean overflow;
} Numeric;
static const Numeric ZERO = (Numeric) { {0, 0, 0, 0, 0, 0, 0, 0}, FALSE };
static const Numeric ONE = (Numeric) { {0, 0, 0, 1, 0, 0, 0, 0}, FALSE };
static Numeric normalize_numeric(Numeric a) {
for (size_t i = 7; i > 0; i++)
if (a.value[i] > 0xffffffffUL) {
a.value[i-1] += ((a.value[i] & 0xffffffff00000000UL) >> 32);
a.value[i] &= 0xffffffffUL;
}
if (a.value[0] > 0xffffffffUL) a.overflow = TRUE;
return a;
}
static Numeric from_int(uint64_t a) {
return normalize_numeric((Numeric){{0, 0, (a & 0xffffffff00000000UL) >> 32, (a & 0xffffffff), 0, 0, 0, 0}, FALSE});
}
static Boolean is_zero(Numeric a) {
for (size_t i = 0; i < 8; i++)
if (a.value[i] != 0)
return FALSE;
return TRUE;
}
static Boolean is_neg(Numeric a) {
return (a.value[0] & 0x80000000UL) ? TRUE : FALSE;
}
static Numeric add(Numeric a, Numeric b) {
for (size_t i = 0; i < 8; i++)
a.value[i] += b.value[i];
if (b.overflow) a.overflow = TRUE;
return normalize_numeric(a);
}
static Numeric sub(Numeric a, Numeric b) {
for (size_t i = 0; i < 8; i++)
b.value[i] ^= 0xffffffffUL;
b = add(b, ONE);
if (b.overflow) a.overflow = TRUE;
return add(a, b);
}
static Numeric multi(Numeric a, Numeric b) {
if (is_zero(a) || is_zero(b)) return ZERO;
Numeric s = a;
for (Numeric i = ZERO; is_neg(sub(i, b)); i = add(i, ONE))
s = add(s, a);
if (b.overflow) s.overflow = TRUE;
return s;
}
static Numeric div_n(Numeric a, Numeric b) {
Numeric s = a;
Numeric i = ZERO;
for (; !is_neg(sub(s, b)); i = add(i, ONE))
s = add(s, a);
if (a.overflow) i.overflow = TRUE;
if (b.overflow) i.overflow = TRUE;
if (s.overflow) i.overflow = TRUE;
return i;
}
static Boolean is_numeric_start(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
char c2 = far_peek(lexer_info, 1);
if (isdigit(c) || c == '.' || (c == '-' && (isdigit(c2) || c2 == '.'))) return TRUE;
return FALSE;
}
static Boolean is_base_digit(LexerInfo *lexer_info, char c, size_t start, size_t start_line, uint16_t flags) {
if (flags & NUMERIC_BINARY) return (c == '0' || c == '1') ? TRUE : FALSE;
if (flags & NUMERIC_OCTAL) return (isdigit(c) && !(c == '8' || c == '9')) ? TRUE : FALSE;
if (flags & NUMERIC_DECIMAL) return (isdigit(c)) ? TRUE : FALSE;
if (flags & NUMERIC_HEXADECIMAL) return (isxdigit(c)) ? TRUE : FALSE;
return FALSE;
}
typedef struct {
SlsResultType type;
union {
IntegerBuiltInType integer_type; // type == SLS_RESULT
SlsError error; // type == SLS_ERROR
uint16_t flags;
SlsError error;
};
} IntegerTypeResult;
} NumericNextResult;
static IntegerTypeResult get_integer_type(NumericTypes numeric_type) {
IntegerBuiltInType integer_type;
switch (numeric_type) {
case NUMERIC_I64:
integer_type = INTEGER_I64;
break;
case NUMERIC_I32:
integer_type = INTEGER_I32;
break;
case NUMERIC_I16:
integer_type = INTEGER_I16;
break;
case NUMERIC_I8:
integer_type = INTEGER_I8;
break;
case NUMERIC_U64:
integer_type = INTEGER_U64;
break;
case NUMERIC_U32:
integer_type = INTEGER_U32;
break;
case NUMERIC_U16:
integer_type = INTEGER_U16;
break;
case NUMERIC_U8:
integer_type = INTEGER_U8;
break;
default:
return (IntegerTypeResult){SLS_ERROR, .error = (SlsError){.message = "Lexer Error: Encountered a Float where there should not be one.", .code = 1}};
}
return (IntegerTypeResult){SLS_RESULT, .integer_type = integer_type};
static NumericNextResult numeric_next(LexerInfo *lexer_info, char c, size_t start, size_t start_line, uint16_t flags) {
if (lexer_info->pos == start && c == '0')
if (isalpha(far_peek(lexer_info, 1))) {
c = advance(lexer_info);
if (c == 'b' || c == 'B') flags |= NUMERIC_BINARY;
else if (c == 'o' || c == 'O') flags |= NUMERIC_OCTAL;
else if (c == 'o' || c == 'O') flags |= NUMERIC_OCTAL;
else {
};
} else flags |= NUMERIC_DECIMAL;
}
static uint64_t create_binary_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 2;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '.' || token[i] == '_') continue;
value *= 2;
switch (token[i]) {
case '1': value += 1; break;
}
}
if (negative) value = (~value) + 1;
return value;
}
static uint64_t create_octal_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 2;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '.' || token[i] == '_') continue;
value *= 8;
switch (token[i]) {
case '1': value += 1; break;
case '2': value += 2; break;
case '3': value += 3; break;
case '4': value += 4; break;
case '5': value += 5; break;
case '6': value += 6; break;
case '7': value += 7; break;
}
}
if (negative) value = (~value) + 1;
return value;
}
static uint64_t create_decimal_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 0;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '.' || token[i] == '_') continue;
value *= 10;
switch (token[i]) {
case '1': value += 1; break;
case '2': value += 2; break;
case '3': value += 3; break;
case '4': value += 4; break;
case '5': value += 5; break;
case '6': value += 6; break;
case '7': value += 7; break;
case '8': value += 8; break;
case '9': value += 9; break;
}
}
if (negative) value = (~value) + 1;
return value;
}
static uint64_t create_hexadecimal_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 2;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '.' || token[i] == '_') continue;
value *= 16;
switch (token[i]) {
case '1': value += 1; break;
case '2': value += 2; break;
case '3': value += 3; break;
case '4': value += 4; break;
case '5': value += 5; break;
case '6': value += 6; break;
case '7': value += 7; break;
case '8': value += 8; break;
case '9': value += 9; break;
case 'A':
case 'a': value += 10; break;
case 'B':
case 'b': value += 11; break;
case 'C':
case 'c': value += 12; break;
case 'D':
case 'd': value += 13; break;
case 'E':
case 'e': value += 14; break;
case 'F':
case 'f': value += 15; break;
}
}
if (negative) value = (~value) + 1;
return value;
}
static LexerResult create_integer_token(LexerInfo *lexer_info, IntegerBuiltInType type, uint64_t value, size_t start, size_t start_line) {
switch (type) {
case INTEGER_I32:
if (value > (uint64_t)UINT32_MAX) {
char *error_message = (char *)malloc(sizeof(char) * 47);
strncpy(error_message, "Integer overflow: value exceeds range for i32.", 47);
return lexer_error(lexer_info, error_message, start, start_line);
}
break;
case INTEGER_I16:
if (value > (uint64_t)UINT16_MAX) {
char *error_message = (char *)malloc(sizeof(char) * 47);
strncpy(error_message, "Integer overflow: value exceeds range for i16.", 47);
return lexer_error(lexer_info, error_message, start, start_line);
}
break;
case INTEGER_I8:
if (value > (uint64_t)UINT8_MAX) {
char *error_message = (char *)malloc(sizeof(char) * 46);
strncpy(error_message, "Integer overflow: value exceeds range for i8.", 46);
return lexer_error(lexer_info, error_message, start, start_line);
}
break;
case INTEGER_U32:
if (seek(lexer_info, start) == '-') {
char *error_message = (char *)malloc(sizeof(char) * 48);
strncpy(error_message, "Integer overflow: value exceeds range for u32.", 48);
return lexer_error(lexer_info, error_message, start, start_line);
} if (value > (uint64_t)UINT32_MAX) {
char *error_message = (char *)malloc(sizeof(char) * 47);
strncpy(error_message, "Integer overflow: value exceeds range for u32.", 47);
return lexer_error(lexer_info, error_message, start, start_line);
}
break;
case INTEGER_U16:
if (seek(lexer_info, start) == '-') {
char *error_message = (char *)malloc(sizeof(char) * 48);
strncpy(error_message, "Integer overflow: value exceeds range for u16.", 48);
return lexer_error(lexer_info, error_message, start, start_line);
} if (value > (uint64_t)UINT16_MAX) {
char *error_message = (char *)malloc(sizeof(char) * 47);
strncpy(error_message, "Integer overflow: value exceeds range for u16.", 47);
return lexer_error(lexer_info, error_message, start, start_line);
}
break;
case INTEGER_U8:
if (seek(lexer_info, start) == '-') {
char *error_message = (char *)malloc(sizeof(char) * 47);
strncpy(error_message, "Integer overflow: value exceeds range for u8.", 47);
return lexer_error(lexer_info, error_message, start, start_line);
} if (value > (uint64_t)UINT8_MAX) {
char *error_message = (char *)malloc(sizeof(char) * 46);
strncpy(error_message, "Integer overflow: value exceeds range for u8.", 46);
return lexer_error(lexer_info, error_message, start, start_line);
}
break;
}
return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = type, .value = value}}, start, start_line);
}
typedef enum {
NUMERIC_BINARY,
NUMERIC_OCTAL,
NUMERIC_DECIMAL,
NUMERIC_HEXADECIMAL,
NUMERIC_FLOAT,
NUMERIC_EXPONENTIAL,
} NumericLiteralTypes;
static LexerResult parse_numeric_type(LexerInfo *lexer_info, char c, size_t start, size_t start_line, NumericLiteralTypes numeric_literal_type) {
NumericTypes numeric_type = 0;
c = advance(lexer_info);
if (c == 'f') {
numeric_type |= NUMERIC_FLOAT_BASE;
if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) {
c = advance(lexer_info);
if (c == '6' && far_peek(lexer_info, 1) == '4') {
numeric_type |= NUMERIC_64;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '3' && far_peek(lexer_info, 1) == '2') {
numeric_type |= NUMERIC_32;
c = advance(lexer_info);
c = advance(lexer_info);
} else {
char *error_message = (char *)malloc(sizeof(char) * 52);
strncpy(error_message, "Invalid float type: must be of type 'f64' or 'f32'.", 52);
return lexer_error(lexer_info, error_message, start, start_line);
}
} else {
char *error_message = (char *)malloc(sizeof(char) * 49);
strncpy(error_message, "Invalid numeric literal: float type not allowed.", 49);
return lexer_error(lexer_info, error_message, start, start_line);
}
} else if (c == 'i' || c == 'u') {
if (c == 'u') numeric_type |= NUMERIC_UNSIGNED;
c = advance(lexer_info);
if (c == '6' && far_peek(lexer_info, 1) == '4') {
numeric_type |= NUMERIC_64;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '3' && far_peek(lexer_info, 1) == '2') {
numeric_type |= NUMERIC_32;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '1' && far_peek(lexer_info, 1) == '6') {
numeric_type |= NUMERIC_16;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '8') {
numeric_type |= NUMERIC_8;
c = advance(lexer_info);
} else {
if (numeric_type & NUMERIC_UNSIGNED) {
char *error_message = (char *)malloc(sizeof(char) * 78);
strncpy(error_message, "Invalid unsigned integer type: must be of type 'u64', 'u32', 'u16', and 'u8'.", 78);
return lexer_error(lexer_info, error_message, start, start_line);
} else {
char *error_message = (char *)malloc(sizeof(char) * 76);
strncpy(error_message, "Invalid signed integer type: must be of type 'i64', 'i32', 'i16', and 'i8'.", 76);
return lexer_error(lexer_info, error_message, start, start_line);
}
}
} else {
if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) {
char *error_message = (char *)malloc(sizeof(char) * 61);
strncpy(error_message, "Invalid numeric type: type must start with 'f', 'i', or 'u'.", 61);
return lexer_error(lexer_info, error_message, start, start_line);
} else {
char *error_message = (char *)malloc(sizeof(char) * 55);
strncpy(error_message, "Invalid integer type: type must start with 'i' or 'u'.", 55);
return lexer_error(lexer_info, error_message, start, start_line);
}
} if (isspace(c) || c == '/' || c == '\0') {
IntegerTypeResult integer_type = get_integer_type(numeric_type);
if (numeric_literal_type == NUMERIC_DECIMAL && numeric_type & NUMERIC_FLOAT_BASE)
numeric_literal_type = NUMERIC_FLOAT;
uint64_t value;
switch (numeric_literal_type) {
case NUMERIC_BINARY:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_binary_integer(lexer_info, start);
return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line);
case NUMERIC_OCTAL:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_octal_integer(lexer_info, start);
return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line);
case NUMERIC_DECIMAL:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_decimal_integer(lexer_info, start);
return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line);
case NUMERIC_HEXADECIMAL:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_hexadecimal_integer(lexer_info, start);
return create_integer_token(lexer_info, integer_type.integer_type, value, start, start_line);
case NUMERIC_FLOAT:
break;
case NUMERIC_EXPONENTIAL:
break;
}
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Numeric Literal Not Implemented Error.", 1}};
}
char *error_message = (char *)malloc(sizeof(char) * 57);
snprintf(error_message, 57, "Invalid numeric literal: unexpected '%c' in numeric type.", c);
return lexer_error(lexer_info, error_message, start, start_line);
}
static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
while (c == '0' || c == '1' || c == '_') c = advance(lexer_info);
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_BINARY);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_binary_integer(lexer_info, start);
return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line);
}
char *error_message = (char *)malloc(sizeof(char) * 58);
snprintf(error_message, 58, "Invalid binary literal: unexpected '%c' in binary integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);
}
static LexerResult parse_octal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
while ((isdigit(c) || c == '_') && !(c == '8' || c == '9')) c = advance(lexer_info);
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_OCTAL);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_octal_integer(lexer_info, start);
return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line);
}
char *error_message = (char *)malloc(sizeof(char) * 56);
snprintf(error_message, 56, "Invalid octal literal: unexpected '%c' in octal integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);
}
static LexerResult parse_exponential(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
static LexerResult read_numeric(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Exponential Not Implemented Error.", 1}};
}
static LexerResult parse_float(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Not Implemented Error.", 1}};
}
static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
while (isdigit(c) || c == '_') c = advance(lexer_info);
if (c == '.') return parse_float(lexer_info, c, start, start_line);
if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line);
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_DECIMAL);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_decimal_integer(lexer_info, start);
return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line);
}
char *error_message = (char *)malloc(sizeof(char) * 60);
snprintf(error_message, 60, "Invalid decimal literal: unexpected '%c' in decimal integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);
}
static LexerResult parse_hexadecimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
while (isxdigit(c) || c == '_') c = advance(lexer_info);
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_HEXADECIMAL);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_hexadecimal_integer(lexer_info, start);
return create_integer_token(lexer_info, INTEGER_I64, value, start, start_line);
}
char *error_message = (char *)malloc(sizeof(char) * 68);
snprintf(error_message, 68, "Invalid hexadecimal literal: unexpected '%c' in hexadecimal integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);
}
static LexerResult parse_numeric_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
if (c == '-') c = advance(lexer_info);
if (c == '0') {
c = advance(lexer_info);
if (c == 'b' || c == 'B') {
c = advance(lexer_info);
return parse_binary_integer(lexer_info, c, start, start_line);
} else if (c == 'o' || c == 'O') {
c = advance(lexer_info);
return parse_octal_integer(lexer_info, c, start, start_line);
} else if (c == 'x' || c == 'X') {
c = advance(lexer_info);
return parse_hexadecimal_integer(lexer_info, c, start, start_line);
}
}
return parse_decimal_integer(lexer_info, c, start, start_line);
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Integers Not Implemented Error.", 1}};
}
static LexerResult parse_character_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
@ -614,7 +310,7 @@ static LexerResult lexer_next(LexerInfo *lexer_info) {
// End of file tokens
if (c == '\0') return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line);
// Integers and Floats
if (isdigit(c) || c == '.' || (c == '-' && isdigit(far_peek(lexer_info, 1)))) return parse_numeric_literal(lexer_info, c, start, start_line);
if (is_numeric_start(lexer_info, c, start, start_line)) return read_numeric(lexer_info, c, start, start_line);
// Character Literals
if (c == '\'') return parse_character_literal(lexer_info, c, start, start_line);
// String Literals