Lexer can now do most integers

This commit is contained in:
Kyler Olsen 2025-11-06 14:44:57 -07:00
parent bf787f3cfe
commit 681322ea2e
1 changed files with 256 additions and 38 deletions

View File

@ -132,15 +132,205 @@ static LexerResult lexer_error(LexerInfo *lexer_info, const char* message, size_
return (LexerResult){SLS_RESULT, .result = result};
}
static LexerResult parse_integer_type(LexerInfo *lexer_info, char c, size_t start, size_t start_line, Boolean float_allowed) {
typedef enum {
NUMERIC_FLOAT_BASE = 1 << 0,
NUMERIC_UNSIGNED = 1 << 1,
NUMERIC_64 = 1 << 2,
NUMERIC_32 = 1 << 3,
NUMERIC_16 = 1 << 4,
NUMERIC_8 = 1 << 5,
} NumericTypesBase;
typedef enum {
NUMERIC_F64 = NUMERIC_64 | NUMERIC_FLOAT_BASE,
NUMERIC_F32 = NUMERIC_32 | NUMERIC_FLOAT_BASE,
NUMERIC_I64 = NUMERIC_64,
NUMERIC_I32 = NUMERIC_32,
NUMERIC_I16 = NUMERIC_16,
NUMERIC_I8 = NUMERIC_8,
NUMERIC_U64 = NUMERIC_64 | NUMERIC_UNSIGNED,
NUMERIC_U32 = NUMERIC_32 | NUMERIC_UNSIGNED,
NUMERIC_U16 = NUMERIC_16 | NUMERIC_UNSIGNED,
NUMERIC_U8 = NUMERIC_8 | NUMERIC_UNSIGNED,
} NumericTypes;
typedef struct {
SlsResultType type;
union {
IntegerBuiltInType integer_type; // type == SLS_RESULT
SlsError error; // type == SLS_ERROR
};
} IntegerTypeResult;
static IntegerTypeResult get_integer_type(NumericTypes numeric_type) {
IntegerBuiltInType integer_type;
switch (numeric_type) {
case NUMERIC_I64:
integer_type = INTEGER_I64;
break;
case NUMERIC_I32:
integer_type = INTEGER_I32;
break;
case NUMERIC_I16:
integer_type = INTEGER_I16;
break;
case NUMERIC_I8:
integer_type = INTEGER_I8;
break;
case NUMERIC_U64:
integer_type = INTEGER_U64;
break;
case NUMERIC_U32:
integer_type = INTEGER_U32;
break;
case NUMERIC_U16:
integer_type = INTEGER_U16;
break;
case NUMERIC_U8:
integer_type = INTEGER_U8;
break;
default:
return (IntegerTypeResult){SLS_ERROR, .error = (SlsError){.message = "Lexer Error: Encountered a Float where there should not be one.", .code = 1}};
}
return (IntegerTypeResult){SLS_RESULT, .integer_type = integer_type};
}
static uint64_t create_binary_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 2;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '.' || token[i] == '_') continue;
value *= 2;
switch (token[i]) {
case '1': value += 1; break;
}
}
return value * (negative ? -1 : 1);
}
static uint64_t create_octal_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 2;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '.' || token[i] == '_') continue;
value *= 8;
switch (token[i]) {
case '1': value += 1; break;
case '2': value += 2; break;
case '3': value += 3; break;
case '4': value += 4; break;
case '5': value += 5; break;
case '6': value += 6; break;
case '7': value += 7; break;
}
}
return value * (negative ? -1 : 1);
}
static uint64_t create_decimal_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 0;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '.' || token[i] == '_') continue;
value *= 10;
switch (token[i]) {
case '1': value += 1; break;
case '2': value += 2; break;
case '3': value += 3; break;
case '4': value += 4; break;
case '5': value += 5; break;
case '6': value += 6; break;
case '7': value += 7; break;
case '8': value += 8; break;
case '9': value += 9; break;
}
}
return value * (negative ? -1 : 1);
}
static uint64_t create_hexadecimal_integer(LexerInfo *lexer_info, size_t start) {
uint64_t value = 0;
Boolean negative = FALSE;
const char *token = get_token_text(lexer_info, start);
size_t i = 2;
if (token[0] == '-') {
negative = TRUE;
i += 1;
}
for (; i < lexer_info->pos - start; i++) {
if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break;
if (token[i] == '.' || token[i] == '_') continue;
value *= 16;
switch (token[i]) {
case '1': value += 1; break;
case '2': value += 2; break;
case '3': value += 3; break;
case '4': value += 4; break;
case '5': value += 5; break;
case '6': value += 6; break;
case '7': value += 7; break;
case '8': value += 8; break;
case '9': value += 9; break;
case 'A':
case 'a': value += 10; break;
case 'B':
case 'b': value += 11; break;
case 'C':
case 'c': value += 12; break;
case 'D':
case 'd': value += 13; break;
case 'E':
case 'e': value += 14; break;
case 'F':
case 'f': value += 15; break;
}
}
return value * (negative ? -1 : 1);
}
typedef enum {
NUMERIC_BINARY,
NUMERIC_OCTAL,
NUMERIC_DECIMAL,
NUMERIC_HEXADECIMAL,
NUMERIC_FLOAT,
NUMERIC_EXPONENTIAL,
} NumericLiteralTypes;
static LexerResult parse_numeric_type(LexerInfo *lexer_info, char c, size_t start, size_t start_line, NumericLiteralTypes numeric_literal_type) {
NumericTypes numeric_type = 0;
c = advance(lexer_info);
if (c == 'f') {
if (float_allowed) {
numeric_type |= NUMERIC_FLOAT_BASE;
if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) {
c = advance(lexer_info);
if (c == '6' && far_peek(lexer_info, 1) == '4') {
numeric_type |= NUMERIC_64;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '3' && far_peek(lexer_info, 1) == '2') {
numeric_type |= NUMERIC_32;
c = advance(lexer_info);
c = advance(lexer_info);
} else {
@ -153,44 +343,37 @@ static LexerResult parse_integer_type(LexerInfo *lexer_info, char c, size_t star
strncpy(error_message, "Invalid numeric literal: float type not allowed.", 49);
return lexer_error(lexer_info, error_message, start, start_line);
}
} else if (c == 'i') {
} else if (c == 'i' || c == 'u') {
if (c == 'u') numeric_type |= NUMERIC_UNSIGNED;
c = advance(lexer_info);
if (c == '6' && far_peek(lexer_info, 1) == '4') {
numeric_type |= NUMERIC_64;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '3' && far_peek(lexer_info, 1) == '2') {
numeric_type |= NUMERIC_32;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '1' && far_peek(lexer_info, 1) == '6') {
numeric_type |= NUMERIC_16;
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '8') {
numeric_type |= NUMERIC_8;
c = advance(lexer_info);
} else {
if (numeric_type & NUMERIC_UNSIGNED) {
char *error_message = (char *)malloc(sizeof(char) * 78);
strncpy(error_message, "Invalid unsigned integer type: must be of type 'u64', 'u32', 'u16', and 'u8'.", 78);
return lexer_error(lexer_info, error_message, start, start_line);
} else {
char *error_message = (char *)malloc(sizeof(char) * 76);
strncpy(error_message, "Invalid signed integer type: must be of type 'i64', 'i32', 'i16', and 'i8'.", 76);
return lexer_error(lexer_info, error_message, start, start_line);
}
} else if (c == 'u') {
c = advance(lexer_info);
if (c == '6' && far_peek(lexer_info, 1) == '4') {
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '3' && far_peek(lexer_info, 1) == '2') {
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '1' && far_peek(lexer_info, 1) == '6') {
c = advance(lexer_info);
c = advance(lexer_info);
} else if (c == '8') {
c = advance(lexer_info);
} else {
char *error_message = (char *)malloc(sizeof(char) * 78);
strncpy(error_message, "Invalid unsigned integer type: must be of type 'u64', 'u32', 'u16', and 'u8'.", 78);
return lexer_error(lexer_info, error_message, start, start_line);
}
} else {
if (float_allowed) {
if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) {
char *error_message = (char *)malloc(sizeof(char) * 61);
strncpy(error_message, "Invalid numeric type: type must start with 'f', 'i', or 'u'.", 61);
return lexer_error(lexer_info, error_message, start, start_line);
@ -199,8 +382,35 @@ static LexerResult parse_integer_type(LexerInfo *lexer_info, char c, size_t star
strncpy(error_message, "Invalid integer type: type must start with 'i' or 'u'.", 55);
return lexer_error(lexer_info, error_message, start, start_line);
}
} if (isspace(c) || c == '/' || c == '\0')
} if (isspace(c) || c == '/' || c == '\0') {
IntegerTypeResult integer_type = get_integer_type(numeric_type);
if (numeric_literal_type == NUMERIC_DECIMAL && numeric_type & NUMERIC_FLOAT_BASE)
numeric_literal_type = NUMERIC_FLOAT;
uint64_t value;
switch (numeric_literal_type) {
case NUMERIC_BINARY:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_binary_integer(lexer_info, start);
return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = integer_type.integer_type, .value = value}}, start, start_line);
case NUMERIC_OCTAL:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_octal_integer(lexer_info, start);
return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = integer_type.integer_type, .value = value}}, start, start_line);
case NUMERIC_DECIMAL:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_decimal_integer(lexer_info, start);
return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = integer_type.integer_type, .value = value}}, start, start_line);
case NUMERIC_HEXADECIMAL:
if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error};
value = create_hexadecimal_integer(lexer_info, start);
return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = integer_type.integer_type, .value = value}}, start, start_line);
case NUMERIC_FLOAT:
break;
case NUMERIC_EXPONENTIAL:
break;
}
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Numeric Literal Not Implemented Error.", 1}};
}
char *error_message = (char *)malloc(sizeof(char) * 57);
snprintf(error_message, 57, "Invalid numeric literal: unexpected '%c' in numeric type.", c);
return lexer_error(lexer_info, error_message, start, start_line);
@ -208,9 +418,11 @@ static LexerResult parse_integer_type(LexerInfo *lexer_info, char c, size_t star
static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
do {c = advance(lexer_info);} while (c == '0' || c == '1' || c == '_');
if (c == ':') return parse_integer_type(lexer_info, c, start, start_line, FALSE);
if (isspace(c) || c == '/' || c == '\0')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Binary Integer Not Implemented Error.", 1}};
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_BINARY);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_binary_integer(lexer_info, start);
return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = INTEGER_I64, .value = value}}, start, start_line);
}
char *error_message = (char *)malloc(sizeof(char) * 58);
snprintf(error_message, 58, "Invalid binary literal: unexpected '%c' in binary integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);
@ -218,9 +430,11 @@ static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t st
static LexerResult parse_octal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
do {c = advance(lexer_info);} while ((isdigit(c) || c == '_') && !(c == '8' || c == '9'));
if (c == ':') return parse_integer_type(lexer_info, c, start, start_line, FALSE);
if (isspace(c) || c == '/' || c == '\0')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Octal Integer Not Implemented Error.", 1}};
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_OCTAL);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_octal_integer(lexer_info, start);
return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = INTEGER_I64, .value = value}}, start, start_line);
}
char *error_message = (char *)malloc(sizeof(char) * 56);
snprintf(error_message, 56, "Invalid octal literal: unexpected '%c' in octal integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);
@ -240,9 +454,11 @@ static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t s
do {c = advance(lexer_info);} while (isdigit(c) || c == '_');
if (c == '.') return parse_float(lexer_info, c, start, start_line);
if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line);
if (c == ':') return parse_integer_type(lexer_info, c, start, start_line, TRUE);
if (isspace(c) || c == '/' || c == '\0')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Decimal Integer Not Implemented Error.", 1}};
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_DECIMAL);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_decimal_integer(lexer_info, start);
return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = INTEGER_I64, .value = value}}, start, start_line);
}
char *error_message = (char *)malloc(sizeof(char) * 60);
snprintf(error_message, 60, "Invalid decimal literal: unexpected '%c' in decimal integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);
@ -250,9 +466,11 @@ static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t s
static LexerResult parse_hexadecimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
do {c = advance(lexer_info);} while (isxdigit(c) || c == '_');
if (c == ':') return parse_integer_type(lexer_info, c, start, start_line, FALSE);
if (isspace(c) || c == '/' || c == '\0')
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Hexadecimal Integer Not Implemented Error.", 1}};
if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_HEXADECIMAL);
if (isspace(c) || c == '/' || c == '\0') {
uint64_t value = create_hexadecimal_integer(lexer_info, start);
return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = INTEGER_I64, .value = value}}, start, start_line);
}
char *error_message = (char *)malloc(sizeof(char) * 68);
snprintf(error_message, 68, "Invalid hexadecimal literal: unexpected '%c' in hexadecimal integer.", c);
return lexer_error(lexer_info, error_message, start, start_line);