diff --git a/SLS_C/src/lexer.c b/SLS_C/src/lexer.c index 497aad4..b9e4ee2 100644 --- a/SLS_C/src/lexer.c +++ b/SLS_C/src/lexer.c @@ -132,15 +132,205 @@ static LexerResult lexer_error(LexerInfo *lexer_info, const char* message, size_ return (LexerResult){SLS_RESULT, .result = result}; } -static LexerResult parse_integer_type(LexerInfo *lexer_info, char c, size_t start, size_t start_line, Boolean float_allowed) { +typedef enum { + NUMERIC_FLOAT_BASE = 1 << 0, + NUMERIC_UNSIGNED = 1 << 1, + NUMERIC_64 = 1 << 2, + NUMERIC_32 = 1 << 3, + NUMERIC_16 = 1 << 4, + NUMERIC_8 = 1 << 5, +} NumericTypesBase; + +typedef enum { + NUMERIC_F64 = NUMERIC_64 | NUMERIC_FLOAT_BASE, + NUMERIC_F32 = NUMERIC_32 | NUMERIC_FLOAT_BASE, + NUMERIC_I64 = NUMERIC_64, + NUMERIC_I32 = NUMERIC_32, + NUMERIC_I16 = NUMERIC_16, + NUMERIC_I8 = NUMERIC_8, + NUMERIC_U64 = NUMERIC_64 | NUMERIC_UNSIGNED, + NUMERIC_U32 = NUMERIC_32 | NUMERIC_UNSIGNED, + NUMERIC_U16 = NUMERIC_16 | NUMERIC_UNSIGNED, + NUMERIC_U8 = NUMERIC_8 | NUMERIC_UNSIGNED, +} NumericTypes; + +typedef struct { + SlsResultType type; + union { + IntegerBuiltInType integer_type; // type == SLS_RESULT + SlsError error; // type == SLS_ERROR + }; +} IntegerTypeResult; + +static IntegerTypeResult get_integer_type(NumericTypes numeric_type) { + IntegerBuiltInType integer_type; + switch (numeric_type) { + case NUMERIC_I64: + integer_type = INTEGER_I64; + break; + case NUMERIC_I32: + integer_type = INTEGER_I32; + break; + case NUMERIC_I16: + integer_type = INTEGER_I16; + break; + case NUMERIC_I8: + integer_type = INTEGER_I8; + break; + case NUMERIC_U64: + integer_type = INTEGER_U64; + break; + case NUMERIC_U32: + integer_type = INTEGER_U32; + break; + case NUMERIC_U16: + integer_type = INTEGER_U16; + break; + case NUMERIC_U8: + integer_type = INTEGER_U8; + break; + default: + return (IntegerTypeResult){SLS_ERROR, .error = (SlsError){.message = "Lexer Error: Encountered a Float where there should not be one.", .code = 1}}; + } + return (IntegerTypeResult){SLS_RESULT, .integer_type = integer_type}; +} + +static uint64_t create_binary_integer(LexerInfo *lexer_info, size_t start) { + uint64_t value = 0; + Boolean negative = FALSE; + const char *token = get_token_text(lexer_info, start); + size_t i = 2; + if (token[0] == '-') { + negative = TRUE; + i += 1; + } + for (; i < lexer_info->pos - start; i++) { + if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; + if (token[i] == '.' || token[i] == '_') continue; + value *= 2; + switch (token[i]) { + case '1': value += 1; break; + } + } + return value * (negative ? -1 : 1); +} + +static uint64_t create_octal_integer(LexerInfo *lexer_info, size_t start) { + uint64_t value = 0; + Boolean negative = FALSE; + const char *token = get_token_text(lexer_info, start); + size_t i = 2; + if (token[0] == '-') { + negative = TRUE; + i += 1; + } + for (; i < lexer_info->pos - start; i++) { + if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; + if (token[i] == '.' || token[i] == '_') continue; + value *= 8; + switch (token[i]) { + case '1': value += 1; break; + case '2': value += 2; break; + case '3': value += 3; break; + case '4': value += 4; break; + case '5': value += 5; break; + case '6': value += 6; break; + case '7': value += 7; break; + } + } + return value * (negative ? -1 : 1); +} + +static uint64_t create_decimal_integer(LexerInfo *lexer_info, size_t start) { + uint64_t value = 0; + Boolean negative = FALSE; + const char *token = get_token_text(lexer_info, start); + size_t i = 0; + if (token[0] == '-') { + negative = TRUE; + i += 1; + } + for (; i < lexer_info->pos - start; i++) { + if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; + if (token[i] == '.' || token[i] == '_') continue; + value *= 10; + switch (token[i]) { + case '1': value += 1; break; + case '2': value += 2; break; + case '3': value += 3; break; + case '4': value += 4; break; + case '5': value += 5; break; + case '6': value += 6; break; + case '7': value += 7; break; + case '8': value += 8; break; + case '9': value += 9; break; + } + } + return value * (negative ? -1 : 1); +} + +static uint64_t create_hexadecimal_integer(LexerInfo *lexer_info, size_t start) { + uint64_t value = 0; + Boolean negative = FALSE; + const char *token = get_token_text(lexer_info, start); + size_t i = 2; + if (token[0] == '-') { + negative = TRUE; + i += 1; + } + for (; i < lexer_info->pos - start; i++) { + if (isspace(token[i]) || token[i] == '/' || token[i] == '\0' || token[i] == ':') break; + if (token[i] == '.' || token[i] == '_') continue; + value *= 16; + switch (token[i]) { + case '1': value += 1; break; + case '2': value += 2; break; + case '3': value += 3; break; + case '4': value += 4; break; + case '5': value += 5; break; + case '6': value += 6; break; + case '7': value += 7; break; + case '8': value += 8; break; + case '9': value += 9; break; + case 'A': + case 'a': value += 10; break; + case 'B': + case 'b': value += 11; break; + case 'C': + case 'c': value += 12; break; + case 'D': + case 'd': value += 13; break; + case 'E': + case 'e': value += 14; break; + case 'F': + case 'f': value += 15; break; + } + } + return value * (negative ? -1 : 1); +} + +typedef enum { + NUMERIC_BINARY, + NUMERIC_OCTAL, + NUMERIC_DECIMAL, + NUMERIC_HEXADECIMAL, + NUMERIC_FLOAT, + NUMERIC_EXPONENTIAL, +} NumericLiteralTypes; + +static LexerResult parse_numeric_type(LexerInfo *lexer_info, char c, size_t start, size_t start_line, NumericLiteralTypes numeric_literal_type) { + NumericTypes numeric_type = 0; c = advance(lexer_info); if (c == 'f') { - if (float_allowed) { + numeric_type |= NUMERIC_FLOAT_BASE; + if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) { c = advance(lexer_info); if (c == '6' && far_peek(lexer_info, 1) == '4') { + numeric_type |= NUMERIC_64; c = advance(lexer_info); c = advance(lexer_info); } else if (c == '3' && far_peek(lexer_info, 1) == '2') { + numeric_type |= NUMERIC_32; c = advance(lexer_info); c = advance(lexer_info); } else { @@ -153,44 +343,37 @@ static LexerResult parse_integer_type(LexerInfo *lexer_info, char c, size_t star strncpy(error_message, "Invalid numeric literal: float type not allowed.", 49); return lexer_error(lexer_info, error_message, start, start_line); } - } else if (c == 'i') { + } else if (c == 'i' || c == 'u') { + if (c == 'u') numeric_type |= NUMERIC_UNSIGNED; c = advance(lexer_info); if (c == '6' && far_peek(lexer_info, 1) == '4') { + numeric_type |= NUMERIC_64; c = advance(lexer_info); c = advance(lexer_info); } else if (c == '3' && far_peek(lexer_info, 1) == '2') { + numeric_type |= NUMERIC_32; c = advance(lexer_info); c = advance(lexer_info); } else if (c == '1' && far_peek(lexer_info, 1) == '6') { + numeric_type |= NUMERIC_16; c = advance(lexer_info); c = advance(lexer_info); } else if (c == '8') { + numeric_type |= NUMERIC_8; c = advance(lexer_info); } else { - char *error_message = (char *)malloc(sizeof(char) * 76); - strncpy(error_message, "Invalid signed integer type: must be of type 'i64', 'i32', 'i16', and 'i8'.", 76); - return lexer_error(lexer_info, error_message, start, start_line); - } - } else if (c == 'u') { - c = advance(lexer_info); - if (c == '6' && far_peek(lexer_info, 1) == '4') { - c = advance(lexer_info); - c = advance(lexer_info); - } else if (c == '3' && far_peek(lexer_info, 1) == '2') { - c = advance(lexer_info); - c = advance(lexer_info); - } else if (c == '1' && far_peek(lexer_info, 1) == '6') { - c = advance(lexer_info); - c = advance(lexer_info); - } else if (c == '8') { - c = advance(lexer_info); - } else { - char *error_message = (char *)malloc(sizeof(char) * 78); - strncpy(error_message, "Invalid unsigned integer type: must be of type 'u64', 'u32', 'u16', and 'u8'.", 78); - return lexer_error(lexer_info, error_message, start, start_line); + if (numeric_type & NUMERIC_UNSIGNED) { + char *error_message = (char *)malloc(sizeof(char) * 78); + strncpy(error_message, "Invalid unsigned integer type: must be of type 'u64', 'u32', 'u16', and 'u8'.", 78); + return lexer_error(lexer_info, error_message, start, start_line); + } else { + char *error_message = (char *)malloc(sizeof(char) * 76); + strncpy(error_message, "Invalid signed integer type: must be of type 'i64', 'i32', 'i16', and 'i8'.", 76); + return lexer_error(lexer_info, error_message, start, start_line); + } } } else { - if (float_allowed) { + if (numeric_literal_type == NUMERIC_DECIMAL || numeric_literal_type == NUMERIC_FLOAT || numeric_literal_type == NUMERIC_EXPONENTIAL) { char *error_message = (char *)malloc(sizeof(char) * 61); strncpy(error_message, "Invalid numeric type: type must start with 'f', 'i', or 'u'.", 61); return lexer_error(lexer_info, error_message, start, start_line); @@ -199,8 +382,35 @@ static LexerResult parse_integer_type(LexerInfo *lexer_info, char c, size_t star strncpy(error_message, "Invalid integer type: type must start with 'i' or 'u'.", 55); return lexer_error(lexer_info, error_message, start, start_line); } - } if (isspace(c) || c == '/' || c == '\0') + } if (isspace(c) || c == '/' || c == '\0') { + IntegerTypeResult integer_type = get_integer_type(numeric_type); + if (numeric_literal_type == NUMERIC_DECIMAL && numeric_type & NUMERIC_FLOAT_BASE) + numeric_literal_type = NUMERIC_FLOAT; + uint64_t value; + switch (numeric_literal_type) { + case NUMERIC_BINARY: + if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; + value = create_binary_integer(lexer_info, start); + return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = integer_type.integer_type, .value = value}}, start, start_line); + case NUMERIC_OCTAL: + if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; + value = create_octal_integer(lexer_info, start); + return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = integer_type.integer_type, .value = value}}, start, start_line); + case NUMERIC_DECIMAL: + if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; + value = create_decimal_integer(lexer_info, start); + return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = integer_type.integer_type, .value = value}}, start, start_line); + case NUMERIC_HEXADECIMAL: + if (integer_type.type == SLS_ERROR) return (LexerResult){SLS_ERROR, .error = integer_type.error}; + value = create_hexadecimal_integer(lexer_info, start); + return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = integer_type.integer_type, .value = value}}, start, start_line); + case NUMERIC_FLOAT: + break; + case NUMERIC_EXPONENTIAL: + break; + } return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Numeric Literal Not Implemented Error.", 1}}; + } char *error_message = (char *)malloc(sizeof(char) * 57); snprintf(error_message, 57, "Invalid numeric literal: unexpected '%c' in numeric type.", c); return lexer_error(lexer_info, error_message, start, start_line); @@ -208,9 +418,11 @@ static LexerResult parse_integer_type(LexerInfo *lexer_info, char c, size_t star static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { do {c = advance(lexer_info);} while (c == '0' || c == '1' || c == '_'); - if (c == ':') return parse_integer_type(lexer_info, c, start, start_line, FALSE); - if (isspace(c) || c == '/' || c == '\0') - return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Binary Integer Not Implemented Error.", 1}}; + if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_BINARY); + if (isspace(c) || c == '/' || c == '\0') { + uint64_t value = create_binary_integer(lexer_info, start); + return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = INTEGER_I64, .value = value}}, start, start_line); + } char *error_message = (char *)malloc(sizeof(char) * 58); snprintf(error_message, 58, "Invalid binary literal: unexpected '%c' in binary integer.", c); return lexer_error(lexer_info, error_message, start, start_line); @@ -218,9 +430,11 @@ static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t st static LexerResult parse_octal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { do {c = advance(lexer_info);} while ((isdigit(c) || c == '_') && !(c == '8' || c == '9')); - if (c == ':') return parse_integer_type(lexer_info, c, start, start_line, FALSE); - if (isspace(c) || c == '/' || c == '\0') - return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Octal Integer Not Implemented Error.", 1}}; + if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_OCTAL); + if (isspace(c) || c == '/' || c == '\0') { + uint64_t value = create_octal_integer(lexer_info, start); + return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = INTEGER_I64, .value = value}}, start, start_line); + } char *error_message = (char *)malloc(sizeof(char) * 56); snprintf(error_message, 56, "Invalid octal literal: unexpected '%c' in octal integer.", c); return lexer_error(lexer_info, error_message, start, start_line); @@ -240,9 +454,11 @@ static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t s do {c = advance(lexer_info);} while (isdigit(c) || c == '_'); if (c == '.') return parse_float(lexer_info, c, start, start_line); if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line); - if (c == ':') return parse_integer_type(lexer_info, c, start, start_line, TRUE); - if (isspace(c) || c == '/' || c == '\0') - return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Decimal Integer Not Implemented Error.", 1}}; + if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_DECIMAL); + if (isspace(c) || c == '/' || c == '\0') { + uint64_t value = create_decimal_integer(lexer_info, start); + return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = INTEGER_I64, .value = value}}, start, start_line); + } char *error_message = (char *)malloc(sizeof(char) * 60); snprintf(error_message, 60, "Invalid decimal literal: unexpected '%c' in decimal integer.", c); return lexer_error(lexer_info, error_message, start, start_line); @@ -250,9 +466,11 @@ static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t s static LexerResult parse_hexadecimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { do {c = advance(lexer_info);} while (isxdigit(c) || c == '_'); - if (c == ':') return parse_integer_type(lexer_info, c, start, start_line, FALSE); - if (isspace(c) || c == '/' || c == '\0') - return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Hexadecimal Integer Not Implemented Error.", 1}}; + if (c == ':') return parse_numeric_type(lexer_info, c, start, start_line, NUMERIC_HEXADECIMAL); + if (isspace(c) || c == '/' || c == '\0') { + uint64_t value = create_hexadecimal_integer(lexer_info, start); + return lexer_result(lexer_info, (Token){TOKEN_INTEGER, .integer_literal = (IntegerLiteral){.type = INTEGER_I64, .value = value}}, start, start_line); + } char *error_message = (char *)malloc(sizeof(char) * 68); snprintf(error_message, 68, "Invalid hexadecimal literal: unexpected '%c' in hexadecimal integer.", c); return lexer_error(lexer_info, error_message, start, start_line);