From 14158ed9f4d4b11b8022f0f4b36ded00df89742b Mon Sep 17 00:00:00 2001 From: Kyler Date: Thu, 19 Jun 2025 21:34:26 -0600 Subject: [PATCH] Expanded keywords to have their own TokenType enum --- include/sync/lexer.h | 50 +++++++++++++++++++++- src/lexer.c | 100 +++++++++++++++++++++++++++++++++---------- src/main.c | 21 +++++++-- 3 files changed, 143 insertions(+), 28 deletions(-) diff --git a/include/sync/lexer.h b/include/sync/lexer.h index a990db9..f75b7bb 100644 --- a/include/sync/lexer.h +++ b/include/sync/lexer.h @@ -23,7 +23,55 @@ typedef enum { TOKEN_RBRACKET, TOKEN_CHARACTER, TOKEN_STRING, - TOKEN_KEYWORD, + // Definitions and Declarations + TOKEN_KW_CONST, + TOKEN_KW_ENUM, + TOKEN_KW_FN, + TOKEN_KW_LET, + TOKEN_KW_MUT, + TOKEN_KW_PUBLIC, + TOKEN_KW_STATIC, + TOKEN_KW_STRUCT, + TOKEN_KW_UNION, + // Control Flow + TOKEN_KW_BREAK, + TOKEN_KW_CASE, + TOKEN_KW_CONTINUE, + TOKEN_KW_DEFAULT, + TOKEN_KW_DO, + TOKEN_KW_ELSE, + TOKEN_KW_FOR, + TOKEN_KW_IF, + TOKEN_KW_MATCH, + TOKEN_KW_RETURN, + TOKEN_KW_SWITCH, + TOKEN_KW_WHILE, + // Values + TOKEN_KW_FALSE, + TOKEN_KW_TRUE, + // Types + TOKEN_KW_BOOL, + TOKEN_KW_F32, + TOKEN_KW_F64, + TOKEN_KW_I8, + TOKEN_KW_I16, + TOKEN_KW_I32, + TOKEN_KW_I64, + TOKEN_KW_U8, + TOKEN_KW_U16, + TOKEN_KW_U32, + TOKEN_KW_U64, + TOKEN_KW_VOID, + // Modules + TOKEN_KW_AS, + TOKEN_KW_IMPORT, + // Operators + TOKEN_KW_AND, + TOKEN_KW_IS, + TOKEN_KW_NOT, + TOKEN_KW_OR, + TOKEN_KW_SIZEOF, + TOKEN_KW_XOR, } TokenType; typedef struct { diff --git a/src/lexer.c b/src/lexer.c index f515f7f..4642ab6 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -9,30 +9,71 @@ #include "sync/types.h" #include "sync/lexer.h" -const char* KEYWORDS[] = { +typedef struct { + const char* value; + TokenType type; +} KeywordPair; + +typedef struct { + char is_keyword; + TokenType token; +} KeywordResult; + +const KeywordPair KEYWORDS[] = { // Definitions and Declarations - "const", "enum", "fn", "let", "mut", "public", "static", "struct", "union", - + {"const", TOKEN_KW_CONST}, + {"enum", TOKEN_KW_ENUM}, + {"fn", TOKEN_KW_FN}, + {"let", TOKEN_KW_LET}, + {"mut", TOKEN_KW_MUT}, + {"public", TOKEN_KW_PUBLIC}, + {"static", TOKEN_KW_STATIC}, + {"struct", TOKEN_KW_STRUCT}, + {"union", TOKEN_KW_UNION}, // Control Flow - "break", "case", "continue", "default", "do", "else", "for", "if", "match", - "return", "switch", "while", - + {"break", TOKEN_KW_BREAK}, + {"case", TOKEN_KW_CASE}, + {"continue", TOKEN_KW_CONTINUE}, + {"default", TOKEN_KW_DEFAULT}, + {"do", TOKEN_KW_DO}, + {"else", TOKEN_KW_ELSE}, + {"for", TOKEN_KW_FOR}, + {"if", TOKEN_KW_IF}, + {"match", TOKEN_KW_MATCH}, + {"return", TOKEN_KW_RETURN}, + {"switch", TOKEN_KW_SWITCH}, + {"while", TOKEN_KW_WHILE}, // Values - "False", "True", - + {"False", TOKEN_KW_FALSE}, + {"True", TOKEN_KW_TRUE}, // Types - "bool", "f32", "f64", "i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32", - "u64", "u128", "void", - + {"bool", TOKEN_KW_BOOL}, + {"f32", TOKEN_KW_F32}, + {"f64", TOKEN_KW_F64}, + {"i8", TOKEN_KW_I8}, + {"i16", TOKEN_KW_I16}, + {"i32", TOKEN_KW_I32}, + {"i64", TOKEN_KW_I64}, + {"u8", TOKEN_KW_U8}, + {"u16", TOKEN_KW_U16}, + {"u32", TOKEN_KW_U32}, + {"u64", TOKEN_KW_U64}, + {"void", TOKEN_KW_VOID}, // Modules - "as", "import", - + {"as", TOKEN_KW_AS}, + {"import", TOKEN_KW_IMPORT}, // Operators - "is", "sizeof", + {"and", TOKEN_KW_AND}, + {"is", TOKEN_KW_IS}, + {"not", TOKEN_KW_NOT}, + {"or", TOKEN_KW_OR}, + {"sizeof", TOKEN_KW_SIZEOF}, + {"xor", TOKEN_KW_XOR}, +}; - // Reserved - "extern", "impl", "in", "move", "new", "self", "super", "trait", "tuple", - "type", "use", "where", "yeet", +const char* RESERVED_KEYWORDS[] = { + "extern", "f16", "f128", "i128", "impl", "in", "move", "new", "self", + "super", "trait", "tuple", "type", "u128", "use", "where", "yeet", }; void lexer_init(Lexer* lexer, const char* filename, const char* source) { @@ -110,13 +151,23 @@ static char is_identifier_char(char c) { return isalnum(c) || c == '_'; } -static char is_keyword(Lexer* lexer, size_t start, size_t length) { +static KeywordResult is_keyword(Lexer* lexer, size_t start, size_t length) { size_t num_keywords = sizeof(KEYWORDS) / sizeof(KEYWORDS[0]); for (size_t i = 0; i < num_keywords; i++) if ( - strncmp(&lexer->source[start], KEYWORDS[i], length) == 0 && - strlen(KEYWORDS[i]) == length - ) return 1; + strncmp(&lexer->source[start], KEYWORDS[i].value, length) == 0 && + strlen(KEYWORDS[i].value) == length + ) return (KeywordResult){1, KEYWORDS[i].type}; + return (KeywordResult){0, TOKEN_EOF}; +} + +static char is_future_keyword(Lexer* lexer, size_t start, size_t length) { + size_t num_keywords = sizeof(RESERVED_KEYWORDS) / sizeof(RESERVED_KEYWORDS[0]); + for (size_t i = 0; i < num_keywords; i++) + if ( + strncmp(&lexer->source[start], RESERVED_KEYWORDS[i], length) == 0 && + strlen(RESERVED_KEYWORDS[i]) == length + ) return 1; return 0; } @@ -218,8 +269,11 @@ static LexerResult lexer_next(Lexer* lexer) { if (is_identifier_start(c)) { while (is_identifier_char(peek(lexer))) advance(lexer); size_t length = lexer->pos - start; - if (is_keyword(lexer, start, length)) - return lexer_result(lexer, TOKEN_KEYWORD, start, start_line); + if (is_future_keyword(lexer, start, length)) + return lexer_error(lexer, "Reserved Keyword for future use", start, start_line); + KeywordResult result = is_keyword(lexer, start, length); + if (result.is_keyword) + return lexer_result(lexer, result.token, start, start_line); return lexer_result(lexer, TOKEN_IDENTIFIER, start, start_line); } diff --git a/src/main.c b/src/main.c index e89af49..8dcd9a9 100644 --- a/src/main.c +++ b/src/main.c @@ -10,10 +10,23 @@ #include "sync/syntax.h" const char* TOKEN_TYPES[] = { - "EOF", "IDENTIFIER", "NUMBER", "OPERATOR", - "LPAREN", "RPAREN", "SEMICOLON", "LBRACE", - "RBRACE", "LBRACKET", "RBRACKET", "CHARACTER", - "STRING", "KEYWORD" + "EOF", "IDENTIFIER", "NUMBER", "OPERATOR", "LPAREN", "RPAREN", "SEMICOLON", + "LBRACE", "RBRACE", "LBRACKET", "RBRACKET", "CHARACTER", "STRING", + // Definitions and Declarations + "KW_CONST", "KW_ENUM", "KW_FN", "KW_LET", "KW_MUT", "KW_PUBLIC", + "KW_STATIC", "KW_STRUCT", "KW_UNION", + // Control Flow + "KW_BREAK", "KW_CASE", "KW_CONTINUE", "KW_DEFAULT", "KW_DO", "KW_ELSE", + "KW_FOR", "KW_IF", "KW_MATCH", "KW_RETURN", "KW_SWITCH", "KW_WHILE", + // Values + "KW_FALSE", "KW_TRUE", + // Types + "KW_BOOL", "KW_F32", "KW_F64", "KW_I8", "KW_I16", "KW_I32", "KW_I64", + "KW_U8", "KW_U16", "KW_U32", "KW_U64", "KW_VOID", + // Modules + "KW_AS", "KW_IMPORT", + // Operators + "KW_AND", "KW_IS", "KW_NOT", "KW_OR", "KW_SIZEOF", "KW_XOR", }; static void print_token(Token token) {