diff --git a/include/sync/lexer.h b/include/sync/lexer.h index e6c1b4f..a990db9 100644 --- a/include/sync/lexer.h +++ b/include/sync/lexer.h @@ -23,6 +23,7 @@ typedef enum { TOKEN_RBRACKET, TOKEN_CHARACTER, TOKEN_STRING, + TOKEN_KEYWORD, } TokenType; typedef struct { diff --git a/src/lexer.c b/src/lexer.c index 3c4d051..f515f7f 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -9,6 +9,32 @@ #include "sync/types.h" #include "sync/lexer.h" +const char* KEYWORDS[] = { + // Definitions and Declarations + "const", "enum", "fn", "let", "mut", "public", "static", "struct", "union", + + // Control Flow + "break", "case", "continue", "default", "do", "else", "for", "if", "match", + "return", "switch", "while", + + // Values + "False", "True", + + // Types + "bool", "f32", "f64", "i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32", + "u64", "u128", "void", + + // Modules + "as", "import", + + // Operators + "is", "sizeof", + + // Reserved + "extern", "impl", "in", "move", "new", "self", "super", "trait", "tuple", + "type", "use", "where", "yeet", +}; + void lexer_init(Lexer* lexer, const char* filename, const char* source) { lexer->filename = filename; lexer->source = source; @@ -84,6 +110,16 @@ static char is_identifier_char(char c) { return isalnum(c) || c == '_'; } +static char is_keyword(Lexer* lexer, size_t start, size_t length) { + size_t num_keywords = sizeof(KEYWORDS) / sizeof(KEYWORDS[0]); + for (size_t i = 0; i < num_keywords; i++) + if ( + strncmp(&lexer->source[start], KEYWORDS[i], length) == 0 && + strlen(KEYWORDS[i]) == length + ) return 1; + return 0; +} + static LexerResult lexer_result(Lexer* lexer, TokenType type, size_t start, size_t start_line) { TokenResult* result = (TokenResult*)malloc(sizeof(TokenResult)); if (result == NULL) @@ -181,6 +217,9 @@ static LexerResult lexer_next(Lexer* lexer) { // Identifiers if (is_identifier_start(c)) { while (is_identifier_char(peek(lexer))) advance(lexer); + size_t length = lexer->pos - start; + if (is_keyword(lexer, start, length)) + return lexer_result(lexer, TOKEN_KEYWORD, start, start_line); return lexer_result(lexer, TOKEN_IDENTIFIER, start, start_line); } diff --git a/src/main.c b/src/main.c index 3074f6b..e89af49 100644 --- a/src/main.c +++ b/src/main.c @@ -13,7 +13,7 @@ const char* TOKEN_TYPES[] = { "EOF", "IDENTIFIER", "NUMBER", "OPERATOR", "LPAREN", "RPAREN", "SEMICOLON", "LBRACE", "RBRACE", "LBRACKET", "RBRACKET", "CHARACTER", - "STRING" + "STRING", "KEYWORD" }; static void print_token(Token token) {