Expanded keywords to have their own TokenType enum

This commit is contained in:
Kyler Olsen 2025-06-19 21:34:26 -06:00
parent 0c9f97b41d
commit 14158ed9f4
3 changed files with 143 additions and 28 deletions

View File

@ -23,7 +23,55 @@ typedef enum {
TOKEN_RBRACKET, TOKEN_RBRACKET,
TOKEN_CHARACTER, TOKEN_CHARACTER,
TOKEN_STRING, TOKEN_STRING,
TOKEN_KEYWORD, // Definitions and Declarations
TOKEN_KW_CONST,
TOKEN_KW_ENUM,
TOKEN_KW_FN,
TOKEN_KW_LET,
TOKEN_KW_MUT,
TOKEN_KW_PUBLIC,
TOKEN_KW_STATIC,
TOKEN_KW_STRUCT,
TOKEN_KW_UNION,
// Control Flow
TOKEN_KW_BREAK,
TOKEN_KW_CASE,
TOKEN_KW_CONTINUE,
TOKEN_KW_DEFAULT,
TOKEN_KW_DO,
TOKEN_KW_ELSE,
TOKEN_KW_FOR,
TOKEN_KW_IF,
TOKEN_KW_MATCH,
TOKEN_KW_RETURN,
TOKEN_KW_SWITCH,
TOKEN_KW_WHILE,
// Values
TOKEN_KW_FALSE,
TOKEN_KW_TRUE,
// Types
TOKEN_KW_BOOL,
TOKEN_KW_F32,
TOKEN_KW_F64,
TOKEN_KW_I8,
TOKEN_KW_I16,
TOKEN_KW_I32,
TOKEN_KW_I64,
TOKEN_KW_U8,
TOKEN_KW_U16,
TOKEN_KW_U32,
TOKEN_KW_U64,
TOKEN_KW_VOID,
// Modules
TOKEN_KW_AS,
TOKEN_KW_IMPORT,
// Operators
TOKEN_KW_AND,
TOKEN_KW_IS,
TOKEN_KW_NOT,
TOKEN_KW_OR,
TOKEN_KW_SIZEOF,
TOKEN_KW_XOR,
} TokenType; } TokenType;
typedef struct { typedef struct {

View File

@ -9,30 +9,71 @@
#include "sync/types.h" #include "sync/types.h"
#include "sync/lexer.h" #include "sync/lexer.h"
const char* KEYWORDS[] = { typedef struct {
const char* value;
TokenType type;
} KeywordPair;
typedef struct {
char is_keyword;
TokenType token;
} KeywordResult;
const KeywordPair KEYWORDS[] = {
// Definitions and Declarations // Definitions and Declarations
"const", "enum", "fn", "let", "mut", "public", "static", "struct", "union", {"const", TOKEN_KW_CONST},
{"enum", TOKEN_KW_ENUM},
{"fn", TOKEN_KW_FN},
{"let", TOKEN_KW_LET},
{"mut", TOKEN_KW_MUT},
{"public", TOKEN_KW_PUBLIC},
{"static", TOKEN_KW_STATIC},
{"struct", TOKEN_KW_STRUCT},
{"union", TOKEN_KW_UNION},
// Control Flow // Control Flow
"break", "case", "continue", "default", "do", "else", "for", "if", "match", {"break", TOKEN_KW_BREAK},
"return", "switch", "while", {"case", TOKEN_KW_CASE},
{"continue", TOKEN_KW_CONTINUE},
{"default", TOKEN_KW_DEFAULT},
{"do", TOKEN_KW_DO},
{"else", TOKEN_KW_ELSE},
{"for", TOKEN_KW_FOR},
{"if", TOKEN_KW_IF},
{"match", TOKEN_KW_MATCH},
{"return", TOKEN_KW_RETURN},
{"switch", TOKEN_KW_SWITCH},
{"while", TOKEN_KW_WHILE},
// Values // Values
"False", "True", {"False", TOKEN_KW_FALSE},
{"True", TOKEN_KW_TRUE},
// Types // Types
"bool", "f32", "f64", "i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32", {"bool", TOKEN_KW_BOOL},
"u64", "u128", "void", {"f32", TOKEN_KW_F32},
{"f64", TOKEN_KW_F64},
{"i8", TOKEN_KW_I8},
{"i16", TOKEN_KW_I16},
{"i32", TOKEN_KW_I32},
{"i64", TOKEN_KW_I64},
{"u8", TOKEN_KW_U8},
{"u16", TOKEN_KW_U16},
{"u32", TOKEN_KW_U32},
{"u64", TOKEN_KW_U64},
{"void", TOKEN_KW_VOID},
// Modules // Modules
"as", "import", {"as", TOKEN_KW_AS},
{"import", TOKEN_KW_IMPORT},
// Operators // Operators
"is", "sizeof", {"and", TOKEN_KW_AND},
{"is", TOKEN_KW_IS},
{"not", TOKEN_KW_NOT},
{"or", TOKEN_KW_OR},
{"sizeof", TOKEN_KW_SIZEOF},
{"xor", TOKEN_KW_XOR},
};
// Reserved const char* RESERVED_KEYWORDS[] = {
"extern", "impl", "in", "move", "new", "self", "super", "trait", "tuple", "extern", "f16", "f128", "i128", "impl", "in", "move", "new", "self",
"type", "use", "where", "yeet", "super", "trait", "tuple", "type", "u128", "use", "where", "yeet",
}; };
void lexer_init(Lexer* lexer, const char* filename, const char* source) { void lexer_init(Lexer* lexer, const char* filename, const char* source) {
@ -110,13 +151,23 @@ static char is_identifier_char(char c) {
return isalnum(c) || c == '_'; return isalnum(c) || c == '_';
} }
static char is_keyword(Lexer* lexer, size_t start, size_t length) { static KeywordResult is_keyword(Lexer* lexer, size_t start, size_t length) {
size_t num_keywords = sizeof(KEYWORDS) / sizeof(KEYWORDS[0]); size_t num_keywords = sizeof(KEYWORDS) / sizeof(KEYWORDS[0]);
for (size_t i = 0; i < num_keywords; i++) for (size_t i = 0; i < num_keywords; i++)
if ( if (
strncmp(&lexer->source[start], KEYWORDS[i], length) == 0 && strncmp(&lexer->source[start], KEYWORDS[i].value, length) == 0 &&
strlen(KEYWORDS[i]) == length strlen(KEYWORDS[i].value) == length
) return 1; ) return (KeywordResult){1, KEYWORDS[i].type};
return (KeywordResult){0, TOKEN_EOF};
}
static char is_future_keyword(Lexer* lexer, size_t start, size_t length) {
size_t num_keywords = sizeof(RESERVED_KEYWORDS) / sizeof(RESERVED_KEYWORDS[0]);
for (size_t i = 0; i < num_keywords; i++)
if (
strncmp(&lexer->source[start], RESERVED_KEYWORDS[i], length) == 0 &&
strlen(RESERVED_KEYWORDS[i]) == length
) return 1;
return 0; return 0;
} }
@ -218,8 +269,11 @@ static LexerResult lexer_next(Lexer* lexer) {
if (is_identifier_start(c)) { if (is_identifier_start(c)) {
while (is_identifier_char(peek(lexer))) advance(lexer); while (is_identifier_char(peek(lexer))) advance(lexer);
size_t length = lexer->pos - start; size_t length = lexer->pos - start;
if (is_keyword(lexer, start, length)) if (is_future_keyword(lexer, start, length))
return lexer_result(lexer, TOKEN_KEYWORD, start, start_line); return lexer_error(lexer, "Reserved Keyword for future use", start, start_line);
KeywordResult result = is_keyword(lexer, start, length);
if (result.is_keyword)
return lexer_result(lexer, result.token, start, start_line);
return lexer_result(lexer, TOKEN_IDENTIFIER, start, start_line); return lexer_result(lexer, TOKEN_IDENTIFIER, start, start_line);
} }

View File

@ -10,10 +10,23 @@
#include "sync/syntax.h" #include "sync/syntax.h"
const char* TOKEN_TYPES[] = { const char* TOKEN_TYPES[] = {
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR", "EOF", "IDENTIFIER", "NUMBER", "OPERATOR", "LPAREN", "RPAREN", "SEMICOLON",
"LPAREN", "RPAREN", "SEMICOLON", "LBRACE", "LBRACE", "RBRACE", "LBRACKET", "RBRACKET", "CHARACTER", "STRING",
"RBRACE", "LBRACKET", "RBRACKET", "CHARACTER", // Definitions and Declarations
"STRING", "KEYWORD" "KW_CONST", "KW_ENUM", "KW_FN", "KW_LET", "KW_MUT", "KW_PUBLIC",
"KW_STATIC", "KW_STRUCT", "KW_UNION",
// Control Flow
"KW_BREAK", "KW_CASE", "KW_CONTINUE", "KW_DEFAULT", "KW_DO", "KW_ELSE",
"KW_FOR", "KW_IF", "KW_MATCH", "KW_RETURN", "KW_SWITCH", "KW_WHILE",
// Values
"KW_FALSE", "KW_TRUE",
// Types
"KW_BOOL", "KW_F32", "KW_F64", "KW_I8", "KW_I16", "KW_I32", "KW_I64",
"KW_U8", "KW_U16", "KW_U32", "KW_U64", "KW_VOID",
// Modules
"KW_AS", "KW_IMPORT",
// Operators
"KW_AND", "KW_IS", "KW_NOT", "KW_OR", "KW_SIZEOF", "KW_XOR",
}; };
static void print_token(Token token) { static void print_token(Token token) {