Added keyword token type
This commit is contained in:
parent
23f23cd9b6
commit
256f9fe611
|
@ -23,6 +23,7 @@ typedef enum {
|
||||||
TOKEN_RBRACKET,
|
TOKEN_RBRACKET,
|
||||||
TOKEN_CHARACTER,
|
TOKEN_CHARACTER,
|
||||||
TOKEN_STRING,
|
TOKEN_STRING,
|
||||||
|
TOKEN_KEYWORD,
|
||||||
} TokenType;
|
} TokenType;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
39
src/lexer.c
39
src/lexer.c
|
@ -9,6 +9,32 @@
|
||||||
#include "sync/types.h"
|
#include "sync/types.h"
|
||||||
#include "sync/lexer.h"
|
#include "sync/lexer.h"
|
||||||
|
|
||||||
|
const char* KEYWORDS[] = {
|
||||||
|
// Definitions and Declarations
|
||||||
|
"const", "enum", "fn", "let", "mut", "public", "static", "struct", "union",
|
||||||
|
|
||||||
|
// Control Flow
|
||||||
|
"break", "case", "continue", "default", "do", "else", "for", "if", "match",
|
||||||
|
"return", "switch", "while",
|
||||||
|
|
||||||
|
// Values
|
||||||
|
"False", "True",
|
||||||
|
|
||||||
|
// Types
|
||||||
|
"bool", "f32", "f64", "i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32",
|
||||||
|
"u64", "u128", "void",
|
||||||
|
|
||||||
|
// Modules
|
||||||
|
"as", "import",
|
||||||
|
|
||||||
|
// Operators
|
||||||
|
"is", "sizeof",
|
||||||
|
|
||||||
|
// Reserved
|
||||||
|
"extern", "impl", "in", "move", "new", "self", "super", "trait", "tuple",
|
||||||
|
"type", "use", "where", "yeet",
|
||||||
|
};
|
||||||
|
|
||||||
void lexer_init(Lexer* lexer, const char* filename, const char* source) {
|
void lexer_init(Lexer* lexer, const char* filename, const char* source) {
|
||||||
lexer->filename = filename;
|
lexer->filename = filename;
|
||||||
lexer->source = source;
|
lexer->source = source;
|
||||||
|
@ -84,6 +110,16 @@ static char is_identifier_char(char c) {
|
||||||
return isalnum(c) || c == '_';
|
return isalnum(c) || c == '_';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static char is_keyword(Lexer* lexer, size_t start, size_t length) {
|
||||||
|
size_t num_keywords = sizeof(KEYWORDS) / sizeof(KEYWORDS[0]);
|
||||||
|
for (size_t i = 0; i < num_keywords; i++)
|
||||||
|
if (
|
||||||
|
strncmp(&lexer->source[start], KEYWORDS[i], length) == 0 &&
|
||||||
|
strlen(KEYWORDS[i]) == length
|
||||||
|
) return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static LexerResult lexer_result(Lexer* lexer, TokenType type, size_t start, size_t start_line) {
|
static LexerResult lexer_result(Lexer* lexer, TokenType type, size_t start, size_t start_line) {
|
||||||
TokenResult* result = (TokenResult*)malloc(sizeof(TokenResult));
|
TokenResult* result = (TokenResult*)malloc(sizeof(TokenResult));
|
||||||
if (result == NULL)
|
if (result == NULL)
|
||||||
|
@ -181,6 +217,9 @@ static LexerResult lexer_next(Lexer* lexer) {
|
||||||
// Identifiers
|
// Identifiers
|
||||||
if (is_identifier_start(c)) {
|
if (is_identifier_start(c)) {
|
||||||
while (is_identifier_char(peek(lexer))) advance(lexer);
|
while (is_identifier_char(peek(lexer))) advance(lexer);
|
||||||
|
size_t length = lexer->pos - start;
|
||||||
|
if (is_keyword(lexer, start, length))
|
||||||
|
return lexer_result(lexer, TOKEN_KEYWORD, start, start_line);
|
||||||
return lexer_result(lexer, TOKEN_IDENTIFIER, start, start_line);
|
return lexer_result(lexer, TOKEN_IDENTIFIER, start, start_line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ const char* TOKEN_TYPES[] = {
|
||||||
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR",
|
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR",
|
||||||
"LPAREN", "RPAREN", "SEMICOLON", "LBRACE",
|
"LPAREN", "RPAREN", "SEMICOLON", "LBRACE",
|
||||||
"RBRACE", "LBRACKET", "RBRACKET", "CHARACTER",
|
"RBRACE", "LBRACKET", "RBRACKET", "CHARACTER",
|
||||||
"STRING"
|
"STRING", "KEYWORD"
|
||||||
};
|
};
|
||||||
|
|
||||||
static void print_token(Token token) {
|
static void print_token(Token token) {
|
||||||
|
|
Loading…
Reference in New Issue