Compare commits
3 Commits
23f23cd9b6
...
14158ed9f4
Author | SHA1 | Date |
---|---|---|
|
14158ed9f4 | |
|
0c9f97b41d | |
|
256f9fe611 |
|
@ -23,6 +23,55 @@ typedef enum {
|
||||||
TOKEN_RBRACKET,
|
TOKEN_RBRACKET,
|
||||||
TOKEN_CHARACTER,
|
TOKEN_CHARACTER,
|
||||||
TOKEN_STRING,
|
TOKEN_STRING,
|
||||||
|
// Definitions and Declarations
|
||||||
|
TOKEN_KW_CONST,
|
||||||
|
TOKEN_KW_ENUM,
|
||||||
|
TOKEN_KW_FN,
|
||||||
|
TOKEN_KW_LET,
|
||||||
|
TOKEN_KW_MUT,
|
||||||
|
TOKEN_KW_PUBLIC,
|
||||||
|
TOKEN_KW_STATIC,
|
||||||
|
TOKEN_KW_STRUCT,
|
||||||
|
TOKEN_KW_UNION,
|
||||||
|
// Control Flow
|
||||||
|
TOKEN_KW_BREAK,
|
||||||
|
TOKEN_KW_CASE,
|
||||||
|
TOKEN_KW_CONTINUE,
|
||||||
|
TOKEN_KW_DEFAULT,
|
||||||
|
TOKEN_KW_DO,
|
||||||
|
TOKEN_KW_ELSE,
|
||||||
|
TOKEN_KW_FOR,
|
||||||
|
TOKEN_KW_IF,
|
||||||
|
TOKEN_KW_MATCH,
|
||||||
|
TOKEN_KW_RETURN,
|
||||||
|
TOKEN_KW_SWITCH,
|
||||||
|
TOKEN_KW_WHILE,
|
||||||
|
// Values
|
||||||
|
TOKEN_KW_FALSE,
|
||||||
|
TOKEN_KW_TRUE,
|
||||||
|
// Types
|
||||||
|
TOKEN_KW_BOOL,
|
||||||
|
TOKEN_KW_F32,
|
||||||
|
TOKEN_KW_F64,
|
||||||
|
TOKEN_KW_I8,
|
||||||
|
TOKEN_KW_I16,
|
||||||
|
TOKEN_KW_I32,
|
||||||
|
TOKEN_KW_I64,
|
||||||
|
TOKEN_KW_U8,
|
||||||
|
TOKEN_KW_U16,
|
||||||
|
TOKEN_KW_U32,
|
||||||
|
TOKEN_KW_U64,
|
||||||
|
TOKEN_KW_VOID,
|
||||||
|
// Modules
|
||||||
|
TOKEN_KW_AS,
|
||||||
|
TOKEN_KW_IMPORT,
|
||||||
|
// Operators
|
||||||
|
TOKEN_KW_AND,
|
||||||
|
TOKEN_KW_IS,
|
||||||
|
TOKEN_KW_NOT,
|
||||||
|
TOKEN_KW_OR,
|
||||||
|
TOKEN_KW_SIZEOF,
|
||||||
|
TOKEN_KW_XOR,
|
||||||
} TokenType;
|
} TokenType;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
93
src/lexer.c
93
src/lexer.c
|
@ -9,6 +9,73 @@
|
||||||
#include "sync/types.h"
|
#include "sync/types.h"
|
||||||
#include "sync/lexer.h"
|
#include "sync/lexer.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char* value;
|
||||||
|
TokenType type;
|
||||||
|
} KeywordPair;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char is_keyword;
|
||||||
|
TokenType token;
|
||||||
|
} KeywordResult;
|
||||||
|
|
||||||
|
const KeywordPair KEYWORDS[] = {
|
||||||
|
// Definitions and Declarations
|
||||||
|
{"const", TOKEN_KW_CONST},
|
||||||
|
{"enum", TOKEN_KW_ENUM},
|
||||||
|
{"fn", TOKEN_KW_FN},
|
||||||
|
{"let", TOKEN_KW_LET},
|
||||||
|
{"mut", TOKEN_KW_MUT},
|
||||||
|
{"public", TOKEN_KW_PUBLIC},
|
||||||
|
{"static", TOKEN_KW_STATIC},
|
||||||
|
{"struct", TOKEN_KW_STRUCT},
|
||||||
|
{"union", TOKEN_KW_UNION},
|
||||||
|
// Control Flow
|
||||||
|
{"break", TOKEN_KW_BREAK},
|
||||||
|
{"case", TOKEN_KW_CASE},
|
||||||
|
{"continue", TOKEN_KW_CONTINUE},
|
||||||
|
{"default", TOKEN_KW_DEFAULT},
|
||||||
|
{"do", TOKEN_KW_DO},
|
||||||
|
{"else", TOKEN_KW_ELSE},
|
||||||
|
{"for", TOKEN_KW_FOR},
|
||||||
|
{"if", TOKEN_KW_IF},
|
||||||
|
{"match", TOKEN_KW_MATCH},
|
||||||
|
{"return", TOKEN_KW_RETURN},
|
||||||
|
{"switch", TOKEN_KW_SWITCH},
|
||||||
|
{"while", TOKEN_KW_WHILE},
|
||||||
|
// Values
|
||||||
|
{"False", TOKEN_KW_FALSE},
|
||||||
|
{"True", TOKEN_KW_TRUE},
|
||||||
|
// Types
|
||||||
|
{"bool", TOKEN_KW_BOOL},
|
||||||
|
{"f32", TOKEN_KW_F32},
|
||||||
|
{"f64", TOKEN_KW_F64},
|
||||||
|
{"i8", TOKEN_KW_I8},
|
||||||
|
{"i16", TOKEN_KW_I16},
|
||||||
|
{"i32", TOKEN_KW_I32},
|
||||||
|
{"i64", TOKEN_KW_I64},
|
||||||
|
{"u8", TOKEN_KW_U8},
|
||||||
|
{"u16", TOKEN_KW_U16},
|
||||||
|
{"u32", TOKEN_KW_U32},
|
||||||
|
{"u64", TOKEN_KW_U64},
|
||||||
|
{"void", TOKEN_KW_VOID},
|
||||||
|
// Modules
|
||||||
|
{"as", TOKEN_KW_AS},
|
||||||
|
{"import", TOKEN_KW_IMPORT},
|
||||||
|
// Operators
|
||||||
|
{"and", TOKEN_KW_AND},
|
||||||
|
{"is", TOKEN_KW_IS},
|
||||||
|
{"not", TOKEN_KW_NOT},
|
||||||
|
{"or", TOKEN_KW_OR},
|
||||||
|
{"sizeof", TOKEN_KW_SIZEOF},
|
||||||
|
{"xor", TOKEN_KW_XOR},
|
||||||
|
};
|
||||||
|
|
||||||
|
const char* RESERVED_KEYWORDS[] = {
|
||||||
|
"extern", "f16", "f128", "i128", "impl", "in", "move", "new", "self",
|
||||||
|
"super", "trait", "tuple", "type", "u128", "use", "where", "yeet",
|
||||||
|
};
|
||||||
|
|
||||||
void lexer_init(Lexer* lexer, const char* filename, const char* source) {
|
void lexer_init(Lexer* lexer, const char* filename, const char* source) {
|
||||||
lexer->filename = filename;
|
lexer->filename = filename;
|
||||||
lexer->source = source;
|
lexer->source = source;
|
||||||
|
@ -84,6 +151,26 @@ static char is_identifier_char(char c) {
|
||||||
return isalnum(c) || c == '_';
|
return isalnum(c) || c == '_';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static KeywordResult is_keyword(Lexer* lexer, size_t start, size_t length) {
|
||||||
|
size_t num_keywords = sizeof(KEYWORDS) / sizeof(KEYWORDS[0]);
|
||||||
|
for (size_t i = 0; i < num_keywords; i++)
|
||||||
|
if (
|
||||||
|
strncmp(&lexer->source[start], KEYWORDS[i].value, length) == 0 &&
|
||||||
|
strlen(KEYWORDS[i].value) == length
|
||||||
|
) return (KeywordResult){1, KEYWORDS[i].type};
|
||||||
|
return (KeywordResult){0, TOKEN_EOF};
|
||||||
|
}
|
||||||
|
|
||||||
|
static char is_future_keyword(Lexer* lexer, size_t start, size_t length) {
|
||||||
|
size_t num_keywords = sizeof(RESERVED_KEYWORDS) / sizeof(RESERVED_KEYWORDS[0]);
|
||||||
|
for (size_t i = 0; i < num_keywords; i++)
|
||||||
|
if (
|
||||||
|
strncmp(&lexer->source[start], RESERVED_KEYWORDS[i], length) == 0 &&
|
||||||
|
strlen(RESERVED_KEYWORDS[i]) == length
|
||||||
|
) return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static LexerResult lexer_result(Lexer* lexer, TokenType type, size_t start, size_t start_line) {
|
static LexerResult lexer_result(Lexer* lexer, TokenType type, size_t start, size_t start_line) {
|
||||||
TokenResult* result = (TokenResult*)malloc(sizeof(TokenResult));
|
TokenResult* result = (TokenResult*)malloc(sizeof(TokenResult));
|
||||||
if (result == NULL)
|
if (result == NULL)
|
||||||
|
@ -181,6 +268,12 @@ static LexerResult lexer_next(Lexer* lexer) {
|
||||||
// Identifiers
|
// Identifiers
|
||||||
if (is_identifier_start(c)) {
|
if (is_identifier_start(c)) {
|
||||||
while (is_identifier_char(peek(lexer))) advance(lexer);
|
while (is_identifier_char(peek(lexer))) advance(lexer);
|
||||||
|
size_t length = lexer->pos - start;
|
||||||
|
if (is_future_keyword(lexer, start, length))
|
||||||
|
return lexer_error(lexer, "Reserved Keyword for future use", start, start_line);
|
||||||
|
KeywordResult result = is_keyword(lexer, start, length);
|
||||||
|
if (result.is_keyword)
|
||||||
|
return lexer_result(lexer, result.token, start, start_line);
|
||||||
return lexer_result(lexer, TOKEN_IDENTIFIER, start, start_line);
|
return lexer_result(lexer, TOKEN_IDENTIFIER, start, start_line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
21
src/main.c
21
src/main.c
|
@ -10,10 +10,23 @@
|
||||||
#include "sync/syntax.h"
|
#include "sync/syntax.h"
|
||||||
|
|
||||||
const char* TOKEN_TYPES[] = {
|
const char* TOKEN_TYPES[] = {
|
||||||
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR",
|
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR", "LPAREN", "RPAREN", "SEMICOLON",
|
||||||
"LPAREN", "RPAREN", "SEMICOLON", "LBRACE",
|
"LBRACE", "RBRACE", "LBRACKET", "RBRACKET", "CHARACTER", "STRING",
|
||||||
"RBRACE", "LBRACKET", "RBRACKET", "CHARACTER",
|
// Definitions and Declarations
|
||||||
"STRING"
|
"KW_CONST", "KW_ENUM", "KW_FN", "KW_LET", "KW_MUT", "KW_PUBLIC",
|
||||||
|
"KW_STATIC", "KW_STRUCT", "KW_UNION",
|
||||||
|
// Control Flow
|
||||||
|
"KW_BREAK", "KW_CASE", "KW_CONTINUE", "KW_DEFAULT", "KW_DO", "KW_ELSE",
|
||||||
|
"KW_FOR", "KW_IF", "KW_MATCH", "KW_RETURN", "KW_SWITCH", "KW_WHILE",
|
||||||
|
// Values
|
||||||
|
"KW_FALSE", "KW_TRUE",
|
||||||
|
// Types
|
||||||
|
"KW_BOOL", "KW_F32", "KW_F64", "KW_I8", "KW_I16", "KW_I32", "KW_I64",
|
||||||
|
"KW_U8", "KW_U16", "KW_U32", "KW_U64", "KW_VOID",
|
||||||
|
// Modules
|
||||||
|
"KW_AS", "KW_IMPORT",
|
||||||
|
// Operators
|
||||||
|
"KW_AND", "KW_IS", "KW_NOT", "KW_OR", "KW_SIZEOF", "KW_XOR",
|
||||||
};
|
};
|
||||||
|
|
||||||
static void print_token(Token token) {
|
static void print_token(Token token) {
|
||||||
|
|
70
src/syntax.c
70
src/syntax.c
|
@ -11,7 +11,6 @@
|
||||||
#include "sync/syntax.h"
|
#include "sync/syntax.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char* filename;
|
|
||||||
TokenArray tokens;
|
TokenArray tokens;
|
||||||
size_t pos;
|
size_t pos;
|
||||||
} Parser;
|
} Parser;
|
||||||
|
@ -106,7 +105,45 @@ static SyntaxResult syntax_error(const char* message, Token token) {
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SyntaxResult parse_expression(Parser* parser) {
|
||||||
|
}
|
||||||
|
|
||||||
static SyntaxResult parse_statement(Parser* parser) {
|
static SyntaxResult parse_statement(Parser* parser) {
|
||||||
|
Child* children = NULL;
|
||||||
|
|
||||||
|
if (
|
||||||
|
parser->tokens.tokens[parser->pos].type == TOKEN_EOF ||
|
||||||
|
parser->tokens.tokens[parser->pos].type == TOKEN_RBRACE
|
||||||
|
) return syntax_error("Expected statement", parser->tokens.tokens[parser->pos]);
|
||||||
|
SyntaxResult result = parse_expression(parser);
|
||||||
|
if (result.type == SYNC_ERROR) return result;
|
||||||
|
if (result.result.type == SYNC_ERROR) return result;
|
||||||
|
if (children == NULL) {
|
||||||
|
children = (Child*)malloc(sizeof(Child));
|
||||||
|
if (children == NULL)
|
||||||
|
return (SyntaxResult){SYNC_ERROR, .error = (GeneralError){"Failed to allocate memory.", 1}};
|
||||||
|
children->next = NULL;
|
||||||
|
children->node = result.result.result;
|
||||||
|
} else {
|
||||||
|
GeneralError error = add_child(children, result.result.result);
|
||||||
|
if (error.message != NULL) return (SyntaxResult){SYNC_ERROR, .error = error};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parser->tokens.tokens[parser->pos].type != TOKEN_SEMICOLON)
|
||||||
|
return syntax_error("Expected statement", parser->tokens.tokens[parser->pos]);
|
||||||
|
|
||||||
|
SyntaxNode node = {
|
||||||
|
.type = NODE_SEMICOLON,
|
||||||
|
.start = parser->tokens.tokens[parser->pos].start,
|
||||||
|
.length = parser->tokens.tokens[parser->pos].length,
|
||||||
|
.file_info = parser->tokens.tokens[parser->pos].file_info,
|
||||||
|
.children = (Children){0, NULL}
|
||||||
|
};
|
||||||
|
GeneralError error = add_child(children, result.result.result);
|
||||||
|
if (error.message != NULL) return (SyntaxResult){SYNC_ERROR, .error = error};
|
||||||
|
parser->pos++;
|
||||||
|
|
||||||
|
return syntax_result(NODE_STATEMENT, children);
|
||||||
}
|
}
|
||||||
|
|
||||||
static SyntaxResult parse_block(Parser* parser) {
|
static SyntaxResult parse_block(Parser* parser) {
|
||||||
|
@ -120,10 +157,14 @@ static SyntaxResult parse_block(Parser* parser) {
|
||||||
if (result.result.type == SYNC_ERROR) return result;
|
if (result.result.type == SYNC_ERROR) return result;
|
||||||
if (children == NULL) {
|
if (children == NULL) {
|
||||||
children = (Child*)malloc(sizeof(Child));
|
children = (Child*)malloc(sizeof(Child));
|
||||||
if (children == NULL) return (SyntaxResult){SYNC_ERROR, .error = (GeneralError){"Failed to allocate memory.", 1}};
|
if (children == NULL)
|
||||||
|
return (SyntaxResult){SYNC_ERROR, .error = (GeneralError){"Failed to allocate memory.", 1}};
|
||||||
children->next = NULL;
|
children->next = NULL;
|
||||||
children->node = result.result.result;
|
children->node = result.result.result;
|
||||||
} else add_child(children, result.result.result);
|
} else {
|
||||||
|
GeneralError error = add_child(children, result.result.result);
|
||||||
|
if (error.message != NULL) return (SyntaxResult){SYNC_ERROR, .error = error};
|
||||||
|
}
|
||||||
if (parser->tokens.tokens[parser->pos].type == TOKEN_EOF)
|
if (parser->tokens.tokens[parser->pos].type == TOKEN_EOF)
|
||||||
return syntax_error("Expected '}'", parser->tokens.tokens[parser->pos]);
|
return syntax_error("Expected '}'", parser->tokens.tokens[parser->pos]);
|
||||||
}
|
}
|
||||||
|
@ -141,26 +182,25 @@ static SyntaxResult parse_block(Parser* parser) {
|
||||||
return syntax_result(NODE_BLOCK, children);
|
return syntax_result(NODE_BLOCK, children);
|
||||||
}
|
}
|
||||||
|
|
||||||
SyntaxResult syntactical_analysis(TokenArray tokens) {
|
SyntaxResult syntactical_analysis(const char* filename, TokenArray tokens) {
|
||||||
Parser* parser = (Parser*)malloc(sizeof(Parser));
|
Parser parser = { tokens, 0 };
|
||||||
if (parser == NULL) return (SyntaxResult){SYNC_ERROR, .error = (GeneralError){"Failed to allocate memory.", 1}};
|
|
||||||
Child* children = NULL;
|
Child* children = NULL;
|
||||||
|
|
||||||
while (parser->tokens.tokens[parser->pos].type != TOKEN_EOF) {
|
while (parser.tokens.tokens[parser.pos].type != TOKEN_EOF) {
|
||||||
SyntaxResult result = parse_block(parser);
|
SyntaxResult result = parse_block(&parser);
|
||||||
if (result.type == SYNC_ERROR) { free(parser); return result; }
|
if (result.type == SYNC_ERROR) return result;
|
||||||
if (result.result.type == SYNC_ERROR) { free(parser); return result; }
|
if (result.result.type == SYNC_ERROR) return result;
|
||||||
if (children == NULL) {
|
if (children == NULL) {
|
||||||
children = (Child*)malloc(sizeof(Child));
|
children = (Child*)malloc(sizeof(Child));
|
||||||
if (children == NULL) {
|
if (children == NULL)
|
||||||
free(parser);
|
|
||||||
return (SyntaxResult){SYNC_ERROR, .error = (GeneralError){"Failed to allocate memory.", 1}};
|
return (SyntaxResult){SYNC_ERROR, .error = (GeneralError){"Failed to allocate memory.", 1}};
|
||||||
}
|
|
||||||
children->next = NULL;
|
children->next = NULL;
|
||||||
children->node = result.result.result;
|
children->node = result.result.result;
|
||||||
} else add_child(children, result.result.result);
|
} else {
|
||||||
|
GeneralError error = add_child(children, result.result.result);
|
||||||
|
if (error.message != NULL) return (SyntaxResult){SYNC_ERROR, .error = error};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
free(parser);
|
|
||||||
return syntax_result(NODE_FILE, children);
|
return syntax_result(NODE_FILE, children);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue