Updated operators lexing
This commit is contained in:
parent
f7f9797a4a
commit
2088c70803
|
@ -11,18 +11,64 @@
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
TOKEN_EOF,
|
TOKEN_EOF,
|
||||||
|
// Literals and values
|
||||||
TOKEN_IDENTIFIER,
|
TOKEN_IDENTIFIER,
|
||||||
TOKEN_NUMBER,
|
TOKEN_NUMBER,
|
||||||
TOKEN_OPERATOR,
|
|
||||||
TOKEN_LPAREN,
|
|
||||||
TOKEN_RPAREN,
|
|
||||||
TOKEN_SEMICOLON,
|
|
||||||
TOKEN_LBRACE,
|
|
||||||
TOKEN_RBRACE,
|
|
||||||
TOKEN_LBRACKET,
|
|
||||||
TOKEN_RBRACKET,
|
|
||||||
TOKEN_CHARACTER,
|
TOKEN_CHARACTER,
|
||||||
TOKEN_STRING,
|
TOKEN_STRING,
|
||||||
|
// Punctuation
|
||||||
|
TOKEN_SEMICOLON,
|
||||||
|
TOKEN_COLON,
|
||||||
|
TOKEN_COMMA,
|
||||||
|
TOKEN_L_PARENTHESIS,
|
||||||
|
TOKEN_R_PARENTHESIS,
|
||||||
|
TOKEN_L_CURLY_BRACE,
|
||||||
|
TOKEN_R_CURLY_BRACE,
|
||||||
|
TOKEN_L_SQUARE_BRACKET,
|
||||||
|
TOKEN_R_SQUARE_BRACKET,
|
||||||
|
// --- Operators ---
|
||||||
|
// Pointers
|
||||||
|
TOKEN_OPER_DEREFERENCE_ZINC,
|
||||||
|
TOKEN_OPER_ADDRESS_OF_ZINC,
|
||||||
|
TOKEN_OPER_DEREFERENCE_MEMBER_OF,
|
||||||
|
TOKEN_OPER_MEMBER_OF,
|
||||||
|
// Bitwise
|
||||||
|
TOKEN_OPER_BITWISE_NOT,
|
||||||
|
TOKEN_OPER_BITSHIFT_LEFT,
|
||||||
|
TOKEN_OPER_BITSHIFT_RIGHT,
|
||||||
|
TOKEN_OPER_BITWISE_AND,
|
||||||
|
TOKEN_OPER_BITWISE_XOR,
|
||||||
|
TOKEN_OPER_BITWISE_OR,
|
||||||
|
// Boolean
|
||||||
|
TOKEN_OPER_BOOLEAN_NOT,
|
||||||
|
TOKEN_OPER_BOOLEAN_AND,
|
||||||
|
TOKEN_OPER_BOOLEAN_OR,
|
||||||
|
// Arithmetic
|
||||||
|
TOKEN_OPER_MULTIPLICATION,
|
||||||
|
TOKEN_OPER_DIVISION,
|
||||||
|
TOKEN_OPER_MODULUS,
|
||||||
|
TOKEN_OPER_ADDITION,
|
||||||
|
TOKEN_OPER_SUBTRACTION,
|
||||||
|
// Comparision
|
||||||
|
TOKEN_OPER_GREATER_THAN,
|
||||||
|
TOKEN_OPER_GREATER_THAN_OR_EQUAL_TO,
|
||||||
|
TOKEN_OPER_LESS_THAN,
|
||||||
|
TOKEN_OPER_LESS_THAN_OR_EQUAL_TO,
|
||||||
|
TOKEN_OPER_EQUALITY,
|
||||||
|
TOKEN_OPER_INEQUALITY,
|
||||||
|
// Assignment
|
||||||
|
TOKEN_OPER_ASSIGNMENT,
|
||||||
|
TOKEN_OPER_ADDITION_ASSIGNMENT,
|
||||||
|
TOKEN_OPER_SUBTRACTION_ASSIGNMENT,
|
||||||
|
TOKEN_OPER_MULTIPLICATION_ASSIGNMENT,
|
||||||
|
TOKEN_OPER_DIVISION_ASSIGNMENT,
|
||||||
|
TOKEN_OPER_MODULUS_ASSIGNMENT,
|
||||||
|
TOKEN_OPER_BITWISE_AND_ASSIGNMENT,
|
||||||
|
TOKEN_OPER_BITWISE_OR_ASSIGNMENT,
|
||||||
|
TOKEN_OPER_BITWISE_XOR_ASSIGNMENT,
|
||||||
|
TOKEN_OPER_BITSHIFT_LEFT_ASSIGNMENT,
|
||||||
|
TOKEN_OPER_BITSHIFT_RIGHT_ASSIGNMENT,
|
||||||
|
// --- Keywords ---
|
||||||
// Definitions and Declarations
|
// Definitions and Declarations
|
||||||
TOKEN_KW_CONST,
|
TOKEN_KW_CONST,
|
||||||
TOKEN_KW_ENUM,
|
TOKEN_KW_ENUM,
|
||||||
|
@ -74,7 +120,6 @@ typedef enum {
|
||||||
TOKEN_KW_NOT,
|
TOKEN_KW_NOT,
|
||||||
TOKEN_KW_OR,
|
TOKEN_KW_OR,
|
||||||
TOKEN_KW_SIZEOF,
|
TOKEN_KW_SIZEOF,
|
||||||
TOKEN_KW_XOR,
|
|
||||||
} TokenType;
|
} TokenType;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
104
src/lexer.c
104
src/lexer.c
|
@ -71,7 +71,6 @@ const KeywordPair KEYWORDS[] = {
|
||||||
{"not", TOKEN_KW_NOT},
|
{"not", TOKEN_KW_NOT},
|
||||||
{"or", TOKEN_KW_OR},
|
{"or", TOKEN_KW_OR},
|
||||||
{"sizeof", TOKEN_KW_SIZEOF},
|
{"sizeof", TOKEN_KW_SIZEOF},
|
||||||
{"xor", TOKEN_KW_XOR},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const char *RESERVED_KEYWORDS[] = {
|
const char *RESERVED_KEYWORDS[] = {
|
||||||
|
@ -88,6 +87,7 @@ const char *RESERVED_KEYWORDS[] = {
|
||||||
"in",
|
"in",
|
||||||
"match",
|
"match",
|
||||||
"move",
|
"move",
|
||||||
|
"namespace"
|
||||||
"new",
|
"new",
|
||||||
"pointer",
|
"pointer",
|
||||||
"Pointer",
|
"Pointer",
|
||||||
|
@ -307,49 +307,83 @@ static LexerResult lexer_next(Lexer* lexer) {
|
||||||
}
|
}
|
||||||
|
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
|
TokenType punctuation;
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '=':
|
case ';': punctuation = TOKEN_SEMICOLON; break;
|
||||||
if (peek(lexer) == '=') advance(lexer);
|
case ':': punctuation = TOKEN_COLON; break;
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
case ',': punctuation = TOKEN_COMMA; break;
|
||||||
case '>':
|
case '(': punctuation = TOKEN_L_PARENTHESIS; break;
|
||||||
if (peek(lexer) == '=' || peek(lexer) == '>') advance(lexer);
|
case ')': punctuation = TOKEN_R_PARENTHESIS; break;
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
case '{': punctuation = TOKEN_L_CURLY_BRACE; break;
|
||||||
case '<':
|
case '}': punctuation = TOKEN_R_CURLY_BRACE; break;
|
||||||
if (peek(lexer) == '=' || peek(lexer) == '<') advance(lexer);
|
case '[': punctuation = TOKEN_L_SQUARE_BRACKET; break;
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
case ']': punctuation = TOKEN_R_SQUARE_BRACKET; break;
|
||||||
case '!':
|
case '#': punctuation = TOKEN_OPER_DEREFERENCE_ZINC; break;
|
||||||
if (peek(lexer) == '=') advance(lexer);
|
case '@': punctuation = TOKEN_OPER_ADDRESS_OF_ZINC; break;
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
case '.': punctuation = TOKEN_OPER_MEMBER_OF; break;
|
||||||
|
case '~': punctuation = TOKEN_OPER_BITWISE_NOT; break;
|
||||||
case '&':
|
case '&':
|
||||||
if (peek(lexer) == '=' || peek(lexer) == '&') advance(lexer);
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITWISE_AND_ASSIGNMENT; }
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
if (peek(lexer) == '&') { advance(lexer); punctuation = TOKEN_OPER_BOOLEAN_AND; }
|
||||||
|
else punctuation = TOKEN_OPER_BITWISE_AND;
|
||||||
|
break;
|
||||||
|
case '^':
|
||||||
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITWISE_XOR_ASSIGNMENT; }
|
||||||
|
else punctuation = TOKEN_OPER_BITWISE_XOR;
|
||||||
|
break;
|
||||||
case '|':
|
case '|':
|
||||||
if (peek(lexer) == '=' || peek(lexer) == '|') advance(lexer);
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITWISE_OR_ASSIGNMENT; }
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
if (peek(lexer) == '|') { advance(lexer); punctuation = TOKEN_OPER_BOOLEAN_OR; }
|
||||||
|
punctuation = TOKEN_OPER_BITWISE_OR;
|
||||||
|
break;
|
||||||
|
case '!':
|
||||||
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_INEQUALITY; }
|
||||||
|
else punctuation = TOKEN_OPER_BOOLEAN_NOT;
|
||||||
|
break;
|
||||||
case '+':
|
case '+':
|
||||||
if (peek(lexer) == '=') advance(lexer);
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_ADDITION_ASSIGNMENT; }
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
else punctuation = TOKEN_OPER_ADDITION;
|
||||||
|
break;
|
||||||
case '-':
|
case '-':
|
||||||
if (peek(lexer) == '=' || peek(lexer) == '>') advance(lexer);
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_SUBTRACTION_ASSIGNMENT; }
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
else if (peek(lexer) == '>') { advance(lexer); punctuation = TOKEN_OPER_DEREFERENCE_MEMBER_OF; }
|
||||||
|
else punctuation = TOKEN_OPER_SUBTRACTION;
|
||||||
|
break;
|
||||||
case '*':
|
case '*':
|
||||||
if (peek(lexer) == '=') advance(lexer);
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_MULTIPLICATION_ASSIGNMENT; }
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
else punctuation = TOKEN_OPER_MULTIPLICATION;
|
||||||
|
break;
|
||||||
case '/':
|
case '/':
|
||||||
if (peek(lexer) == '=') advance(lexer);
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_DIVISION_ASSIGNMENT; }
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
else punctuation = TOKEN_OPER_DIVISION;
|
||||||
case '.': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
break;
|
||||||
case ',': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
case '%':
|
||||||
case ':': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_MODULUS_ASSIGNMENT; }
|
||||||
case '(': return lexer_result(lexer, TOKEN_LPAREN, start, start_line);
|
else punctuation = TOKEN_OPER_MODULUS;
|
||||||
case ')': return lexer_result(lexer, TOKEN_RPAREN, start, start_line);
|
break;
|
||||||
case ';': return lexer_result(lexer, TOKEN_SEMICOLON, start, start_line);
|
case '<':
|
||||||
case '}': return lexer_result(lexer, TOKEN_RBRACE, start, start_line);
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_LESS_THAN_OR_EQUAL_TO; }
|
||||||
case '{': return lexer_result(lexer, TOKEN_LBRACE, start, start_line);
|
else if (peek(lexer) == '<') {
|
||||||
case ']': return lexer_result(lexer, TOKEN_RBRACKET, start, start_line);
|
advance(lexer);
|
||||||
case '[': return lexer_result(lexer, TOKEN_LBRACKET, start, start_line);
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITSHIFT_LEFT_ASSIGNMENT; }
|
||||||
|
else punctuation = TOKEN_OPER_BITSHIFT_LEFT;
|
||||||
|
} else punctuation = TOKEN_OPER_LESS_THAN;
|
||||||
|
break;
|
||||||
|
case '>':
|
||||||
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_GREATER_THAN_OR_EQUAL_TO; }
|
||||||
|
else if (peek(lexer) == '>') {
|
||||||
|
advance(lexer);
|
||||||
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITSHIFT_RIGHT_ASSIGNMENT; }
|
||||||
|
else punctuation = TOKEN_OPER_BITSHIFT_RIGHT;
|
||||||
|
} else punctuation = TOKEN_OPER_GREATER_THAN;
|
||||||
|
break;
|
||||||
|
case '=':
|
||||||
|
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_EQUALITY; }
|
||||||
|
else punctuation = TOKEN_OPER_ASSIGNMENT;
|
||||||
|
break;
|
||||||
default: return lexer_error(lexer, "Unknown token", start, start_line);
|
default: return lexer_error(lexer, "Unknown token", start, start_line);
|
||||||
}
|
}
|
||||||
|
return lexer_result(lexer, punctuation, start, start_line);
|
||||||
}
|
}
|
||||||
|
|
||||||
LexerResult lexical_analysis(Lexer *lexer) {
|
LexerResult lexical_analysis(Lexer *lexer) {
|
||||||
|
|
115
src/main.c
115
src/main.c
|
@ -10,23 +10,116 @@
|
||||||
#include "sync/syntax.h"
|
#include "sync/syntax.h"
|
||||||
|
|
||||||
const char *TOKEN_TYPES[] = {
|
const char *TOKEN_TYPES[] = {
|
||||||
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR", "LPAREN", "RPAREN", "SEMICOLON",
|
"EOF",
|
||||||
"LBRACE", "RBRACE", "LBRACKET", "RBRACKET", "CHARACTER", "STRING",
|
// Literals and values
|
||||||
|
"IDENTIFIER",
|
||||||
|
"NUMBER",
|
||||||
|
"CHARACTER",
|
||||||
|
"STRING",
|
||||||
|
// Punctuation
|
||||||
|
"SEMICOLON",
|
||||||
|
"COLON",
|
||||||
|
"COMMA",
|
||||||
|
"L_PARENTHESIS",
|
||||||
|
"R_PARENTHESIS",
|
||||||
|
"L_CURLY_BRACE",
|
||||||
|
"R_CURLY_BRACE",
|
||||||
|
"L_SQUARE_BRACKET",
|
||||||
|
"R_SQUARE_BRACKET",
|
||||||
|
// --- Operators ---
|
||||||
|
// Pointers
|
||||||
|
"OPER_DEREFERENCE_ZINC",
|
||||||
|
"OPER_ADDRESS_OF_ZINC",
|
||||||
|
"OPER_DEREFERENCE_MEMBER_OF",
|
||||||
|
"OPER_MEMBER_OF",
|
||||||
|
// Bitwise
|
||||||
|
"OPER_BITWISE_NOT",
|
||||||
|
"OPER_BITSHIFT_LEFT",
|
||||||
|
"OPER_BITSHIFT_RIGHT",
|
||||||
|
"OPER_BITWISE_AND",
|
||||||
|
"OPER_BITWISE_XOR",
|
||||||
|
"OPER_BITWISE_OR",
|
||||||
|
// Boolean
|
||||||
|
"OPER_BOOLEAN_NOT",
|
||||||
|
"OPER_BOOLEAN_AND",
|
||||||
|
"OPER_BOOLEAN_OR",
|
||||||
|
// Arithmetic
|
||||||
|
"OPER_MULTIPLICATION",
|
||||||
|
"OPER_DIVISION",
|
||||||
|
"OPER_MODULUS",
|
||||||
|
"OPER_ADDITION",
|
||||||
|
"OPER_SUBTRACTION",
|
||||||
|
// Comparision
|
||||||
|
"OPER_GREATER_THAN",
|
||||||
|
"OPER_GREATER_THAN_OR_EQUAL_TO",
|
||||||
|
"OPER_LESS_THAN",
|
||||||
|
"OPER_LESS_THAN_OR_EQUAL_TO",
|
||||||
|
"OPER_EQUALITY",
|
||||||
|
"OPER_INEQUALITY",
|
||||||
|
// Assignment
|
||||||
|
"OPER_ASSIGNMENT",
|
||||||
|
"OPER_ADDITION_ASSIGNMENT",
|
||||||
|
"OPER_SUBTRACTION_ASSIGNMENT",
|
||||||
|
"OPER_MULTIPLICATION_ASSIGNMENT",
|
||||||
|
"OPER_DIVISION_ASSIGNMENT",
|
||||||
|
"OPER_MODULUS_ASSIGNMENT",
|
||||||
|
"OPER_BITWISE_AND_ASSIGNMENT",
|
||||||
|
"OPER_BITWISE_OR_ASSIGNMENT",
|
||||||
|
"OPER_BITWISE_XOR_ASSIGNMENT",
|
||||||
|
"OPER_BITSHIFT_LEFT_ASSIGNMENT",
|
||||||
|
"OPER_BITSHIFT_RIGHT_ASSIGNMENT",
|
||||||
|
// --- Keywords ---
|
||||||
// Definitions and Declarations
|
// Definitions and Declarations
|
||||||
"KW_CONST", "KW_ENUM", "KW_FN", "KW_LET", "KW_MUT", "KW_PUBLIC",
|
"KW_CONST",
|
||||||
"KW_STATIC", "KW_STRUCT", "KW_UNION",
|
"KW_ENUM",
|
||||||
|
"KW_FN",
|
||||||
|
"KW_LET",
|
||||||
|
"KW_MUT",
|
||||||
|
"KW_PUBLIC",
|
||||||
|
"KW_STATIC",
|
||||||
|
"KW_STRUCT",
|
||||||
|
"KW_UNION",
|
||||||
// Control Flow
|
// Control Flow
|
||||||
"KW_BREAK", "KW_CASE", "KW_CONTINUE", "KW_DEFAULT", "KW_DO", "KW_ELSE",
|
"KW_BREAK",
|
||||||
"KW_FOR", "KW_IF", "KW_MATCH", "KW_RETURN", "KW_SWITCH", "KW_WHILE",
|
"KW_CONTINUE",
|
||||||
|
"KW_DO",
|
||||||
|
"KW_ELSE",
|
||||||
|
"KW_FOR",
|
||||||
|
"KW_IF",
|
||||||
|
"KW_RETURN",
|
||||||
|
"KW_WHILE",
|
||||||
// Values
|
// Values
|
||||||
"KW_FALSE", "KW_TRUE",
|
"KW_ERROR",
|
||||||
|
"KW_FALSE",
|
||||||
|
"KW_NONE",
|
||||||
|
"KW_SOME",
|
||||||
|
"KW_TRUE",
|
||||||
|
"KW_VALUE",
|
||||||
// Types
|
// Types
|
||||||
"KW_BOOL", "KW_F32", "KW_F64", "KW_I8", "KW_I16", "KW_I32", "KW_I64",
|
"KW_BOOL",
|
||||||
"KW_U8", "KW_U16", "KW_U32", "KW_U64", "KW_VOID",
|
"KW_F32",
|
||||||
|
"KW_F64",
|
||||||
|
"KW_I8",
|
||||||
|
"KW_I16",
|
||||||
|
"KW_I32",
|
||||||
|
"KW_I64",
|
||||||
|
"KW_OPTION",
|
||||||
|
"KW_RESULT",
|
||||||
|
"KW_U8",
|
||||||
|
"KW_U16",
|
||||||
|
"KW_U32",
|
||||||
|
"KW_U64",
|
||||||
|
"KW_VOID",
|
||||||
// Modules
|
// Modules
|
||||||
"KW_AS", "KW_IMPORT",
|
"KW_AS",
|
||||||
|
"KW_IMPORT",
|
||||||
// Operators
|
// Operators
|
||||||
"KW_AND", "KW_IS", "KW_NOT", "KW_OR", "KW_SIZEOF", "KW_XOR",
|
"KW_AND",
|
||||||
|
"KW_CAST",
|
||||||
|
"KW_IS",
|
||||||
|
"KW_NOT",
|
||||||
|
"KW_OR",
|
||||||
|
"KW_SIZEOF",
|
||||||
};
|
};
|
||||||
|
|
||||||
static void print_token(Token token) {
|
static void print_token(Token token) {
|
||||||
|
|
|
@ -6,7 +6,7 @@ static void print_token(Token token) {
|
||||||
(const char *[]){
|
(const char *[]){
|
||||||
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR",
|
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR",
|
||||||
"LPAREN", "RPAREN", "SEMICOLON", "UNKNOWN"
|
"LPAREN", "RPAREN", "SEMICOLON", "UNKNOWN"
|
||||||
}[token.type],
|
}[token.type_],
|
||||||
(int)token.length, token.start
|
(int)token.length, token.start
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -37,12 +37,12 @@ int main(void) {
|
||||||
TokenResult result;
|
TokenResult result;
|
||||||
do {
|
do {
|
||||||
result = lexer_next(&lexer);
|
result = lexer_next(&lexer);
|
||||||
if (result.type == SYNC_RESULT) {
|
if (result.type_ == SYNC_RESULT) {
|
||||||
print_token(result.result);
|
print_token(result.result);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Error: %s\n", result.error.message);
|
fprintf(stderr, "Error: %s\n", result.error.message);
|
||||||
}
|
}
|
||||||
} while (result.type != SYNC_ERROR && result.result.type != TOKEN_EOF);
|
} while (result.type_ != SYNC_ERROR && result.result.type_ != TOKEN_EOF);
|
||||||
|
|
||||||
free(source);
|
free(source);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in New Issue