Updated operators lexing

This commit is contained in:
Kyler Olsen 2025-06-22 00:45:06 -06:00
parent f7f9797a4a
commit 2088c70803
4 changed files with 231 additions and 59 deletions

View File

@ -11,18 +11,64 @@
typedef enum { typedef enum {
TOKEN_EOF, TOKEN_EOF,
// Literals and values
TOKEN_IDENTIFIER, TOKEN_IDENTIFIER,
TOKEN_NUMBER, TOKEN_NUMBER,
TOKEN_OPERATOR,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_SEMICOLON,
TOKEN_LBRACE,
TOKEN_RBRACE,
TOKEN_LBRACKET,
TOKEN_RBRACKET,
TOKEN_CHARACTER, TOKEN_CHARACTER,
TOKEN_STRING, TOKEN_STRING,
// Punctuation
TOKEN_SEMICOLON,
TOKEN_COLON,
TOKEN_COMMA,
TOKEN_L_PARENTHESIS,
TOKEN_R_PARENTHESIS,
TOKEN_L_CURLY_BRACE,
TOKEN_R_CURLY_BRACE,
TOKEN_L_SQUARE_BRACKET,
TOKEN_R_SQUARE_BRACKET,
// --- Operators ---
// Pointers
TOKEN_OPER_DEREFERENCE_ZINC,
TOKEN_OPER_ADDRESS_OF_ZINC,
TOKEN_OPER_DEREFERENCE_MEMBER_OF,
TOKEN_OPER_MEMBER_OF,
// Bitwise
TOKEN_OPER_BITWISE_NOT,
TOKEN_OPER_BITSHIFT_LEFT,
TOKEN_OPER_BITSHIFT_RIGHT,
TOKEN_OPER_BITWISE_AND,
TOKEN_OPER_BITWISE_XOR,
TOKEN_OPER_BITWISE_OR,
// Boolean
TOKEN_OPER_BOOLEAN_NOT,
TOKEN_OPER_BOOLEAN_AND,
TOKEN_OPER_BOOLEAN_OR,
// Arithmetic
TOKEN_OPER_MULTIPLICATION,
TOKEN_OPER_DIVISION,
TOKEN_OPER_MODULUS,
TOKEN_OPER_ADDITION,
TOKEN_OPER_SUBTRACTION,
// Comparision
TOKEN_OPER_GREATER_THAN,
TOKEN_OPER_GREATER_THAN_OR_EQUAL_TO,
TOKEN_OPER_LESS_THAN,
TOKEN_OPER_LESS_THAN_OR_EQUAL_TO,
TOKEN_OPER_EQUALITY,
TOKEN_OPER_INEQUALITY,
// Assignment
TOKEN_OPER_ASSIGNMENT,
TOKEN_OPER_ADDITION_ASSIGNMENT,
TOKEN_OPER_SUBTRACTION_ASSIGNMENT,
TOKEN_OPER_MULTIPLICATION_ASSIGNMENT,
TOKEN_OPER_DIVISION_ASSIGNMENT,
TOKEN_OPER_MODULUS_ASSIGNMENT,
TOKEN_OPER_BITWISE_AND_ASSIGNMENT,
TOKEN_OPER_BITWISE_OR_ASSIGNMENT,
TOKEN_OPER_BITWISE_XOR_ASSIGNMENT,
TOKEN_OPER_BITSHIFT_LEFT_ASSIGNMENT,
TOKEN_OPER_BITSHIFT_RIGHT_ASSIGNMENT,
// --- Keywords ---
// Definitions and Declarations // Definitions and Declarations
TOKEN_KW_CONST, TOKEN_KW_CONST,
TOKEN_KW_ENUM, TOKEN_KW_ENUM,
@ -74,7 +120,6 @@ typedef enum {
TOKEN_KW_NOT, TOKEN_KW_NOT,
TOKEN_KW_OR, TOKEN_KW_OR,
TOKEN_KW_SIZEOF, TOKEN_KW_SIZEOF,
TOKEN_KW_XOR,
} TokenType; } TokenType;
typedef struct { typedef struct {

View File

@ -71,7 +71,6 @@ const KeywordPair KEYWORDS[] = {
{"not", TOKEN_KW_NOT}, {"not", TOKEN_KW_NOT},
{"or", TOKEN_KW_OR}, {"or", TOKEN_KW_OR},
{"sizeof", TOKEN_KW_SIZEOF}, {"sizeof", TOKEN_KW_SIZEOF},
{"xor", TOKEN_KW_XOR},
}; };
const char *RESERVED_KEYWORDS[] = { const char *RESERVED_KEYWORDS[] = {
@ -88,6 +87,7 @@ const char *RESERVED_KEYWORDS[] = {
"in", "in",
"match", "match",
"move", "move",
"namespace"
"new", "new",
"pointer", "pointer",
"Pointer", "Pointer",
@ -307,49 +307,83 @@ static LexerResult lexer_next(Lexer* lexer) {
} }
advance(lexer); advance(lexer);
TokenType punctuation;
switch (c) { switch (c) {
case '=': case ';': punctuation = TOKEN_SEMICOLON; break;
if (peek(lexer) == '=') advance(lexer); case ':': punctuation = TOKEN_COLON; break;
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); case ',': punctuation = TOKEN_COMMA; break;
case '>': case '(': punctuation = TOKEN_L_PARENTHESIS; break;
if (peek(lexer) == '=' || peek(lexer) == '>') advance(lexer); case ')': punctuation = TOKEN_R_PARENTHESIS; break;
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); case '{': punctuation = TOKEN_L_CURLY_BRACE; break;
case '<': case '}': punctuation = TOKEN_R_CURLY_BRACE; break;
if (peek(lexer) == '=' || peek(lexer) == '<') advance(lexer); case '[': punctuation = TOKEN_L_SQUARE_BRACKET; break;
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); case ']': punctuation = TOKEN_R_SQUARE_BRACKET; break;
case '!': case '#': punctuation = TOKEN_OPER_DEREFERENCE_ZINC; break;
if (peek(lexer) == '=') advance(lexer); case '@': punctuation = TOKEN_OPER_ADDRESS_OF_ZINC; break;
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); case '.': punctuation = TOKEN_OPER_MEMBER_OF; break;
case '~': punctuation = TOKEN_OPER_BITWISE_NOT; break;
case '&': case '&':
if (peek(lexer) == '=' || peek(lexer) == '&') advance(lexer); if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITWISE_AND_ASSIGNMENT; }
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); if (peek(lexer) == '&') { advance(lexer); punctuation = TOKEN_OPER_BOOLEAN_AND; }
else punctuation = TOKEN_OPER_BITWISE_AND;
break;
case '^':
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITWISE_XOR_ASSIGNMENT; }
else punctuation = TOKEN_OPER_BITWISE_XOR;
break;
case '|': case '|':
if (peek(lexer) == '=' || peek(lexer) == '|') advance(lexer); if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITWISE_OR_ASSIGNMENT; }
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); if (peek(lexer) == '|') { advance(lexer); punctuation = TOKEN_OPER_BOOLEAN_OR; }
punctuation = TOKEN_OPER_BITWISE_OR;
break;
case '!':
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_INEQUALITY; }
else punctuation = TOKEN_OPER_BOOLEAN_NOT;
break;
case '+': case '+':
if (peek(lexer) == '=') advance(lexer); if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_ADDITION_ASSIGNMENT; }
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); else punctuation = TOKEN_OPER_ADDITION;
break;
case '-': case '-':
if (peek(lexer) == '=' || peek(lexer) == '>') advance(lexer); if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_SUBTRACTION_ASSIGNMENT; }
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); else if (peek(lexer) == '>') { advance(lexer); punctuation = TOKEN_OPER_DEREFERENCE_MEMBER_OF; }
else punctuation = TOKEN_OPER_SUBTRACTION;
break;
case '*': case '*':
if (peek(lexer) == '=') advance(lexer); if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_MULTIPLICATION_ASSIGNMENT; }
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); else punctuation = TOKEN_OPER_MULTIPLICATION;
break;
case '/': case '/':
if (peek(lexer) == '=') advance(lexer); if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_DIVISION_ASSIGNMENT; }
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); else punctuation = TOKEN_OPER_DIVISION;
case '.': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); break;
case ',': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); case '%':
case ':': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_MODULUS_ASSIGNMENT; }
case '(': return lexer_result(lexer, TOKEN_LPAREN, start, start_line); else punctuation = TOKEN_OPER_MODULUS;
case ')': return lexer_result(lexer, TOKEN_RPAREN, start, start_line); break;
case ';': return lexer_result(lexer, TOKEN_SEMICOLON, start, start_line); case '<':
case '}': return lexer_result(lexer, TOKEN_RBRACE, start, start_line); if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_LESS_THAN_OR_EQUAL_TO; }
case '{': return lexer_result(lexer, TOKEN_LBRACE, start, start_line); else if (peek(lexer) == '<') {
case ']': return lexer_result(lexer, TOKEN_RBRACKET, start, start_line); advance(lexer);
case '[': return lexer_result(lexer, TOKEN_LBRACKET, start, start_line); if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITSHIFT_LEFT_ASSIGNMENT; }
else punctuation = TOKEN_OPER_BITSHIFT_LEFT;
} else punctuation = TOKEN_OPER_LESS_THAN;
break;
case '>':
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_GREATER_THAN_OR_EQUAL_TO; }
else if (peek(lexer) == '>') {
advance(lexer);
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITSHIFT_RIGHT_ASSIGNMENT; }
else punctuation = TOKEN_OPER_BITSHIFT_RIGHT;
} else punctuation = TOKEN_OPER_GREATER_THAN;
break;
case '=':
if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_EQUALITY; }
else punctuation = TOKEN_OPER_ASSIGNMENT;
break;
default: return lexer_error(lexer, "Unknown token", start, start_line); default: return lexer_error(lexer, "Unknown token", start, start_line);
} }
return lexer_result(lexer, punctuation, start, start_line);
} }
LexerResult lexical_analysis(Lexer *lexer) { LexerResult lexical_analysis(Lexer *lexer) {

View File

@ -10,23 +10,116 @@
#include "sync/syntax.h" #include "sync/syntax.h"
const char *TOKEN_TYPES[] = { const char *TOKEN_TYPES[] = {
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR", "LPAREN", "RPAREN", "SEMICOLON", "EOF",
"LBRACE", "RBRACE", "LBRACKET", "RBRACKET", "CHARACTER", "STRING", // Literals and values
"IDENTIFIER",
"NUMBER",
"CHARACTER",
"STRING",
// Punctuation
"SEMICOLON",
"COLON",
"COMMA",
"L_PARENTHESIS",
"R_PARENTHESIS",
"L_CURLY_BRACE",
"R_CURLY_BRACE",
"L_SQUARE_BRACKET",
"R_SQUARE_BRACKET",
// --- Operators ---
// Pointers
"OPER_DEREFERENCE_ZINC",
"OPER_ADDRESS_OF_ZINC",
"OPER_DEREFERENCE_MEMBER_OF",
"OPER_MEMBER_OF",
// Bitwise
"OPER_BITWISE_NOT",
"OPER_BITSHIFT_LEFT",
"OPER_BITSHIFT_RIGHT",
"OPER_BITWISE_AND",
"OPER_BITWISE_XOR",
"OPER_BITWISE_OR",
// Boolean
"OPER_BOOLEAN_NOT",
"OPER_BOOLEAN_AND",
"OPER_BOOLEAN_OR",
// Arithmetic
"OPER_MULTIPLICATION",
"OPER_DIVISION",
"OPER_MODULUS",
"OPER_ADDITION",
"OPER_SUBTRACTION",
// Comparision
"OPER_GREATER_THAN",
"OPER_GREATER_THAN_OR_EQUAL_TO",
"OPER_LESS_THAN",
"OPER_LESS_THAN_OR_EQUAL_TO",
"OPER_EQUALITY",
"OPER_INEQUALITY",
// Assignment
"OPER_ASSIGNMENT",
"OPER_ADDITION_ASSIGNMENT",
"OPER_SUBTRACTION_ASSIGNMENT",
"OPER_MULTIPLICATION_ASSIGNMENT",
"OPER_DIVISION_ASSIGNMENT",
"OPER_MODULUS_ASSIGNMENT",
"OPER_BITWISE_AND_ASSIGNMENT",
"OPER_BITWISE_OR_ASSIGNMENT",
"OPER_BITWISE_XOR_ASSIGNMENT",
"OPER_BITSHIFT_LEFT_ASSIGNMENT",
"OPER_BITSHIFT_RIGHT_ASSIGNMENT",
// --- Keywords ---
// Definitions and Declarations // Definitions and Declarations
"KW_CONST", "KW_ENUM", "KW_FN", "KW_LET", "KW_MUT", "KW_PUBLIC", "KW_CONST",
"KW_STATIC", "KW_STRUCT", "KW_UNION", "KW_ENUM",
"KW_FN",
"KW_LET",
"KW_MUT",
"KW_PUBLIC",
"KW_STATIC",
"KW_STRUCT",
"KW_UNION",
// Control Flow // Control Flow
"KW_BREAK", "KW_CASE", "KW_CONTINUE", "KW_DEFAULT", "KW_DO", "KW_ELSE", "KW_BREAK",
"KW_FOR", "KW_IF", "KW_MATCH", "KW_RETURN", "KW_SWITCH", "KW_WHILE", "KW_CONTINUE",
"KW_DO",
"KW_ELSE",
"KW_FOR",
"KW_IF",
"KW_RETURN",
"KW_WHILE",
// Values // Values
"KW_FALSE", "KW_TRUE", "KW_ERROR",
"KW_FALSE",
"KW_NONE",
"KW_SOME",
"KW_TRUE",
"KW_VALUE",
// Types // Types
"KW_BOOL", "KW_F32", "KW_F64", "KW_I8", "KW_I16", "KW_I32", "KW_I64", "KW_BOOL",
"KW_U8", "KW_U16", "KW_U32", "KW_U64", "KW_VOID", "KW_F32",
"KW_F64",
"KW_I8",
"KW_I16",
"KW_I32",
"KW_I64",
"KW_OPTION",
"KW_RESULT",
"KW_U8",
"KW_U16",
"KW_U32",
"KW_U64",
"KW_VOID",
// Modules // Modules
"KW_AS", "KW_IMPORT", "KW_AS",
"KW_IMPORT",
// Operators // Operators
"KW_AND", "KW_IS", "KW_NOT", "KW_OR", "KW_SIZEOF", "KW_XOR", "KW_AND",
"KW_CAST",
"KW_IS",
"KW_NOT",
"KW_OR",
"KW_SIZEOF",
}; };
static void print_token(Token token) { static void print_token(Token token) {

View File

@ -6,7 +6,7 @@ static void print_token(Token token) {
(const char *[]){ (const char *[]){
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR", "EOF", "IDENTIFIER", "NUMBER", "OPERATOR",
"LPAREN", "RPAREN", "SEMICOLON", "UNKNOWN" "LPAREN", "RPAREN", "SEMICOLON", "UNKNOWN"
}[token.type], }[token.type_],
(int)token.length, token.start (int)token.length, token.start
); );
} }
@ -37,12 +37,12 @@ int main(void) {
TokenResult result; TokenResult result;
do { do {
result = lexer_next(&lexer); result = lexer_next(&lexer);
if (result.type == SYNC_RESULT) { if (result.type_ == SYNC_RESULT) {
print_token(result.result); print_token(result.result);
} else { } else {
fprintf(stderr, "Error: %s\n", result.error.message); fprintf(stderr, "Error: %s\n", result.error.message);
} }
} while (result.type != SYNC_ERROR && result.result.type != TOKEN_EOF); } while (result.type_ != SYNC_ERROR && result.result.type_ != TOKEN_EOF);
free(source); free(source);
return 0; return 0;