diff --git a/include/sync/lexer.h b/include/sync/lexer.h index a6956fd..756d88e 100644 --- a/include/sync/lexer.h +++ b/include/sync/lexer.h @@ -11,18 +11,64 @@ typedef enum { TOKEN_EOF, + // Literals and values TOKEN_IDENTIFIER, TOKEN_NUMBER, - TOKEN_OPERATOR, - TOKEN_LPAREN, - TOKEN_RPAREN, - TOKEN_SEMICOLON, - TOKEN_LBRACE, - TOKEN_RBRACE, - TOKEN_LBRACKET, - TOKEN_RBRACKET, TOKEN_CHARACTER, TOKEN_STRING, + // Punctuation + TOKEN_SEMICOLON, + TOKEN_COLON, + TOKEN_COMMA, + TOKEN_L_PARENTHESIS, + TOKEN_R_PARENTHESIS, + TOKEN_L_CURLY_BRACE, + TOKEN_R_CURLY_BRACE, + TOKEN_L_SQUARE_BRACKET, + TOKEN_R_SQUARE_BRACKET, + // --- Operators --- + // Pointers + TOKEN_OPER_DEREFERENCE_ZINC, + TOKEN_OPER_ADDRESS_OF_ZINC, + TOKEN_OPER_DEREFERENCE_MEMBER_OF, + TOKEN_OPER_MEMBER_OF, + // Bitwise + TOKEN_OPER_BITWISE_NOT, + TOKEN_OPER_BITSHIFT_LEFT, + TOKEN_OPER_BITSHIFT_RIGHT, + TOKEN_OPER_BITWISE_AND, + TOKEN_OPER_BITWISE_XOR, + TOKEN_OPER_BITWISE_OR, + // Boolean + TOKEN_OPER_BOOLEAN_NOT, + TOKEN_OPER_BOOLEAN_AND, + TOKEN_OPER_BOOLEAN_OR, + // Arithmetic + TOKEN_OPER_MULTIPLICATION, + TOKEN_OPER_DIVISION, + TOKEN_OPER_MODULUS, + TOKEN_OPER_ADDITION, + TOKEN_OPER_SUBTRACTION, + // Comparision + TOKEN_OPER_GREATER_THAN, + TOKEN_OPER_GREATER_THAN_OR_EQUAL_TO, + TOKEN_OPER_LESS_THAN, + TOKEN_OPER_LESS_THAN_OR_EQUAL_TO, + TOKEN_OPER_EQUALITY, + TOKEN_OPER_INEQUALITY, + // Assignment + TOKEN_OPER_ASSIGNMENT, + TOKEN_OPER_ADDITION_ASSIGNMENT, + TOKEN_OPER_SUBTRACTION_ASSIGNMENT, + TOKEN_OPER_MULTIPLICATION_ASSIGNMENT, + TOKEN_OPER_DIVISION_ASSIGNMENT, + TOKEN_OPER_MODULUS_ASSIGNMENT, + TOKEN_OPER_BITWISE_AND_ASSIGNMENT, + TOKEN_OPER_BITWISE_OR_ASSIGNMENT, + TOKEN_OPER_BITWISE_XOR_ASSIGNMENT, + TOKEN_OPER_BITSHIFT_LEFT_ASSIGNMENT, + TOKEN_OPER_BITSHIFT_RIGHT_ASSIGNMENT, + // --- Keywords --- // Definitions and Declarations TOKEN_KW_CONST, TOKEN_KW_ENUM, @@ -74,7 +120,6 @@ typedef enum { TOKEN_KW_NOT, TOKEN_KW_OR, TOKEN_KW_SIZEOF, - TOKEN_KW_XOR, } TokenType; typedef struct { diff --git a/src/lexer.c b/src/lexer.c index 333c69d..25194c0 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -71,7 +71,6 @@ const KeywordPair KEYWORDS[] = { {"not", TOKEN_KW_NOT}, {"or", TOKEN_KW_OR}, {"sizeof", TOKEN_KW_SIZEOF}, - {"xor", TOKEN_KW_XOR}, }; const char *RESERVED_KEYWORDS[] = { @@ -88,6 +87,7 @@ const char *RESERVED_KEYWORDS[] = { "in", "match", "move", + "namespace" "new", "pointer", "Pointer", @@ -307,49 +307,83 @@ static LexerResult lexer_next(Lexer* lexer) { } advance(lexer); + TokenType punctuation; switch (c) { - case '=': - if (peek(lexer) == '=') advance(lexer); - return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); - case '>': - if (peek(lexer) == '=' || peek(lexer) == '>') advance(lexer); - return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); - case '<': - if (peek(lexer) == '=' || peek(lexer) == '<') advance(lexer); - return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); - case '!': - if (peek(lexer) == '=') advance(lexer); - return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); + case ';': punctuation = TOKEN_SEMICOLON; break; + case ':': punctuation = TOKEN_COLON; break; + case ',': punctuation = TOKEN_COMMA; break; + case '(': punctuation = TOKEN_L_PARENTHESIS; break; + case ')': punctuation = TOKEN_R_PARENTHESIS; break; + case '{': punctuation = TOKEN_L_CURLY_BRACE; break; + case '}': punctuation = TOKEN_R_CURLY_BRACE; break; + case '[': punctuation = TOKEN_L_SQUARE_BRACKET; break; + case ']': punctuation = TOKEN_R_SQUARE_BRACKET; break; + case '#': punctuation = TOKEN_OPER_DEREFERENCE_ZINC; break; + case '@': punctuation = TOKEN_OPER_ADDRESS_OF_ZINC; break; + case '.': punctuation = TOKEN_OPER_MEMBER_OF; break; + case '~': punctuation = TOKEN_OPER_BITWISE_NOT; break; case '&': - if (peek(lexer) == '=' || peek(lexer) == '&') advance(lexer); - return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITWISE_AND_ASSIGNMENT; } + if (peek(lexer) == '&') { advance(lexer); punctuation = TOKEN_OPER_BOOLEAN_AND; } + else punctuation = TOKEN_OPER_BITWISE_AND; + break; + case '^': + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITWISE_XOR_ASSIGNMENT; } + else punctuation = TOKEN_OPER_BITWISE_XOR; + break; case '|': - if (peek(lexer) == '=' || peek(lexer) == '|') advance(lexer); - return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITWISE_OR_ASSIGNMENT; } + if (peek(lexer) == '|') { advance(lexer); punctuation = TOKEN_OPER_BOOLEAN_OR; } + punctuation = TOKEN_OPER_BITWISE_OR; + break; + case '!': + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_INEQUALITY; } + else punctuation = TOKEN_OPER_BOOLEAN_NOT; + break; case '+': - if (peek(lexer) == '=') advance(lexer); - return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_ADDITION_ASSIGNMENT; } + else punctuation = TOKEN_OPER_ADDITION; + break; case '-': - if (peek(lexer) == '=' || peek(lexer) == '>') advance(lexer); - return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_SUBTRACTION_ASSIGNMENT; } + else if (peek(lexer) == '>') { advance(lexer); punctuation = TOKEN_OPER_DEREFERENCE_MEMBER_OF; } + else punctuation = TOKEN_OPER_SUBTRACTION; + break; case '*': - if (peek(lexer) == '=') advance(lexer); - return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_MULTIPLICATION_ASSIGNMENT; } + else punctuation = TOKEN_OPER_MULTIPLICATION; + break; case '/': - if (peek(lexer) == '=') advance(lexer); - return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); - case '.': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); - case ',': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); - case ':': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line); - case '(': return lexer_result(lexer, TOKEN_LPAREN, start, start_line); - case ')': return lexer_result(lexer, TOKEN_RPAREN, start, start_line); - case ';': return lexer_result(lexer, TOKEN_SEMICOLON, start, start_line); - case '}': return lexer_result(lexer, TOKEN_RBRACE, start, start_line); - case '{': return lexer_result(lexer, TOKEN_LBRACE, start, start_line); - case ']': return lexer_result(lexer, TOKEN_RBRACKET, start, start_line); - case '[': return lexer_result(lexer, TOKEN_LBRACKET, start, start_line); + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_DIVISION_ASSIGNMENT; } + else punctuation = TOKEN_OPER_DIVISION; + break; + case '%': + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_MODULUS_ASSIGNMENT; } + else punctuation = TOKEN_OPER_MODULUS; + break; + case '<': + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_LESS_THAN_OR_EQUAL_TO; } + else if (peek(lexer) == '<') { + advance(lexer); + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITSHIFT_LEFT_ASSIGNMENT; } + else punctuation = TOKEN_OPER_BITSHIFT_LEFT; + } else punctuation = TOKEN_OPER_LESS_THAN; + break; + case '>': + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_GREATER_THAN_OR_EQUAL_TO; } + else if (peek(lexer) == '>') { + advance(lexer); + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_BITSHIFT_RIGHT_ASSIGNMENT; } + else punctuation = TOKEN_OPER_BITSHIFT_RIGHT; + } else punctuation = TOKEN_OPER_GREATER_THAN; + break; + case '=': + if (peek(lexer) == '=') { advance(lexer); punctuation = TOKEN_OPER_EQUALITY; } + else punctuation = TOKEN_OPER_ASSIGNMENT; + break; default: return lexer_error(lexer, "Unknown token", start, start_line); } + return lexer_result(lexer, punctuation, start, start_line); } LexerResult lexical_analysis(Lexer *lexer) { diff --git a/src/main.c b/src/main.c index 8dcd9a9..435bb1d 100644 --- a/src/main.c +++ b/src/main.c @@ -9,24 +9,117 @@ #include "sync/lexer.h" #include "sync/syntax.h" -const char* TOKEN_TYPES[] = { - "EOF", "IDENTIFIER", "NUMBER", "OPERATOR", "LPAREN", "RPAREN", "SEMICOLON", - "LBRACE", "RBRACE", "LBRACKET", "RBRACKET", "CHARACTER", "STRING", +const char *TOKEN_TYPES[] = { + "EOF", + // Literals and values + "IDENTIFIER", + "NUMBER", + "CHARACTER", + "STRING", + // Punctuation + "SEMICOLON", + "COLON", + "COMMA", + "L_PARENTHESIS", + "R_PARENTHESIS", + "L_CURLY_BRACE", + "R_CURLY_BRACE", + "L_SQUARE_BRACKET", + "R_SQUARE_BRACKET", + // --- Operators --- + // Pointers + "OPER_DEREFERENCE_ZINC", + "OPER_ADDRESS_OF_ZINC", + "OPER_DEREFERENCE_MEMBER_OF", + "OPER_MEMBER_OF", + // Bitwise + "OPER_BITWISE_NOT", + "OPER_BITSHIFT_LEFT", + "OPER_BITSHIFT_RIGHT", + "OPER_BITWISE_AND", + "OPER_BITWISE_XOR", + "OPER_BITWISE_OR", + // Boolean + "OPER_BOOLEAN_NOT", + "OPER_BOOLEAN_AND", + "OPER_BOOLEAN_OR", + // Arithmetic + "OPER_MULTIPLICATION", + "OPER_DIVISION", + "OPER_MODULUS", + "OPER_ADDITION", + "OPER_SUBTRACTION", + // Comparision + "OPER_GREATER_THAN", + "OPER_GREATER_THAN_OR_EQUAL_TO", + "OPER_LESS_THAN", + "OPER_LESS_THAN_OR_EQUAL_TO", + "OPER_EQUALITY", + "OPER_INEQUALITY", + // Assignment + "OPER_ASSIGNMENT", + "OPER_ADDITION_ASSIGNMENT", + "OPER_SUBTRACTION_ASSIGNMENT", + "OPER_MULTIPLICATION_ASSIGNMENT", + "OPER_DIVISION_ASSIGNMENT", + "OPER_MODULUS_ASSIGNMENT", + "OPER_BITWISE_AND_ASSIGNMENT", + "OPER_BITWISE_OR_ASSIGNMENT", + "OPER_BITWISE_XOR_ASSIGNMENT", + "OPER_BITSHIFT_LEFT_ASSIGNMENT", + "OPER_BITSHIFT_RIGHT_ASSIGNMENT", + // --- Keywords --- // Definitions and Declarations - "KW_CONST", "KW_ENUM", "KW_FN", "KW_LET", "KW_MUT", "KW_PUBLIC", - "KW_STATIC", "KW_STRUCT", "KW_UNION", + "KW_CONST", + "KW_ENUM", + "KW_FN", + "KW_LET", + "KW_MUT", + "KW_PUBLIC", + "KW_STATIC", + "KW_STRUCT", + "KW_UNION", // Control Flow - "KW_BREAK", "KW_CASE", "KW_CONTINUE", "KW_DEFAULT", "KW_DO", "KW_ELSE", - "KW_FOR", "KW_IF", "KW_MATCH", "KW_RETURN", "KW_SWITCH", "KW_WHILE", + "KW_BREAK", + "KW_CONTINUE", + "KW_DO", + "KW_ELSE", + "KW_FOR", + "KW_IF", + "KW_RETURN", + "KW_WHILE", // Values - "KW_FALSE", "KW_TRUE", + "KW_ERROR", + "KW_FALSE", + "KW_NONE", + "KW_SOME", + "KW_TRUE", + "KW_VALUE", // Types - "KW_BOOL", "KW_F32", "KW_F64", "KW_I8", "KW_I16", "KW_I32", "KW_I64", - "KW_U8", "KW_U16", "KW_U32", "KW_U64", "KW_VOID", + "KW_BOOL", + "KW_F32", + "KW_F64", + "KW_I8", + "KW_I16", + "KW_I32", + "KW_I64", + "KW_OPTION", + "KW_RESULT", + "KW_U8", + "KW_U16", + "KW_U32", + "KW_U64", + "KW_VOID", // Modules - "KW_AS", "KW_IMPORT", + "KW_AS", + "KW_IMPORT", // Operators - "KW_AND", "KW_IS", "KW_NOT", "KW_OR", "KW_SIZEOF", "KW_XOR", + "KW_AND", + "KW_CAST", + "KW_IS", + "KW_NOT", + "KW_OR", + "KW_SIZEOF", }; static void print_token(Token token) { diff --git a/test/example1.zn b/test/example1.zn index 3acc2a0..ad99b49 100644 --- a/test/example1.zn +++ b/test/example1.zn @@ -6,7 +6,7 @@ static void print_token(Token token) { (const char *[]){ "EOF", "IDENTIFIER", "NUMBER", "OPERATOR", "LPAREN", "RPAREN", "SEMICOLON", "UNKNOWN" - }[token.type], + }[token.type_], (int)token.length, token.start ); } @@ -37,12 +37,12 @@ int main(void) { TokenResult result; do { result = lexer_next(&lexer); - if (result.type == SYNC_RESULT) { + if (result.type_ == SYNC_RESULT) { print_token(result.result); } else { fprintf(stderr, "Error: %s\n", result.error.message); } - } while (result.type != SYNC_ERROR && result.result.type != TOKEN_EOF); + } while (result.type_ != SYNC_ERROR && result.result.type_ != TOKEN_EOF); free(source); return 0;