Compare commits
No commits in common. "2e773218be00463a27ad813d07c574f306e684d6" and "f6049fc64467acd6102594fda5358a2b016b5e1a" have entirely different histories.
2e773218be
...
f6049fc644
|
@ -12,10 +12,6 @@ typedef enum {
|
||||||
TOKEN_LPAREN,
|
TOKEN_LPAREN,
|
||||||
TOKEN_RPAREN,
|
TOKEN_RPAREN,
|
||||||
TOKEN_SEMICOLON,
|
TOKEN_SEMICOLON,
|
||||||
TOKEN_LBRACE,
|
|
||||||
TOKEN_RBRACE,
|
|
||||||
TOKEN_LBRACKET,
|
|
||||||
TOKEN_RBRACKET,
|
|
||||||
} TokenType;
|
} TokenType;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
100
src/lexer.c
100
src/lexer.c
|
@ -39,23 +39,6 @@ static char advance(Lexer *lexer) {
|
||||||
return lexer->source[lexer->pos++];
|
return lexer->source[lexer->pos++];
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
DIGIT_IS_BINARY = 1 << 0,
|
|
||||||
DIGIT_IS_OCTAL = 1 << 1,
|
|
||||||
DIGIT_IS_HEXADECIMAL = 1 << 2,
|
|
||||||
} DigitFlags;
|
|
||||||
|
|
||||||
static char is_digit_start(char c) {
|
|
||||||
return isdigit(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
static char is_digit_char(char c, DigitFlags flags) {
|
|
||||||
if (flags & DIGIT_IS_BINARY) return c == '0' || c == '1' || c == '_';
|
|
||||||
if (flags & DIGIT_IS_OCTAL) return isdigit(c) && c < '8' || c == '_';
|
|
||||||
if (flags & DIGIT_IS_HEXADECIMAL) return isxdigit(c) || c == '_';
|
|
||||||
return isdigit(c) || c == '_';
|
|
||||||
}
|
|
||||||
|
|
||||||
static char is_identifier_start(char c) {
|
static char is_identifier_start(char c) {
|
||||||
return isalpha(c) || c == '_';
|
return isalpha(c) || c == '_';
|
||||||
}
|
}
|
||||||
|
@ -64,14 +47,6 @@ static char is_identifier_char(char c) {
|
||||||
return isalnum(c) || c == '_';
|
return isalnum(c) || c == '_';
|
||||||
}
|
}
|
||||||
|
|
||||||
static TokenResult lexer_result(Lexer *lexer, TokenType type, size_t start, size_t start_line) {
|
|
||||||
return (TokenResult){SYNC_RESULT, .result = (Token){type, &lexer->source[start], lexer->pos - start, get_file_info(lexer, start, start_line)}};
|
|
||||||
}
|
|
||||||
|
|
||||||
static TokenResult lexer_error(Lexer *lexer, const char *message, size_t start, size_t start_line) {
|
|
||||||
return (TokenResult){SYNC_ERROR, .error = (SyncError){SYNC_LEXER_ERROR, message, get_file_info(lexer, start, start_line)}};
|
|
||||||
}
|
|
||||||
|
|
||||||
TokenResult lexer_next(Lexer *lexer) {
|
TokenResult lexer_next(Lexer *lexer) {
|
||||||
// Gets the next token from the source
|
// Gets the next token from the source
|
||||||
|
|
||||||
|
@ -89,79 +64,34 @@ TokenResult lexer_next(Lexer *lexer) {
|
||||||
|
|
||||||
// End of file tokens
|
// End of file tokens
|
||||||
if (c == '\0') {
|
if (c == '\0') {
|
||||||
return lexer_result(lexer, TOKEN_EOF, start, start_line);
|
return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_EOF, &lexer->source[start], 0, get_file_info(lexer, start, start_line)}};
|
||||||
}
|
|
||||||
|
|
||||||
// Digits
|
|
||||||
if (isdigit(c)) {
|
|
||||||
DigitFlags flags = 0;
|
|
||||||
if (c == '0') {
|
|
||||||
advance(lexer);
|
|
||||||
if (peek(lexer) == 'b' || peek(lexer) == 'B') {
|
|
||||||
flags |= DIGIT_IS_BINARY; advance(lexer);
|
|
||||||
} else if (peek(lexer) == 'o' || peek(lexer) == 'O') {
|
|
||||||
flags |= DIGIT_IS_OCTAL; advance(lexer);
|
|
||||||
} else if (peek(lexer) == 'x' || peek(lexer) == 'X') {
|
|
||||||
flags |= DIGIT_IS_HEXADECIMAL; advance(lexer);
|
|
||||||
} else if (is_digit_char(peek(lexer), flags)) {
|
|
||||||
return lexer_error(lexer, "Invalid number format", start, start_line);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while (is_digit_char(peek(lexer), flags)) advance(lexer);
|
|
||||||
if (isspace(peek(lexer)) || peek(lexer) == ';' || peek(lexer) == '\0' || peek(lexer) == ')' || peek(lexer) == '(' || peek(lexer) == ',') {
|
|
||||||
return lexer_result(lexer, TOKEN_NUMBER, start, start_line);
|
|
||||||
} else {
|
|
||||||
return lexer_error(lexer, "Invalid number format", start, start_line);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Identifiers
|
// Identifiers
|
||||||
if (is_identifier_start(c)) {
|
if (is_identifier_start(c)) {
|
||||||
while (is_identifier_char(peek(lexer))) advance(lexer);
|
while (is_identifier_char(peek(lexer))) advance(lexer);
|
||||||
return lexer_result(lexer, TOKEN_IDENTIFIER, start, start_line);
|
return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_IDENTIFIER, &lexer->source[start], lexer->pos - start, get_file_info(lexer, start, start_line)}};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Digits
|
||||||
|
if (isdigit(c)) {
|
||||||
|
while (isdigit(peek(lexer))) advance(lexer);
|
||||||
|
return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_NUMBER, &lexer->source[start], lexer->pos - start, get_file_info(lexer, start, start_line)}};
|
||||||
}
|
}
|
||||||
|
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '=':
|
case '=':
|
||||||
if (peek(lexer) == '=') advance(lexer);
|
if (peek(lexer) == '=') return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_OPERATOR, &lexer->source[start], 2, get_file_info(lexer, start, start_line)}};
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
|
||||||
case '>':
|
|
||||||
if (peek(lexer) == '=' || peek(lexer) == '>') advance(lexer);
|
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
|
||||||
case '<':
|
|
||||||
if (peek(lexer) == '=' || peek(lexer) == '<') advance(lexer);
|
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
|
||||||
case '!':
|
|
||||||
if (peek(lexer) == '=') advance(lexer);
|
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
|
||||||
case '&':
|
|
||||||
if (peek(lexer) == '=' || peek(lexer) == '&') advance(lexer);
|
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
|
||||||
case '|':
|
|
||||||
if (peek(lexer) == '=' || peek(lexer) == '|') advance(lexer);
|
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
|
||||||
case '+':
|
case '+':
|
||||||
if (peek(lexer) == '=') advance(lexer);
|
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
|
||||||
case '-':
|
case '-':
|
||||||
if (peek(lexer) == '=') advance(lexer);
|
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
|
||||||
case '*':
|
case '*':
|
||||||
if (peek(lexer) == '=') advance(lexer);
|
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
|
||||||
case '/':
|
case '/':
|
||||||
if (peek(lexer) == '=') advance(lexer);
|
return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_OPERATOR, &lexer->source[start], 1, get_file_info(lexer, start, start_line)}};
|
||||||
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
case '(': return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_LPAREN, &lexer->source[start], 1, get_file_info(lexer, start, start_line)}};
|
||||||
case '.': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
case ')': return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_RPAREN, &lexer->source[start], 1, get_file_info(lexer, start, start_line)}};
|
||||||
case ',': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
|
case ';': return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_SEMICOLON, &lexer->source[start], 1, get_file_info(lexer, start, start_line)}};
|
||||||
case '(': return lexer_result(lexer, TOKEN_LPAREN, start, start_line);
|
default:
|
||||||
case ')': return lexer_result(lexer, TOKEN_RPAREN, start, start_line);
|
return (TokenResult){SYNC_ERROR, .error = (SyncError){SYNC_LEXER_ERROR, "Unknown token", get_file_info(lexer, start, start_line)}};
|
||||||
case ';': return lexer_result(lexer, TOKEN_SEMICOLON, start, start_line);
|
|
||||||
case '}': return lexer_result(lexer, TOKEN_RBRACE, start, start_line);
|
|
||||||
case '{': return lexer_result(lexer, TOKEN_LBRACE, start, start_line);
|
|
||||||
case ']': return lexer_result(lexer, TOKEN_RBRACKET, start, start_line);
|
|
||||||
case '[': return lexer_result(lexer, TOKEN_LBRACKET, start, start_line);
|
|
||||||
default: return lexer_error(lexer, "Unknown token", start, start_line);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
28
src/main.c
28
src/main.c
|
@ -1,5 +1,4 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
|
||||||
#include "sync/types.h"
|
#include "sync/types.h"
|
||||||
#include "sync/lexer.h"
|
#include "sync/lexer.h"
|
||||||
|
|
||||||
|
@ -7,35 +6,17 @@ static void print_token(Token token) {
|
||||||
printf("Token: %-15s | Text: '%.*s'\n",
|
printf("Token: %-15s | Text: '%.*s'\n",
|
||||||
(const char *[]){
|
(const char *[]){
|
||||||
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR",
|
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR",
|
||||||
"LPAREN", "RPAREN", "SEMICOLON", "LBRACE",
|
"LPAREN", "RPAREN", "SEMICOLON", "UNKNOWN"
|
||||||
"RBRACE", "LBRACKET", "RBRACKET"
|
|
||||||
}[token.type],
|
}[token.type],
|
||||||
(int)token.length, token.start
|
(int)token.length, token.start
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
const char *filename = "test/example1.zn";
|
const char *source = "sum = a + b123;\nprint(sum);";
|
||||||
FILE *file = fopen(filename, "rb");
|
|
||||||
if (!file) {
|
|
||||||
fprintf(stderr, "Failed to open file: %s\n", filename);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
fseek(file, 0, SEEK_END);
|
|
||||||
long filesize = ftell(file);
|
|
||||||
fseek(file, 0, SEEK_SET);
|
|
||||||
char *source = malloc(filesize + 1);
|
|
||||||
if (!source) {
|
|
||||||
fprintf(stderr, "Failed to allocate memory.\n");
|
|
||||||
fclose(file);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
fread(source, 1, filesize, file);
|
|
||||||
source[filesize] = '\0';
|
|
||||||
fclose(file);
|
|
||||||
|
|
||||||
Lexer lexer;
|
Lexer lexer;
|
||||||
lexer_init(&lexer, filename, source);
|
lexer_init(&lexer, "<stdin>", source);
|
||||||
|
|
||||||
TokenResult result;
|
TokenResult result;
|
||||||
do {
|
do {
|
||||||
|
@ -44,11 +25,8 @@ int main(void) {
|
||||||
print_token(result.result);
|
print_token(result.result);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Error: %s\n", result.error.message);
|
fprintf(stderr, "Error: %s\n", result.error.message);
|
||||||
fprintf(stderr, "\tFilename: %s\n", result.error.file_info.filename);
|
|
||||||
fprintf(stderr, "\tLine: %zi\n", result.error.file_info.line);
|
|
||||||
}
|
}
|
||||||
} while (result.type != SYNC_ERROR && result.result.type != TOKEN_EOF);
|
} while (result.type != SYNC_ERROR && result.result.type != TOKEN_EOF);
|
||||||
|
|
||||||
free(source);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,49 +0,0 @@
|
||||||
// Lexer Test Example1
|
|
||||||
// Currently not valid ZINC code
|
|
||||||
|
|
||||||
static void print_token(Token token) {
|
|
||||||
printf("Token: %-15s | Text: '%.*s'\n",
|
|
||||||
(const char *[]){
|
|
||||||
"EOF", "IDENTIFIER", "NUMBER", "OPERATOR",
|
|
||||||
"LPAREN", "RPAREN", "SEMICOLON", "UNKNOWN"
|
|
||||||
}[token.type],
|
|
||||||
(int)token.length, token.start
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(void) {
|
|
||||||
const char *filename = "test/example1.zn";
|
|
||||||
FILE *file = fopen(filename, "rb");
|
|
||||||
if (!file) {
|
|
||||||
fprintf(stderr, "Failed to open file: %s\n", filename);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
fseek(file, 0, SEEK_END);
|
|
||||||
long filesize = ftell(file);
|
|
||||||
fseek(file, 0, SEEK_SET);
|
|
||||||
char *source = malloc(filesize + 1);
|
|
||||||
if (!source) {
|
|
||||||
fprintf(stderr, "Failed to allocate memory.\n");
|
|
||||||
fclose(file);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
fread(source, 1, filesize, file);
|
|
||||||
source[filesize] = '\0';
|
|
||||||
fclose(file);
|
|
||||||
|
|
||||||
Lexer lexer;
|
|
||||||
lexer_init(&lexer, filename, source);
|
|
||||||
|
|
||||||
TokenResult result;
|
|
||||||
do {
|
|
||||||
result = lexer_next(&lexer);
|
|
||||||
if (result.type == SYNC_RESULT) {
|
|
||||||
print_token(result.result);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "Error: %s\n", result.error.message);
|
|
||||||
}
|
|
||||||
} while (result.type != SYNC_ERROR && result.result.type != TOKEN_EOF);
|
|
||||||
|
|
||||||
free(source);
|
|
||||||
return 0;
|
|
||||||
}
|
|
Loading…
Reference in New Issue