Expanded number token reading and added more operators and punctuation

This commit is contained in:
Kyler Olsen 2025-06-12 21:39:57 -06:00
parent 145ba2c181
commit e174f95d5c
2 changed files with 89 additions and 15 deletions

View File

@ -12,6 +12,10 @@ typedef enum {
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_SEMICOLON,
TOKEN_LBRACE,
TOKEN_RBRACE,
TOKEN_LBRACKET,
TOKEN_RBRACKET,
} TokenType;
typedef struct {

View File

@ -39,6 +39,23 @@ static char advance(Lexer *lexer) {
return lexer->source[lexer->pos++];
}
typedef enum {
DIGIT_IS_BINARY = 1 << 0,
DIGIT_IS_OCTAL = 1 << 1,
DIGIT_IS_HEXADECIMAL = 1 << 2,
} DigitFlags;
static char is_digit_start(char c) {
return isdigit(c);
}
static char is_digit_char(char c, DigitFlags flags) {
if (flags & DIGIT_IS_BINARY) return c == '0' || c == '1' || c == '_';
if (flags & DIGIT_IS_OCTAL) return isdigit(c) && c < '8' || c == '_';
if (flags & DIGIT_IS_HEXADECIMAL) return isxdigit(c) || c == '_';
return isdigit(c) || c == '_';
}
static char is_identifier_start(char c) {
return isalpha(c) || c == '_';
}
@ -47,6 +64,14 @@ static char is_identifier_char(char c) {
return isalnum(c) || c == '_';
}
static TokenResult lexer_result(Lexer *lexer, TokenType type, size_t start, size_t start_line) {
return (TokenResult){SYNC_RESULT, .result = (Token){type, &lexer->source[start], lexer->pos - start, get_file_info(lexer, start, start_line)}};
}
static TokenResult lexer_error(Lexer *lexer, const char *message, size_t start, size_t start_line) {
return (TokenResult){SYNC_ERROR, .error = (SyncError){SYNC_LEXER_ERROR, message, get_file_info(lexer, start, start_line)}};
}
TokenResult lexer_next(Lexer *lexer) {
// Gets the next token from the source
@ -64,34 +89,79 @@ TokenResult lexer_next(Lexer *lexer) {
// End of file tokens
if (c == '\0') {
return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_EOF, &lexer->source[start], 0, get_file_info(lexer, start, start_line)}};
return lexer_result(lexer, TOKEN_EOF, start, start_line);
}
// Digits
if (isdigit(c)) {
DigitFlags flags = 0;
if (c == '0') {
advance(lexer);
if (peek(lexer) == 'b' || peek(lexer) == 'B') {
flags |= DIGIT_IS_BINARY; advance(lexer);
} else if (peek(lexer) == 'o' || peek(lexer) == 'O') {
flags |= DIGIT_IS_OCTAL; advance(lexer);
} else if (peek(lexer) == 'x' || peek(lexer) == 'X') {
flags |= DIGIT_IS_HEXADECIMAL; advance(lexer);
} else if (is_digit_char(peek(lexer), flags)) {
return lexer_error(lexer, "Invalid number format", start, start_line);
}
}
while (is_digit_char(peek(lexer), flags)) advance(lexer);
if (isspace(peek(lexer)) || peek(lexer) == ';' || peek(lexer) == '\0' || peek(lexer) == ')' || peek(lexer) == '(' || peek(lexer) == ',') {
return lexer_result(lexer, TOKEN_NUMBER, start, start_line);
} else {
return lexer_error(lexer, "Invalid number format", start, start_line);
}
}
// Identifiers
if (is_identifier_start(c)) {
while (is_identifier_char(peek(lexer))) advance(lexer);
return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_IDENTIFIER, &lexer->source[start], lexer->pos - start, get_file_info(lexer, start, start_line)}};
}
// Digits
if (isdigit(c)) {
while (isdigit(peek(lexer))) advance(lexer);
return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_NUMBER, &lexer->source[start], lexer->pos - start, get_file_info(lexer, start, start_line)}};
return lexer_result(lexer, TOKEN_IDENTIFIER, start, start_line);
}
advance(lexer);
switch (c) {
case '=':
if (peek(lexer) == '=') return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_OPERATOR, &lexer->source[start], 2, get_file_info(lexer, start, start_line)}};
if (peek(lexer) == '=') advance(lexer);
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case '>':
if (peek(lexer) == '=' || peek(lexer) == '>') advance(lexer);
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case '<':
if (peek(lexer) == '=' || peek(lexer) == '<') advance(lexer);
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case '!':
if (peek(lexer) == '=') advance(lexer);
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case '&':
if (peek(lexer) == '=' || peek(lexer) == '&') advance(lexer);
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case '|':
if (peek(lexer) == '=' || peek(lexer) == '|') advance(lexer);
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case '+':
if (peek(lexer) == '=') advance(lexer);
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case '-':
if (peek(lexer) == '=') advance(lexer);
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case '*':
if (peek(lexer) == '=') advance(lexer);
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case '/':
return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_OPERATOR, &lexer->source[start], 1, get_file_info(lexer, start, start_line)}};
case '(': return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_LPAREN, &lexer->source[start], 1, get_file_info(lexer, start, start_line)}};
case ')': return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_RPAREN, &lexer->source[start], 1, get_file_info(lexer, start, start_line)}};
case ';': return (TokenResult){SYNC_RESULT, .result = (Token){TOKEN_SEMICOLON, &lexer->source[start], 1, get_file_info(lexer, start, start_line)}};
default:
return (TokenResult){SYNC_ERROR, .error = (SyncError){SYNC_LEXER_ERROR, "Unknown token", get_file_info(lexer, start, start_line)}};
if (peek(lexer) == '=') advance(lexer);
return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case '.': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case ',': return lexer_result(lexer, TOKEN_OPERATOR, start, start_line);
case '(': return lexer_result(lexer, TOKEN_LPAREN, start, start_line);
case ')': return lexer_result(lexer, TOKEN_RPAREN, start, start_line);
case ';': return lexer_result(lexer, TOKEN_SEMICOLON, start, start_line);
case '}': return lexer_result(lexer, TOKEN_RBRACE, start, start_line);
case '{': return lexer_result(lexer, TOKEN_LBRACE, start, start_line);
case ']': return lexer_result(lexer, TOKEN_RBRACKET, start, start_line);
case '[': return lexer_result(lexer, TOKEN_LBRACKET, start, start_line);
default: return lexer_error(lexer, "Unknown token", start, start_line);
}
}