sync/src/lexer.c

76 lines
2.0 KiB
C

#include <ctype.h>
#include <string.h>
#include "sync/lexer.h"
void lexer_init(Lexer *lexer, const char *source) {
lexer->source = source;
lexer->pos = 0;
}
static char peek(Lexer *lexer) {
return lexer->source[lexer->pos];
}
static char double_peek(Lexer *lexer) {
return lexer->source[lexer->pos+1];
}
static char advance(Lexer *lexer) {
return lexer->source[lexer->pos++];
}
static int is_identifier_start(char c) {
return isalpha(c) || c == '_';
}
static int is_identifier_char(char c) {
return isalnum(c) || c == '_';
}
Token lexer_next(Lexer *lexer) {
// Gets the next token from the source
while (isspace(peek(lexer)) || peek(lexer) == '/') {
// Skip Comments
if (peek(lexer) == '/' && double_peek(lexer) == '/')
while (peek(lexer) != '\n') advance(lexer);
// Skip whitespace
while (isspace(peek(lexer))) advance(lexer);
}
char c = peek(lexer);
size_t start = lexer->pos;
// End of file tokens
if (c == '\0') {
return (Token){TOKEN_EOF, &lexer->source[start], 0};
}
// Identifiers
if (is_identifier_start(c)) {
while (is_identifier_char(peek(lexer))) advance(lexer);
return (Token){TOKEN_IDENTIFIER, &lexer->source[start], lexer->pos - start};
}
// Digits
if (isdigit(c)) {
while (isdigit(peek(lexer))) advance(lexer);
return (Token){TOKEN_NUMBER, &lexer->source[start], lexer->pos - start};
}
advance(lexer);
switch (c) {
case '=':
if (peek(lexer) == '=') return (Token){TOKEN_OPERATOR, &lexer->source[start], 2};
case '+':
case '-':
case '*':
case '/':
return (Token){TOKEN_OPERATOR, &lexer->source[start], 1};
case '(': return (Token){TOKEN_LPAREN, &lexer->source[start], 1};
case ')': return (Token){TOKEN_RPAREN, &lexer->source[start], 1};
case ';': return (Token){TOKEN_SEMICOLON, &lexer->source[start], 1};
default: return (Token){TOKEN_UNKNOWN, &lexer->source[start], 1};
}
}