diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6f31401 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +build/ +.vscode/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..9c57067 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.13) +project(sync C) + +set(CMAKE_C_STANDARD 99) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +include_directories(include) + +# ---- Source Library (exclude main.c) ---- +file(GLOB SYNC_LIB_SRC src/*.c) +list(REMOVE_ITEM SYNC_LIB_SRC "${CMAKE_SOURCE_DIR}/src/main.c") +add_library(sync_lib STATIC ${SYNC_LIB_SRC}) + +# ---- sync executable ---- +add_executable(sync src/main.c) +target_link_libraries(sync PRIVATE sync_lib) + +# ---- Tests ---- +file(GLOB TEST_FILES test/test_*.c) +foreach(TEST_FILE ${TEST_FILES}) + get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE) + add_executable(${TEST_NAME} ${TEST_FILE}) + target_link_libraries(${TEST_NAME} PRIVATE sync_lib) + target_include_directories(${TEST_NAME} PRIVATE include) +endforeach() diff --git a/include/sync/lexer.h b/include/sync/lexer.h new file mode 100644 index 0000000..c16e115 --- /dev/null +++ b/include/sync/lexer.h @@ -0,0 +1,31 @@ +#ifndef SYNC_LEXER_H +#define SYNC_LEXER_H + +#include + +typedef enum { + TOKEN_EOF, + TOKEN_IDENTIFIER, + TOKEN_NUMBER, + TOKEN_OPERATOR, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_SEMICOLON, + TOKEN_UNKNOWN, +} TokenType; + +typedef struct { + TokenType type; + const char *start; + size_t length; +} Token; + +typedef struct { + const char *source; + size_t pos; +} Lexer; + +void lexer_init(Lexer *lexer, const char *source); +Token lexer_next(Lexer *lexer); + +#endif // SYNC_LEXER_H diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..d9c95d0 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,75 @@ +#include +#include +#include "sync/lexer.h" + +void lexer_init(Lexer *lexer, const char *source) { + lexer->source = source; + lexer->pos = 0; +} + +static char peek(Lexer *lexer) { + return lexer->source[lexer->pos]; +} + +static char double_peek(Lexer *lexer) { + return lexer->source[lexer->pos+1]; +} + +static char advance(Lexer *lexer) { + return lexer->source[lexer->pos++]; +} + +static int is_identifier_start(char c) { + return isalpha(c) || c == '_'; +} + +static int is_identifier_char(char c) { + return isalnum(c) || c == '_'; +} + +Token lexer_next(Lexer *lexer) { + // Gets the next token from the source + + while (isspace(peek(lexer)) || peek(lexer) == '/') { + // Skip Comments + if (peek(lexer) == '/' && double_peek(lexer) == '/') + while (peek(lexer) != '\n') advance(lexer); + // Skip whitespace + while (isspace(peek(lexer))) advance(lexer); + } + + char c = peek(lexer); + size_t start = lexer->pos; + + // End of file tokens + if (c == '\0') { + return (Token){TOKEN_EOF, &lexer->source[start], 0}; + } + + // Identifiers + if (is_identifier_start(c)) { + while (is_identifier_char(peek(lexer))) advance(lexer); + return (Token){TOKEN_IDENTIFIER, &lexer->source[start], lexer->pos - start}; + } + + // Digits + if (isdigit(c)) { + while (isdigit(peek(lexer))) advance(lexer); + return (Token){TOKEN_NUMBER, &lexer->source[start], lexer->pos - start}; + } + + advance(lexer); + switch (c) { + case '=': + if (peek(lexer) == '=') return (Token){TOKEN_OPERATOR, &lexer->source[start], 2}; + case '+': + case '-': + case '*': + case '/': + return (Token){TOKEN_OPERATOR, &lexer->source[start], 1}; + case '(': return (Token){TOKEN_LPAREN, &lexer->source[start], 1}; + case ')': return (Token){TOKEN_RPAREN, &lexer->source[start], 1}; + case ';': return (Token){TOKEN_SEMICOLON, &lexer->source[start], 1}; + default: return (Token){TOKEN_UNKNOWN, &lexer->source[start], 1}; + } +} diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..cad33ca --- /dev/null +++ b/src/main.c @@ -0,0 +1,27 @@ +#include +#include "sync/lexer.h" + +static void print_token(Token token) { + printf("Token: %-15s | Text: '%.*s'\n", + (const char *[]){ + "EOF", "IDENTIFIER", "NUMBER", "OPERATOR", + "LPAREN", "RPAREN", "SEMICOLON", "UNKNOWN" + }[token.type], + (int)token.length, token.start + ); +} + +int main(void) { + const char *source = "sum = a + b123;\nprint(sum);"; + + Lexer lexer; + lexer_init(&lexer, source); + + Token token; + do { + token = lexer_next(&lexer); + print_token(token); + } while (token.type != TOKEN_EOF); + + return 0; +} diff --git a/test/test_lexer.c b/test/test_lexer.c new file mode 100644 index 0000000..fbc64e3 --- /dev/null +++ b/test/test_lexer.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include "../include/sync/lexer.h" + +void test_tokenize_simple_assignment(void) { + const char *src = "x = 42;"; + Lexer lexer; + lexer_init(&lexer, src); + + Token t = lexer_next(&lexer); + assert(t.type == TOKEN_IDENTIFIER && strncmp(t.start, "x", t.length) == 0); + + t = lexer_next(&lexer); + assert(t.type == TOKEN_OPERATOR && strncmp(t.start, "=", t.length) == 0); + + t = lexer_next(&lexer); + assert(t.type == TOKEN_NUMBER && strncmp(t.start, "42", t.length) == 0); + + t = lexer_next(&lexer); + assert(t.type == TOKEN_SEMICOLON); + + t = lexer_next(&lexer); + assert(t.type == TOKEN_EOF); +} + +void test_tokenize_function_call(void) { + const char *src = "print(x);"; + Lexer lexer; + lexer_init(&lexer, src); + + Token t = lexer_next(&lexer); + assert(t.type == TOKEN_IDENTIFIER && strncmp(t.start, "print", t.length) == 0); + + t = lexer_next(&lexer); + assert(t.type == TOKEN_LPAREN); + + t = lexer_next(&lexer); + assert(t.type == TOKEN_IDENTIFIER && strncmp(t.start, "x", t.length) == 0); + + t = lexer_next(&lexer); + assert(t.type == TOKEN_RPAREN); + + t = lexer_next(&lexer); + assert(t.type == TOKEN_SEMICOLON); + + t = lexer_next(&lexer); + assert(t.type == TOKEN_EOF); +} + +int main(void) { + test_tokenize_simple_assignment(); + test_tokenize_function_call(); + + printf("All lexer tests passed.\n"); + return 0; +}