Worked on lexing
This commit is contained in:
parent
389890a7e7
commit
c00ff5c798
|
|
@ -8,6 +8,7 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "sls/sls_errors.h"
|
#include "sls/sls_errors.h"
|
||||||
#include "sls/lexer.h"
|
#include "sls/lexer.h"
|
||||||
|
|
@ -76,6 +77,11 @@ static FileInfo get_file_info(LexerInfo *lexer_info, size_t start, size_t start_
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *get_token_text(LexerInfo *lexer_info, size_t start) {
|
||||||
|
// Returns the current character from the source code
|
||||||
|
return lexer_info->source_code + start;
|
||||||
|
}
|
||||||
|
|
||||||
static char peek(LexerInfo *lexer_info) {
|
static char peek(LexerInfo *lexer_info) {
|
||||||
// Returns the current character from the source code
|
// Returns the current character from the source code
|
||||||
return lexer_info->source_code[lexer_info->pos];
|
return lexer_info->source_code[lexer_info->pos];
|
||||||
|
|
@ -112,7 +118,7 @@ static LexerResult lexer_result(LexerInfo *lexer_info, Token token, size_t start
|
||||||
return (LexerResult){SLS_RESULT, .result = result};
|
return (LexerResult){SLS_RESULT, .result = result};
|
||||||
}
|
}
|
||||||
|
|
||||||
static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t start, size_t start_line) {
|
static LexerResult lexer_error(LexerInfo *lexer_info, const char* message, size_t start, size_t start_line) {
|
||||||
// Create a LexerTokenResult to store an error from lexing the current token
|
// Create a LexerTokenResult to store an error from lexing the current token
|
||||||
LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
|
LexerTokenResult *result = (LexerTokenResult *)malloc(sizeof(LexerTokenResult));
|
||||||
if (result == NULL)
|
if (result == NULL)
|
||||||
|
|
@ -120,12 +126,109 @@ static LexerResult lexer_error(LexerInfo *lexer, const char* message, size_t sta
|
||||||
result->type = SLS_ERROR;
|
result->type = SLS_ERROR;
|
||||||
result->error.message = message;
|
result->error.message = message;
|
||||||
result->error.code = 1;
|
result->error.code = 1;
|
||||||
result->file_info = get_file_info(lexer, start, start_line);
|
result->file_info = get_file_info(lexer_info, start, start_line);
|
||||||
result->next = NULL;
|
result->next = NULL;
|
||||||
// SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error
|
// SLS_RESULT is used here because we successfully created a LexerTokenResult reporting an error
|
||||||
return (LexerResult){SLS_RESULT, .result = result};
|
return (LexerResult){SLS_RESULT, .result = result};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_binary_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
do {c = advance(lexer_info);} while (c == '0' || c == '1' || c == '_');
|
||||||
|
if (c == ':')
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Binary Integer Type Not Implemented Error.", 1}};
|
||||||
|
if (isspace(c) || c == '/')
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Binary Integer Not Implemented Error.", 1}};
|
||||||
|
char *error_message = (char *)malloc(sizeof(char) * 58);
|
||||||
|
snprintf(error_message, 58, "Invalid binary literal: unexpected '%c' in binary integer.", c);
|
||||||
|
return lexer_error(lexer_info, error_message, start, start_line);
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_octal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
do {c = advance(lexer_info);} while ((isdigit(c) || c == '_') && !(c == '8' || c == '9'));
|
||||||
|
if (c == ':')
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Octal Integer Type Not Implemented Error.", 1}};
|
||||||
|
if (isspace(c) || c == '/')
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Octal Integer Not Implemented Error.", 1}};
|
||||||
|
char *error_message = (char *)malloc(sizeof(char) * 56);
|
||||||
|
snprintf(error_message, 56, "Invalid octal literal: unexpected '%c' in octal integer.", c);
|
||||||
|
return lexer_error(lexer_info, error_message, start, start_line);
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_exponential(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
(void)lexer_info; (void)c; (void)start; (void)start_line;
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Exponential Not Implemented Error.", 1}};
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_float(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
(void)lexer_info; (void)c; (void)start; (void)start_line;
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Float Not Implemented Error.", 1}};
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_decimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
do {c = advance(lexer_info);} while (isdigit(c) || c == '_');
|
||||||
|
if (c == '.') return parse_float(lexer_info, c, start, start_line);
|
||||||
|
if (c == 'e' || c == 'E') return parse_exponential(lexer_info, c, start, start_line);
|
||||||
|
if (c == ':')
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Decimal Integer Type Not Implemented Error.", 1}};
|
||||||
|
if (isspace(c) || c == '/')
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Decimal Integer Not Implemented Error.", 1}};
|
||||||
|
char *error_message = (char *)malloc(sizeof(char) * 60);
|
||||||
|
snprintf(error_message, 60, "Invalid decimal literal: unexpected '%c' in decimal integer.", c);
|
||||||
|
return lexer_error(lexer_info, error_message, start, start_line);
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_hexadecimal_integer(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
do {c = advance(lexer_info);} while (isxdigit(c) || c == '_');
|
||||||
|
if (c == ':')
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Hexadecimal Integer Type Not Implemented Error.", 1}};
|
||||||
|
if (isspace(c) || c == '/')
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Hexadecimal Integer Not Implemented Error.", 1}};
|
||||||
|
char *error_message = (char *)malloc(sizeof(char) * 68);
|
||||||
|
snprintf(error_message, 68, "Invalid hexadecimal literal: unexpected '%c' in hexadecimal integer.", c);
|
||||||
|
return lexer_error(lexer_info, error_message, start, start_line);
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_numeric_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
if (c == '-') c = advance(lexer_info);
|
||||||
|
if (c == '0') {
|
||||||
|
c = advance(lexer_info);
|
||||||
|
if (c == 'b' || c == 'B') return parse_binary_integer(lexer_info, c, start, start_line);
|
||||||
|
else if (c == 'o' || c == 'O') return parse_octal_integer(lexer_info, c, start, start_line);
|
||||||
|
else if (c == 'x' || c == 'X') return parse_hexadecimal_integer(lexer_info, c, start, start_line);
|
||||||
|
}
|
||||||
|
return parse_decimal_integer(lexer_info, c, start, start_line);
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_character_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
(void)lexer_info; (void)c; (void)start; (void)start_line;
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Character Literals Not Implemented Error.", 1}};
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_string_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
(void)lexer_info; (void)c; (void)start; (void)start_line;
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: String Literals Not Implemented Error.", 1}};
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_token_string(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
(void)lexer_info; (void)c; (void)start; (void)start_line;
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Token Strings Not Implemented Error.", 1}};
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_array_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
(void)lexer_info; (void)c; (void)start; (void)start_line;
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Array Literals Not Implemented Error.", 1}};
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_type_tuples(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
(void)lexer_info; (void)c; (void)start; (void)start_line;
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Type Tuples Not Implemented Error.", 1}};
|
||||||
|
}
|
||||||
|
|
||||||
|
static LexerResult parse_identifiers_and_booleans(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
|
||||||
|
(void)lexer_info; (void)c; (void)start; (void)start_line;
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Identifiers and Booleans Not Implemented Error.", 1}};
|
||||||
|
}
|
||||||
|
|
||||||
static LexerResult lexer_next(LexerInfo *lexer_info) {
|
static LexerResult lexer_next(LexerInfo *lexer_info) {
|
||||||
// Gets the next token from the source
|
// Gets the next token from the source
|
||||||
|
|
||||||
|
|
@ -143,38 +246,23 @@ static LexerResult lexer_next(LexerInfo *lexer_info) {
|
||||||
size_t start_line = lexer_info->line;
|
size_t start_line = lexer_info->line;
|
||||||
|
|
||||||
// End of file tokens
|
// End of file tokens
|
||||||
if (c == '\0') {
|
if (c == '\0') return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line);
|
||||||
return lexer_result(lexer_info, (Token){.type = TOKEN_EOF}, start, start_line);
|
|
||||||
}
|
|
||||||
return lexer_error(lexer_info, "Lexer: Not Implemented Error", start, start_line);
|
|
||||||
|
|
||||||
// Integers and Floats
|
// Integers and Floats
|
||||||
if (isdigit(c)) {
|
if (isdigit(c) || c == '.' || (c == '-' && isdigit(far_peek(lexer_info, 1)))) return parse_numeric_literal(lexer_info, c, start, start_line);
|
||||||
}
|
|
||||||
|
|
||||||
// Character Literals
|
// Character Literals
|
||||||
if (c == '\'') {
|
if (c == '\'') return parse_character_literal(lexer_info, c, start, start_line);
|
||||||
}
|
|
||||||
|
|
||||||
// String Literals
|
// String Literals
|
||||||
if (c == '\"') {
|
if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line);
|
||||||
}
|
|
||||||
|
|
||||||
// Token Strings
|
// Token Strings
|
||||||
if (c == '{') {
|
if (c == '{') return parse_token_string(lexer_info, c, start, start_line);
|
||||||
}
|
|
||||||
|
|
||||||
// Array Literals
|
// Array Literals
|
||||||
if (c == '[') {
|
if (c == '[') return parse_array_literal(lexer_info, c, start, start_line);
|
||||||
}
|
|
||||||
|
|
||||||
// Type Tuples
|
// Type Tuples
|
||||||
if (c == '(') {
|
if (c == '(') return parse_type_tuples(lexer_info, c, start, start_line);
|
||||||
}
|
|
||||||
|
|
||||||
// Identifiers and Booleans
|
// Identifiers and Booleans
|
||||||
if (isascii(c) && isprint(c) && !isspace(c) && c != '.') {
|
if (isascii(c)) return parse_identifiers_and_booleans(lexer_info, c, start, start_line);
|
||||||
}
|
// Lexing Error
|
||||||
|
return (LexerResult){SLS_ERROR, .error = (SlsError){"Lexer: Unknown Character Error.", 1}};
|
||||||
}
|
}
|
||||||
|
|
||||||
void clean_token_result(LexerTokenResult *head) {
|
void clean_token_result(LexerTokenResult *head) {
|
||||||
|
|
@ -182,6 +270,7 @@ void clean_token_result(LexerTokenResult *head) {
|
||||||
LexerTokenResult *next;
|
LexerTokenResult *next;
|
||||||
while (head) {
|
while (head) {
|
||||||
next = head->next;
|
next = head->next;
|
||||||
|
if (head->type == SLS_ERROR) free(head->error.message);
|
||||||
if (head) free(head);
|
if (head) free(head);
|
||||||
head = next;
|
head = next;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue