Characters

This commit is contained in:
Kyler Olsen 2025-11-17 22:53:15 -07:00
parent ffe8008bb9
commit f8894ea4c0
5 changed files with 130 additions and 4562 deletions

View File

@ -593,8 +593,48 @@ static LexerResult parse_numeric_literal(LexerInfo *lexer_info, char c, size_t s
}
static LexerResult parse_character_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line;
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Character Literals Not Implemented Error."), 1}};
if (c == '\'')
return lexer_error(lexer_info, SLS_STR("Invalid character literal: empty character literal."), start, start_line);
char value = '\0';
if (c == '\\') {
c = advance(lexer_info);
switch (c) {
case 'n':
value = '\n';
break;
case 'r':
value = '\r';
break;
case 't':
value = '\t';
break;
case '\\':
value = '\\';
break;
case '\'':
value = '\'';
break;
case '0':
value = '\0';
break;
default:
SlsStr error_msg = sls_format(SLS_STR("Invalid character literal: unknown escape sequence '\\%c'."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
} else if (c == '\n' || c == '\r')
return lexer_error(lexer_info, SLS_STR("Invalid character literal: unclosed character literal."), start, start_line);
else value = c;
c = advance(lexer_info);
if (isspace(c) || c == '/' || c == '\0')
return lexer_error(lexer_info, SLS_STR("Invalid character literal: unclosed character literal."), start, start_line);
else if (c != '\'') {
SlsStr error_msg = sls_format(SLS_STR("Invalid character literal: unexpected '%c' in character."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
advance(lexer_info);
return lexer_result(lexer_info, (Token){TOKEN_CHARACTER, .character_literal = (uint8_t){value}}, start, start_line);
}
static LexerResult parse_string_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
@ -644,7 +684,10 @@ static LexerResult lexer_next(LexerInfo *lexer_info) {
if (isdigit(c) || (c == '.' && isdigit(far_peek(lexer_info, 1))) || (c == '-' && isdigit(far_peek(lexer_info, 1))))
return parse_numeric_literal(lexer_info, c, start, start_line);
// Character Literals
if (c == '\'') return parse_character_literal(lexer_info, c, start, start_line);
if (c == '\'') {
c = advance(lexer_info);
return parse_character_literal(lexer_info, c, start, start_line);
}
// String Literals
if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line);
// Token Strings

View File

@ -291,7 +291,7 @@ Boolean test_integer_value(LexerTest *test, LexerResult result, size_t i, TestIn
}
Boolean test_character_value(LexerTest *test, LexerResult result, size_t i, uint8_t *value) {
static const TokenType token_type = TOKEN_INTEGER;
static const TokenType token_type = TOKEN_CHARACTER;
LexerTokenResult *head = get_token(result.result, i);
if (test_token_type(test, result, i, token_type)) {
return TRUE;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -7,22 +7,12 @@ class CharTestGenerator(BaseTestGenerator):
# Common escape sequences
ESCAPE_SEQUENCES = {
("Newline", '\\n', '\n',), # Newline
("Carriage return", '\\r', '\r',), # Carriage return
("Tab", '\\t', '\t',), # Tab
("Backslash", '\\\\', '\\',), # Backslash
("Double quote", '\\\\"', '"',), # Double quote
("Single quote", "\\'", "'",), # Single quote
("Null character", '\\0', '\0',), # Null character
}
# Hexadecimal escape examples
HEX_ESCAPES = {
'\\x41': 'A',
'\\x61': 'a',
'\\x20': ' ',
'\\x00': '\0',
'\\xFF': 'ÿ',
("Newline", '\\\\n', '\n',), # Newline
("Carriage return", '\\\\r', '\r',), # Carriage return
("Tab", '\\\\t', '\t',), # Tab
("Backslash", '\\\\\\\\', '\\',), # Backslash
("Single quote", "\\\\'", "'",), # Single quote
("Null character", '\\\\0', '\0',), # Null character
}
# Unicode escape examples
@ -81,35 +71,16 @@ class CharTestGenerator(BaseTestGenerator):
name = f"Char Escape {escape_name}"
code = f"'{escape_str}'"
self.make_success_test(name, code, "char", char_val)
# Additional escape sequences with descriptions
test_cases = [
("Newline", "'\\n'", '\n'),
("Carriage Return", "'\\r'", '\r'),
("Tab", "'\\t'", '\t'),
("Backslash", "'\\\\'", '\\'),
("Double Quote", "'\\\\\"'", '"'),
("Single Quote", "'\\''", "'"),
("Null", "'\\0'", '\0'),
]
for desc, code, char_val in test_cases:
self.make_success_test(f"Char {desc}", code, "char", char_val)
def generate_hexadecimal_escape_tests(self):
"""Generate tests for hexadecimal escape sequences."""
for escape_str, char_val in self.HEX_ESCAPES.items():
name = f"Char Hex Escape {escape_str}"
code = f"'{escape_str}'"
self.make_success_test(name, code, "char", char_val)
# Additional specific hex escapes
self.make_success_test("Char Hex Lowercase A", "'\\x61'", "char", 'a')
self.make_success_test("Char Hex Uppercase A", "'\\x41'", "char", 'A')
self.make_success_test("Char Hex Space", "'\\x20'", "char", ' ')
self.make_success_test("Char Hex Tab", "'\\x09'", "char", '\t')
self.make_success_test("Char Hex Newline", "'\\x0A'", "char", '\n')
self.make_success_test("Char Hex Max ASCII", "'\\x7F'", "char", '\x7F')
self.make_success_test("Char Hex Lowercase A", "'\\\\x61'", "char", 'a')
self.make_success_test("Char Hex Uppercase A", "'\\\\x41'", "char", 'A')
self.make_success_test("Char Hex Space", "'\\\\x20'", "char", ' ')
self.make_success_test("Char Hex Tab", "'\\\\x09'", "char", '\t')
self.make_success_test("Char Hex Newline", "'\\\\x0A'", "char", '\n')
self.make_success_test("Char Hex Max ASCII", "'\\\\x7F'", "char", '\x7F')
def generate_unicode_escape_tests(self):
"""Generate tests for Unicode escape sequences."""
@ -176,7 +147,7 @@ class CharTestGenerator(BaseTestGenerator):
# Multiple characters (no escape)
self.make_error_test("Char Multiple Characters",
"'AB'",
"Invalid character literal: multiple characters without escape sequence.")
"Invalid character literal: unexpected 'B' in character.")
# Unclosed quote
self.make_error_test("Char Unclosed Quote",
@ -186,7 +157,7 @@ class CharTestGenerator(BaseTestGenerator):
# Unescaped newline
self.make_error_test("Char Unescaped Newline",
"'\\n'",
"Invalid character literal: unescaped newline in character literal.")
"Invalid character literal: unclosed character literal.")
# Invalid escape sequence
self.make_error_test("Char Invalid Escape",
@ -241,28 +212,18 @@ class CharTestGenerator(BaseTestGenerator):
self.make_error_test("Char Unicode Unclosed",
"'\\u{1F600'",
"Invalid character literal: unclosed Unicode escape sequence.")
# Double quotes instead of single
self.make_error_test("Char Double Quotes",
'"A"',
"Invalid character literal: character literals must use single quotes.")
# No quotes
self.make_error_test("Char No Quotes",
"A",
"Not a character literal: missing quotes.")
def generate_edge_case_tests(self):
"""Generate edge case tests."""
# ASCII control characters
self.make_success_test("Char ASCII Control SOH", "'\\x01'", "char", '\x01')
self.make_success_test("Char ASCII Control BEL", "'\\x07'", "char", '\x07')
self.make_success_test("Char ASCII Control ESC", "'\\x1B'", "char", '\x1B')
self.make_success_test("Char ASCII Control DEL", "'\\x7F'", "char", '\x7F')
self.make_success_test("Char ASCII Control SOH", "'\\\\x01'", "char", '\x01')
self.make_success_test("Char ASCII Control BEL", "'\\\\x07'", "char", '\x07')
self.make_success_test("Char ASCII Control ESC", "'\\\\x1B'", "char", '\x1B')
self.make_success_test("Char ASCII Control DEL", "'\\\\x7F'", "char", '\x7F')
# Extended ASCII
self.make_success_test("Char Extended ASCII Lower", "'\\x80'", "char", '\x80')
self.make_success_test("Char Extended ASCII Upper", "'\\xFF'", "char", '\xFF')
self.make_success_test("Char Extended ASCII Lower", "'\\\\x80'", "char", '\x80')
self.make_success_test("Char Extended ASCII Upper", "'\\\\xFF'", "char", '\xFF')
if self.ENABLE_UNICODE:
# Zero-width characters
@ -281,23 +242,16 @@ class CharTestGenerator(BaseTestGenerator):
# High Unicode values (but valid)
self.make_success_test("Char Unicode High Valid", "'\\u{10FFFF}'", "char", '\U0010FFFF')
# Backslash before valid character
self.make_success_test("Char Backslash Literal", "'\\\\'", "char", '\\')
# Quote escaping
self.make_success_test("Char Single Quote Escaped", "'\\''", "char", "'")
# self.make_success_test("Char Double Quote Escaped", "'\\\"'", "char", '"')
def generate_case_sensitivity_tests(self):
"""Generate tests for case sensitivity in escape sequences."""
# Hex escapes - lowercase x
self.make_success_test("Char Hex Lowercase x", "'\\x41'", "char", 'A')
self.make_success_test("Char Hex Lowercase x", "'\\\\x41'", "char", 'A')
# Hex digits - both cases
self.make_success_test("Char Hex Digits Uppercase", "'\\xFF'", "char", '\xFF')
self.make_success_test("Char Hex Digits Lowercase", "'\\xff'", "char", '\xff')
self.make_success_test("Char Hex Digits Mixed", "'\\xAb'", "char", '\xAB')
self.make_success_test("Char Hex Digits Uppercase", "'\\\\xFF'", "char", '\xFF')
self.make_success_test("Char Hex Digits Lowercase", "'\\\\xff'", "char", '\xff')
self.make_success_test("Char Hex Digits Mixed", "'\\\\xAb'", "char", '\xAB')
if self.ENABLE_UNICODE:
# Unicode escapes - lowercase u