Characters

This commit is contained in:
Kyler Olsen 2025-11-17 22:53:15 -07:00
parent ffe8008bb9
commit f8894ea4c0
5 changed files with 130 additions and 4562 deletions

View File

@ -593,8 +593,48 @@ static LexerResult parse_numeric_literal(LexerInfo *lexer_info, char c, size_t s
} }
static LexerResult parse_character_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { static LexerResult parse_character_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
(void)lexer_info; (void)c; (void)start; (void)start_line; if (c == '\'')
return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Lexer: Character Literals Not Implemented Error."), 1}}; return lexer_error(lexer_info, SLS_STR("Invalid character literal: empty character literal."), start, start_line);
char value = '\0';
if (c == '\\') {
c = advance(lexer_info);
switch (c) {
case 'n':
value = '\n';
break;
case 'r':
value = '\r';
break;
case 't':
value = '\t';
break;
case '\\':
value = '\\';
break;
case '\'':
value = '\'';
break;
case '0':
value = '\0';
break;
default:
SlsStr error_msg = sls_format(SLS_STR("Invalid character literal: unknown escape sequence '\\%c'."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
} else if (c == '\n' || c == '\r')
return lexer_error(lexer_info, SLS_STR("Invalid character literal: unclosed character literal."), start, start_line);
else value = c;
c = advance(lexer_info);
if (isspace(c) || c == '/' || c == '\0')
return lexer_error(lexer_info, SLS_STR("Invalid character literal: unclosed character literal."), start, start_line);
else if (c != '\'') {
SlsStr error_msg = sls_format(SLS_STR("Invalid character literal: unexpected '%c' in character."), c);
if (error_msg.str == NULL) return (LexerResult){SLS_ERROR, .error = (SlsError){SLS_STR("Out Of Memory Error."), 1}};
return lexer_error(lexer_info, error_msg, start, start_line);
}
advance(lexer_info);
return lexer_result(lexer_info, (Token){TOKEN_CHARACTER, .character_literal = (uint8_t){value}}, start, start_line);
} }
static LexerResult parse_string_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) { static LexerResult parse_string_literal(LexerInfo *lexer_info, char c, size_t start, size_t start_line) {
@ -644,7 +684,10 @@ static LexerResult lexer_next(LexerInfo *lexer_info) {
if (isdigit(c) || (c == '.' && isdigit(far_peek(lexer_info, 1))) || (c == '-' && isdigit(far_peek(lexer_info, 1)))) if (isdigit(c) || (c == '.' && isdigit(far_peek(lexer_info, 1))) || (c == '-' && isdigit(far_peek(lexer_info, 1))))
return parse_numeric_literal(lexer_info, c, start, start_line); return parse_numeric_literal(lexer_info, c, start, start_line);
// Character Literals // Character Literals
if (c == '\'') return parse_character_literal(lexer_info, c, start, start_line); if (c == '\'') {
c = advance(lexer_info);
return parse_character_literal(lexer_info, c, start, start_line);
}
// String Literals // String Literals
if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line); if (c == '\"') return parse_string_literal(lexer_info, c, start, start_line);
// Token Strings // Token Strings

View File

@ -291,7 +291,7 @@ Boolean test_integer_value(LexerTest *test, LexerResult result, size_t i, TestIn
} }
Boolean test_character_value(LexerTest *test, LexerResult result, size_t i, uint8_t *value) { Boolean test_character_value(LexerTest *test, LexerResult result, size_t i, uint8_t *value) {
static const TokenType token_type = TOKEN_INTEGER; static const TokenType token_type = TOKEN_CHARACTER;
LexerTokenResult *head = get_token(result.result, i); LexerTokenResult *head = get_token(result.result, i);
if (test_token_type(test, result, i, token_type)) { if (test_token_type(test, result, i, token_type)) {
return TRUE; return TRUE;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -7,22 +7,12 @@ class CharTestGenerator(BaseTestGenerator):
# Common escape sequences # Common escape sequences
ESCAPE_SEQUENCES = { ESCAPE_SEQUENCES = {
("Newline", '\\n', '\n',), # Newline ("Newline", '\\\\n', '\n',), # Newline
("Carriage return", '\\r', '\r',), # Carriage return ("Carriage return", '\\\\r', '\r',), # Carriage return
("Tab", '\\t', '\t',), # Tab ("Tab", '\\\\t', '\t',), # Tab
("Backslash", '\\\\', '\\',), # Backslash ("Backslash", '\\\\\\\\', '\\',), # Backslash
("Double quote", '\\\\"', '"',), # Double quote ("Single quote", "\\\\'", "'",), # Single quote
("Single quote", "\\'", "'",), # Single quote ("Null character", '\\\\0', '\0',), # Null character
("Null character", '\\0', '\0',), # Null character
}
# Hexadecimal escape examples
HEX_ESCAPES = {
'\\x41': 'A',
'\\x61': 'a',
'\\x20': ' ',
'\\x00': '\0',
'\\xFF': 'ÿ',
} }
# Unicode escape examples # Unicode escape examples
@ -82,34 +72,15 @@ class CharTestGenerator(BaseTestGenerator):
code = f"'{escape_str}'" code = f"'{escape_str}'"
self.make_success_test(name, code, "char", char_val) self.make_success_test(name, code, "char", char_val)
# Additional escape sequences with descriptions
test_cases = [
("Newline", "'\\n'", '\n'),
("Carriage Return", "'\\r'", '\r'),
("Tab", "'\\t'", '\t'),
("Backslash", "'\\\\'", '\\'),
("Double Quote", "'\\\\\"'", '"'),
("Single Quote", "'\\''", "'"),
("Null", "'\\0'", '\0'),
]
for desc, code, char_val in test_cases:
self.make_success_test(f"Char {desc}", code, "char", char_val)
def generate_hexadecimal_escape_tests(self): def generate_hexadecimal_escape_tests(self):
"""Generate tests for hexadecimal escape sequences.""" """Generate tests for hexadecimal escape sequences."""
for escape_str, char_val in self.HEX_ESCAPES.items():
name = f"Char Hex Escape {escape_str}"
code = f"'{escape_str}'"
self.make_success_test(name, code, "char", char_val)
# Additional specific hex escapes # Additional specific hex escapes
self.make_success_test("Char Hex Lowercase A", "'\\x61'", "char", 'a') self.make_success_test("Char Hex Lowercase A", "'\\\\x61'", "char", 'a')
self.make_success_test("Char Hex Uppercase A", "'\\x41'", "char", 'A') self.make_success_test("Char Hex Uppercase A", "'\\\\x41'", "char", 'A')
self.make_success_test("Char Hex Space", "'\\x20'", "char", ' ') self.make_success_test("Char Hex Space", "'\\\\x20'", "char", ' ')
self.make_success_test("Char Hex Tab", "'\\x09'", "char", '\t') self.make_success_test("Char Hex Tab", "'\\\\x09'", "char", '\t')
self.make_success_test("Char Hex Newline", "'\\x0A'", "char", '\n') self.make_success_test("Char Hex Newline", "'\\\\x0A'", "char", '\n')
self.make_success_test("Char Hex Max ASCII", "'\\x7F'", "char", '\x7F') self.make_success_test("Char Hex Max ASCII", "'\\\\x7F'", "char", '\x7F')
def generate_unicode_escape_tests(self): def generate_unicode_escape_tests(self):
"""Generate tests for Unicode escape sequences.""" """Generate tests for Unicode escape sequences."""
@ -176,7 +147,7 @@ class CharTestGenerator(BaseTestGenerator):
# Multiple characters (no escape) # Multiple characters (no escape)
self.make_error_test("Char Multiple Characters", self.make_error_test("Char Multiple Characters",
"'AB'", "'AB'",
"Invalid character literal: multiple characters without escape sequence.") "Invalid character literal: unexpected 'B' in character.")
# Unclosed quote # Unclosed quote
self.make_error_test("Char Unclosed Quote", self.make_error_test("Char Unclosed Quote",
@ -186,7 +157,7 @@ class CharTestGenerator(BaseTestGenerator):
# Unescaped newline # Unescaped newline
self.make_error_test("Char Unescaped Newline", self.make_error_test("Char Unescaped Newline",
"'\\n'", "'\\n'",
"Invalid character literal: unescaped newline in character literal.") "Invalid character literal: unclosed character literal.")
# Invalid escape sequence # Invalid escape sequence
self.make_error_test("Char Invalid Escape", self.make_error_test("Char Invalid Escape",
@ -242,27 +213,17 @@ class CharTestGenerator(BaseTestGenerator):
"'\\u{1F600'", "'\\u{1F600'",
"Invalid character literal: unclosed Unicode escape sequence.") "Invalid character literal: unclosed Unicode escape sequence.")
# Double quotes instead of single
self.make_error_test("Char Double Quotes",
'"A"',
"Invalid character literal: character literals must use single quotes.")
# No quotes
self.make_error_test("Char No Quotes",
"A",
"Not a character literal: missing quotes.")
def generate_edge_case_tests(self): def generate_edge_case_tests(self):
"""Generate edge case tests.""" """Generate edge case tests."""
# ASCII control characters # ASCII control characters
self.make_success_test("Char ASCII Control SOH", "'\\x01'", "char", '\x01') self.make_success_test("Char ASCII Control SOH", "'\\\\x01'", "char", '\x01')
self.make_success_test("Char ASCII Control BEL", "'\\x07'", "char", '\x07') self.make_success_test("Char ASCII Control BEL", "'\\\\x07'", "char", '\x07')
self.make_success_test("Char ASCII Control ESC", "'\\x1B'", "char", '\x1B') self.make_success_test("Char ASCII Control ESC", "'\\\\x1B'", "char", '\x1B')
self.make_success_test("Char ASCII Control DEL", "'\\x7F'", "char", '\x7F') self.make_success_test("Char ASCII Control DEL", "'\\\\x7F'", "char", '\x7F')
# Extended ASCII # Extended ASCII
self.make_success_test("Char Extended ASCII Lower", "'\\x80'", "char", '\x80') self.make_success_test("Char Extended ASCII Lower", "'\\\\x80'", "char", '\x80')
self.make_success_test("Char Extended ASCII Upper", "'\\xFF'", "char", '\xFF') self.make_success_test("Char Extended ASCII Upper", "'\\\\xFF'", "char", '\xFF')
if self.ENABLE_UNICODE: if self.ENABLE_UNICODE:
# Zero-width characters # Zero-width characters
@ -282,22 +243,15 @@ class CharTestGenerator(BaseTestGenerator):
# High Unicode values (but valid) # High Unicode values (but valid)
self.make_success_test("Char Unicode High Valid", "'\\u{10FFFF}'", "char", '\U0010FFFF') self.make_success_test("Char Unicode High Valid", "'\\u{10FFFF}'", "char", '\U0010FFFF')
# Backslash before valid character
self.make_success_test("Char Backslash Literal", "'\\\\'", "char", '\\')
# Quote escaping
self.make_success_test("Char Single Quote Escaped", "'\\''", "char", "'")
# self.make_success_test("Char Double Quote Escaped", "'\\\"'", "char", '"')
def generate_case_sensitivity_tests(self): def generate_case_sensitivity_tests(self):
"""Generate tests for case sensitivity in escape sequences.""" """Generate tests for case sensitivity in escape sequences."""
# Hex escapes - lowercase x # Hex escapes - lowercase x
self.make_success_test("Char Hex Lowercase x", "'\\x41'", "char", 'A') self.make_success_test("Char Hex Lowercase x", "'\\\\x41'", "char", 'A')
# Hex digits - both cases # Hex digits - both cases
self.make_success_test("Char Hex Digits Uppercase", "'\\xFF'", "char", '\xFF') self.make_success_test("Char Hex Digits Uppercase", "'\\\\xFF'", "char", '\xFF')
self.make_success_test("Char Hex Digits Lowercase", "'\\xff'", "char", '\xff') self.make_success_test("Char Hex Digits Lowercase", "'\\\\xff'", "char", '\xff')
self.make_success_test("Char Hex Digits Mixed", "'\\xAb'", "char", '\xAB') self.make_success_test("Char Hex Digits Mixed", "'\\\\xAb'", "char", '\xAB')
if self.ENABLE_UNICODE: if self.ENABLE_UNICODE:
# Unicode escapes - lowercase u # Unicode escapes - lowercase u