474 lines
21 KiB
Python
474 lines
21 KiB
Python
from typing import List, Dict, Any
|
||
from .base_tests import BaseTestGenerator
|
||
|
||
|
||
class StringTestGenerator(BaseTestGenerator):
|
||
"""Generate test cases for string literals."""
|
||
|
||
# Common escape sequences
|
||
ESCAPE_SEQUENCES = {
|
||
'\\n': '\n', # Newline
|
||
'\\r': '\r', # Carriage return
|
||
'\\t': '\t', # Tab
|
||
'\\\\': '\\', # Backslash
|
||
'\\"': '"', # Double quote
|
||
"\\'": "'", # Single quote
|
||
'\\0': '\0', # Null character
|
||
}
|
||
|
||
def generate_basic_tests(self):
|
||
"""Generate basic string literal tests."""
|
||
# Empty string
|
||
self.make_success_test("String Empty", '""', "string", "")
|
||
|
||
# Simple strings
|
||
self.make_success_test("String Simple", '"hello"', "string", "hello")
|
||
self.make_success_test("String With Space", '"hello world"', "string", "hello world")
|
||
self.make_success_test("String Single Char", '"A"', "string", "A")
|
||
|
||
# Multiple words
|
||
self.make_success_test("String Multiple Words",
|
||
'"The quick brown fox"', "string", "The quick brown fox")
|
||
|
||
# Numbers in strings
|
||
self.make_success_test("String With Numbers", '"abc123"', "string", "abc123")
|
||
self.make_success_test("String Only Numbers", '"12345"', "string", "12345")
|
||
|
||
# Mixed case
|
||
self.make_success_test("String Mixed Case", '"HeLLo WoRLd"', "string", "HeLLo WoRLd")
|
||
|
||
# Special characters
|
||
self.make_success_test("String With Punctuation",
|
||
'"Hello, World!"', "string", "Hello, World!")
|
||
self.make_success_test("String With Symbols",
|
||
'"@#$%^&*()"', "string", "@#$%^&*()")
|
||
|
||
# Spaces
|
||
self.make_success_test("String Multiple Spaces",
|
||
'"hello world"', "string", "hello world")
|
||
self.make_success_test("String Leading Space", '" hello"', "string", " hello")
|
||
self.make_success_test("String Trailing Space", '"hello "', "string", "hello ")
|
||
self.make_success_test("String Only Spaces", '" "', "string", " ")
|
||
|
||
def generate_escape_sequence_tests(self):
|
||
"""Generate tests for escape sequences."""
|
||
# Individual escape sequences
|
||
self.make_success_test("String Newline", '"hello\\nworld"',
|
||
"string", "hello\\nworld")
|
||
self.make_success_test("String Tab", '"hello\\tworld"',
|
||
"string", "hello\\tworld")
|
||
self.make_success_test("String Carriage Return", '"hello\\rworld"',
|
||
"string", "hello\\rworld")
|
||
self.make_success_test("String Backslash", '"hello\\\\world"',
|
||
"string", "hello\\\\world")
|
||
self.make_success_test("String Double Quote", '"say \\\\"hello\\\\""',
|
||
"string", 'say \\"hello\\"')
|
||
self.make_success_test("String Single Quote", '"it\\\'s"',
|
||
"string", "it's")
|
||
self.make_success_test("String Null Char", '"hello\\0world"',
|
||
"string", "hello\\0world")
|
||
|
||
# Multiple escape sequences
|
||
self.make_success_test("String Multiple Escapes",
|
||
'"line1\\nline2\\nline3"',
|
||
"string", "line1\\nline2\\nline3")
|
||
self.make_success_test("String Mixed Escapes",
|
||
'"tab\\there\\nnewline\\\\backslash"',
|
||
"string", "tab\\there\\nnewline\\\\backslash")
|
||
|
||
# Escape at start/end
|
||
self.make_success_test("String Escape At Start", '"\\nhello"',
|
||
"string", "\\nhello")
|
||
self.make_success_test("String Escape At End", '"hello\\n"',
|
||
"string", "hello\\n")
|
||
|
||
# Consecutive escapes
|
||
self.make_success_test("String Consecutive Escapes", '"\\n\\n\\n"',
|
||
"string", "\\n\\n\\n")
|
||
self.make_success_test("String All Escapes", '"\\n\\r\\t\\\\"\\\'\\0"',
|
||
"string", "\\n\\r\\t\\\\\\\"\\'\\0")
|
||
|
||
def generate_hexadecimal_escape_tests(self):
|
||
"""Generate tests for hexadecimal escape sequences."""
|
||
# Basic hex escapes
|
||
self.make_success_test("String Hex Letter A", '"\\x41"', "string", "A")
|
||
self.make_success_test("String Hex Letter a", '"\\x61"', "string", "a")
|
||
self.make_success_test("String Hex Space", '"\\x20"', "string", " ")
|
||
self.make_success_test("String Hex Tab", '"\\x09"', "string", "\\t")
|
||
self.make_success_test("String Hex Newline", '"\\x0A"', "string", "\\n")
|
||
|
||
# Multiple hex escapes
|
||
self.make_success_test("String Multiple Hex", '"\\x48\\x65\\x6C\\x6C\\x6F"',
|
||
"string", "Hello")
|
||
|
||
# Hex with regular text
|
||
self.make_success_test("String Hex Mixed", '"Hello\\x20World"',
|
||
"string", "Hello World")
|
||
|
||
# Extended ASCII
|
||
self.make_success_test("String Hex Extended ASCII", '"\\xA9\\xAE"',
|
||
"string", "\xA9\xAE")
|
||
|
||
# Case variations
|
||
self.make_success_test("String Hex Uppercase", '"\\xFF"', "string", "\\xFF")
|
||
self.make_success_test("String Hex Lowercase", '"\\xff"', "string", "\\xff")
|
||
self.make_success_test("String Hex Mixed Case", '"\\xAb"', "string", "\\xAb")
|
||
|
||
# All hex values
|
||
self.make_success_test("String Hex Zero", '"\\x00"', "string", "\\0")
|
||
self.make_success_test("String Hex Max", '"\\xFF"', "string", "\\xFF")
|
||
|
||
def generate_unicode_escape_tests(self):
|
||
"""Generate tests for Unicode escape sequences."""
|
||
# Basic Unicode escapes
|
||
self.make_success_test("String Unicode Letter A", '"\\u{41}"', "string", "A")
|
||
self.make_success_test("String Unicode Space", '"\\u{20}"', "string", " ")
|
||
|
||
# Emoji
|
||
self.make_success_test("String Unicode Smiley", '"\\u{1F600}"',
|
||
"string", "😀")
|
||
self.make_success_test("String Unicode Heart", '"\\u{2764}"',
|
||
"string", "❤")
|
||
self.make_success_test("String Unicode Star", '"\\u{2B50}"',
|
||
"string", "⭐")
|
||
|
||
# Multiple emoji
|
||
self.make_success_test("String Multiple Emoji",
|
||
'"\\u{1F600}\\u{2764}\\u{2B50}"',
|
||
"string", "😀❤⭐")
|
||
|
||
# Greek letters
|
||
self.make_success_test("String Unicode Greek Alpha", '"\\u{03B1}"',
|
||
"string", "α")
|
||
self.make_success_test("String Unicode Greek Beta", '"\\u{03B2}"',
|
||
"string", "β")
|
||
|
||
# Chinese characters
|
||
self.make_success_test("String Unicode Chinese", '"\\u{4E2D}\\u{6587}"',
|
||
"string", "中文")
|
||
|
||
# Arabic
|
||
self.make_success_test("String Unicode Arabic", '"\\u{0639}\\u{0631}\\u{0628}"',
|
||
"string", "عرب")
|
||
|
||
# Cyrillic
|
||
self.make_success_test("String Unicode Cyrillic", '"\\u{0420}\\u{0443}\\u{0441}"',
|
||
"string", "Рус")
|
||
|
||
# Mathematical symbols
|
||
self.make_success_test("String Unicode Math", '"\\u{221E}\\u{2211}\\u{222B}"',
|
||
"string", "∞∑∫")
|
||
|
||
# Mixed with regular text
|
||
self.make_success_test("String Unicode Mixed", '"Hello \\u{1F600} World"',
|
||
"string", "Hello 😀 World")
|
||
|
||
# Case variations in hex digits
|
||
self.make_success_test("String Unicode Hex Uppercase", '"\\u{1F600}"',
|
||
"string", "😀")
|
||
self.make_success_test("String Unicode Hex Lowercase", '"\\u{1f600}"',
|
||
"string", "😀")
|
||
self.make_success_test("String Unicode Hex Mixed", '"\\u{1F60a}"',
|
||
"string", "😊")
|
||
|
||
# Variable length code points
|
||
self.make_success_test("String Unicode 2 Digits", '"\\u{41}"', "string", "A")
|
||
self.make_success_test("String Unicode 4 Digits", '"\\u{03B1}"', "string", "α")
|
||
self.make_success_test("String Unicode 5 Digits", '"\\u{1F600}"', "string", "😀")
|
||
self.make_success_test("String Unicode 6 Digits", '"\\u{10FFFF}"',
|
||
"string", "\U0010FFFF")
|
||
|
||
def generate_direct_unicode_tests(self):
|
||
"""Generate tests for direct Unicode characters in strings."""
|
||
# Direct emoji
|
||
self.make_success_test("String Direct Emoji", '"Hello 😀 World"',
|
||
"string", "Hello 😀 World")
|
||
self.make_success_test("String Multiple Direct Emoji", '"😀❤⭐👍"',
|
||
"string", "😀❤⭐👍")
|
||
|
||
# Direct Greek
|
||
self.make_success_test("String Direct Greek", '"αβγδ"', "string", "αβγδ")
|
||
|
||
# Direct Chinese
|
||
self.make_success_test("String Direct Chinese", '"你好世界"',
|
||
"string", "你好世界")
|
||
|
||
# Direct Arabic
|
||
self.make_success_test("String Direct Arabic", '"مرحبا"', "string", "مرحبا")
|
||
|
||
# Direct Cyrillic
|
||
self.make_success_test("String Direct Cyrillic", '"Привет"',
|
||
"string", "Привет")
|
||
|
||
# Direct mathematical
|
||
self.make_success_test("String Direct Math", '"∞∑∫√π"', "string", "∞∑∫√π")
|
||
|
||
# Mixed scripts
|
||
self.make_success_test("String Mixed Scripts", '"Hello 世界 Привет"',
|
||
"string", "Hello 世界 Привет")
|
||
|
||
def generate_multiline_tests(self):
|
||
"""Generate tests for strings with embedded newlines."""
|
||
# Strings with escape newlines
|
||
self.make_success_test("String With Escaped Newlines",
|
||
'"line1\\nline2\\nline3"',
|
||
"string", "line1\\nline2\\nline3")
|
||
|
||
# Paragraph-like text
|
||
self.make_success_test("String Paragraph",
|
||
'"First line.\\nSecond line.\\nThird line."',
|
||
"string", "First line.\\nSecond line.\\nThird line.")
|
||
|
||
# Mixed line endings
|
||
self.make_success_test("String Mixed Line Endings",
|
||
'"Windows\\r\\nUnix\\nMac\\r"',
|
||
"string", "Windows\\r\\nUnix\\nMac\\r")
|
||
|
||
def generate_whitespace_tests(self):
|
||
"""Generate tests with various whitespace."""
|
||
# Leading/trailing whitespace outside quotes
|
||
self.make_success_test("String Leading Whitespace Outside",
|
||
' "hello"', "string", "hello")
|
||
self.make_success_test("String Trailing Whitespace Outside",
|
||
'"hello" ', "string", "hello")
|
||
self.make_success_test("String Both Whitespace Outside",
|
||
' "hello" ', "string", "hello")
|
||
|
||
# Tabs outside quotes
|
||
self.make_success_test("String Tab Before", '\t"hello"', "string", "hello")
|
||
|
||
# Mixed whitespace
|
||
self.make_success_test("String Tabs And Newlines Inside",
|
||
'"hello\\t\\tworld\\n\\ntest"',
|
||
"string", "hello\\t\\tworld\\n\\ntest")
|
||
|
||
# Only whitespace inside
|
||
self.make_success_test("String Only Tabs", '"\\t\\t\\t"', "string", "\\t\\t\\t")
|
||
self.make_success_test("String Only Newlines", '"\\n\\n\\n"', "string", "\\n\\n\\n")
|
||
self.make_success_test("String Mixed Whitespace", '" \\t\\n\\r "',
|
||
"string", " \\t\\n\\r ")
|
||
|
||
def generate_long_string_tests(self):
|
||
"""Generate tests for longer strings."""
|
||
# Sentence
|
||
self.make_success_test("String Sentence",
|
||
'"The quick brown fox jumps over the lazy dog."',
|
||
"string", "The quick brown fox jumps over the lazy dog.")
|
||
|
||
# Multiple sentences
|
||
self.make_success_test("String Multiple Sentences",
|
||
'"First sentence. Second sentence. Third sentence."',
|
||
"string", "First sentence. Second sentence. Third sentence.")
|
||
|
||
# Long string with escapes
|
||
long_str = '"This is a long string.\\nIt has multiple lines.\\nAnd some tabs\\there.\\nPlus quotes "like this"."'
|
||
expected = "This is a long string.\\nIt has multiple lines.\\nAnd some tabs\\there.\\nPlus quotes \\\"like this\\\"."
|
||
self.make_success_test("String Long With Escapes", long_str,
|
||
"string", expected)
|
||
|
||
# Repetitive string
|
||
self.make_success_test("String Repetitive", '"aaaaaaaaaa"',
|
||
"string", "aaaaaaaaaa")
|
||
|
||
# All ASCII printable characters
|
||
printable = "".join(chr(i) for i in range(32, 127) if chr(i) not in ['"', '\\'])
|
||
self.make_success_test("String ASCII Printable",
|
||
f'"{printable}"', "string", printable)
|
||
|
||
def generate_special_content_tests(self):
|
||
"""Generate tests for strings with special content."""
|
||
# Code-like strings
|
||
self.make_success_test("String Code Like",
|
||
'"int main() { return 0; }"',
|
||
"string", "int main() { return 0; }")
|
||
|
||
# JSON-like strings
|
||
self.make_success_test("String JSON Like",
|
||
'"{{\\\\"key\\\\": \\\\"value\\\\"}}"',
|
||
"string", '{\\"key\\": \\"value\\"}')
|
||
|
||
# URL
|
||
self.make_success_test("String URL",
|
||
'"https://example.com/path?query=value"',
|
||
"string", "https://example.com/path?query=value")
|
||
|
||
# Email
|
||
self.make_success_test("String Email",
|
||
'"user@example.com"', "string", "user@example.com")
|
||
|
||
# File path (Unix)
|
||
self.make_success_test("String Unix Path",
|
||
'"/home/user/file.txt"',
|
||
"string", "/home/user/file.txt")
|
||
|
||
# File path (Windows)
|
||
self.make_success_test("String Windows Path",
|
||
'"C:\\\\Users\\\\file.txt"',
|
||
"string", "C:\\\\Users\\\\file.txt")
|
||
|
||
# SQL-like
|
||
self.make_success_test("String SQL Like",
|
||
'"SELECT * FROM users WHERE id = 1"',
|
||
"string", "SELECT * FROM users WHERE id = 1")
|
||
|
||
# Regular expression
|
||
self.make_success_test("String Regex Like",
|
||
'"[a-zA-Z0-9]+"', "string", "[a-zA-Z0-9]+")
|
||
|
||
def generate_error_tests(self):
|
||
"""Generate error test cases."""
|
||
# Unclosed string
|
||
self.make_error_test("String Unclosed",
|
||
'"hello',
|
||
"Invalid string literal: unclosed string literal.")
|
||
|
||
# Unescaped newline
|
||
self.make_error_test("String Unescaped Newline",
|
||
'"hello\\nworld"',
|
||
"Invalid string literal: unescaped newline in string literal.")
|
||
|
||
# Invalid escape sequence
|
||
self.make_error_test("String Invalid Escape",
|
||
'"hello\\\\qworld"',
|
||
"Invalid string literal: unknown escape sequence '\\\\q'.")
|
||
|
||
# Hex escape too short
|
||
self.make_error_test("String Hex Too Short",
|
||
'"\\x4"',
|
||
"Invalid string literal: hexadecimal escape must have exactly 2 digits.")
|
||
|
||
# Hex escape too long
|
||
self.make_error_test("String Hex Too Long",
|
||
'"\\\\x414"',
|
||
"Invalid string literal: hexadecimal escape must have exactly 2 digits.")
|
||
|
||
# Invalid hex digits
|
||
self.make_error_test("String Hex Invalid Digit",
|
||
'"\\\\xGG"',
|
||
"Invalid string literal: invalid hexadecimal digit 'G'.")
|
||
|
||
if self.ENABLE_UNICODE:
|
||
# Unicode no braces
|
||
self.make_error_test("String Unicode No Braces",
|
||
'"\\u1F600"',
|
||
"Invalid string literal: Unicode escape must use braces \\u{...}.")
|
||
|
||
# Unicode empty
|
||
self.make_error_test("String Unicode Empty",
|
||
'"\\u{}"',
|
||
"Invalid string literal: empty Unicode escape sequence.")
|
||
|
||
# Unicode too long
|
||
self.make_error_test("String Unicode Too Long",
|
||
'"\\u{1234567}"',
|
||
"Invalid string literal: Unicode escape sequence too long (max 6 hex digits).")
|
||
|
||
# Unicode invalid code point (surrogate)
|
||
self.make_error_test("String Unicode Surrogate",
|
||
'"\\u{D800}"',
|
||
"Invalid string literal: invalid Unicode code point (surrogate range).")
|
||
|
||
# Unicode out of range
|
||
self.make_error_test("String Unicode Out Of Range",
|
||
'"\\u{110000}"',
|
||
"Invalid string literal: Unicode code point out of range (max 0x10FFFF).")
|
||
|
||
# Unicode invalid hex
|
||
self.make_error_test("String Unicode Invalid Hex",
|
||
'"\\u{GGGG}"',
|
||
"Invalid string literal: invalid hexadecimal digit 'G' in Unicode escape.")
|
||
|
||
# Unclosed Unicode escape
|
||
self.make_error_test("String Unicode Unclosed",
|
||
'"\\u{1F600"',
|
||
"Invalid string literal: unclosed Unicode escape sequence.")
|
||
|
||
# Single quotes instead of double
|
||
self.make_error_test("String Single Quotes",
|
||
"'hello'",
|
||
"Invalid string literal: string literals must use double quotes.")
|
||
|
||
# Backslash at end
|
||
self.make_error_test("String Backslash At End",
|
||
'"hello\\\\"',
|
||
"Invalid string literal: incomplete escape sequence at end.")
|
||
|
||
def generate_edge_case_tests(self):
|
||
"""Generate edge case tests."""
|
||
# String with only escape sequences
|
||
self.make_success_test("String Only Escapes",
|
||
'"\\n\\t\\r"', "string", "\\n\\t\\r")
|
||
|
||
# String with null characters
|
||
self.make_success_test("String With Nulls",
|
||
'"a\\0b\\0c"', "string", "a\\0b\\0c")
|
||
|
||
# Very long escape sequence
|
||
self.make_success_test("String Many Escapes",
|
||
'"\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n"',
|
||
"string", "\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n")
|
||
|
||
if self.ENABLE_UNICODE:
|
||
# Mixed escape types
|
||
self.make_success_test("String All Escape Types",
|
||
'"\\n\\x41\\u{42}test"',
|
||
"string", "\nABtest")
|
||
|
||
# Zero-width characters
|
||
self.make_success_test("String Zero Width",
|
||
'"hello\\u{200B}world"',
|
||
"string", "hello\u200Bworld")
|
||
|
||
# Combining characters
|
||
self.make_success_test("String Combining",
|
||
'"e\\u{0301}"', # é as e + combining acute
|
||
"string", "e\u0301")
|
||
|
||
# Right-to-left
|
||
self.make_success_test("String RTL Mark",
|
||
'"\\u{200F}hello"',
|
||
"string", "\u200Fhello")
|
||
|
||
# Byte order mark
|
||
self.make_success_test("String BOM",
|
||
'"\\u{FEFF}hello"',
|
||
"string", "\uFEFFhello")
|
||
|
||
def generate_all_tests(self) -> List[Dict[str, Any]]:
|
||
"""Generate all string literal test cases."""
|
||
# Basic tests
|
||
self.generate_basic_tests()
|
||
|
||
# Escape sequences
|
||
self.generate_escape_sequence_tests()
|
||
|
||
# Hexadecimal escapes
|
||
self.generate_hexadecimal_escape_tests()
|
||
|
||
if self.ENABLE_UNICODE:
|
||
# Unicode escapes
|
||
self.generate_unicode_escape_tests()
|
||
|
||
# Direct Unicode
|
||
self.generate_direct_unicode_tests()
|
||
|
||
# Multiline strings
|
||
self.generate_multiline_tests()
|
||
|
||
# Whitespace handling
|
||
self.generate_whitespace_tests()
|
||
|
||
# Long strings
|
||
self.generate_long_string_tests()
|
||
|
||
# Special content
|
||
self.generate_special_content_tests()
|
||
|
||
# Error cases
|
||
self.generate_error_tests()
|
||
|
||
# Edge cases
|
||
self.generate_edge_case_tests()
|
||
|
||
return self.get_tests()
|