import yaml import re from pathlib import Path # python3 SLS_Tests/yaml_to_c_tests.py SLS_Tests/cases.yaml SLS_C/tests/lexer_tests.c file_headers = """\ // Kyler Olsen // YREA SLS // Lexer Tests // October 2025 #include #include #include #include #include #include "sls/errors.h" #include "sls/lexer.h" #include "sls/string.h" #include "tests/lexer_test_helpers.h" #include "tests/tests.h" """ main_header = """\ TestsReport run_lexer_tests() { TestsReport test_report = (TestsReport) { .section = SLS_STR("lexer_tests"), .count = NUM_OF_TESTS, .tests = (TestResult *)malloc(sizeof(TestResult) * NUM_OF_TESTS), }; size_t i = 0; """ # === Helper functions === def sanitize_name(name: str) -> str: """Convert test name into a valid C function name.""" name = re.sub(r"[^a-zA-Z0-9_]", "_", name) name = re.sub(r"_+", "_", name) return f"test_{name}" def c_string_literal(s: str) -> str: """Escape quotes for embedding in C string literals.""" return s.replace('"', '\\"') def _token_to_c_call(token: dict, idx_var="i") -> str: """Generate a C 'test_*_value' call based on token type.""" ttype = token.get("type") value = token.get("value") if ttype == "i64": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I64, {value}}})' elif ttype == "i32": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I32, {value}}})' elif ttype == "i16": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I16, {value}}})' elif ttype == "i8": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I8, {value}}})' elif ttype == "u64": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U64, {value}}})' elif ttype == "u32": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U32, {value}}})' elif ttype == "u16": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U16, {value}}})' elif ttype == "u8": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U8, {value}}})' elif ttype == "f64": return f'test_double_value(&test, result, {idx_var}++, &(double){{{value}}})' elif ttype == "f32": return f'test_float_value(&test, result, {idx_var}++, &(float){{{value}}})' elif ttype == "char": return f'test_character_value(&test, result, {idx_var}++, &(uint8_t){{{ord(value)}}})' # type: ignore elif ttype == "string": return f'test_string_value(&test, result, {idx_var}++, &SLS_STR("{value}"))' # type: ignore elif ttype == "identifier": return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{FALSE, SLS_STR("{value}")}})' # type: ignore elif ttype == "identifier_literal": return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{TRUE, SLS_STR("{value}")}})' # type: ignore elif ttype == "bool": return f'test_boolean_value(&test, result, {idx_var}++, &(Boolean){{{"TRUE" if value else "FALSE"}}})' # type: ignore elif ttype == "error": return f'test_for_error(&test, result, i++, SLS_STR("{c_string_literal(value)}"))' # type: ignore elif ttype == "token_string": return _token_string_c_call(token, idx_var, value) # type: ignore else: raise ValueError(f' Unhandled token type: {ttype}') def _token_string_c_call(token: dict, idx_var: str, value: list[dict]) -> str: """Generate C code for testing a token string value.""" # Build the array of TestTokenValue structures token_values = [] for inner_token in value: inner_type = inner_token.get("type") inner_value = inner_token.get("value") # Map token types to their C test handler and value structure if inner_type == "i64": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_I64, {inner_value}}}" elif inner_type == "i32": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_I32, {inner_value}}}" elif inner_type == "i16": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_I16, {inner_value}}}" elif inner_type == "i8": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_I8, {inner_value}}}" elif inner_type == "u64": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_U64, {inner_value}}}" elif inner_type == "u32": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_U32, {inner_value}}}" elif inner_type == "u16": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_U16, {inner_value}}}" elif inner_type == "u8": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_U8, {inner_value}}}" elif inner_type == "f64": handler = "test_double_value" value_struct = f"&(double){{{inner_value}}}" elif inner_type == "f32": handler = "test_float_value" value_struct = f"&(float){{{inner_value}}}" elif inner_type == "char": handler = "test_character_value" value_struct = f"&(uint8_t){{{ord(inner_value)}}}" # type: ignore elif inner_type == "string": handler = "test_string_value" value_struct = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore elif inner_type == "identifier": handler = "test_identifier_value" value_struct = f'&(TestIdentifierValue){{FALSE, SLS_STR("{inner_value}")}}' elif inner_type == "identifier_literal": handler = "test_identifier_value" value_struct = f'&(TestIdentifierValue){{TRUE, SLS_STR("{inner_value}")}}' elif inner_type == "bool": handler = "test_boolean_value" bool_str = "TRUE" if inner_value else "FALSE" value_struct = f"&(Boolean){{{bool_str}}}" elif inner_type == "error": handler = "test_for_error" value_struct = f'SLS_STR("{c_string_literal(inner_value)}")' # type: ignore elif inner_type == "token_string": # Recursive case: nested token string handler = "test_token_string_value" nested_tokens = inner_value if isinstance(inner_value, list) else [] value_struct = _build_token_string_value_struct(nested_tokens) else: raise ValueError(f'Unhandled inner token type in token_string: {inner_type}') token_values.append(f"{{.token_handler = {handler}, .value = {value_struct}}}") # Generate the TestTokenStringValue initialization num_tokens = len(value) if num_tokens == 0: # Empty token string return f'test_token_string_value(&test, result, {idx_var}, &(TestTokenStringValue){{0, NULL}})' else: # Token string with values values_array = f"(TestTokenValue[]){{{', '.join(token_values)}}}" return f'test_token_string_value(&test, result, {idx_var}, &(TestTokenStringValue){{{num_tokens}, {values_array}}})' def _build_token_string_value_struct(nested_tokens: list[dict]) -> str: """Helper to build a TestTokenStringValue structure for nested token strings.""" if not nested_tokens: return "&(TestTokenStringValue){0, NULL}" token_values = [] for inner_token in nested_tokens: inner_type = inner_token.get("type") inner_value = inner_token.get("value") if inner_type == "i64": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_I64, {inner_value}}}" elif inner_type == "i32": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_I32, {inner_value}}}" elif inner_type == "i16": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_I16, {inner_value}}}" elif inner_type == "i8": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_I8, {inner_value}}}" elif inner_type == "u64": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_U64, {inner_value}}}" elif inner_type == "u32": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_U32, {inner_value}}}" elif inner_type == "u16": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_U16, {inner_value}}}" elif inner_type == "u8": handler = "test_integer_value" value_struct = f"&(TestIntegerValue){{INTEGER_U8, {inner_value}}}" elif inner_type == "f64": handler = "test_double_value" value_struct = f"&(double){{{inner_value}}}" elif inner_type == "f32": handler = "test_float_value" value_struct = f"&(float){{{inner_value}}}" elif inner_type == "char": handler = "test_character_value" value_struct = f"&(uint8_t){{{ord(inner_value)}}}" # type: ignore elif inner_type == "string": handler = "test_string_value" value_struct = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore elif inner_type == "identifier": handler = "test_identifier_value" value_struct = f'&(TestIdentifierValue){{FALSE, SLS_STR("{inner_value}")}}' elif inner_type == "identifier_literal": handler = "test_identifier_value" value_struct = f'&(TestIdentifierValue){{TRUE, SLS_STR("{inner_value}")}}' elif inner_type == "bool": handler = "test_boolean_value" bool_str = "TRUE" if inner_value else "FALSE" value_struct = f"&(Boolean){{{bool_str}}}" elif inner_type == "error": handler = "test_for_error" value_struct = f'SLS_STR("{c_string_literal(inner_value)}")' # type: ignore elif inner_type == "token_string": # Recursive case handler = "test_token_string_value" value_struct = _build_token_string_value_struct(inner_value if isinstance(inner_value, list) else []) else: raise ValueError(f'Unhandled nested token type in token_string: {inner_type}') token_values.append(f"{{.token_handler = {handler}, .value = {value_struct}}}") num_tokens = len(nested_tokens) values_array = f"(TestTokenValue[]){{{', '.join(token_values)}}}" return f"&(TestTokenStringValue){{{num_tokens}, {values_array}}}" def token_to_c_call(token: dict, idx_var="i") -> str: """Generate a C 'test_*_value' call based on token type.""" return f"if ({_token_to_c_call(token, idx_var)}) return test.result;" def generate_c_test(test: dict) -> str: """Convert a single YAML test entry to a C test function.""" name = test["name"] c_name = sanitize_name(name) code = c_string_literal(test["code"]) tokens = test.get("tokens", []) # Function header c_code = [f"static TestResult {c_name}() {{", f' LexerTest test = start_up_test(SLS_STR("{name}"), SLS_STR("{code}"));', " LexerResult result = lexical_analysis(&test.lexer_info);", " if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);", " size_t i = 0;"] # Token checks for token in tokens: c_code.append(" " + token_to_c_call(token)) # EOF check and return if 'error' not in [token.get("type") for token in tokens]: c_code.append(" if (test_eof_value(&test, result, i++, 0)) return test.result;") c_code.append(" return pass_test(&test, result);") c_code.append("}\n") return "\n".join(c_code) def yaml_to_c_tests(yaml_path: str, output_path: str): """Convert YAML test cases into C test code.""" with open(yaml_path, "r", encoding="utf-8") as f: tests = yaml.safe_load(f) # Ensure we have a list of tests if not isinstance(tests, list): raise ValueError("Expected a YAML list of test cases.") c_tests = [] for test in tests: c_tests.append(generate_c_test(test)) program = [ file_headers, f"static const size_t NUM_OF_TESTS = {len(tests)};\n", "\n".join(c_tests), main_header, ] + [f" test_report.tests[i++] = {sanitize_name(test['name'])}();" for test in tests] program.append("\n return test_report;\n}\n") output_code = "\n".join(program) Path(output_path).write_text(output_code, encoding="utf-8") print(f" Generated {len(c_tests)} C tests -> {output_path}") # === Example usage === if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Convert YAML SLS tests to C lexer tests.") parser.add_argument("input", help="Path to input YAML test file") parser.add_argument("output", help="Path to output C file") args = parser.parse_args() yaml_to_c_tests(args.input, args.output)