import yaml import re from pathlib import Path # python3 SLS_Tests/yaml_to_c_tests.py SLS_Tests/cases.yaml SLS_C/tests/lexer_tests.c file_headers = """\ // Kyler Olsen // YREA SLS // Lexer Tests // October 2025 #include #include #include #include #include #include "sls/errors.h" #include "sls/lexer.h" #include "sls/string.h" #include "tests/lexer_test_helpers.h" #include "tests/tests.h" """ main_header = """\ TestsReport run_lexer_tests() { TestsReport test_report = (TestsReport) { .section = SLS_STR("lexer_tests"), .count = NUM_OF_TESTS, .tests = (TestResult *)malloc(sizeof(TestResult) * NUM_OF_TESTS), }; size_t i = 0; """ # === Helper functions === def sanitize_name(name: str) -> str: """Convert test name into a valid C function name.""" name = re.sub(r"[^a-zA-Z0-9_]", "_", name) name = re.sub(r"_+", "_", name) return f"test_{name}" def c_string_literal(s: str) -> str: """Escape quotes for embedding in C string literals.""" return s.replace('"', '\\"') def _token_to_c_call(token: dict, idx_var="i") -> str: """Generate a C 'test_*_value' call based on token type.""" ttype = token.get("type") value = token.get("value") if ttype == "i64": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I64, {value}}})' elif ttype == "i32": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I32, {value}}})' elif ttype == "i16": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I16, {value}}})' elif ttype == "i8": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I8, {value}}})' elif ttype == "u64": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U64, {value}}})' elif ttype == "u32": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U32, {value}}})' elif ttype == "u16": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U16, {value}}})' elif ttype == "u8": return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U8, {value}}})' elif ttype == "f64": return f'test_double_value(&test, result, {idx_var}++, &(double){{{value}}})' elif ttype == "f32": return f'test_float_value(&test, result, {idx_var}++, &(float){{{value}}})' elif ttype == "char": return f'test_character_value(&test, result, {idx_var}++, &(uint8_t){{{ord(value)}}})' # type: ignore elif ttype == "string": return f'test_string_value(&test, result, {idx_var}++, &SLS_STR("{value}"))' # type: ignore elif ttype == "identifier": return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{FALSE, SLS_STR("{value}")}})' # type: ignore elif ttype == "identifier_literal": return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{TRUE, SLS_STR("{value}")}})' # type: ignore elif ttype == "bool": return f'test_boolean_value(&test, result, {idx_var}++, &(Boolean){{{"TRUE" if value else "FALSE"}}})' # type: ignore elif ttype == "error": return f'test_for_error(&test, result, i++, &SLS_STR("{c_string_literal(value)}"))' # type: ignore elif ttype == "token_string": return _token_string_c_call(idx_var, value) # type: ignore else: raise ValueError(f' Unhandled token type: {ttype}') def _token_string_c_call(idx_var: str, value: list[dict]) -> str: """Generate C code for testing a token string value.""" if not value: # Empty token string return ( f'test_token_string_value(&test, result, {idx_var}++, ' f'&(TestTokenStringValue){{0, NULL}})' ) # Generate token handler calls for each token in the string token_handlers = [] for i, inner_token in enumerate(value): inner_type = inner_token.get("type") inner_value = inner_token.get("value") # Determine the handler function and value initialization if inner_type == "i64": handler = "test_integer_value" val_init = f'&(TestIntegerValue){{INTEGER_I64, {inner_value}}}' elif inner_type == "i32": handler = "test_integer_value" val_init = f'&(TestIntegerValue){{INTEGER_I32, {inner_value}}}' elif inner_type == "i16": handler = "test_integer_value" val_init = f'&(TestIntegerValue){{INTEGER_I16, {inner_value}}}' elif inner_type == "i8": handler = "test_integer_value" val_init = f'&(TestIntegerValue){{INTEGER_I8, {inner_value}}}' elif inner_type == "u64": handler = "test_integer_value" val_init = f'&(TestIntegerValue){{INTEGER_U64, {inner_value}}}' elif inner_type == "u32": handler = "test_integer_value" val_init = f'&(TestIntegerValue){{INTEGER_U32, {inner_value}}}' elif inner_type == "u16": handler = "test_integer_value" val_init = f'&(TestIntegerValue){{INTEGER_U16, {inner_value}}}' elif inner_type == "u8": handler = "test_integer_value" val_init = f'&(TestIntegerValue){{INTEGER_U8, {inner_value}}}' elif inner_type == "f64": handler = "test_double_value" val_init = f'&(double){{{inner_value}}}' elif inner_type == "f32": handler = "test_float_value" val_init = f'&(float){{{inner_value}}}' elif inner_type == "char": handler = "test_character_value" val_init = f'&(uint8_t){{{ord(inner_value)}}}' # type: ignore elif inner_type == "string": handler = "test_string_value" val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore elif inner_type == "identifier": handler = "test_identifier_value" val_init = f'&(TestIdentifierValue){{FALSE, SLS_STR("{inner_value}")}}' elif inner_type == "identifier_literal": handler = "test_identifier_value" val_init = f'&(TestIdentifierValue){{TRUE, SLS_STR("{inner_value}")}}' elif inner_type == "bool": handler = "test_boolean_value" val_init = f'&(Boolean){{{"TRUE" if inner_value else "FALSE"}}}' elif inner_type == "error": handler = "test_for_error" val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore elif inner_type == "token_string": # Nested token string - recursive call handler = "test_token_string_value" val_init = _token_string_value_init(inner_token.get("value", [])) else: raise ValueError(f'Unhandled token type in token string: {inner_type}') token_handlers.append( f'{{(Boolean (*)(LexerTest *, LexerResult, size_t, void *)){handler}, ' f'{val_init}}}' ) # Generate the array initialization tokens_array = f'(TestTokenStringToken[]){{{", ".join(token_handlers)}}}' return ( f'test_token_string_value(&test, result, {idx_var}++, ' f'&(TestTokenStringValue){{{len(value)}, {tokens_array}}})' ) def _token_string_value_init(value: list[dict]) -> str: """Generate initialization code for a TestTokenStringValue (for nested token strings).""" if not value: return '&(TestTokenStringValue){0, NULL}' token_handlers = [] for inner_token in value: inner_type = inner_token.get("type") inner_value = inner_token.get("value") if inner_type == "i64": handler = "test_integer_value" val_init = f'&(TestIntegerValue){{INTEGER_I64, {inner_value}}}' elif inner_type == "i32": handler = "test_integer_value" val_init = f'&(TestIntegerValue){{INTEGER_I32, {inner_value}}}' elif inner_type == "f64": handler = "test_double_value" val_init = f'&(double){{{inner_value}}}' elif inner_type == "f32": handler = "test_float_value" val_init = f'&(float){{{inner_value}}}' elif inner_type == "char": handler = "test_character_value" val_init = f'&(uint8_t){{{ord(inner_value)}}}' # type: ignore elif inner_type == "string": handler = "test_string_value" val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore elif inner_type == "identifier": handler = "test_identifier_value" val_init = f'&(TestIdentifierValue){{FALSE, SLS_STR("{inner_value}")}}' elif inner_type == "identifier_literal": handler = "test_identifier_value" val_init = f'&(TestIdentifierValue){{TRUE, SLS_STR("{inner_value}")}}' elif inner_type == "bool": handler = "test_boolean_value" val_init = f'&(Boolean){{{"TRUE" if inner_value else "FALSE"}}}' elif inner_type == "error": handler = "test_for_error" val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore elif inner_type == "token_string": handler = "test_token_string_value" val_init = _token_string_value_init(inner_token.get("value", [])) # Add other types as needed else: raise ValueError(f'Unhandled token type in nested token string: {inner_type}') token_handlers.append( f'{{(Boolean (*)(LexerTest *, LexerResult, size_t, void *)){handler}, ' f'{val_init}}}' ) tokens_array = f'(TestTokenStringToken[]){{{", ".join(token_handlers)}}}' return f'&(TestTokenStringValue){{{len(value)}, {tokens_array}}}' def token_to_c_call(token: dict, idx_var="i") -> str: """Generate a C 'test_*_value' call based on token type.""" return f"if ({_token_to_c_call(token, idx_var)}) return test.result;" def generate_c_test(test: dict) -> str: """Convert a single YAML test entry to a C test function.""" name = test["name"] c_name = sanitize_name(name) code = c_string_literal(test["code"]) tokens = test.get("tokens", []) # Function header c_code = [f"static TestResult {c_name}() {{", f' LexerTest test = start_up_test(SLS_STR("{name}"), SLS_STR("{code}"));', " LexerResult result = lexical_analysis(&test.lexer_info);", " if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);", " size_t i = 0;"] # Token checks for token in tokens: c_code.append(" " + token_to_c_call(token)) # EOF check and return if 'error' not in [token.get("type") for token in tokens]: c_code.append(" if (test_eof_value(&test, result, i++, 0)) return test.result;") c_code.append(" return pass_test(&test, result);") c_code.append("}\n") return "\n".join(c_code) def yaml_to_c_tests(yaml_path: str, output_path: str): """Convert YAML test cases into C test code.""" with open(yaml_path, "r", encoding="utf-8") as f: tests = yaml.safe_load(f) # Ensure we have a list of tests if not isinstance(tests, list): raise ValueError("Expected a YAML list of test cases.") c_tests = [] for test in tests: c_tests.append(generate_c_test(test)) program = [ file_headers, f"static const size_t NUM_OF_TESTS = {len(tests)};\n", "\n".join(c_tests), main_header, ] + [f" test_report.tests[i++] = {sanitize_name(test['name'])}();" for test in tests] program.append("\n return test_report;\n}\n") output_code = "\n".join(program) Path(output_path).write_text(output_code, encoding="utf-8") print(f" Generated {len(c_tests)} C tests -> {output_path}") # === Example usage === if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Convert YAML SLS tests to C lexer tests.") parser.add_argument("input", help="Path to input YAML test file") parser.add_argument("output", help="Path to output C file") args = parser.parse_args() yaml_to_c_tests(args.input, args.output)