YREA-SLS/SLS_Tests/yaml_to_c_tests.py

306 lines
13 KiB
Python

import yaml
import re
from pathlib import Path
# python3 SLS_Tests/yaml_to_c_tests.py SLS_Tests/cases.yaml SLS_C/tests/lexer_tests.c
file_headers = """\
// Kyler Olsen
// YREA SLS
// Lexer Tests
// October 2025
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
#include "sls/errors.h"
#include "sls/lexer.h"
#include "sls/string.h"
#include "tests/lexer_test_helpers.h"
#include "tests/tests.h"
"""
main_header = """\
TestsReport run_lexer_tests() {
TestsReport test_report = (TestsReport) {
.section = SLS_STR("lexer_tests"),
.count = NUM_OF_TESTS,
.tests = (TestResult *)malloc(sizeof(TestResult) * NUM_OF_TESTS),
};
size_t i = 0;
"""
# === Helper functions ===
def sanitize_name(name: str) -> str:
"""Convert test name into a valid C function name."""
name = re.sub(r"[^a-zA-Z0-9_]", "_", name)
name = re.sub(r"_+", "_", name)
return f"test_{name}"
def c_string_literal(s: str) -> str:
"""Escape quotes for embedding in C string literals."""
return s.replace('"', '\\"')
def _token_to_c_call(token: dict, idx_var="i") -> str:
"""Generate a C 'test_*_value' call based on token type."""
ttype = token.get("type")
value = token.get("value")
if ttype == "i64":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I64, {value}}})'
elif ttype == "i32":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I32, {value}}})'
elif ttype == "i16":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I16, {value}}})'
elif ttype == "i8":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I8, {value}}})'
elif ttype == "u64":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U64, {value}}})'
elif ttype == "u32":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U32, {value}}})'
elif ttype == "u16":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U16, {value}}})'
elif ttype == "u8":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U8, {value}}})'
elif ttype == "f64":
return f'test_double_value(&test, result, {idx_var}++, &(double){{{value}}})'
elif ttype == "f32":
return f'test_float_value(&test, result, {idx_var}++, &(float){{{value}}})'
elif ttype == "char":
return f'test_character_value(&test, result, {idx_var}++, &(uint8_t){{{ord(value)}}})' # type: ignore
elif ttype == "string":
return f'test_string_value(&test, result, {idx_var}++, &SLS_STR("{value}"))' # type: ignore
elif ttype == "identifier":
return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{FALSE, SLS_STR("{value}")}})' # type: ignore
elif ttype == "identifier_literal":
return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{TRUE, SLS_STR("{value}")}})' # type: ignore
elif ttype == "bool":
return f'test_boolean_value(&test, result, {idx_var}++, &(Boolean){{{"TRUE" if value else "FALSE"}}})' # type: ignore
elif ttype == "error":
return f'test_for_error(&test, result, i++, SLS_STR("{c_string_literal(value)}"))' # type: ignore
elif ttype == "token_string":
return _token_string_c_call(token, idx_var, value) # type: ignore
else:
raise ValueError(f' Unhandled token type: {ttype}')
def _token_string_c_call(token: dict, idx_var: str, value: list[dict]) -> str:
"""Generate C code for testing a token string value."""
# Build the array of TestTokenValue structures
token_values = []
for inner_token in value:
inner_type = inner_token.get("type")
inner_value = inner_token.get("value")
# Map token types to their C test handler and value structure
if inner_type == "i64":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_I64, {inner_value}}}"
elif inner_type == "i32":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_I32, {inner_value}}}"
elif inner_type == "i16":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_I16, {inner_value}}}"
elif inner_type == "i8":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_I8, {inner_value}}}"
elif inner_type == "u64":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_U64, {inner_value}}}"
elif inner_type == "u32":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_U32, {inner_value}}}"
elif inner_type == "u16":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_U16, {inner_value}}}"
elif inner_type == "u8":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_U8, {inner_value}}}"
elif inner_type == "f64":
handler = "test_double_value"
value_struct = f"&(double){{{inner_value}}}"
elif inner_type == "f32":
handler = "test_float_value"
value_struct = f"&(float){{{inner_value}}}"
elif inner_type == "char":
handler = "test_character_value"
value_struct = f"&(uint8_t){{{ord(inner_value)}}}" # type: ignore
elif inner_type == "string":
handler = "test_string_value"
value_struct = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
elif inner_type == "identifier":
handler = "test_identifier_value"
value_struct = f'&(TestIdentifierValue){{FALSE, SLS_STR("{inner_value}")}}'
elif inner_type == "identifier_literal":
handler = "test_identifier_value"
value_struct = f'&(TestIdentifierValue){{TRUE, SLS_STR("{inner_value}")}}'
elif inner_type == "bool":
handler = "test_boolean_value"
bool_str = "TRUE" if inner_value else "FALSE"
value_struct = f"&(Boolean){{{bool_str}}}"
elif inner_type == "error":
handler = "test_for_error"
value_struct = f'SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
elif inner_type == "token_string":
# Recursive case: nested token string
handler = "test_token_string_value"
nested_tokens = inner_value if isinstance(inner_value, list) else []
value_struct = _build_token_string_value_struct(nested_tokens)
else:
raise ValueError(f'Unhandled inner token type in token_string: {inner_type}')
token_values.append(f"{{.token_handler = {handler}, .value = {value_struct}}}")
# Generate the TestTokenStringValue initialization
num_tokens = len(value)
if num_tokens == 0:
# Empty token string
return f'test_token_string_value(&test, result, {idx_var}, &(TestTokenStringValue){{0, NULL}})'
else:
# Token string with values
values_array = f"(TestTokenValue[]){{{', '.join(token_values)}}}"
return f'test_token_string_value(&test, result, {idx_var}, &(TestTokenStringValue){{{num_tokens}, {values_array}}})'
def _build_token_string_value_struct(nested_tokens: list[dict]) -> str:
"""Helper to build a TestTokenStringValue structure for nested token strings."""
if not nested_tokens:
return "&(TestTokenStringValue){0, NULL}"
token_values = []
for inner_token in nested_tokens:
inner_type = inner_token.get("type")
inner_value = inner_token.get("value")
if inner_type == "i64":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_I64, {inner_value}}}"
elif inner_type == "i32":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_I32, {inner_value}}}"
elif inner_type == "i16":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_I16, {inner_value}}}"
elif inner_type == "i8":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_I8, {inner_value}}}"
elif inner_type == "u64":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_U64, {inner_value}}}"
elif inner_type == "u32":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_U32, {inner_value}}}"
elif inner_type == "u16":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_U16, {inner_value}}}"
elif inner_type == "u8":
handler = "test_integer_value"
value_struct = f"&(TestIntegerValue){{INTEGER_U8, {inner_value}}}"
elif inner_type == "f64":
handler = "test_double_value"
value_struct = f"&(double){{{inner_value}}}"
elif inner_type == "f32":
handler = "test_float_value"
value_struct = f"&(float){{{inner_value}}}"
elif inner_type == "char":
handler = "test_character_value"
value_struct = f"&(uint8_t){{{ord(inner_value)}}}" # type: ignore
elif inner_type == "string":
handler = "test_string_value"
value_struct = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
elif inner_type == "identifier":
handler = "test_identifier_value"
value_struct = f'&(TestIdentifierValue){{FALSE, SLS_STR("{inner_value}")}}'
elif inner_type == "identifier_literal":
handler = "test_identifier_value"
value_struct = f'&(TestIdentifierValue){{TRUE, SLS_STR("{inner_value}")}}'
elif inner_type == "bool":
handler = "test_boolean_value"
bool_str = "TRUE" if inner_value else "FALSE"
value_struct = f"&(Boolean){{{bool_str}}}"
elif inner_type == "error":
handler = "test_for_error"
value_struct = f'SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
elif inner_type == "token_string":
# Recursive case
handler = "test_token_string_value"
value_struct = _build_token_string_value_struct(inner_value if isinstance(inner_value, list) else [])
else:
raise ValueError(f'Unhandled nested token type in token_string: {inner_type}')
token_values.append(f"{{.token_handler = {handler}, .value = {value_struct}}}")
num_tokens = len(nested_tokens)
values_array = f"(TestTokenValue[]){{{', '.join(token_values)}}}"
return f"&(TestTokenStringValue){{{num_tokens}, {values_array}}}"
def token_to_c_call(token: dict, idx_var="i") -> str:
"""Generate a C 'test_*_value' call based on token type."""
return f"if ({_token_to_c_call(token, idx_var)}) return test.result;"
def generate_c_test(test: dict) -> str:
"""Convert a single YAML test entry to a C test function."""
name = test["name"]
c_name = sanitize_name(name)
code = c_string_literal(test["code"])
tokens = test.get("tokens", [])
# Function header
c_code = [f"static TestResult {c_name}() {{",
f' LexerTest test = start_up_test(SLS_STR("{name}"), SLS_STR("{code}"));',
" LexerResult result = lexical_analysis(&test.lexer_info);",
" if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);",
" size_t i = 0;"]
# Token checks
for token in tokens:
c_code.append(" " + token_to_c_call(token))
# EOF check and return
if 'error' not in [token.get("type") for token in tokens]:
c_code.append(" if (test_eof_value(&test, result, i++, 0)) return test.result;")
c_code.append(" return pass_test(&test, result);")
c_code.append("}\n")
return "\n".join(c_code)
def yaml_to_c_tests(yaml_path: str, output_path: str):
"""Convert YAML test cases into C test code."""
with open(yaml_path, "r", encoding="utf-8") as f:
tests = yaml.safe_load(f)
# Ensure we have a list of tests
if not isinstance(tests, list):
raise ValueError("Expected a YAML list of test cases.")
c_tests = []
for test in tests:
c_tests.append(generate_c_test(test))
program = [
file_headers,
f"static const size_t NUM_OF_TESTS = {len(tests)};\n",
"\n".join(c_tests),
main_header,
] + [f" test_report.tests[i++] = {sanitize_name(test['name'])}();" for test in tests]
program.append("\n return test_report;\n}\n")
output_code = "\n".join(program)
Path(output_path).write_text(output_code, encoding="utf-8")
print(f" Generated {len(c_tests)} C tests -> {output_path}")
# === Example usage ===
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Convert YAML SLS tests to C lexer tests.")
parser.add_argument("input", help="Path to input YAML test file")
parser.add_argument("output", help="Path to output C file")
args = parser.parse_args()
yaml_to_c_tests(args.input, args.output)