YREA-SLS/SLS_Tests/yaml_to_c_tests.py

293 lines
12 KiB
Python

import yaml
import re
from pathlib import Path
# python3 SLS_Tests/yaml_to_c_tests.py SLS_Tests/cases.yaml SLS_C/tests/lexer_tests.c
file_headers = """\
// Kyler Olsen
// YREA SLS
// Lexer Tests
// October 2025
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
#include "sls/errors.h"
#include "sls/lexer.h"
#include "sls/string.h"
#include "tests/lexer_test_helpers.h"
#include "tests/tests.h"
"""
main_header = """\
TestsReport run_lexer_tests() {
TestsReport test_report = (TestsReport) {
.section = SLS_STR("lexer_tests"),
.count = NUM_OF_TESTS,
.tests = (TestResult *)malloc(sizeof(TestResult) * NUM_OF_TESTS),
};
size_t i = 0;
"""
# === Helper functions ===
def sanitize_name(name: str) -> str:
"""Convert test name into a valid C function name."""
name = re.sub(r"[^a-zA-Z0-9_]", "_", name)
name = re.sub(r"_+", "_", name)
return f"test_{name}"
def c_string_literal(s: str) -> str:
"""Escape quotes for embedding in C string literals."""
return s.replace('"', '\\"')
def _token_to_c_call(token: dict, idx_var="i") -> str:
"""Generate a C 'test_*_value' call based on token type."""
ttype = token.get("type")
value = token.get("value")
if ttype == "i64":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I64, {value}}})'
elif ttype == "i32":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I32, {value}}})'
elif ttype == "i16":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I16, {value}}})'
elif ttype == "i8":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I8, {value}}})'
elif ttype == "u64":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U64, {value}}})'
elif ttype == "u32":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U32, {value}}})'
elif ttype == "u16":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U16, {value}}})'
elif ttype == "u8":
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U8, {value}}})'
elif ttype == "f64":
return f'test_double_value(&test, result, {idx_var}++, &(double){{{value}}})'
elif ttype == "f32":
return f'test_float_value(&test, result, {idx_var}++, &(float){{{value}}})'
elif ttype == "char":
return f'test_character_value(&test, result, {idx_var}++, &(uint8_t){{{ord(value)}}})' # type: ignore
elif ttype == "string":
return f'test_string_value(&test, result, {idx_var}++, &SLS_STR("{value}"))' # type: ignore
elif ttype == "identifier":
return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{FALSE, SLS_STR("{value}")}})' # type: ignore
elif ttype == "identifier_literal":
return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{TRUE, SLS_STR("{value}")}})' # type: ignore
elif ttype == "bool":
return f'test_boolean_value(&test, result, {idx_var}++, &(Boolean){{{"TRUE" if value else "FALSE"}}})' # type: ignore
elif ttype == "error":
return f'test_for_error(&test, result, i++, &SLS_STR("{c_string_literal(value)}"))' # type: ignore
elif ttype == "token_string":
return _token_string_c_call(idx_var, value) # type: ignore
else:
raise ValueError(f' Unhandled token type: {ttype}')
def _token_string_c_call(idx_var: str, value: list[dict]) -> str:
"""Generate C code for testing a token string value."""
if not value: # Empty token string
return (
f'test_token_string_value(&test, result, {idx_var}++, '
f'&(TestTokenStringValue){{0, NULL}})'
)
# Generate token handler calls for each token in the string
token_handlers = []
for i, inner_token in enumerate(value):
inner_type = inner_token.get("type")
inner_value = inner_token.get("value")
# Determine the handler function and value initialization
if inner_type == "i64":
handler = "test_integer_value"
val_init = f'&(TestIntegerValue){{INTEGER_I64, {inner_value}}}'
elif inner_type == "i32":
handler = "test_integer_value"
val_init = f'&(TestIntegerValue){{INTEGER_I32, {inner_value}}}'
elif inner_type == "i16":
handler = "test_integer_value"
val_init = f'&(TestIntegerValue){{INTEGER_I16, {inner_value}}}'
elif inner_type == "i8":
handler = "test_integer_value"
val_init = f'&(TestIntegerValue){{INTEGER_I8, {inner_value}}}'
elif inner_type == "u64":
handler = "test_integer_value"
val_init = f'&(TestIntegerValue){{INTEGER_U64, {inner_value}}}'
elif inner_type == "u32":
handler = "test_integer_value"
val_init = f'&(TestIntegerValue){{INTEGER_U32, {inner_value}}}'
elif inner_type == "u16":
handler = "test_integer_value"
val_init = f'&(TestIntegerValue){{INTEGER_U16, {inner_value}}}'
elif inner_type == "u8":
handler = "test_integer_value"
val_init = f'&(TestIntegerValue){{INTEGER_U8, {inner_value}}}'
elif inner_type == "f64":
handler = "test_double_value"
val_init = f'&(double){{{inner_value}}}'
elif inner_type == "f32":
handler = "test_float_value"
val_init = f'&(float){{{inner_value}}}'
elif inner_type == "char":
handler = "test_character_value"
val_init = f'&(uint8_t){{{ord(inner_value)}}}' # type: ignore
elif inner_type == "string":
handler = "test_string_value"
val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
elif inner_type == "identifier":
handler = "test_identifier_value"
val_init = f'&(TestIdentifierValue){{FALSE, SLS_STR("{inner_value}")}}'
elif inner_type == "identifier_literal":
handler = "test_identifier_value"
val_init = f'&(TestIdentifierValue){{TRUE, SLS_STR("{inner_value}")}}'
elif inner_type == "bool":
handler = "test_boolean_value"
val_init = f'&(Boolean){{{"TRUE" if inner_value else "FALSE"}}}'
elif inner_type == "error":
handler = "test_for_error"
val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
elif inner_type == "token_string":
# Nested token string - recursive call
handler = "test_token_string_value"
val_init = _token_string_value_init(inner_token.get("value", []))
else:
raise ValueError(f'Unhandled token type in token string: {inner_type}')
token_handlers.append(
f'{{(Boolean (*)(LexerTest *, LexerResult, size_t, void *)){handler}, '
f'{val_init}}}'
)
# Generate the array initialization
tokens_array = f'(TestTokenStringToken[]){{{", ".join(token_handlers)}}}'
return (
f'test_token_string_value(&test, result, {idx_var}++, '
f'&(TestTokenStringValue){{{len(value)}, {tokens_array}}})'
)
def _token_string_value_init(value: list[dict]) -> str:
"""Generate initialization code for a TestTokenStringValue (for nested token strings)."""
if not value:
return '&(TestTokenStringValue){0, NULL}'
token_handlers = []
for inner_token in value:
inner_type = inner_token.get("type")
inner_value = inner_token.get("value")
if inner_type == "i64":
handler = "test_integer_value"
val_init = f'&(TestIntegerValue){{INTEGER_I64, {inner_value}}}'
elif inner_type == "i32":
handler = "test_integer_value"
val_init = f'&(TestIntegerValue){{INTEGER_I32, {inner_value}}}'
elif inner_type == "f64":
handler = "test_double_value"
val_init = f'&(double){{{inner_value}}}'
elif inner_type == "f32":
handler = "test_float_value"
val_init = f'&(float){{{inner_value}}}'
elif inner_type == "char":
handler = "test_character_value"
val_init = f'&(uint8_t){{{ord(inner_value)}}}' # type: ignore
elif inner_type == "string":
handler = "test_string_value"
val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
elif inner_type == "identifier":
handler = "test_identifier_value"
val_init = f'&(TestIdentifierValue){{FALSE, SLS_STR("{inner_value}")}}'
elif inner_type == "identifier_literal":
handler = "test_identifier_value"
val_init = f'&(TestIdentifierValue){{TRUE, SLS_STR("{inner_value}")}}'
elif inner_type == "bool":
handler = "test_boolean_value"
val_init = f'&(Boolean){{{"TRUE" if inner_value else "FALSE"}}}'
elif inner_type == "error":
handler = "test_for_error"
val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
elif inner_type == "token_string":
handler = "test_token_string_value"
val_init = _token_string_value_init(inner_token.get("value", []))
# Add other types as needed
else:
raise ValueError(f'Unhandled token type in nested token string: {inner_type}')
token_handlers.append(
f'{{(Boolean (*)(LexerTest *, LexerResult, size_t, void *)){handler}, '
f'{val_init}}}'
)
tokens_array = f'(TestTokenStringToken[]){{{", ".join(token_handlers)}}}'
return f'&(TestTokenStringValue){{{len(value)}, {tokens_array}}}'
def token_to_c_call(token: dict, idx_var="i") -> str:
"""Generate a C 'test_*_value' call based on token type."""
return f"if ({_token_to_c_call(token, idx_var)}) return test.result;"
def generate_c_test(test: dict) -> str:
"""Convert a single YAML test entry to a C test function."""
name = test["name"]
c_name = sanitize_name(name)
code = c_string_literal(test["code"])
tokens = test.get("tokens", [])
# Function header
c_code = [f"static TestResult {c_name}() {{",
f' LexerTest test = start_up_test(SLS_STR("{name}"), SLS_STR("{code}"));',
" LexerResult result = lexical_analysis(&test.lexer_info);",
" if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);",
" size_t i = 0;"]
# Token checks
for token in tokens:
c_code.append(" " + token_to_c_call(token))
# EOF check and return
if 'error' not in [token.get("type") for token in tokens]:
c_code.append(" if (test_eof_value(&test, result, i++, 0)) return test.result;")
c_code.append(" return pass_test(&test, result);")
c_code.append("}\n")
return "\n".join(c_code)
def yaml_to_c_tests(yaml_path: str, output_path: str):
"""Convert YAML test cases into C test code."""
with open(yaml_path, "r", encoding="utf-8") as f:
tests = yaml.safe_load(f)
# Ensure we have a list of tests
if not isinstance(tests, list):
raise ValueError("Expected a YAML list of test cases.")
c_tests = []
for test in tests:
c_tests.append(generate_c_test(test))
program = [
file_headers,
f"static const size_t NUM_OF_TESTS = {len(tests)};\n",
"\n".join(c_tests),
main_header,
] + [f" test_report.tests[i++] = {sanitize_name(test['name'])}();" for test in tests]
program.append("\n return test_report;\n}\n")
output_code = "\n".join(program)
Path(output_path).write_text(output_code, encoding="utf-8")
print(f" Generated {len(c_tests)} C tests -> {output_path}")
# === Example usage ===
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Convert YAML SLS tests to C lexer tests.")
parser.add_argument("input", help="Path to input YAML test file")
parser.add_argument("output", help="Path to output C file")
args = parser.parse_args()
yaml_to_c_tests(args.input, args.output)