293 lines
12 KiB
Python
293 lines
12 KiB
Python
import yaml
|
|
import re
|
|
from pathlib import Path
|
|
|
|
# python3 SLS_Tests/yaml_to_c_tests.py SLS_Tests/cases.yaml SLS_C/tests/lexer_tests.c
|
|
|
|
file_headers = """\
|
|
// Kyler Olsen
|
|
// YREA SLS
|
|
// Lexer Tests
|
|
// October 2025
|
|
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
|
|
#include "sls/errors.h"
|
|
#include "sls/lexer.h"
|
|
#include "sls/string.h"
|
|
#include "tests/lexer_test_helpers.h"
|
|
#include "tests/tests.h"
|
|
|
|
"""
|
|
|
|
main_header = """\
|
|
TestsReport run_lexer_tests() {
|
|
TestsReport test_report = (TestsReport) {
|
|
.section = SLS_STR("lexer_tests"),
|
|
.count = NUM_OF_TESTS,
|
|
.tests = (TestResult *)malloc(sizeof(TestResult) * NUM_OF_TESTS),
|
|
};
|
|
|
|
size_t i = 0;
|
|
"""
|
|
|
|
# === Helper functions ===
|
|
|
|
def sanitize_name(name: str) -> str:
|
|
"""Convert test name into a valid C function name."""
|
|
name = re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
|
name = re.sub(r"_+", "_", name)
|
|
return f"test_{name}"
|
|
|
|
def c_string_literal(s: str) -> str:
|
|
"""Escape quotes for embedding in C string literals."""
|
|
return s.replace('"', '\\"')
|
|
|
|
def _token_to_c_call(token: dict, idx_var="i") -> str:
|
|
"""Generate a C 'test_*_value' call based on token type."""
|
|
ttype = token.get("type")
|
|
value = token.get("value")
|
|
|
|
if ttype == "i64":
|
|
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I64, {value}}})'
|
|
elif ttype == "i32":
|
|
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I32, {value}}})'
|
|
elif ttype == "i16":
|
|
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I16, {value}}})'
|
|
elif ttype == "i8":
|
|
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_I8, {value}}})'
|
|
elif ttype == "u64":
|
|
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U64, {value}}})'
|
|
elif ttype == "u32":
|
|
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U32, {value}}})'
|
|
elif ttype == "u16":
|
|
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U16, {value}}})'
|
|
elif ttype == "u8":
|
|
return f'test_integer_value(&test, result, {idx_var}++, &(TestIntegerValue){{INTEGER_U8, {value}}})'
|
|
elif ttype == "f64":
|
|
return f'test_double_value(&test, result, {idx_var}++, &(double){{{value}}})'
|
|
elif ttype == "f32":
|
|
return f'test_float_value(&test, result, {idx_var}++, &(float){{{value}}})'
|
|
elif ttype == "char":
|
|
return f'test_character_value(&test, result, {idx_var}++, &(uint8_t){{{ord(value)}}})' # type: ignore
|
|
elif ttype == "string":
|
|
return f'test_string_value(&test, result, {idx_var}++, &SLS_STR("{value}"))' # type: ignore
|
|
elif ttype == "identifier":
|
|
return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{FALSE, SLS_STR("{value}")}})' # type: ignore
|
|
elif ttype == "identifier_literal":
|
|
return f'test_identifier_value(&test, result, {idx_var}++, &(TestIdentifierValue){{TRUE, SLS_STR("{value}")}})' # type: ignore
|
|
elif ttype == "bool":
|
|
return f'test_boolean_value(&test, result, {idx_var}++, &(Boolean){{{"TRUE" if value else "FALSE"}}})' # type: ignore
|
|
elif ttype == "error":
|
|
return f'test_for_error(&test, result, i++, &SLS_STR("{c_string_literal(value)}"))' # type: ignore
|
|
elif ttype == "token_string":
|
|
return _token_string_c_call(idx_var, value) # type: ignore
|
|
else:
|
|
raise ValueError(f' Unhandled token type: {ttype}')
|
|
|
|
def _token_string_c_call(idx_var: str, value: list[dict]) -> str:
|
|
"""Generate C code for testing a token string value."""
|
|
if not value: # Empty token string
|
|
return (
|
|
f'test_token_string_value(&test, result, {idx_var}++, '
|
|
f'&(TestTokenStringValue){{0, NULL}})'
|
|
)
|
|
|
|
# Generate token handler calls for each token in the string
|
|
token_handlers = []
|
|
for i, inner_token in enumerate(value):
|
|
inner_type = inner_token.get("type")
|
|
inner_value = inner_token.get("value")
|
|
|
|
# Determine the handler function and value initialization
|
|
if inner_type == "i64":
|
|
handler = "test_integer_value"
|
|
val_init = f'&(TestIntegerValue){{INTEGER_I64, {inner_value}}}'
|
|
elif inner_type == "i32":
|
|
handler = "test_integer_value"
|
|
val_init = f'&(TestIntegerValue){{INTEGER_I32, {inner_value}}}'
|
|
elif inner_type == "i16":
|
|
handler = "test_integer_value"
|
|
val_init = f'&(TestIntegerValue){{INTEGER_I16, {inner_value}}}'
|
|
elif inner_type == "i8":
|
|
handler = "test_integer_value"
|
|
val_init = f'&(TestIntegerValue){{INTEGER_I8, {inner_value}}}'
|
|
elif inner_type == "u64":
|
|
handler = "test_integer_value"
|
|
val_init = f'&(TestIntegerValue){{INTEGER_U64, {inner_value}}}'
|
|
elif inner_type == "u32":
|
|
handler = "test_integer_value"
|
|
val_init = f'&(TestIntegerValue){{INTEGER_U32, {inner_value}}}'
|
|
elif inner_type == "u16":
|
|
handler = "test_integer_value"
|
|
val_init = f'&(TestIntegerValue){{INTEGER_U16, {inner_value}}}'
|
|
elif inner_type == "u8":
|
|
handler = "test_integer_value"
|
|
val_init = f'&(TestIntegerValue){{INTEGER_U8, {inner_value}}}'
|
|
elif inner_type == "f64":
|
|
handler = "test_double_value"
|
|
val_init = f'&(double){{{inner_value}}}'
|
|
elif inner_type == "f32":
|
|
handler = "test_float_value"
|
|
val_init = f'&(float){{{inner_value}}}'
|
|
elif inner_type == "char":
|
|
handler = "test_character_value"
|
|
val_init = f'&(uint8_t){{{ord(inner_value)}}}' # type: ignore
|
|
elif inner_type == "string":
|
|
handler = "test_string_value"
|
|
val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
|
|
elif inner_type == "identifier":
|
|
handler = "test_identifier_value"
|
|
val_init = f'&(TestIdentifierValue){{FALSE, SLS_STR("{inner_value}")}}'
|
|
elif inner_type == "identifier_literal":
|
|
handler = "test_identifier_value"
|
|
val_init = f'&(TestIdentifierValue){{TRUE, SLS_STR("{inner_value}")}}'
|
|
elif inner_type == "bool":
|
|
handler = "test_boolean_value"
|
|
val_init = f'&(Boolean){{{"TRUE" if inner_value else "FALSE"}}}'
|
|
elif inner_type == "error":
|
|
handler = "test_for_error"
|
|
val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
|
|
elif inner_type == "token_string":
|
|
# Nested token string - recursive call
|
|
handler = "test_token_string_value"
|
|
val_init = _token_string_value_init(inner_token.get("value", []))
|
|
else:
|
|
raise ValueError(f'Unhandled token type in token string: {inner_type}')
|
|
|
|
token_handlers.append(
|
|
f'{{(Boolean (*)(LexerTest *, LexerResult, size_t, void *)){handler}, '
|
|
f'{val_init}}}'
|
|
)
|
|
|
|
# Generate the array initialization
|
|
tokens_array = f'(TestTokenStringToken[]){{{", ".join(token_handlers)}}}'
|
|
|
|
return (
|
|
f'test_token_string_value(&test, result, {idx_var}++, '
|
|
f'&(TestTokenStringValue){{{len(value)}, {tokens_array}}})'
|
|
)
|
|
|
|
def _token_string_value_init(value: list[dict]) -> str:
|
|
"""Generate initialization code for a TestTokenStringValue (for nested token strings)."""
|
|
if not value:
|
|
return '&(TestTokenStringValue){0, NULL}'
|
|
|
|
token_handlers = []
|
|
for inner_token in value:
|
|
inner_type = inner_token.get("type")
|
|
inner_value = inner_token.get("value")
|
|
|
|
if inner_type == "i64":
|
|
handler = "test_integer_value"
|
|
val_init = f'&(TestIntegerValue){{INTEGER_I64, {inner_value}}}'
|
|
elif inner_type == "i32":
|
|
handler = "test_integer_value"
|
|
val_init = f'&(TestIntegerValue){{INTEGER_I32, {inner_value}}}'
|
|
elif inner_type == "f64":
|
|
handler = "test_double_value"
|
|
val_init = f'&(double){{{inner_value}}}'
|
|
elif inner_type == "f32":
|
|
handler = "test_float_value"
|
|
val_init = f'&(float){{{inner_value}}}'
|
|
elif inner_type == "char":
|
|
handler = "test_character_value"
|
|
val_init = f'&(uint8_t){{{ord(inner_value)}}}' # type: ignore
|
|
elif inner_type == "string":
|
|
handler = "test_string_value"
|
|
val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
|
|
elif inner_type == "identifier":
|
|
handler = "test_identifier_value"
|
|
val_init = f'&(TestIdentifierValue){{FALSE, SLS_STR("{inner_value}")}}'
|
|
elif inner_type == "identifier_literal":
|
|
handler = "test_identifier_value"
|
|
val_init = f'&(TestIdentifierValue){{TRUE, SLS_STR("{inner_value}")}}'
|
|
elif inner_type == "bool":
|
|
handler = "test_boolean_value"
|
|
val_init = f'&(Boolean){{{"TRUE" if inner_value else "FALSE"}}}'
|
|
elif inner_type == "error":
|
|
handler = "test_for_error"
|
|
val_init = f'&SLS_STR("{c_string_literal(inner_value)}")' # type: ignore
|
|
elif inner_type == "token_string":
|
|
handler = "test_token_string_value"
|
|
val_init = _token_string_value_init(inner_token.get("value", []))
|
|
# Add other types as needed
|
|
else:
|
|
raise ValueError(f'Unhandled token type in nested token string: {inner_type}')
|
|
|
|
token_handlers.append(
|
|
f'{{(Boolean (*)(LexerTest *, LexerResult, size_t, void *)){handler}, '
|
|
f'{val_init}}}'
|
|
)
|
|
|
|
tokens_array = f'(TestTokenStringToken[]){{{", ".join(token_handlers)}}}'
|
|
return f'&(TestTokenStringValue){{{len(value)}, {tokens_array}}}'
|
|
|
|
def token_to_c_call(token: dict, idx_var="i") -> str:
|
|
"""Generate a C 'test_*_value' call based on token type."""
|
|
return f"if ({_token_to_c_call(token, idx_var)}) return test.result;"
|
|
|
|
def generate_c_test(test: dict) -> str:
|
|
"""Convert a single YAML test entry to a C test function."""
|
|
name = test["name"]
|
|
c_name = sanitize_name(name)
|
|
code = c_string_literal(test["code"])
|
|
tokens = test.get("tokens", [])
|
|
|
|
# Function header
|
|
c_code = [f"static TestResult {c_name}() {{",
|
|
f' LexerTest test = start_up_test(SLS_STR("{name}"), SLS_STR("{code}"));',
|
|
" LexerResult result = lexical_analysis(&test.lexer_info);",
|
|
" if (result.type == SLS_ERROR) return error_fail_test(&test, result, result.error);",
|
|
" size_t i = 0;"]
|
|
|
|
# Token checks
|
|
for token in tokens:
|
|
c_code.append(" " + token_to_c_call(token))
|
|
|
|
# EOF check and return
|
|
if 'error' not in [token.get("type") for token in tokens]:
|
|
c_code.append(" if (test_eof_value(&test, result, i++, 0)) return test.result;")
|
|
c_code.append(" return pass_test(&test, result);")
|
|
c_code.append("}\n")
|
|
|
|
return "\n".join(c_code)
|
|
|
|
def yaml_to_c_tests(yaml_path: str, output_path: str):
|
|
"""Convert YAML test cases into C test code."""
|
|
with open(yaml_path, "r", encoding="utf-8") as f:
|
|
tests = yaml.safe_load(f)
|
|
|
|
# Ensure we have a list of tests
|
|
if not isinstance(tests, list):
|
|
raise ValueError("Expected a YAML list of test cases.")
|
|
|
|
c_tests = []
|
|
for test in tests:
|
|
c_tests.append(generate_c_test(test))
|
|
|
|
program = [
|
|
file_headers,
|
|
f"static const size_t NUM_OF_TESTS = {len(tests)};\n",
|
|
"\n".join(c_tests),
|
|
main_header,
|
|
] + [f" test_report.tests[i++] = {sanitize_name(test['name'])}();" for test in tests]
|
|
program.append("\n return test_report;\n}\n")
|
|
|
|
output_code = "\n".join(program)
|
|
Path(output_path).write_text(output_code, encoding="utf-8")
|
|
print(f" Generated {len(c_tests)} C tests -> {output_path}")
|
|
|
|
# === Example usage ===
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
parser = argparse.ArgumentParser(description="Convert YAML SLS tests to C lexer tests.")
|
|
parser.add_argument("input", help="Path to input YAML test file")
|
|
parser.add_argument("output", help="Path to output C file")
|
|
args = parser.parse_args()
|
|
yaml_to_c_tests(args.input, args.output)
|