127 lines
5.4 KiB
Python
127 lines
5.4 KiB
Python
import yaml
|
|
import re
|
|
from pathlib import Path
|
|
|
|
"""
|
|
Convert YAML test cases to Rust integration tests for the `sls` crate.
|
|
|
|
Usage:
|
|
python3 SLS_Tests/yaml_to_rust_tests.py SLS_Tests/cases.yaml SLS_Rust/sls/tests/lexer_tests_generated.rs
|
|
|
|
This generator produces simple `#[test]` functions that run the lexer and
|
|
verify token kinds and values (basic checks). It's intentionally conservative
|
|
— it compares token types and lexemes/numeric values where applicable.
|
|
"""
|
|
|
|
def sanitize_name(name: str) -> str:
|
|
name = re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
|
name = re.sub(r"_+", "_", name)
|
|
name = name.strip("_")
|
|
if not name:
|
|
name = "unnamed"
|
|
return f"test_{name}"
|
|
|
|
|
|
def rust_string_literal(s: str) -> str:
|
|
return s.replace('\\', '\\\\').replace('"', '\\"')
|
|
|
|
|
|
def token_match_expectation(token_var: str, expected: dict) -> str:
|
|
ttype = expected.get('type')
|
|
val = expected.get('value')
|
|
|
|
if ttype in ('i64','i32','i16','i8','u64','u32','u16','u8'):
|
|
# check numeric field produced by the lexer
|
|
# allow constant names like INT64_MIN to be used directly in generated code
|
|
if isinstance(val, str) and re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", val):
|
|
vexpr = val
|
|
else:
|
|
vexpr = str(val)
|
|
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Int);\n assert!({token_var}.numeric.is_some(), \"expected numeric value\");\n assert_eq!({token_var}.numeric.unwrap(), {vexpr});"
|
|
elif ttype in ('f64','f32'):
|
|
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Float);\n assert!({token_var}.float.is_some(), \"expected float value\");\n assert!(({token_var}.float.unwrap() - {val}).abs() < 1e-12);"
|
|
elif ttype == 'string':
|
|
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Str);\n assert_eq!({token_var}.lexeme, \"{rust_string_literal(str(val))}\");"
|
|
elif ttype in ('identifier', 'identifier_literal'):
|
|
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Ident);\n assert_eq!({token_var}.lexeme, \"{rust_string_literal(str(val))}\");"
|
|
elif ttype == 'char':
|
|
codepoint = ord(val) if isinstance(val, str) and len(val) == 1 else val
|
|
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Int);\n assert!({token_var}.numeric.is_some(), \"expected numeric char code\");\n assert_eq!({token_var}.numeric.unwrap(), {codepoint});"
|
|
elif ttype == 'bool':
|
|
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Ident);\n assert_eq!({token_var}.lexeme, \"{'true' if val else 'false'}\");"
|
|
elif ttype == 'error':
|
|
# For now, assert that we got an Illegal token
|
|
return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Illegal);"
|
|
elif ttype == 'token_string':
|
|
# Complex nested token strings are not handled by this simple generator
|
|
return f"// token_string check not implemented; received token: {{:#?}}\n // TODO: implement nested expectations\n // for now just assert we got an Ident or similar\n assert!(!{token_var}.lexeme.is_empty());"
|
|
else:
|
|
return f"// Unhandled expected token type: {ttype}\n assert!(!{token_var}.lexeme.is_empty());"
|
|
|
|
|
|
def generate_rust_test(test: dict) -> str:
|
|
name = sanitize_name(test.get('name','unnamed'))
|
|
code = test.get('code','')
|
|
tokens = test.get('tokens', [])
|
|
|
|
fn_lines = [f"#[test]", f"fn {name}() " "{"]
|
|
fn_lines.append(f" let src = \"{rust_string_literal(str(code))}\";")
|
|
fn_lines.append(" let mut lexer = sls::lexer::Lexer::new(src);")
|
|
fn_lines.append(" let mut got = vec![];")
|
|
fn_lines.append(" loop {")
|
|
fn_lines.append(" let t = lexer.next_token();")
|
|
fn_lines.append(" if t.ttype == sls::lexer::TokenType::Eof { break; }")
|
|
fn_lines.append(" got.push(t);")
|
|
fn_lines.append(" }")
|
|
fn_lines.append("")
|
|
# Basic assertion count vs expected (allow zero expected -> empty)
|
|
if tokens:
|
|
fn_lines.append(f" assert_eq!(got.len(), {len(tokens)}usize, \"token count mismatch\");")
|
|
else:
|
|
fn_lines.append(" assert!(got.is_empty());")
|
|
|
|
for i, token in enumerate(tokens):
|
|
expectation = token_match_expectation(f"got[{i}]", token)
|
|
# indent lines of expectation properly
|
|
for line in expectation.split('\n'):
|
|
fn_lines.append(f" {line}")
|
|
|
|
fn_lines.append("}")
|
|
fn_lines.append("")
|
|
return "\n".join(fn_lines)
|
|
|
|
|
|
def yaml_to_rust_tests(yaml_path: str, output_path: str):
|
|
with open(yaml_path, 'r', encoding='utf-8') as f:
|
|
tests = yaml.safe_load(f)
|
|
|
|
if not isinstance(tests, list):
|
|
raise ValueError('Expected YAML to be a list of tests')
|
|
|
|
rust_tests = []
|
|
for t in tests:
|
|
rust_tests.append(generate_rust_test(t))
|
|
|
|
header = """// Generated tests - do not edit by hand
|
|
// Use: run `python3 SLS_Tests/yaml_to_rust_tests.py SLS_Tests/cases.yaml tests/lexer_tests_generated.rs`
|
|
|
|
use sls; // crate under test
|
|
|
|
const INT64_MIN: i128 = i64::MIN as i128;
|
|
const UINT64_MAX: i128 = u64::MAX as i128;
|
|
|
|
"""
|
|
|
|
out_text = header + "\n".join(rust_tests)
|
|
Path(output_path).write_text(out_text, encoding='utf-8')
|
|
print(f"Generated {len(rust_tests)} Rust tests -> {output_path}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import argparse
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('input')
|
|
parser.add_argument('output')
|
|
args = parser.parse_args()
|
|
yaml_to_rust_tests(args.input, args.output)
|