import yaml import re from pathlib import Path """ Convert YAML test cases to Rust integration tests for the `sls` crate. Usage: python3 SLS_Tests/yaml_to_rust_tests.py SLS_Tests/cases.yaml SLS_Rust/sls/tests/lexer_tests_generated.rs This generator produces simple `#[test]` functions that run the lexer and verify token kinds and values (basic checks). It's intentionally conservative — it compares token types and lexemes/numeric values where applicable. """ def sanitize_name(name: str) -> str: name = re.sub(r"[^a-zA-Z0-9_]", "_", name) name = re.sub(r"_+", "_", name) name = name.strip("_") name = name.lower() if not name: name = "unnamed" return f"test_{name}" def rust_string_literal(s: str) -> str: return s.replace('\\', '\\\\').replace('"', '\\"') def token_match_expectation(token_var: str, expected: dict) -> str: ttype = expected.get('type') val = expected.get('value') if ttype in ('i64','i32','i16','i8','u64','u32','u16','u8'): # check numeric field produced by the lexer # allow constant names like INT64_MIN to be used directly in generated code if isinstance(val, str) and re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", val): vexpr = val else: vexpr = str(val) return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Int);\n assert!({token_var}.numeric.is_some(), \"expected numeric value\");\n assert_eq!({token_var}.numeric.unwrap(), {vexpr});" elif ttype in ('f64','f32'): return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Float);\n assert!({token_var}.float.is_some(), \"expected float value\");\n assert!(({token_var}.float.unwrap() - {val}).abs() < 1e-12);" elif ttype == 'string': return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Str);\n assert_eq!({token_var}.lexeme, \"{rust_string_literal(str(val))}\");" elif ttype in ('identifier', 'identifier_literal'): return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Ident);\n assert_eq!({token_var}.lexeme, \"{rust_string_literal(str(val))}\");" elif ttype == 'char': codepoint = ord(val) if isinstance(val, str) and len(val) == 1 else val return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Int);\n assert!({token_var}.numeric.is_some(), \"expected numeric char code\");\n assert_eq!({token_var}.numeric.unwrap(), {codepoint});" elif ttype == 'bool': return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Ident);\n assert_eq!({token_var}.lexeme, \"{'true' if val else 'false'}\");" elif ttype == 'error': # For now, assert that we got an Illegal token return f"assert_eq!({token_var}.ttype, sls::lexer::TokenType::Illegal);" elif ttype == 'token_string': # Complex nested token strings are not handled by this simple generator return f"// token_string check not implemented; received token: {{:#?}}\n // TODO: implement nested expectations\n // for now just assert we got an Ident or similar\n assert!(!{token_var}.lexeme.is_empty());" else: return f"// Unhandled expected token type: {ttype}\n assert!(!{token_var}.lexeme.is_empty());" def generate_rust_test(test: dict) -> str: name = sanitize_name(test.get('name','unnamed')) code = test.get('code','') tokens = test.get('tokens', []) fn_lines = [f"#[test]", f"fn {name}() " "{"] fn_lines.append(f" let src = \"{rust_string_literal(str(code))}\";") fn_lines.append(" let mut lexer = sls::lexer::Lexer::new(src);") fn_lines.append(" let mut got = vec![];") fn_lines.append(" loop {") fn_lines.append(" let t = lexer.next_token();") fn_lines.append(" if t.ttype == sls::lexer::TokenType::Eof { break; }") fn_lines.append(" got.push(t);") fn_lines.append(" }") fn_lines.append("") # Basic assertion count vs expected (allow zero expected -> empty) if tokens: fn_lines.append(f" assert_eq!(got.len(), {len(tokens)}usize, \"token count mismatch\");") else: fn_lines.append(" assert!(got.is_empty());") for i, token in enumerate(tokens): expectation = token_match_expectation(f"got[{i}]", token) # indent lines of expectation properly for line in expectation.split('\n'): fn_lines.append(f" {line}") fn_lines.append("}") fn_lines.append("") return "\n".join(fn_lines) def yaml_to_rust_tests(yaml_path: str, output_path: str): with open(yaml_path, 'r', encoding='utf-8') as f: tests = yaml.safe_load(f) if not isinstance(tests, list): raise ValueError('Expected YAML to be a list of tests') rust_tests = [] for t in tests: rust_tests.append(generate_rust_test(t)) header = """// Generated tests - do not edit by hand // Use: run `python3 SLS_Tests/yaml_to_rust_tests.py SLS_Tests/cases.yaml tests/lexer_tests_generated.rs` use sls; // crate under test const INT64_MIN: i128 = i64::MIN as i128; const UINT64_MAX: i128 = u64::MAX as i128; """ out_text = header + "\n".join(rust_tests) Path(output_path).write_text(out_text, encoding='utf-8') print(f"Generated {len(rust_tests)} Rust tests -> {output_path}") if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('input') parser.add_argument('output') args = parser.parse_args() yaml_to_rust_tests(args.input, args.output)