import os import re import sys import json from pathlib import Path filenames = [ "docs/changes.md", "docs/overview.md", "docs/lexical_structure.md", "docs/primitive_types.md", "docs/basic_operations.md", "docs/functions.md", "docs/control_flow.md", "docs/data_structures.md", "docs/type_system.md", "docs/trait_system.md", "docs/generic_programming.md", "docs/advanced_topics.md", "docs/standard_library.md", "docs/complete_trait_reference.md", "docs/complete_operator_reference.md", "docs/grammar_summary.md", "docs/module_system.md", "docs/memory_management.md", "docs/examples_and_tutorials.md", ] def remove_front_matter(content): """Remove YAML front matter (--- ... ---) from markdown content.""" pattern = r'^---\n.*?\n---\n' return re.sub(pattern, '', content, flags=re.DOTALL) def extract_front_matter(content): """Extract YAML front matter from markdown content.""" match = re.match(r'^---\n(.*?)\n---\n', content, flags=re.DOTALL) return match.group(1).strip() if match else None def read_file_list(input_arg): """Return list of files either from arguments or a text file of paths.""" p = Path(input_arg) if p.is_file() and p.suffix == '.txt': with p.open(encoding='utf-8') as f: return [line.strip() for line in f if line.strip()] return [input_arg] def combine_markdown(file_inputs, output_combined, output_meta_json): """Combine multiple markdown files into one, skipping front matter.""" files = [] for f in file_inputs: files.extend(read_file_list(f)) combined = [] meta_info = {} for file in files: file_path = Path(file) if not file_path.exists(): print(f"⚠️ Skipping missing file: {file}") continue text = file_path.read_text(encoding='utf-8') front_matter = extract_front_matter(text) if front_matter: meta_info[file_path.name] = front_matter cleaned = remove_front_matter(text).strip() combined.append(f"\n{cleaned}\n\n\n") # combined.append(f"{cleaned}\n") Path(output_combined).write_text("\n".join(combined), encoding='utf-8') Path(output_meta_json).write_text(json.dumps(meta_info, indent=2), encoding='utf-8') print(f"✅ Combined file saved as: {output_combined}") print(f"✅ Metadata JSON saved as: {output_meta_json}") def split_by_h2(input_combined, output_dir): """Split a combined markdown file by H2 sections (## Heading).""" content = Path(input_combined).read_text(encoding='utf-8') sections = re.split(r'(?=^## )', content, flags=re.MULTILINE) os.makedirs(output_dir, exist_ok=True) for i, section in enumerate(sections): if not section.strip(): continue title_match = re.match(r'##\s*(.+)', section) if title_match: filename = re.sub(r'[^\w\-]+', '_', title_match.group(1)).strip('_') + ".md" else: filename = f"part_{i+1}.md" Path(output_dir, filename).write_text(section.strip() + "\n", encoding='utf-8') print(f"Created: {filename}") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage:") print(" Combine files: python manage_docs.py combine") print(" Split by H2: python manage_docs.py split") command = sys.argv[1].lower() if command == "combine": combine_markdown(filenames, "stack_lang_spec.md", "metadata.json") elif command == "split": split_by_h2("stack_lang_spec.md", "docs") else: print("Unknown command. Use 'combine' or 'split'.")