YREA-SLS/manage_docs.py

125 lines
4.1 KiB
Python

import os
import re
import sys
import json
from pathlib import Path
filenames = [
"docs/changes.md",
"docs/overview.md",
"docs/lexical_structure.md",
"docs/primitive_types.md",
"docs/basic_operations.md",
"docs/functions.md",
"docs/control_flow.md",
"docs/data_structures.md",
"docs/type_system.md",
"docs/trait_system.md",
"docs/generic_programming.md",
"docs/advanced_topics.md",
"docs/standard_library.md",
"docs/complete_trait_reference.md",
"docs/complete_operator_reference.md",
"docs/grammar_summary.md",
"docs/module_system.md",
"docs/memory_management.md",
"docs/examples_and_tutorials.md",
]
def remove_front_matter(content):
"""Remove YAML front matter (--- ... ---) from markdown content."""
pattern = r'^---\n.*?\n---\n'
return re.sub(pattern, '', content, flags=re.DOTALL)
def extract_front_matter(content):
"""Extract YAML front matter from markdown content."""
match = re.match(r'^---\n(.*?)\n---\n', content, flags=re.DOTALL)
return match.group(1).strip() if match else None
def read_file_list(input_arg):
"""Return list of files either from arguments or a text file of paths."""
p = Path(input_arg)
if p.is_file() and p.suffix == '.txt':
with p.open(encoding='utf-8') as f:
return [line.strip() for line in f if line.strip()]
return [input_arg]
def combine_markdown(file_inputs, output_combined, output_meta_json):
"""Combine multiple markdown files into one, skipping front matter."""
files = []
for f in file_inputs:
files.extend(read_file_list(f))
combined = []
meta_info = {}
for file in files:
file_path = Path(file)
if not file_path.exists():
print(f"Skipping missing file: {file}")
continue
text = file_path.read_text(encoding='utf-8')
front_matter = extract_front_matter(text)
if front_matter:
meta_info[file_path.name] = front_matter
cleaned = remove_front_matter(text).strip()
combined.append(f"<!-- START {file_path.name} -->\n{cleaned}\n<!-- END {file_path.name} -->\n")
Path(output_combined).write_text("\n".join(combined), encoding='utf-8')
Path(output_meta_json).write_text(json.dumps(meta_info, indent=2), encoding='utf-8')
print(f"Combined file saved as: {output_combined}")
print(f"Metadata JSON saved as: {output_meta_json}")
def split_with_front_matter(input_combined, output_dir, metadata_file):
"""Split a combined markdown file back into original files, restoring front matter."""
content = Path(input_combined).read_text(encoding='utf-8')
if not Path(metadata_file).exists():
print(f"Metadata file not found: {metadata_file}")
sys.exit(1)
meta_info = json.loads(Path(metadata_file).read_text(encoding='utf-8'))
os.makedirs(output_dir, exist_ok=True)
# Find all sections between <!-- START filename.md --> and <!-- END filename.md -->
pattern = r'<!-- START (.*?) -->\n(.*?)\n<!-- END \1 -->'
matches = re.findall(pattern, content, flags=re.DOTALL)
for filename, body in matches:
body = body.strip()
output_path = Path(output_dir, filename)
# Restore front matter if available
if filename in meta_info:
front_matter = meta_info[filename].strip()
restored = f"---\n{front_matter}\n---\n\n{body}\n"
else:
restored = body + "\n"
output_path.write_text(restored, encoding='utf-8')
print(f"Restored: {output_path}")
print("Split complete.")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage:")
print(" Combine files: python manage_docs.py combine")
print(" Split files: python manage_docs.py split")
sys.exit(1)
command = sys.argv[1].lower()
if command == "combine":
combine_markdown(filenames, "stack_lang_spec.md", "metadata.json")
elif command == "split":
split_with_front_matter("stack_lang_spec.md", "docs", "metadata.json")
else:
print("Unknown command. Use 'combine' or 'split'.")