Fixed splitting in manage_docs.py

This commit is contained in:
Kyler Olsen 2025-11-02 01:10:46 -06:00
parent bbec991566
commit c3149cc6d1
2 changed files with 31 additions and 35 deletions

View File

@ -64,8 +64,7 @@ def combine_markdown(file_inputs, output_combined, output_meta_json):
if front_matter:
meta_info[file_path.name] = front_matter
cleaned = remove_front_matter(text).strip()
combined.append(f"<!-- START {file_path.name} -->\n{cleaned}\n\n<!-- END {file_path.name} -->\n")
# combined.append(f"{cleaned}\n")
combined.append(f"<!-- START {file_path.name} -->\n{cleaned}\n<!-- END {file_path.name} -->\n")
Path(output_combined).write_text("\n".join(combined), encoding='utf-8')
Path(output_meta_json).write_text(json.dumps(meta_info, indent=2), encoding='utf-8')
@ -73,29 +72,45 @@ def combine_markdown(file_inputs, output_combined, output_meta_json):
print(f"✅ Combined file saved as: {output_combined}")
print(f"✅ Metadata JSON saved as: {output_meta_json}")
def split_by_h2(input_combined, output_dir):
"""Split a combined markdown file by H2 sections (## Heading)."""
def split_with_front_matter(input_combined, output_dir, metadata_file):
"""Split a combined markdown file back into original files, restoring front matter."""
content = Path(input_combined).read_text(encoding='utf-8')
sections = re.split(r'(?=^## )', content, flags=re.MULTILINE)
if not Path(metadata_file).exists():
print(f"⚠️ Metadata file not found: {metadata_file}")
sys.exit(1)
meta_info = json.loads(Path(metadata_file).read_text(encoding='utf-8'))
os.makedirs(output_dir, exist_ok=True)
for i, section in enumerate(sections):
if not section.strip():
continue
title_match = re.match(r'##\s*(.+)', section)
if title_match:
filename = re.sub(r'[^\w\-]+', '_', title_match.group(1)).strip('_') + ".md"
# Find all sections between <!-- START filename.md --> and <!-- END filename.md -->
pattern = r'<!-- START (.*?) -->\n(.*?)\n<!-- END \1 -->'
matches = re.findall(pattern, content, flags=re.DOTALL)
for filename, body in matches:
body = body.strip()
output_path = Path(output_dir, filename)
# Restore front matter if available
if filename in meta_info:
front_matter = meta_info[filename].strip()
restored = f"---\n{front_matter}\n---\n\n{body}\n"
else:
filename = f"part_{i+1}.md"
Path(output_dir, filename).write_text(section.strip() + "\n", encoding='utf-8')
print(f"Created: {filename}")
restored = body + "\n"
output_path.write_text(restored, encoding='utf-8')
print(f"🪶 Restored: {output_path}")
print("✅ Split complete.")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage:")
print(" Combine files: python manage_docs.py combine")
print(" Split by H2: python manage_docs.py split")
print(" Split files: python manage_docs.py split")
sys.exit(1)
command = sys.argv[1].lower()
@ -103,7 +118,7 @@ if __name__ == "__main__":
combine_markdown(filenames, "stack_lang_spec.md", "metadata.json")
elif command == "split":
split_by_h2("stack_lang_spec.md", "docs")
split_with_front_matter("stack_lang_spec.md", "docs", "metadata.json")
else:
print("Unknown command. Use 'combine' or 'split'.")

View File

@ -72,7 +72,6 @@
8. **Grammar simplification** - Referenced Implementable trait instead of repeating
---
<!-- END changes.md -->
<!-- START overview.md -->
@ -147,7 +146,6 @@ This specification is organized to support both learning and reference:
**Reference lookup**: Use Appendices A-C for quick reference to standard library functions, traits, and operators.
---
<!-- END overview.md -->
<!-- START lexical_structure.md -->
@ -222,7 +220,6 @@ false
```
---
<!-- END lexical_structure.md -->
<!-- START primitive_types.md -->
@ -259,7 +256,6 @@ Raw pointers (`ptr`) are a future feature. See [Appendix F](./memory_management.
> **Related**: See Section 8 for the complete type system, including composite types and type inference.
---
<!-- END primitive_types.md -->
<!-- START basic_operations.md -->
@ -390,7 +386,6 @@ Bitwise operations work on integer types:
> **Implementation Details**: Bitwise operators implement the `::Bitwise` trait. See [Appendix B](./complete_trait_reference.html) for the complete trait definition.
---
<!-- END basic_operations.md -->
<!-- START functions.md -->
@ -526,7 +521,6 @@ The `lambda` operator converts a TokenString into a callable code block that can
> **Related**: See [Section 11.1](advanced_topics.html#111-dynamic-code-evaluation) for the `eval` operator used to execute lambdas.
---
<!-- END functions.md -->
<!-- START control_flow.md -->
@ -682,7 +676,6 @@ result {
**Pattern Syntax**: Patterns can match enum variants, union variants, or literal values. The matched value (if any) is bound and available in the corresponding block.
---
<!-- END control_flow.md -->
<!-- START data_structures.md -->
@ -836,7 +829,6 @@ These operations take TokenString arguments containing function bodies:
> **Implementation Details**: Array operations implement various traits including `::ArrayOf<T>`, `::Selectable<T>`, `::Sliceable`, and `::Sized`. See [Appendix B](./complete_trait_reference.html) for complete trait definitions and [Appendix A](./standard_library.html) for the full array operation reference.
---
<!-- END data_structures.md -->
<!-- START type_system.md -->
@ -991,7 +983,6 @@ Option<Point<f64>> // Option containing a Point of f64s
```
---
<!-- END type_system.md -->
<!-- START trait_system.md -->
@ -1207,7 +1198,6 @@ This section provides a brief overview of all standard traits. For complete defi
> **Complete Reference**: See [Appendix B](./complete_trait_reference.html) for full trait definitions with all methods, examples, and implementation details.
---
<!-- END trait_system.md -->
<!-- START generic_programming.md -->
@ -1383,7 +1373,6 @@ When inheriting from generic traits, you must either:
> **Future Enhancement**: See [Appendix F](./memory_management.html) for planned type parameter enforcement at parse time.
---
<!-- END generic_programming.md -->
<!-- START advanced_topics.md -->
@ -1493,7 +1482,6 @@ The standard library provides I/O, string operations, type conversions, and util
> **Complete Reference**: See [Appendix A](./standard_library.html) for the full standard library reference with all functions, signatures, and examples.
---
<!-- END advanced_topics.md -->
<!-- START standard_library.md -->
@ -1838,7 +1826,6 @@ This appendix provides a complete alphabetical reference of all standard library
**See Also**: [read](#read)
---
<!-- END standard_library.md -->
<!-- START complete_trait_reference.md -->
@ -2610,7 +2597,6 @@ This appendix contains all built-in trait definitions with complete documentatio
**See Also**: Section 11.3 (Type Conversion)
---
<!-- END complete_trait_reference.md -->
<!-- START complete_operator_reference.md -->
@ -3159,7 +3145,6 @@ This appendix provides a complete alphabetical reference of all operators in the
**Section**: 7.4 (Arrays)
---
<!-- END complete_operator_reference.md -->
<!-- START grammar_summary.md -->
@ -3267,7 +3252,6 @@ Language constructs (fn, struct, trait, impl, enum, union, inher) are defined by
> **Complete Specification**: See Appendix B (`::Implementable` trait) for precise definitions of these construct operators.
---
<!-- END grammar_summary.md -->
<!-- START module_system.md -->
@ -3316,7 +3300,6 @@ Language constructs (fn, struct, trait, impl, enum, union, inher) are defined by
- Faster compilation (selective imports)
---
<!-- END module_system.md -->
<!-- START memory_management.md -->
@ -3406,7 +3389,6 @@ Cons: Less granular control, memory held until arena freed
This would provide stronger type safety but add complexity to the type checker.
---
<!-- END memory_management.md -->
<!-- START examples_and_tutorials.md -->
@ -3836,5 +3818,4 @@ dup ::x get print // Prints: 3.0
```
---
<!-- END examples_and_tutorials.md -->