Refactor combine_markdown and split_with_front_matter functions for improved metadata handling and section processing

This commit is contained in:
Kyler Olsen 2025-11-02 01:34:14 -06:00
parent 75b750b662
commit 63d7af7100
3 changed files with 72 additions and 84 deletions

View File

@ -51,8 +51,8 @@ def combine_markdown(file_inputs, output_combined, output_meta_json):
for f in file_inputs:
files.extend(read_file_list(f))
combined = []
meta_info = {}
combined_parts = []
meta_info = {"order": [], "files": {}}
for file in files:
file_path = Path(file)
@ -63,11 +63,12 @@ def combine_markdown(file_inputs, output_combined, output_meta_json):
text = file_path.read_text(encoding='utf-8')
front_matter = extract_front_matter(text)
if front_matter:
meta_info[file_path.name] = front_matter
meta_info["files"][file_path.name] = front_matter
cleaned = remove_front_matter(text).strip()
combined.append(f"<!-- START {file_path.name} -->\n{cleaned}\n<!-- END {file_path.name} -->\n")
combined_parts.append(cleaned)
meta_info["order"].append(file_path.name)
Path(output_combined).write_text("\n".join(combined), encoding='utf-8')
Path(output_combined).write_text("\n\n".join(combined_parts) + "\n", encoding='utf-8')
Path(output_meta_json).write_text(json.dumps(meta_info, indent=2), encoding='utf-8')
print(f"Combined file saved as: {output_combined}")
@ -105,7 +106,7 @@ def write_with_safety(path, content, force=False, backup=False):
def split_with_front_matter(input_combined, output_dir, metadata_file, force=False, backup=False):
"""Split a combined markdown file back into original files, restoring front matter."""
content = Path(input_combined).read_text(encoding='utf-8')
combined_text = Path(input_combined).read_text(encoding='utf-8')
if not Path(metadata_file).exists():
print(f"Metadata file not found: {metadata_file}")
@ -114,35 +115,37 @@ def split_with_front_matter(input_combined, output_dir, metadata_file, force=Fal
meta_info = json.loads(Path(metadata_file).read_text(encoding='utf-8'))
os.makedirs(output_dir, exist_ok=True)
pattern = r'<!-- START (.*?) -->\n(.*?)\n<!-- END \1 -->'
matches = re.findall(pattern, content, flags=re.DOTALL)
order = meta_info.get("order", [])
frontmatters = meta_info.get("files", {})
total_written = 0
total_skipped = 0
total_backups = 0
# Split by H2s — each file should start with one or more H2 sections
# and we assume each original file started with an H2 or higher heading.
sections = re.split(r'(?=^## )', combined_text, flags=re.MULTILINE)
sections = [s.strip() for s in sections if s.strip()]
for filename, body in matches:
body = body.strip()
if len(sections) != len(order):
print(f"Warning: {len(sections)} sections found but {len(order)} files listed. "
f"Splitting by simple proportion instead.")
approx_size = len(combined_text) // len(order)
chunks = [combined_text[i*approx_size:(i+1)*approx_size] for i in range(len(order)-1)]
chunks.append(combined_text[(len(order)-1)*approx_size:])
else:
chunks = sections
for i, filename in enumerate(order):
output_path = Path(output_dir, filename)
body = chunks[i].strip() if i < len(chunks) else ""
# Restore front matter if available
if filename in meta_info:
front_matter = meta_info[filename].strip()
restored = f"---\n{front_matter}\n---\n\n{body}\n"
front_matter = frontmatters.get(filename)
if front_matter:
content = f"---\n{front_matter}\n---\n\n{body}\n"
else:
restored = body + "\n"
content = body + "\n"
before = output_path.exists()
write_with_safety(output_path, restored, force=force, backup=backup)
write_with_safety(output_path, content, force=force, backup=backup)
if backup and before:
total_backups += 1
if output_path.exists():
total_written += 1
else:
total_skipped += 1
print(f"Split complete. {total_written} files written, {total_skipped} skipped, {total_backups} backups made.")
print(f"Split complete. {len(order)} files processed.")
if __name__ == "__main__":
if len(sys.argv) < 2:

View File

@ -1,4 +1,26 @@
{
"order": [
"changes.md",
"overview.md",
"lexical_structure.md",
"primitive_types.md",
"basic_operations.md",
"functions.md",
"control_flow.md",
"data_structures.md",
"type_system.md",
"trait_system.md",
"generic_programming.md",
"advanced_topics.md",
"standard_library.md",
"complete_trait_reference.md",
"complete_operator_reference.md",
"grammar_summary.md",
"module_system.md",
"memory_management.md",
"examples_and_tutorials.md"
],
"files": {
"changes.md": "Title: Stack Language Specification\nPrev:\nNext:",
"overview.md": "Title: 1 Overview\nPrev: Index\nNext: Lexical Structure",
"lexical_structure.md": "Title: 2 Lexical Structure\nPrev: Overview\nNext: Primitive Types",
@ -19,3 +41,4 @@
"memory_management.md": "Title: F Memory Management\nPrev: Module System\nNext: Examples and Tutorials",
"examples_and_tutorials.md": "Title: G Examples & Tutorials\nPrev: Memory Management\nNext:"
}
}

View File

@ -1,4 +1,3 @@
<!-- START changes.md -->
# Stack Language Specification
**Version**: 0.8.1
@ -74,9 +73,7 @@
1. Added links
---
<!-- END changes.md -->
<!-- START overview.md -->
## 1. Overview
A statically-typed, stack-based language with pure postfix notation combining the execution model of HP's RPL, the type system of C and Rust, and modern array operations from Uiua.
@ -148,9 +145,7 @@ This specification is organized to support both learning and reference:
**Reference lookup**: Use Appendices A-C for quick reference to standard library functions, traits, and operators.
---
<!-- END overview.md -->
<!-- START lexical_structure.md -->
## 2. Lexical Structure
### 2.1 Comments
@ -222,9 +217,7 @@ false
```
---
<!-- END lexical_structure.md -->
<!-- START primitive_types.md -->
## 3. Primitive Types
The language provides several built-in primitive types for common values:
@ -258,9 +251,7 @@ Raw pointers (`ptr`) are a future feature. See [Appendix F](./memory_management.
> **Related**: See Section 8 for the complete type system, including composite types and type inference.
---
<!-- END primitive_types.md -->
<!-- START basic_operations.md -->
## 4. Basic Operations
### 4.1 Stack Operations
@ -388,9 +379,7 @@ Bitwise operations work on integer types:
> **Implementation Details**: Bitwise operators implement the `::Bitwise` trait. See [Appendix B](./complete_trait_reference.html) for the complete trait definition.
---
<!-- END basic_operations.md -->
<!-- START functions.md -->
## 5. Functions
Functions are user-defined procedures that encapsulate reusable code. They are the primary abstraction mechanism in the language.
@ -523,9 +512,7 @@ The `lambda` operator converts a TokenString into a callable code block that can
> **Related**: See [Section 11.1](advanced_topics.html#111-dynamic-code-evaluation) for the `eval` operator used to execute lambdas.
---
<!-- END functions.md -->
<!-- START control_flow.md -->
## 6. Control Flow
### 6.1 Conditionals
@ -678,9 +665,7 @@ result {
**Pattern Syntax**: Patterns can match enum variants, union variants, or literal values. The matched value (if any) is bound and available in the corresponding block.
---
<!-- END control_flow.md -->
<!-- START data_structures.md -->
## 7. Data Structures
### 7.1 Structs
@ -831,9 +816,7 @@ These operations take TokenString arguments containing function bodies:
> **Implementation Details**: Array operations implement various traits including `::ArrayOf<T>`, `::Selectable<T>`, `::Sliceable`, and `::Sized`. See [Appendix B](./complete_trait_reference.html) for complete trait definitions and [Appendix A](./standard_library.html) for the full array operation reference.
---
<!-- END data_structures.md -->
<!-- START type_system.md -->
## 8. Type System
### 8.1 Types vs Traits
@ -985,9 +968,7 @@ Option<Point<f64>> // Option containing a Point of f64s
```
---
<!-- END type_system.md -->
<!-- START trait_system.md -->
## 9. Trait System
### 9.1 What are Traits
@ -1200,9 +1181,7 @@ This section provides a brief overview of all standard traits. For complete defi
> **Complete Reference**: See [Appendix B](./complete_trait_reference.html) for full trait definitions with all methods, examples, and implementation details.
---
<!-- END trait_system.md -->
<!-- START generic_programming.md -->
## 10. Generic Programming
### 10.1 Type Parameters
@ -1375,9 +1354,7 @@ When inheriting from generic traits, you must either:
> **Future Enhancement**: See [Appendix F](./memory_management.html) for planned type parameter enforcement at parse time.
---
<!-- END generic_programming.md -->
<!-- START advanced_topics.md -->
## 11. Advanced Topics
### 11.1 Dynamic Code Evaluation
@ -1484,9 +1461,7 @@ The standard library provides I/O, string operations, type conversions, and util
> **Complete Reference**: See [Appendix A](./standard_library.html) for the full standard library reference with all functions, signatures, and examples.
---
<!-- END advanced_topics.md -->
<!-- START standard_library.md -->
## Appendix A: Standard Library
This appendix provides a complete alphabetical reference of all standard library functions and operations.
@ -1828,9 +1803,7 @@ This appendix provides a complete alphabetical reference of all standard library
**See Also**: [read](#read)
---
<!-- END standard_library.md -->
<!-- START complete_trait_reference.md -->
## Appendix B: Complete Trait Reference
This appendix contains all built-in trait definitions with complete documentation, organized alphabetically.
@ -2599,9 +2572,7 @@ This appendix contains all built-in trait definitions with complete documentatio
**See Also**: Section 11.3 (Type Conversion)
---
<!-- END complete_trait_reference.md -->
<!-- START complete_operator_reference.md -->
## Appendix C: Complete Operator Reference
This appendix provides a complete alphabetical reference of all operators in the language.
@ -3147,9 +3118,7 @@ This appendix provides a complete alphabetical reference of all operators in the
**Section**: 7.4 (Arrays)
---
<!-- END complete_operator_reference.md -->
<!-- START grammar_summary.md -->
## Appendix D: Grammar Summary
This appendix provides a concise grammar reference. For complete specifications of language constructs (fn, struct, trait, impl, etc.), see the `::Implementable` trait in Appendix B.
@ -3254,9 +3223,7 @@ Language constructs (fn, struct, trait, impl, enum, union, inher) are defined by
> **Complete Specification**: See Appendix B (`::Implementable` trait) for precise definitions of these construct operators.
---
<!-- END grammar_summary.md -->
<!-- START module_system.md -->
## Appendix E: Module System (Future)
**Current State**: All standard library functions and traits are automatically in scope.
@ -3302,9 +3269,7 @@ Language constructs (fn, struct, trait, impl, enum, union, inher) are defined by
- Faster compilation (selective imports)
---
<!-- END module_system.md -->
<!-- START memory_management.md -->
## Appendix F: Memory Management (Future)
The language specification currently does not include heap memory management. This appendix documents potential future approaches.
@ -3391,9 +3356,7 @@ Cons: Less granular control, memory held until arena freed
This would provide stronger type safety but add complexity to the type checker.
---
<!-- END memory_management.md -->
<!-- START examples_and_tutorials.md -->
## Appendix G: Examples & Tutorials
### G.1 Tutorial: First Steps
@ -3820,4 +3783,3 @@ dup ::x get print // Prints: 3.0
```
---
<!-- END examples_and_tutorials.md -->