Refactor combine_markdown and split_with_front_matter functions for improved metadata handling and section processing

This commit is contained in:
Kyler Olsen 2025-11-02 01:34:14 -06:00
parent 75b750b662
commit 63d7af7100
3 changed files with 72 additions and 84 deletions

View File

@ -51,8 +51,8 @@ def combine_markdown(file_inputs, output_combined, output_meta_json):
for f in file_inputs: for f in file_inputs:
files.extend(read_file_list(f)) files.extend(read_file_list(f))
combined = [] combined_parts = []
meta_info = {} meta_info = {"order": [], "files": {}}
for file in files: for file in files:
file_path = Path(file) file_path = Path(file)
@ -63,11 +63,12 @@ def combine_markdown(file_inputs, output_combined, output_meta_json):
text = file_path.read_text(encoding='utf-8') text = file_path.read_text(encoding='utf-8')
front_matter = extract_front_matter(text) front_matter = extract_front_matter(text)
if front_matter: if front_matter:
meta_info[file_path.name] = front_matter meta_info["files"][file_path.name] = front_matter
cleaned = remove_front_matter(text).strip() cleaned = remove_front_matter(text).strip()
combined.append(f"<!-- START {file_path.name} -->\n{cleaned}\n<!-- END {file_path.name} -->\n") combined_parts.append(cleaned)
meta_info["order"].append(file_path.name)
Path(output_combined).write_text("\n".join(combined), encoding='utf-8') Path(output_combined).write_text("\n\n".join(combined_parts) + "\n", encoding='utf-8')
Path(output_meta_json).write_text(json.dumps(meta_info, indent=2), encoding='utf-8') Path(output_meta_json).write_text(json.dumps(meta_info, indent=2), encoding='utf-8')
print(f"Combined file saved as: {output_combined}") print(f"Combined file saved as: {output_combined}")
@ -105,7 +106,7 @@ def write_with_safety(path, content, force=False, backup=False):
def split_with_front_matter(input_combined, output_dir, metadata_file, force=False, backup=False): def split_with_front_matter(input_combined, output_dir, metadata_file, force=False, backup=False):
"""Split a combined markdown file back into original files, restoring front matter.""" """Split a combined markdown file back into original files, restoring front matter."""
content = Path(input_combined).read_text(encoding='utf-8') combined_text = Path(input_combined).read_text(encoding='utf-8')
if not Path(metadata_file).exists(): if not Path(metadata_file).exists():
print(f"Metadata file not found: {metadata_file}") print(f"Metadata file not found: {metadata_file}")
@ -114,35 +115,37 @@ def split_with_front_matter(input_combined, output_dir, metadata_file, force=Fal
meta_info = json.loads(Path(metadata_file).read_text(encoding='utf-8')) meta_info = json.loads(Path(metadata_file).read_text(encoding='utf-8'))
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
pattern = r'<!-- START (.*?) -->\n(.*?)\n<!-- END \1 -->' order = meta_info.get("order", [])
matches = re.findall(pattern, content, flags=re.DOTALL) frontmatters = meta_info.get("files", {})
total_written = 0 # Split by H2s — each file should start with one or more H2 sections
total_skipped = 0 # and we assume each original file started with an H2 or higher heading.
total_backups = 0 sections = re.split(r'(?=^## )', combined_text, flags=re.MULTILINE)
sections = [s.strip() for s in sections if s.strip()]
for filename, body in matches: if len(sections) != len(order):
body = body.strip() print(f"Warning: {len(sections)} sections found but {len(order)} files listed. "
f"Splitting by simple proportion instead.")
approx_size = len(combined_text) // len(order)
chunks = [combined_text[i*approx_size:(i+1)*approx_size] for i in range(len(order)-1)]
chunks.append(combined_text[(len(order)-1)*approx_size:])
else:
chunks = sections
for i, filename in enumerate(order):
output_path = Path(output_dir, filename) output_path = Path(output_dir, filename)
body = chunks[i].strip() if i < len(chunks) else ""
# Restore front matter if available # Restore front matter if available
if filename in meta_info: front_matter = frontmatters.get(filename)
front_matter = meta_info[filename].strip() if front_matter:
restored = f"---\n{front_matter}\n---\n\n{body}\n" content = f"---\n{front_matter}\n---\n\n{body}\n"
else: else:
restored = body + "\n" content = body + "\n"
before = output_path.exists() write_with_safety(output_path, content, force=force, backup=backup)
write_with_safety(output_path, restored, force=force, backup=backup)
if backup and before: print(f"Split complete. {len(order)} files processed.")
total_backups += 1
if output_path.exists():
total_written += 1
else:
total_skipped += 1
print(f"Split complete. {total_written} files written, {total_skipped} skipped, {total_backups} backups made.")
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) < 2: if len(sys.argv) < 2:

View File

@ -1,4 +1,26 @@
{ {
"order": [
"changes.md",
"overview.md",
"lexical_structure.md",
"primitive_types.md",
"basic_operations.md",
"functions.md",
"control_flow.md",
"data_structures.md",
"type_system.md",
"trait_system.md",
"generic_programming.md",
"advanced_topics.md",
"standard_library.md",
"complete_trait_reference.md",
"complete_operator_reference.md",
"grammar_summary.md",
"module_system.md",
"memory_management.md",
"examples_and_tutorials.md"
],
"files": {
"changes.md": "Title: Stack Language Specification\nPrev:\nNext:", "changes.md": "Title: Stack Language Specification\nPrev:\nNext:",
"overview.md": "Title: 1 Overview\nPrev: Index\nNext: Lexical Structure", "overview.md": "Title: 1 Overview\nPrev: Index\nNext: Lexical Structure",
"lexical_structure.md": "Title: 2 Lexical Structure\nPrev: Overview\nNext: Primitive Types", "lexical_structure.md": "Title: 2 Lexical Structure\nPrev: Overview\nNext: Primitive Types",
@ -18,4 +40,5 @@
"module_system.md": "Title: E Module System\nPrev: Grammar Summary\nNext: Memory Management", "module_system.md": "Title: E Module System\nPrev: Grammar Summary\nNext: Memory Management",
"memory_management.md": "Title: F Memory Management\nPrev: Module System\nNext: Examples and Tutorials", "memory_management.md": "Title: F Memory Management\nPrev: Module System\nNext: Examples and Tutorials",
"examples_and_tutorials.md": "Title: G Examples & Tutorials\nPrev: Memory Management\nNext:" "examples_and_tutorials.md": "Title: G Examples & Tutorials\nPrev: Memory Management\nNext:"
}
} }

View File

@ -1,4 +1,3 @@
<!-- START changes.md -->
# Stack Language Specification # Stack Language Specification
**Version**: 0.8.1 **Version**: 0.8.1
@ -74,9 +73,7 @@
1. Added links 1. Added links
--- ---
<!-- END changes.md -->
<!-- START overview.md -->
## 1. Overview ## 1. Overview
A statically-typed, stack-based language with pure postfix notation combining the execution model of HP's RPL, the type system of C and Rust, and modern array operations from Uiua. A statically-typed, stack-based language with pure postfix notation combining the execution model of HP's RPL, the type system of C and Rust, and modern array operations from Uiua.
@ -148,9 +145,7 @@ This specification is organized to support both learning and reference:
**Reference lookup**: Use Appendices A-C for quick reference to standard library functions, traits, and operators. **Reference lookup**: Use Appendices A-C for quick reference to standard library functions, traits, and operators.
--- ---
<!-- END overview.md -->
<!-- START lexical_structure.md -->
## 2. Lexical Structure ## 2. Lexical Structure
### 2.1 Comments ### 2.1 Comments
@ -222,9 +217,7 @@ false
``` ```
--- ---
<!-- END lexical_structure.md -->
<!-- START primitive_types.md -->
## 3. Primitive Types ## 3. Primitive Types
The language provides several built-in primitive types for common values: The language provides several built-in primitive types for common values:
@ -258,9 +251,7 @@ Raw pointers (`ptr`) are a future feature. See [Appendix F](./memory_management.
> **Related**: See Section 8 for the complete type system, including composite types and type inference. > **Related**: See Section 8 for the complete type system, including composite types and type inference.
--- ---
<!-- END primitive_types.md -->
<!-- START basic_operations.md -->
## 4. Basic Operations ## 4. Basic Operations
### 4.1 Stack Operations ### 4.1 Stack Operations
@ -388,9 +379,7 @@ Bitwise operations work on integer types:
> **Implementation Details**: Bitwise operators implement the `::Bitwise` trait. See [Appendix B](./complete_trait_reference.html) for the complete trait definition. > **Implementation Details**: Bitwise operators implement the `::Bitwise` trait. See [Appendix B](./complete_trait_reference.html) for the complete trait definition.
--- ---
<!-- END basic_operations.md -->
<!-- START functions.md -->
## 5. Functions ## 5. Functions
Functions are user-defined procedures that encapsulate reusable code. They are the primary abstraction mechanism in the language. Functions are user-defined procedures that encapsulate reusable code. They are the primary abstraction mechanism in the language.
@ -523,9 +512,7 @@ The `lambda` operator converts a TokenString into a callable code block that can
> **Related**: See [Section 11.1](advanced_topics.html#111-dynamic-code-evaluation) for the `eval` operator used to execute lambdas. > **Related**: See [Section 11.1](advanced_topics.html#111-dynamic-code-evaluation) for the `eval` operator used to execute lambdas.
--- ---
<!-- END functions.md -->
<!-- START control_flow.md -->
## 6. Control Flow ## 6. Control Flow
### 6.1 Conditionals ### 6.1 Conditionals
@ -678,9 +665,7 @@ result {
**Pattern Syntax**: Patterns can match enum variants, union variants, or literal values. The matched value (if any) is bound and available in the corresponding block. **Pattern Syntax**: Patterns can match enum variants, union variants, or literal values. The matched value (if any) is bound and available in the corresponding block.
--- ---
<!-- END control_flow.md -->
<!-- START data_structures.md -->
## 7. Data Structures ## 7. Data Structures
### 7.1 Structs ### 7.1 Structs
@ -831,9 +816,7 @@ These operations take TokenString arguments containing function bodies:
> **Implementation Details**: Array operations implement various traits including `::ArrayOf<T>`, `::Selectable<T>`, `::Sliceable`, and `::Sized`. See [Appendix B](./complete_trait_reference.html) for complete trait definitions and [Appendix A](./standard_library.html) for the full array operation reference. > **Implementation Details**: Array operations implement various traits including `::ArrayOf<T>`, `::Selectable<T>`, `::Sliceable`, and `::Sized`. See [Appendix B](./complete_trait_reference.html) for complete trait definitions and [Appendix A](./standard_library.html) for the full array operation reference.
--- ---
<!-- END data_structures.md -->
<!-- START type_system.md -->
## 8. Type System ## 8. Type System
### 8.1 Types vs Traits ### 8.1 Types vs Traits
@ -985,9 +968,7 @@ Option<Point<f64>> // Option containing a Point of f64s
``` ```
--- ---
<!-- END type_system.md -->
<!-- START trait_system.md -->
## 9. Trait System ## 9. Trait System
### 9.1 What are Traits ### 9.1 What are Traits
@ -1200,9 +1181,7 @@ This section provides a brief overview of all standard traits. For complete defi
> **Complete Reference**: See [Appendix B](./complete_trait_reference.html) for full trait definitions with all methods, examples, and implementation details. > **Complete Reference**: See [Appendix B](./complete_trait_reference.html) for full trait definitions with all methods, examples, and implementation details.
--- ---
<!-- END trait_system.md -->
<!-- START generic_programming.md -->
## 10. Generic Programming ## 10. Generic Programming
### 10.1 Type Parameters ### 10.1 Type Parameters
@ -1375,9 +1354,7 @@ When inheriting from generic traits, you must either:
> **Future Enhancement**: See [Appendix F](./memory_management.html) for planned type parameter enforcement at parse time. > **Future Enhancement**: See [Appendix F](./memory_management.html) for planned type parameter enforcement at parse time.
--- ---
<!-- END generic_programming.md -->
<!-- START advanced_topics.md -->
## 11. Advanced Topics ## 11. Advanced Topics
### 11.1 Dynamic Code Evaluation ### 11.1 Dynamic Code Evaluation
@ -1484,9 +1461,7 @@ The standard library provides I/O, string operations, type conversions, and util
> **Complete Reference**: See [Appendix A](./standard_library.html) for the full standard library reference with all functions, signatures, and examples. > **Complete Reference**: See [Appendix A](./standard_library.html) for the full standard library reference with all functions, signatures, and examples.
--- ---
<!-- END advanced_topics.md -->
<!-- START standard_library.md -->
## Appendix A: Standard Library ## Appendix A: Standard Library
This appendix provides a complete alphabetical reference of all standard library functions and operations. This appendix provides a complete alphabetical reference of all standard library functions and operations.
@ -1828,9 +1803,7 @@ This appendix provides a complete alphabetical reference of all standard library
**See Also**: [read](#read) **See Also**: [read](#read)
--- ---
<!-- END standard_library.md -->
<!-- START complete_trait_reference.md -->
## Appendix B: Complete Trait Reference ## Appendix B: Complete Trait Reference
This appendix contains all built-in trait definitions with complete documentation, organized alphabetically. This appendix contains all built-in trait definitions with complete documentation, organized alphabetically.
@ -2599,9 +2572,7 @@ This appendix contains all built-in trait definitions with complete documentatio
**See Also**: Section 11.3 (Type Conversion) **See Also**: Section 11.3 (Type Conversion)
--- ---
<!-- END complete_trait_reference.md -->
<!-- START complete_operator_reference.md -->
## Appendix C: Complete Operator Reference ## Appendix C: Complete Operator Reference
This appendix provides a complete alphabetical reference of all operators in the language. This appendix provides a complete alphabetical reference of all operators in the language.
@ -3147,9 +3118,7 @@ This appendix provides a complete alphabetical reference of all operators in the
**Section**: 7.4 (Arrays) **Section**: 7.4 (Arrays)
--- ---
<!-- END complete_operator_reference.md -->
<!-- START grammar_summary.md -->
## Appendix D: Grammar Summary ## Appendix D: Grammar Summary
This appendix provides a concise grammar reference. For complete specifications of language constructs (fn, struct, trait, impl, etc.), see the `::Implementable` trait in Appendix B. This appendix provides a concise grammar reference. For complete specifications of language constructs (fn, struct, trait, impl, etc.), see the `::Implementable` trait in Appendix B.
@ -3254,9 +3223,7 @@ Language constructs (fn, struct, trait, impl, enum, union, inher) are defined by
> **Complete Specification**: See Appendix B (`::Implementable` trait) for precise definitions of these construct operators. > **Complete Specification**: See Appendix B (`::Implementable` trait) for precise definitions of these construct operators.
--- ---
<!-- END grammar_summary.md -->
<!-- START module_system.md -->
## Appendix E: Module System (Future) ## Appendix E: Module System (Future)
**Current State**: All standard library functions and traits are automatically in scope. **Current State**: All standard library functions and traits are automatically in scope.
@ -3302,9 +3269,7 @@ Language constructs (fn, struct, trait, impl, enum, union, inher) are defined by
- Faster compilation (selective imports) - Faster compilation (selective imports)
--- ---
<!-- END module_system.md -->
<!-- START memory_management.md -->
## Appendix F: Memory Management (Future) ## Appendix F: Memory Management (Future)
The language specification currently does not include heap memory management. This appendix documents potential future approaches. The language specification currently does not include heap memory management. This appendix documents potential future approaches.
@ -3391,9 +3356,7 @@ Cons: Less granular control, memory held until arena freed
This would provide stronger type safety but add complexity to the type checker. This would provide stronger type safety but add complexity to the type checker.
--- ---
<!-- END memory_management.md -->
<!-- START examples_and_tutorials.md -->
## Appendix G: Examples & Tutorials ## Appendix G: Examples & Tutorials
### G.1 Tutorial: First Steps ### G.1 Tutorial: First Steps
@ -3820,4 +3783,3 @@ dup ::x get print // Prints: 3.0
``` ```
--- ---
<!-- END examples_and_tutorials.md -->