Refactor combine_markdown and split_with_front_matter functions for improved metadata handling and section processing
This commit is contained in:
parent
75b750b662
commit
63d7af7100
|
|
@ -51,8 +51,8 @@ def combine_markdown(file_inputs, output_combined, output_meta_json):
|
|||
for f in file_inputs:
|
||||
files.extend(read_file_list(f))
|
||||
|
||||
combined = []
|
||||
meta_info = {}
|
||||
combined_parts = []
|
||||
meta_info = {"order": [], "files": {}}
|
||||
|
||||
for file in files:
|
||||
file_path = Path(file)
|
||||
|
|
@ -63,11 +63,12 @@ def combine_markdown(file_inputs, output_combined, output_meta_json):
|
|||
text = file_path.read_text(encoding='utf-8')
|
||||
front_matter = extract_front_matter(text)
|
||||
if front_matter:
|
||||
meta_info[file_path.name] = front_matter
|
||||
meta_info["files"][file_path.name] = front_matter
|
||||
cleaned = remove_front_matter(text).strip()
|
||||
combined.append(f"<!-- START {file_path.name} -->\n{cleaned}\n<!-- END {file_path.name} -->\n")
|
||||
combined_parts.append(cleaned)
|
||||
meta_info["order"].append(file_path.name)
|
||||
|
||||
Path(output_combined).write_text("\n".join(combined), encoding='utf-8')
|
||||
Path(output_combined).write_text("\n\n".join(combined_parts) + "\n", encoding='utf-8')
|
||||
Path(output_meta_json).write_text(json.dumps(meta_info, indent=2), encoding='utf-8')
|
||||
|
||||
print(f"Combined file saved as: {output_combined}")
|
||||
|
|
@ -105,7 +106,7 @@ def write_with_safety(path, content, force=False, backup=False):
|
|||
|
||||
def split_with_front_matter(input_combined, output_dir, metadata_file, force=False, backup=False):
|
||||
"""Split a combined markdown file back into original files, restoring front matter."""
|
||||
content = Path(input_combined).read_text(encoding='utf-8')
|
||||
combined_text = Path(input_combined).read_text(encoding='utf-8')
|
||||
|
||||
if not Path(metadata_file).exists():
|
||||
print(f"Metadata file not found: {metadata_file}")
|
||||
|
|
@ -114,35 +115,37 @@ def split_with_front_matter(input_combined, output_dir, metadata_file, force=Fal
|
|||
meta_info = json.loads(Path(metadata_file).read_text(encoding='utf-8'))
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
pattern = r'<!-- START (.*?) -->\n(.*?)\n<!-- END \1 -->'
|
||||
matches = re.findall(pattern, content, flags=re.DOTALL)
|
||||
order = meta_info.get("order", [])
|
||||
frontmatters = meta_info.get("files", {})
|
||||
|
||||
total_written = 0
|
||||
total_skipped = 0
|
||||
total_backups = 0
|
||||
# Split by H2s — each file should start with one or more H2 sections
|
||||
# and we assume each original file started with an H2 or higher heading.
|
||||
sections = re.split(r'(?=^## )', combined_text, flags=re.MULTILINE)
|
||||
sections = [s.strip() for s in sections if s.strip()]
|
||||
|
||||
for filename, body in matches:
|
||||
body = body.strip()
|
||||
if len(sections) != len(order):
|
||||
print(f"Warning: {len(sections)} sections found but {len(order)} files listed. "
|
||||
f"Splitting by simple proportion instead.")
|
||||
approx_size = len(combined_text) // len(order)
|
||||
chunks = [combined_text[i*approx_size:(i+1)*approx_size] for i in range(len(order)-1)]
|
||||
chunks.append(combined_text[(len(order)-1)*approx_size:])
|
||||
else:
|
||||
chunks = sections
|
||||
|
||||
for i, filename in enumerate(order):
|
||||
output_path = Path(output_dir, filename)
|
||||
body = chunks[i].strip() if i < len(chunks) else ""
|
||||
|
||||
# Restore front matter if available
|
||||
if filename in meta_info:
|
||||
front_matter = meta_info[filename].strip()
|
||||
restored = f"---\n{front_matter}\n---\n\n{body}\n"
|
||||
front_matter = frontmatters.get(filename)
|
||||
if front_matter:
|
||||
content = f"---\n{front_matter}\n---\n\n{body}\n"
|
||||
else:
|
||||
restored = body + "\n"
|
||||
content = body + "\n"
|
||||
|
||||
before = output_path.exists()
|
||||
write_with_safety(output_path, restored, force=force, backup=backup)
|
||||
write_with_safety(output_path, content, force=force, backup=backup)
|
||||
|
||||
if backup and before:
|
||||
total_backups += 1
|
||||
if output_path.exists():
|
||||
total_written += 1
|
||||
else:
|
||||
total_skipped += 1
|
||||
|
||||
print(f"Split complete. {total_written} files written, {total_skipped} skipped, {total_backups} backups made.")
|
||||
print(f"Split complete. {len(order)} files processed.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,26 @@
|
|||
{
|
||||
"order": [
|
||||
"changes.md",
|
||||
"overview.md",
|
||||
"lexical_structure.md",
|
||||
"primitive_types.md",
|
||||
"basic_operations.md",
|
||||
"functions.md",
|
||||
"control_flow.md",
|
||||
"data_structures.md",
|
||||
"type_system.md",
|
||||
"trait_system.md",
|
||||
"generic_programming.md",
|
||||
"advanced_topics.md",
|
||||
"standard_library.md",
|
||||
"complete_trait_reference.md",
|
||||
"complete_operator_reference.md",
|
||||
"grammar_summary.md",
|
||||
"module_system.md",
|
||||
"memory_management.md",
|
||||
"examples_and_tutorials.md"
|
||||
],
|
||||
"files": {
|
||||
"changes.md": "Title: Stack Language Specification\nPrev:\nNext:",
|
||||
"overview.md": "Title: 1 Overview\nPrev: Index\nNext: Lexical Structure",
|
||||
"lexical_structure.md": "Title: 2 Lexical Structure\nPrev: Overview\nNext: Primitive Types",
|
||||
|
|
@ -18,4 +40,5 @@
|
|||
"module_system.md": "Title: E Module System\nPrev: Grammar Summary\nNext: Memory Management",
|
||||
"memory_management.md": "Title: F Memory Management\nPrev: Module System\nNext: Examples and Tutorials",
|
||||
"examples_and_tutorials.md": "Title: G Examples & Tutorials\nPrev: Memory Management\nNext:"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
<!-- START changes.md -->
|
||||
# Stack Language Specification
|
||||
|
||||
**Version**: 0.8.1
|
||||
|
|
@ -74,9 +73,7 @@
|
|||
1. Added links
|
||||
|
||||
---
|
||||
<!-- END changes.md -->
|
||||
|
||||
<!-- START overview.md -->
|
||||
## 1. Overview
|
||||
|
||||
A statically-typed, stack-based language with pure postfix notation combining the execution model of HP's RPL, the type system of C and Rust, and modern array operations from Uiua.
|
||||
|
|
@ -148,9 +145,7 @@ This specification is organized to support both learning and reference:
|
|||
**Reference lookup**: Use Appendices A-C for quick reference to standard library functions, traits, and operators.
|
||||
|
||||
---
|
||||
<!-- END overview.md -->
|
||||
|
||||
<!-- START lexical_structure.md -->
|
||||
## 2. Lexical Structure
|
||||
|
||||
### 2.1 Comments
|
||||
|
|
@ -222,9 +217,7 @@ false
|
|||
```
|
||||
|
||||
---
|
||||
<!-- END lexical_structure.md -->
|
||||
|
||||
<!-- START primitive_types.md -->
|
||||
## 3. Primitive Types
|
||||
|
||||
The language provides several built-in primitive types for common values:
|
||||
|
|
@ -258,9 +251,7 @@ Raw pointers (`ptr`) are a future feature. See [Appendix F](./memory_management.
|
|||
> **Related**: See Section 8 for the complete type system, including composite types and type inference.
|
||||
|
||||
---
|
||||
<!-- END primitive_types.md -->
|
||||
|
||||
<!-- START basic_operations.md -->
|
||||
## 4. Basic Operations
|
||||
|
||||
### 4.1 Stack Operations
|
||||
|
|
@ -388,9 +379,7 @@ Bitwise operations work on integer types:
|
|||
> **Implementation Details**: Bitwise operators implement the `::Bitwise` trait. See [Appendix B](./complete_trait_reference.html) for the complete trait definition.
|
||||
|
||||
---
|
||||
<!-- END basic_operations.md -->
|
||||
|
||||
<!-- START functions.md -->
|
||||
## 5. Functions
|
||||
|
||||
Functions are user-defined procedures that encapsulate reusable code. They are the primary abstraction mechanism in the language.
|
||||
|
|
@ -523,9 +512,7 @@ The `lambda` operator converts a TokenString into a callable code block that can
|
|||
> **Related**: See [Section 11.1](advanced_topics.html#111-dynamic-code-evaluation) for the `eval` operator used to execute lambdas.
|
||||
|
||||
---
|
||||
<!-- END functions.md -->
|
||||
|
||||
<!-- START control_flow.md -->
|
||||
## 6. Control Flow
|
||||
|
||||
### 6.1 Conditionals
|
||||
|
|
@ -678,9 +665,7 @@ result {
|
|||
**Pattern Syntax**: Patterns can match enum variants, union variants, or literal values. The matched value (if any) is bound and available in the corresponding block.
|
||||
|
||||
---
|
||||
<!-- END control_flow.md -->
|
||||
|
||||
<!-- START data_structures.md -->
|
||||
## 7. Data Structures
|
||||
|
||||
### 7.1 Structs
|
||||
|
|
@ -831,9 +816,7 @@ These operations take TokenString arguments containing function bodies:
|
|||
> **Implementation Details**: Array operations implement various traits including `::ArrayOf<T>`, `::Selectable<T>`, `::Sliceable`, and `::Sized`. See [Appendix B](./complete_trait_reference.html) for complete trait definitions and [Appendix A](./standard_library.html) for the full array operation reference.
|
||||
|
||||
---
|
||||
<!-- END data_structures.md -->
|
||||
|
||||
<!-- START type_system.md -->
|
||||
## 8. Type System
|
||||
|
||||
### 8.1 Types vs Traits
|
||||
|
|
@ -985,9 +968,7 @@ Option<Point<f64>> // Option containing a Point of f64s
|
|||
```
|
||||
|
||||
---
|
||||
<!-- END type_system.md -->
|
||||
|
||||
<!-- START trait_system.md -->
|
||||
## 9. Trait System
|
||||
|
||||
### 9.1 What are Traits
|
||||
|
|
@ -1200,9 +1181,7 @@ This section provides a brief overview of all standard traits. For complete defi
|
|||
> **Complete Reference**: See [Appendix B](./complete_trait_reference.html) for full trait definitions with all methods, examples, and implementation details.
|
||||
|
||||
---
|
||||
<!-- END trait_system.md -->
|
||||
|
||||
<!-- START generic_programming.md -->
|
||||
## 10. Generic Programming
|
||||
|
||||
### 10.1 Type Parameters
|
||||
|
|
@ -1375,9 +1354,7 @@ When inheriting from generic traits, you must either:
|
|||
> **Future Enhancement**: See [Appendix F](./memory_management.html) for planned type parameter enforcement at parse time.
|
||||
|
||||
---
|
||||
<!-- END generic_programming.md -->
|
||||
|
||||
<!-- START advanced_topics.md -->
|
||||
## 11. Advanced Topics
|
||||
|
||||
### 11.1 Dynamic Code Evaluation
|
||||
|
|
@ -1484,9 +1461,7 @@ The standard library provides I/O, string operations, type conversions, and util
|
|||
> **Complete Reference**: See [Appendix A](./standard_library.html) for the full standard library reference with all functions, signatures, and examples.
|
||||
|
||||
---
|
||||
<!-- END advanced_topics.md -->
|
||||
|
||||
<!-- START standard_library.md -->
|
||||
## Appendix A: Standard Library
|
||||
|
||||
This appendix provides a complete alphabetical reference of all standard library functions and operations.
|
||||
|
|
@ -1828,9 +1803,7 @@ This appendix provides a complete alphabetical reference of all standard library
|
|||
**See Also**: [read](#read)
|
||||
|
||||
---
|
||||
<!-- END standard_library.md -->
|
||||
|
||||
<!-- START complete_trait_reference.md -->
|
||||
## Appendix B: Complete Trait Reference
|
||||
|
||||
This appendix contains all built-in trait definitions with complete documentation, organized alphabetically.
|
||||
|
|
@ -2599,9 +2572,7 @@ This appendix contains all built-in trait definitions with complete documentatio
|
|||
**See Also**: Section 11.3 (Type Conversion)
|
||||
|
||||
---
|
||||
<!-- END complete_trait_reference.md -->
|
||||
|
||||
<!-- START complete_operator_reference.md -->
|
||||
## Appendix C: Complete Operator Reference
|
||||
|
||||
This appendix provides a complete alphabetical reference of all operators in the language.
|
||||
|
|
@ -3147,9 +3118,7 @@ This appendix provides a complete alphabetical reference of all operators in the
|
|||
**Section**: 7.4 (Arrays)
|
||||
|
||||
---
|
||||
<!-- END complete_operator_reference.md -->
|
||||
|
||||
<!-- START grammar_summary.md -->
|
||||
## Appendix D: Grammar Summary
|
||||
|
||||
This appendix provides a concise grammar reference. For complete specifications of language constructs (fn, struct, trait, impl, etc.), see the `::Implementable` trait in Appendix B.
|
||||
|
|
@ -3254,9 +3223,7 @@ Language constructs (fn, struct, trait, impl, enum, union, inher) are defined by
|
|||
> **Complete Specification**: See Appendix B (`::Implementable` trait) for precise definitions of these construct operators.
|
||||
|
||||
---
|
||||
<!-- END grammar_summary.md -->
|
||||
|
||||
<!-- START module_system.md -->
|
||||
## Appendix E: Module System (Future)
|
||||
|
||||
**Current State**: All standard library functions and traits are automatically in scope.
|
||||
|
|
@ -3302,9 +3269,7 @@ Language constructs (fn, struct, trait, impl, enum, union, inher) are defined by
|
|||
- Faster compilation (selective imports)
|
||||
|
||||
---
|
||||
<!-- END module_system.md -->
|
||||
|
||||
<!-- START memory_management.md -->
|
||||
## Appendix F: Memory Management (Future)
|
||||
|
||||
The language specification currently does not include heap memory management. This appendix documents potential future approaches.
|
||||
|
|
@ -3391,9 +3356,7 @@ Cons: Less granular control, memory held until arena freed
|
|||
This would provide stronger type safety but add complexity to the type checker.
|
||||
|
||||
---
|
||||
<!-- END memory_management.md -->
|
||||
|
||||
<!-- START examples_and_tutorials.md -->
|
||||
## Appendix G: Examples & Tutorials
|
||||
|
||||
### G.1 Tutorial: First Steps
|
||||
|
|
@ -3820,4 +3783,3 @@ dup ::x get print // Prints: 3.0
|
|||
```
|
||||
|
||||
---
|
||||
<!-- END examples_and_tutorials.md -->
|
||||
|
|
|
|||
Loading…
Reference in New Issue