From 63d7af710053903f6dd13a417c7e399c5410fb1b Mon Sep 17 00:00:00 2001 From: Kyler Date: Sun, 2 Nov 2025 01:34:14 -0600 Subject: [PATCH] Refactor combine_markdown and split_with_front_matter functions for improved metadata handling and section processing --- manage_docs.py | 57 +++++++++++++++++++++++-------------------- metadata.json | 61 +++++++++++++++++++++++++++++++--------------- stack_lang_spec.md | 38 ----------------------------- 3 files changed, 72 insertions(+), 84 deletions(-) diff --git a/manage_docs.py b/manage_docs.py index a555c38..44a5332 100644 --- a/manage_docs.py +++ b/manage_docs.py @@ -51,8 +51,8 @@ def combine_markdown(file_inputs, output_combined, output_meta_json): for f in file_inputs: files.extend(read_file_list(f)) - combined = [] - meta_info = {} + combined_parts = [] + meta_info = {"order": [], "files": {}} for file in files: file_path = Path(file) @@ -63,11 +63,12 @@ def combine_markdown(file_inputs, output_combined, output_meta_json): text = file_path.read_text(encoding='utf-8') front_matter = extract_front_matter(text) if front_matter: - meta_info[file_path.name] = front_matter + meta_info["files"][file_path.name] = front_matter cleaned = remove_front_matter(text).strip() - combined.append(f"\n{cleaned}\n\n") + combined_parts.append(cleaned) + meta_info["order"].append(file_path.name) - Path(output_combined).write_text("\n".join(combined), encoding='utf-8') + Path(output_combined).write_text("\n\n".join(combined_parts) + "\n", encoding='utf-8') Path(output_meta_json).write_text(json.dumps(meta_info, indent=2), encoding='utf-8') print(f"Combined file saved as: {output_combined}") @@ -105,7 +106,7 @@ def write_with_safety(path, content, force=False, backup=False): def split_with_front_matter(input_combined, output_dir, metadata_file, force=False, backup=False): """Split a combined markdown file back into original files, restoring front matter.""" - content = Path(input_combined).read_text(encoding='utf-8') + combined_text = Path(input_combined).read_text(encoding='utf-8') if not Path(metadata_file).exists(): print(f"Metadata file not found: {metadata_file}") @@ -114,35 +115,37 @@ def split_with_front_matter(input_combined, output_dir, metadata_file, force=Fal meta_info = json.loads(Path(metadata_file).read_text(encoding='utf-8')) os.makedirs(output_dir, exist_ok=True) - pattern = r'\n(.*?)\n' - matches = re.findall(pattern, content, flags=re.DOTALL) + order = meta_info.get("order", []) + frontmatters = meta_info.get("files", {}) - total_written = 0 - total_skipped = 0 - total_backups = 0 + # Split by H2s — each file should start with one or more H2 sections + # and we assume each original file started with an H2 or higher heading. + sections = re.split(r'(?=^## )', combined_text, flags=re.MULTILINE) + sections = [s.strip() for s in sections if s.strip()] - for filename, body in matches: - body = body.strip() + if len(sections) != len(order): + print(f"Warning: {len(sections)} sections found but {len(order)} files listed. " + f"Splitting by simple proportion instead.") + approx_size = len(combined_text) // len(order) + chunks = [combined_text[i*approx_size:(i+1)*approx_size] for i in range(len(order)-1)] + chunks.append(combined_text[(len(order)-1)*approx_size:]) + else: + chunks = sections + + for i, filename in enumerate(order): output_path = Path(output_dir, filename) + body = chunks[i].strip() if i < len(chunks) else "" # Restore front matter if available - if filename in meta_info: - front_matter = meta_info[filename].strip() - restored = f"---\n{front_matter}\n---\n\n{body}\n" + front_matter = frontmatters.get(filename) + if front_matter: + content = f"---\n{front_matter}\n---\n\n{body}\n" else: - restored = body + "\n" + content = body + "\n" - before = output_path.exists() - write_with_safety(output_path, restored, force=force, backup=backup) + write_with_safety(output_path, content, force=force, backup=backup) - if backup and before: - total_backups += 1 - if output_path.exists(): - total_written += 1 - else: - total_skipped += 1 - - print(f"Split complete. {total_written} files written, {total_skipped} skipped, {total_backups} backups made.") + print(f"Split complete. {len(order)} files processed.") if __name__ == "__main__": if len(sys.argv) < 2: diff --git a/metadata.json b/metadata.json index e4df97a..2b788b3 100644 --- a/metadata.json +++ b/metadata.json @@ -1,21 +1,44 @@ { - "changes.md": "Title: Stack Language Specification\nPrev:\nNext:", - "overview.md": "Title: 1 Overview\nPrev: Index\nNext: Lexical Structure", - "lexical_structure.md": "Title: 2 Lexical Structure\nPrev: Overview\nNext: Primitive Types", - "primitive_types.md": "Title: 3 Primitive Types\nPrev: Lexical Structure\nNext: Basic Operations", - "basic_operations.md": "Title: 4 Basic Operations\nPrev: Primitive Types\nNext: Functions", - "functions.md": "Title: 5 Functions\nPrev: Basic Operations\nNext: Control Flow", - "control_flow.md": "Title: 6 Control Flow\nPrev: Functions\nNext: Data Structures", - "data_structures.md": "Title: 7 Data Structures\nPrev: Control Flow\nNext: Type System", - "type_system.md": "Title: 8 Type System\nPrev: Data Structures\nNext: Trait System", - "trait_system.md": "Title: 9 Trait System\nPrev: Type System\nNext: Generic Programming", - "generic_programming.md": "Title: 10 Generic Programming\nPrev: Trait System\nNext: Advanced Topics", - "advanced_topics.md": "Title: 11 Advanced Topics\nPrev: Generic Programming\nNext: Standard Library", - "standard_library.md": "Title: A Standard Library\nPrev: Advanced Topics\nNext: Complete Trait Reference", - "complete_trait_reference.md": "Title: B Complete Trait Reference\nPrev: Standard Library\nNext: Complete Operator Reference", - "complete_operator_reference.md": "Title: C Complete Operator Reference\nPrev: Complete Trait Reference\nNext: Grammar Summary", - "grammar_summary.md": "Title: D Grammar Summary\nPrev: Complete Operator Reference\nNext: Module System", - "module_system.md": "Title: E Module System\nPrev: Grammar Summary\nNext: Memory Management", - "memory_management.md": "Title: F Memory Management\nPrev: Module System\nNext: Examples and Tutorials", - "examples_and_tutorials.md": "Title: G Examples & Tutorials\nPrev: Memory Management\nNext:" + "order": [ + "changes.md", + "overview.md", + "lexical_structure.md", + "primitive_types.md", + "basic_operations.md", + "functions.md", + "control_flow.md", + "data_structures.md", + "type_system.md", + "trait_system.md", + "generic_programming.md", + "advanced_topics.md", + "standard_library.md", + "complete_trait_reference.md", + "complete_operator_reference.md", + "grammar_summary.md", + "module_system.md", + "memory_management.md", + "examples_and_tutorials.md" + ], + "files": { + "changes.md": "Title: Stack Language Specification\nPrev:\nNext:", + "overview.md": "Title: 1 Overview\nPrev: Index\nNext: Lexical Structure", + "lexical_structure.md": "Title: 2 Lexical Structure\nPrev: Overview\nNext: Primitive Types", + "primitive_types.md": "Title: 3 Primitive Types\nPrev: Lexical Structure\nNext: Basic Operations", + "basic_operations.md": "Title: 4 Basic Operations\nPrev: Primitive Types\nNext: Functions", + "functions.md": "Title: 5 Functions\nPrev: Basic Operations\nNext: Control Flow", + "control_flow.md": "Title: 6 Control Flow\nPrev: Functions\nNext: Data Structures", + "data_structures.md": "Title: 7 Data Structures\nPrev: Control Flow\nNext: Type System", + "type_system.md": "Title: 8 Type System\nPrev: Data Structures\nNext: Trait System", + "trait_system.md": "Title: 9 Trait System\nPrev: Type System\nNext: Generic Programming", + "generic_programming.md": "Title: 10 Generic Programming\nPrev: Trait System\nNext: Advanced Topics", + "advanced_topics.md": "Title: 11 Advanced Topics\nPrev: Generic Programming\nNext: Standard Library", + "standard_library.md": "Title: A Standard Library\nPrev: Advanced Topics\nNext: Complete Trait Reference", + "complete_trait_reference.md": "Title: B Complete Trait Reference\nPrev: Standard Library\nNext: Complete Operator Reference", + "complete_operator_reference.md": "Title: C Complete Operator Reference\nPrev: Complete Trait Reference\nNext: Grammar Summary", + "grammar_summary.md": "Title: D Grammar Summary\nPrev: Complete Operator Reference\nNext: Module System", + "module_system.md": "Title: E Module System\nPrev: Grammar Summary\nNext: Memory Management", + "memory_management.md": "Title: F Memory Management\nPrev: Module System\nNext: Examples and Tutorials", + "examples_and_tutorials.md": "Title: G Examples & Tutorials\nPrev: Memory Management\nNext:" + } } \ No newline at end of file diff --git a/stack_lang_spec.md b/stack_lang_spec.md index 9509ad5..a046754 100644 --- a/stack_lang_spec.md +++ b/stack_lang_spec.md @@ -1,4 +1,3 @@ - # Stack Language Specification **Version**: 0.8.1 @@ -74,9 +73,7 @@ 1. Added links --- - - ## 1. Overview A statically-typed, stack-based language with pure postfix notation combining the execution model of HP's RPL, the type system of C and Rust, and modern array operations from Uiua. @@ -148,9 +145,7 @@ This specification is organized to support both learning and reference: **Reference lookup**: Use Appendices A-C for quick reference to standard library functions, traits, and operators. --- - - ## 2. Lexical Structure ### 2.1 Comments @@ -222,9 +217,7 @@ false ``` --- - - ## 3. Primitive Types The language provides several built-in primitive types for common values: @@ -258,9 +251,7 @@ Raw pointers (`ptr`) are a future feature. See [Appendix F](./memory_management. > **Related**: See Section 8 for the complete type system, including composite types and type inference. --- - - ## 4. Basic Operations ### 4.1 Stack Operations @@ -388,9 +379,7 @@ Bitwise operations work on integer types: > **Implementation Details**: Bitwise operators implement the `::Bitwise` trait. See [Appendix B](./complete_trait_reference.html) for the complete trait definition. --- - - ## 5. Functions Functions are user-defined procedures that encapsulate reusable code. They are the primary abstraction mechanism in the language. @@ -523,9 +512,7 @@ The `lambda` operator converts a TokenString into a callable code block that can > **Related**: See [Section 11.1](advanced_topics.html#111-dynamic-code-evaluation) for the `eval` operator used to execute lambdas. --- - - ## 6. Control Flow ### 6.1 Conditionals @@ -678,9 +665,7 @@ result { **Pattern Syntax**: Patterns can match enum variants, union variants, or literal values. The matched value (if any) is bound and available in the corresponding block. --- - - ## 7. Data Structures ### 7.1 Structs @@ -831,9 +816,7 @@ These operations take TokenString arguments containing function bodies: > **Implementation Details**: Array operations implement various traits including `::ArrayOf`, `::Selectable`, `::Sliceable`, and `::Sized`. See [Appendix B](./complete_trait_reference.html) for complete trait definitions and [Appendix A](./standard_library.html) for the full array operation reference. --- - - ## 8. Type System ### 8.1 Types vs Traits @@ -985,9 +968,7 @@ Option> // Option containing a Point of f64s ``` --- - - ## 9. Trait System ### 9.1 What are Traits @@ -1200,9 +1181,7 @@ This section provides a brief overview of all standard traits. For complete defi > **Complete Reference**: See [Appendix B](./complete_trait_reference.html) for full trait definitions with all methods, examples, and implementation details. --- - - ## 10. Generic Programming ### 10.1 Type Parameters @@ -1375,9 +1354,7 @@ When inheriting from generic traits, you must either: > **Future Enhancement**: See [Appendix F](./memory_management.html) for planned type parameter enforcement at parse time. --- - - ## 11. Advanced Topics ### 11.1 Dynamic Code Evaluation @@ -1484,9 +1461,7 @@ The standard library provides I/O, string operations, type conversions, and util > **Complete Reference**: See [Appendix A](./standard_library.html) for the full standard library reference with all functions, signatures, and examples. --- - - ## Appendix A: Standard Library This appendix provides a complete alphabetical reference of all standard library functions and operations. @@ -1828,9 +1803,7 @@ This appendix provides a complete alphabetical reference of all standard library **See Also**: [read](#read) --- - - ## Appendix B: Complete Trait Reference This appendix contains all built-in trait definitions with complete documentation, organized alphabetically. @@ -2599,9 +2572,7 @@ This appendix contains all built-in trait definitions with complete documentatio **See Also**: Section 11.3 (Type Conversion) --- - - ## Appendix C: Complete Operator Reference This appendix provides a complete alphabetical reference of all operators in the language. @@ -3147,9 +3118,7 @@ This appendix provides a complete alphabetical reference of all operators in the **Section**: 7.4 (Arrays) --- - - ## Appendix D: Grammar Summary This appendix provides a concise grammar reference. For complete specifications of language constructs (fn, struct, trait, impl, etc.), see the `::Implementable` trait in Appendix B. @@ -3254,9 +3223,7 @@ Language constructs (fn, struct, trait, impl, enum, union, inher) are defined by > **Complete Specification**: See Appendix B (`::Implementable` trait) for precise definitions of these construct operators. --- - - ## Appendix E: Module System (Future) **Current State**: All standard library functions and traits are automatically in scope. @@ -3302,9 +3269,7 @@ Language constructs (fn, struct, trait, impl, enum, union, inher) are defined by - Faster compilation (selective imports) --- - - ## Appendix F: Memory Management (Future) The language specification currently does not include heap memory management. This appendix documents potential future approaches. @@ -3391,9 +3356,7 @@ Cons: Less granular control, memory held until arena freed This would provide stronger type safety but add complexity to the type checker. --- - - ## Appendix G: Examples & Tutorials ### G.1 Tutorial: First Steps @@ -3820,4 +3783,3 @@ dup ::x get print // Prints: 3.0 ``` --- -