feat(pdf): template storage layer (load/save/list/import/export)

Phase 2/6. Persists "how to read this bank's statements" as JSON files under ``~/.datatools/pdf_templates/<slug>.json`` so an accountant can build one template per source and reuse it across every statement that follows the same layout. Public API: - ``new_template(name)`` — blank with sensible defaults - ``save_template(t)`` — validate + atomic write (temp + rename) - ``load_template(slug)`` / ``delete_template(slug)`` - ``list_templates()`` — sorted summaries, skips corrupt files - ``template_to_json`` / ``template_from_json`` — portability - ``validate_template(t)`` — returns (ok, errors) list for GUI Schema is documented in the module docstring. Versioned via ``schema_version: 1`` so future fields don't break saved files silently — ``load_template`` refuses unknown versions instead of limping along with missing keys. Validation contract enforces: - non-empty name + slug (lowercase alphanumeric + hyphens) - at least two output columns - at least one column mapped to ``date`` - either one ``amount`` column OR both ``amount_debit`` + ``amount_credit`` - column boundary count consistent with source-column count Storage is atomic: ``_atomic_write`` goes through a temp file + ``os.replace`` so a crashed save can't leave a half-written JSON at the canonical path. The GUI's build flow saves on most visual-picker changes, so this matters more here than for a "save button" workflow. 24 tests cover slugify, defaults, validation branches, round-trip load/save, missing/corrupt file handling, delete, list (incl. skipping corrupt files), atomic-write rollback, and import/export. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 22:46:44 +00:00
parent b8aff862ed
commit aea520d2f7
2 changed files with 646 additions and 0 deletions
--- a/tests/test_pdf_templates.py
+++ b/tests/test_pdf_templates.py
@@ -0,0 +1,239 @@
+"""Tests for the PDF template storage layer."""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from src.pdf_templates import (
+    SCHEMA_VERSION,
+    delete_template,
+    list_templates,
+    load_template,
+    new_template,
+    save_template,
+    slugify,
+    template_from_json,
+    template_path,
+    templates_dir,
+    template_to_json,
+    validate_template,
+)
+
+
+@pytest.fixture
+def isolated_templates(monkeypatch, tmp_path):
+    """Redirect the templates directory into ``tmp_path``."""
+    monkeypatch.setenv("DATATOOLS_PDF_TEMPLATES_DIR", str(tmp_path))
+    yield tmp_path
+
+
+class TestSlugify:
+    def test_basic(self):
+        assert slugify("Chase Personal Checking") == "chase-personal-checking"
+
+    def test_strips_punctuation(self):
+        assert slugify("BofA: Business (USD)") == "bofa-business-usd"
+
+    def test_empty_falls_back(self):
+        assert slugify("") == "untitled"
+        assert slugify("   ") == "untitled"
+
+
+class TestNewTemplate:
+    def test_has_schema_version(self):
+        t = new_template("Sample")
+        assert t["schema_version"] == SCHEMA_VERSION
+
+    def test_slug_derived_from_name(self):
+        t = new_template("Sample Bank")
+        assert t["slug"] == "sample-bank"
+        assert t["name"] == "Sample Bank"
+
+    def test_timestamps_present(self):
+        t = new_template("X")
+        assert t["created_at"]
+        assert t["updated_at"]
+
+
+class TestValidateTemplate:
+    def _valid(self) -> dict:
+        return {
+            "schema_version": SCHEMA_VERSION,
+            "slug": "x",
+            "name": "X",
+            "pages": {"range": "all"},
+            "table": {"column_boundaries": [100, 200]},
+            "columns": [
+                {"source": 0, "target": "date"},
+                {"source": 1, "target": "description"},
+                {"source": 2, "target": "amount"},
+            ],
+            "parse": {},
+        }
+
+    def test_valid_passes(self):
+        ok, errs = validate_template(self._valid())
+        assert ok, errs
+
+    def test_missing_name_fails(self):
+        t = self._valid()
+        t["name"] = ""
+        ok, errs = validate_template(t)
+        assert not ok
+        assert any("name" in e for e in errs)
+
+    def test_bad_schema_version(self):
+        t = self._valid()
+        t["schema_version"] = 999
+        ok, errs = validate_template(t)
+        assert not ok
+
+    def test_requires_date_column(self):
+        t = self._valid()
+        t["columns"] = [
+            {"source": 0, "target": "description"},
+            {"source": 1, "target": "amount"},
+        ]
+        ok, errs = validate_template(t)
+        assert not ok
+        assert any("date" in e for e in errs)
+
+    def test_requires_amount_or_debit_credit(self):
+        t = self._valid()
+        t["columns"] = [
+            {"source": 0, "target": "date"},
+            {"source": 1, "target": "description"},
+        ]
+        ok, errs = validate_template(t)
+        assert not ok
+        assert any("amount" in e for e in errs)
+
+    def test_debit_credit_pair_is_valid(self):
+        t = self._valid()
+        t["columns"] = [
+            {"source": 0, "target": "date"},
+            {"source": 1, "target": "description"},
+            {"source": 2, "target": "amount_debit"},
+            {"source": 3, "target": "amount_credit"},
+        ]
+        t["table"]["column_boundaries"] = [100, 200, 300]
+        ok, errs = validate_template(t)
+        assert ok, errs
+
+
+class TestPersistence:
+    def test_round_trip(self, isolated_templates):
+        t = new_template("Round Trip Bank")
+        t["columns"] = [
+            {"source": 0, "target": "date"},
+            {"source": 1, "target": "description"},
+            {"source": 2, "target": "amount"},
+        ]
+        t["table"]["column_boundaries"] = [100, 200]
+        slug = save_template(t)
+        assert slug == "round-trip-bank"
+
+        path = template_path(slug)
+        assert path.exists()
+        loaded = load_template(slug)
+        assert loaded["name"] == "Round Trip Bank"
+        assert loaded["columns"][0]["target"] == "date"
+
+    def test_save_rejects_invalid(self, isolated_templates):
+        with pytest.raises(ValueError):
+            save_template({"schema_version": 1, "name": ""})
+
+    def test_load_missing_raises(self, isolated_templates):
+        with pytest.raises(FileNotFoundError):
+            load_template("does-not-exist")
+
+    def test_load_corrupt_raises(self, isolated_templates, tmp_path):
+        bad = tmp_path / "bad.json"
+        bad.write_text("not json", encoding="utf-8")
+        with pytest.raises(ValueError):
+            load_template("bad")
+
+    def test_delete(self, isolated_templates):
+        t = new_template("To Delete")
+        t["columns"] = [
+            {"source": 0, "target": "date"},
+            {"source": 1, "target": "amount"},
+        ]
+        t["table"]["column_boundaries"] = [100]
+        save_template(t)
+        assert delete_template("to-delete") is True
+        assert delete_template("to-delete") is False
+
+    def test_list_returns_summaries(self, isolated_templates):
+        for name in ["Alpha", "Bravo"]:
+            t = new_template(name)
+            t["columns"] = [
+                {"source": 0, "target": "date"},
+                {"source": 1, "target": "amount"},
+            ]
+            t["table"]["column_boundaries"] = [100]
+            save_template(t)
+        rows = list_templates()
+        assert {r["slug"] for r in rows} == {"alpha", "bravo"}
+
+    def test_list_skips_corrupt(self, isolated_templates, tmp_path):
+        (tmp_path / "broken.json").write_text("nope", encoding="utf-8")
+        # Even with a broken file present, list still returns []
+        rows = list_templates()
+        assert rows == []
+
+    def test_atomic_save_no_partial_file_on_failure(
+        self, isolated_templates, monkeypatch
+    ):
+        """If the write step fails mid-way, no half-written JSON survives
+        at the target path. Tests the temp-file-rename safety pattern."""
+        t = new_template("Atomic")
+        t["columns"] = [
+            {"source": 0, "target": "date"},
+            {"source": 1, "target": "amount"},
+        ]
+        t["table"]["column_boundaries"] = [100]
+
+        # Make json.dumps blow up to simulate a failure during write.
+        # save_template already validated before this step, so the
+        # crash is "after validation, during write".
+        import src.pdf_templates as mod
+        original_dumps = mod.json.dumps
+
+        def boom(*a, **kw):
+            raise IOError("disk full")
+
+        monkeypatch.setattr(mod.json, "dumps", boom)
+        with pytest.raises(IOError):
+            save_template(t)
+        monkeypatch.setattr(mod.json, "dumps", original_dumps)
+
+        assert not template_path("atomic").exists()
+
+
+class TestImportExport:
+    def test_round_trip_via_json(self):
+        t = new_template("Exported")
+        t["columns"] = [
+            {"source": 0, "target": "date"},
+            {"source": 1, "target": "amount"},
+        ]
+        payload = template_to_json(t)
+        loaded = template_from_json(payload)
+        assert loaded["name"] == "Exported"
+
+    def test_import_rejects_bad_schema(self):
+        bad = json.dumps({"schema_version": 999, "name": "X"})
+        with pytest.raises(ValueError):
+            template_from_json(bad)
+
+    def test_import_rejects_non_object(self):
+        with pytest.raises(ValueError):
+            template_from_json('["not", "an", "object"]')
+
+
+def test_templates_dir_env_override(monkeypatch, tmp_path):
+    monkeypatch.setenv("DATATOOLS_PDF_TEMPLATES_DIR", str(tmp_path))
+    assert templates_dir() == tmp_path