feat(pdf): template storage layer (load/save/list/import/export)
Phase 2/6. Persists "how to read this bank's statements" as JSON files under ``~/.datatools/pdf_templates/<slug>.json`` so an accountant can build one template per source and reuse it across every statement that follows the same layout. Public API: - ``new_template(name)`` — blank with sensible defaults - ``save_template(t)`` — validate + atomic write (temp + rename) - ``load_template(slug)`` / ``delete_template(slug)`` - ``list_templates()`` — sorted summaries, skips corrupt files - ``template_to_json`` / ``template_from_json`` — portability - ``validate_template(t)`` — returns (ok, errors) list for GUI Schema is documented in the module docstring. Versioned via ``schema_version: 1`` so future fields don't break saved files silently — ``load_template`` refuses unknown versions instead of limping along with missing keys. Validation contract enforces: - non-empty name + slug (lowercase alphanumeric + hyphens) - at least two output columns - at least one column mapped to ``date`` - either one ``amount`` column OR both ``amount_debit`` + ``amount_credit`` - column boundary count consistent with source-column count Storage is atomic: ``_atomic_write`` goes through a temp file + ``os.replace`` so a crashed save can't leave a half-written JSON at the canonical path. The GUI's build flow saves on most visual-picker changes, so this matters more here than for a "save button" workflow. 24 tests cover slugify, defaults, validation branches, round-trip load/save, missing/corrupt file handling, delete, list (incl. skipping corrupt files), atomic-write rollback, and import/export. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
239
tests/test_pdf_templates.py
Normal file
239
tests/test_pdf_templates.py
Normal file
@@ -0,0 +1,239 @@
|
||||
"""Tests for the PDF template storage layer."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from src.pdf_templates import (
|
||||
SCHEMA_VERSION,
|
||||
delete_template,
|
||||
list_templates,
|
||||
load_template,
|
||||
new_template,
|
||||
save_template,
|
||||
slugify,
|
||||
template_from_json,
|
||||
template_path,
|
||||
templates_dir,
|
||||
template_to_json,
|
||||
validate_template,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_templates(monkeypatch, tmp_path):
|
||||
"""Redirect the templates directory into ``tmp_path``."""
|
||||
monkeypatch.setenv("DATATOOLS_PDF_TEMPLATES_DIR", str(tmp_path))
|
||||
yield tmp_path
|
||||
|
||||
|
||||
class TestSlugify:
|
||||
def test_basic(self):
|
||||
assert slugify("Chase Personal Checking") == "chase-personal-checking"
|
||||
|
||||
def test_strips_punctuation(self):
|
||||
assert slugify("BofA: Business (USD)") == "bofa-business-usd"
|
||||
|
||||
def test_empty_falls_back(self):
|
||||
assert slugify("") == "untitled"
|
||||
assert slugify(" ") == "untitled"
|
||||
|
||||
|
||||
class TestNewTemplate:
|
||||
def test_has_schema_version(self):
|
||||
t = new_template("Sample")
|
||||
assert t["schema_version"] == SCHEMA_VERSION
|
||||
|
||||
def test_slug_derived_from_name(self):
|
||||
t = new_template("Sample Bank")
|
||||
assert t["slug"] == "sample-bank"
|
||||
assert t["name"] == "Sample Bank"
|
||||
|
||||
def test_timestamps_present(self):
|
||||
t = new_template("X")
|
||||
assert t["created_at"]
|
||||
assert t["updated_at"]
|
||||
|
||||
|
||||
class TestValidateTemplate:
|
||||
def _valid(self) -> dict:
|
||||
return {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"slug": "x",
|
||||
"name": "X",
|
||||
"pages": {"range": "all"},
|
||||
"table": {"column_boundaries": [100, 200]},
|
||||
"columns": [
|
||||
{"source": 0, "target": "date"},
|
||||
{"source": 1, "target": "description"},
|
||||
{"source": 2, "target": "amount"},
|
||||
],
|
||||
"parse": {},
|
||||
}
|
||||
|
||||
def test_valid_passes(self):
|
||||
ok, errs = validate_template(self._valid())
|
||||
assert ok, errs
|
||||
|
||||
def test_missing_name_fails(self):
|
||||
t = self._valid()
|
||||
t["name"] = ""
|
||||
ok, errs = validate_template(t)
|
||||
assert not ok
|
||||
assert any("name" in e for e in errs)
|
||||
|
||||
def test_bad_schema_version(self):
|
||||
t = self._valid()
|
||||
t["schema_version"] = 999
|
||||
ok, errs = validate_template(t)
|
||||
assert not ok
|
||||
|
||||
def test_requires_date_column(self):
|
||||
t = self._valid()
|
||||
t["columns"] = [
|
||||
{"source": 0, "target": "description"},
|
||||
{"source": 1, "target": "amount"},
|
||||
]
|
||||
ok, errs = validate_template(t)
|
||||
assert not ok
|
||||
assert any("date" in e for e in errs)
|
||||
|
||||
def test_requires_amount_or_debit_credit(self):
|
||||
t = self._valid()
|
||||
t["columns"] = [
|
||||
{"source": 0, "target": "date"},
|
||||
{"source": 1, "target": "description"},
|
||||
]
|
||||
ok, errs = validate_template(t)
|
||||
assert not ok
|
||||
assert any("amount" in e for e in errs)
|
||||
|
||||
def test_debit_credit_pair_is_valid(self):
|
||||
t = self._valid()
|
||||
t["columns"] = [
|
||||
{"source": 0, "target": "date"},
|
||||
{"source": 1, "target": "description"},
|
||||
{"source": 2, "target": "amount_debit"},
|
||||
{"source": 3, "target": "amount_credit"},
|
||||
]
|
||||
t["table"]["column_boundaries"] = [100, 200, 300]
|
||||
ok, errs = validate_template(t)
|
||||
assert ok, errs
|
||||
|
||||
|
||||
class TestPersistence:
|
||||
def test_round_trip(self, isolated_templates):
|
||||
t = new_template("Round Trip Bank")
|
||||
t["columns"] = [
|
||||
{"source": 0, "target": "date"},
|
||||
{"source": 1, "target": "description"},
|
||||
{"source": 2, "target": "amount"},
|
||||
]
|
||||
t["table"]["column_boundaries"] = [100, 200]
|
||||
slug = save_template(t)
|
||||
assert slug == "round-trip-bank"
|
||||
|
||||
path = template_path(slug)
|
||||
assert path.exists()
|
||||
loaded = load_template(slug)
|
||||
assert loaded["name"] == "Round Trip Bank"
|
||||
assert loaded["columns"][0]["target"] == "date"
|
||||
|
||||
def test_save_rejects_invalid(self, isolated_templates):
|
||||
with pytest.raises(ValueError):
|
||||
save_template({"schema_version": 1, "name": ""})
|
||||
|
||||
def test_load_missing_raises(self, isolated_templates):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_template("does-not-exist")
|
||||
|
||||
def test_load_corrupt_raises(self, isolated_templates, tmp_path):
|
||||
bad = tmp_path / "bad.json"
|
||||
bad.write_text("not json", encoding="utf-8")
|
||||
with pytest.raises(ValueError):
|
||||
load_template("bad")
|
||||
|
||||
def test_delete(self, isolated_templates):
|
||||
t = new_template("To Delete")
|
||||
t["columns"] = [
|
||||
{"source": 0, "target": "date"},
|
||||
{"source": 1, "target": "amount"},
|
||||
]
|
||||
t["table"]["column_boundaries"] = [100]
|
||||
save_template(t)
|
||||
assert delete_template("to-delete") is True
|
||||
assert delete_template("to-delete") is False
|
||||
|
||||
def test_list_returns_summaries(self, isolated_templates):
|
||||
for name in ["Alpha", "Bravo"]:
|
||||
t = new_template(name)
|
||||
t["columns"] = [
|
||||
{"source": 0, "target": "date"},
|
||||
{"source": 1, "target": "amount"},
|
||||
]
|
||||
t["table"]["column_boundaries"] = [100]
|
||||
save_template(t)
|
||||
rows = list_templates()
|
||||
assert {r["slug"] for r in rows} == {"alpha", "bravo"}
|
||||
|
||||
def test_list_skips_corrupt(self, isolated_templates, tmp_path):
|
||||
(tmp_path / "broken.json").write_text("nope", encoding="utf-8")
|
||||
# Even with a broken file present, list still returns []
|
||||
rows = list_templates()
|
||||
assert rows == []
|
||||
|
||||
def test_atomic_save_no_partial_file_on_failure(
|
||||
self, isolated_templates, monkeypatch
|
||||
):
|
||||
"""If the write step fails mid-way, no half-written JSON survives
|
||||
at the target path. Tests the temp-file-rename safety pattern."""
|
||||
t = new_template("Atomic")
|
||||
t["columns"] = [
|
||||
{"source": 0, "target": "date"},
|
||||
{"source": 1, "target": "amount"},
|
||||
]
|
||||
t["table"]["column_boundaries"] = [100]
|
||||
|
||||
# Make json.dumps blow up to simulate a failure during write.
|
||||
# save_template already validated before this step, so the
|
||||
# crash is "after validation, during write".
|
||||
import src.pdf_templates as mod
|
||||
original_dumps = mod.json.dumps
|
||||
|
||||
def boom(*a, **kw):
|
||||
raise IOError("disk full")
|
||||
|
||||
monkeypatch.setattr(mod.json, "dumps", boom)
|
||||
with pytest.raises(IOError):
|
||||
save_template(t)
|
||||
monkeypatch.setattr(mod.json, "dumps", original_dumps)
|
||||
|
||||
assert not template_path("atomic").exists()
|
||||
|
||||
|
||||
class TestImportExport:
|
||||
def test_round_trip_via_json(self):
|
||||
t = new_template("Exported")
|
||||
t["columns"] = [
|
||||
{"source": 0, "target": "date"},
|
||||
{"source": 1, "target": "amount"},
|
||||
]
|
||||
payload = template_to_json(t)
|
||||
loaded = template_from_json(payload)
|
||||
assert loaded["name"] == "Exported"
|
||||
|
||||
def test_import_rejects_bad_schema(self):
|
||||
bad = json.dumps({"schema_version": 999, "name": "X"})
|
||||
with pytest.raises(ValueError):
|
||||
template_from_json(bad)
|
||||
|
||||
def test_import_rejects_non_object(self):
|
||||
with pytest.raises(ValueError):
|
||||
template_from_json('["not", "an", "object"]')
|
||||
|
||||
|
||||
def test_templates_dir_env_override(monkeypatch, tmp_path):
|
||||
monkeypatch.setenv("DATATOOLS_PDF_TEMPLATES_DIR", str(tmp_path))
|
||||
assert templates_dir() == tmp_path
|
||||
Reference in New Issue
Block a user