feat(pdf): template storage layer (load/save/list/import/export)
Phase 2/6. Persists "how to read this bank's statements" as JSON files under ``~/.datatools/pdf_templates/<slug>.json`` so an accountant can build one template per source and reuse it across every statement that follows the same layout. Public API: - ``new_template(name)`` — blank with sensible defaults - ``save_template(t)`` — validate + atomic write (temp + rename) - ``load_template(slug)`` / ``delete_template(slug)`` - ``list_templates()`` — sorted summaries, skips corrupt files - ``template_to_json`` / ``template_from_json`` — portability - ``validate_template(t)`` — returns (ok, errors) list for GUI Schema is documented in the module docstring. Versioned via ``schema_version: 1`` so future fields don't break saved files silently — ``load_template`` refuses unknown versions instead of limping along with missing keys. Validation contract enforces: - non-empty name + slug (lowercase alphanumeric + hyphens) - at least two output columns - at least one column mapped to ``date`` - either one ``amount`` column OR both ``amount_debit`` + ``amount_credit`` - column boundary count consistent with source-column count Storage is atomic: ``_atomic_write`` goes through a temp file + ``os.replace`` so a crashed save can't leave a half-written JSON at the canonical path. The GUI's build flow saves on most visual-picker changes, so this matters more here than for a "save button" workflow. 24 tests cover slugify, defaults, validation branches, round-trip load/save, missing/corrupt file handling, delete, list (incl. skipping corrupt files), atomic-write rollback, and import/export. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
407
src/pdf_templates.py
Normal file
407
src/pdf_templates.py
Normal file
@@ -0,0 +1,407 @@
|
|||||||
|
"""PDF extract template storage.
|
||||||
|
|
||||||
|
Templates encode "how to read this bank's statements" — page
|
||||||
|
range, table window markers, column x-positions, target field
|
||||||
|
mapping, amount/date parse options. They live as JSON files in
|
||||||
|
``~/.datatools/pdf_templates/`` so an accountant can build one
|
||||||
|
per source and reuse it for every statement that follows the
|
||||||
|
same layout. Templates are portable: the ``export`` / ``import``
|
||||||
|
flow is just a file copy of the JSON.
|
||||||
|
|
||||||
|
The schema is intentionally a plain dict (not a frozen dataclass)
|
||||||
|
because the GUI mutates it incrementally during the build flow.
|
||||||
|
``validate_template`` enforces the contract at save time.
|
||||||
|
|
||||||
|
Schema (``schema_version: 1``)::
|
||||||
|
|
||||||
|
{
|
||||||
|
"schema_version": 1,
|
||||||
|
"slug": "chase-personal-checking",
|
||||||
|
"name": "Chase Personal Checking",
|
||||||
|
"notes": "",
|
||||||
|
"created_at": "<iso8601>",
|
||||||
|
"updated_at": "<iso8601>",
|
||||||
|
"pages": {
|
||||||
|
"range": "all" | "1-3" | "2,4,6-",
|
||||||
|
"skip_matching": "<regex>"
|
||||||
|
},
|
||||||
|
"table": {
|
||||||
|
"header_text": "<text containing all header words>",
|
||||||
|
"end_markers": ["<regex>", ...],
|
||||||
|
"column_boundaries": [x0, x1, ...],
|
||||||
|
"y_tolerance": 3.0,
|
||||||
|
"skip_rows_matching": ["<regex>", ...]
|
||||||
|
},
|
||||||
|
"columns": [
|
||||||
|
{"source": 0, "target": "date"},
|
||||||
|
...
|
||||||
|
# ``target`` is one of: date | description | amount |
|
||||||
|
# amount_debit | amount_credit | balance | <free text>
|
||||||
|
],
|
||||||
|
"parse": {
|
||||||
|
"date_format": "%m/%d/%Y",
|
||||||
|
"date_formats": [],
|
||||||
|
"decimal_separator": ".",
|
||||||
|
"thousands_separator": ",",
|
||||||
|
"currency_strip": "$",
|
||||||
|
"amount_negative_in_parens": true,
|
||||||
|
"merge_multiline_description": true
|
||||||
|
},
|
||||||
|
"visual": {
|
||||||
|
"page_width": 612.0,
|
||||||
|
"page_height": 792.0,
|
||||||
|
"sample_page": 1,
|
||||||
|
"table_bbox": [x0, top, x1, bottom] | null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
The ``visual`` block is preserved across save/load so the build
|
||||||
|
UI can round-trip the user's last visual-picker state.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
SCHEMA_VERSION = 1
|
||||||
|
|
||||||
|
VALID_TARGETS = frozenset({
|
||||||
|
"date",
|
||||||
|
"description",
|
||||||
|
"amount",
|
||||||
|
"amount_debit",
|
||||||
|
"amount_credit",
|
||||||
|
"balance",
|
||||||
|
"type",
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Filesystem layout
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def templates_dir() -> Path:
|
||||||
|
"""Return ``~/.datatools/pdf_templates/``. Override via the
|
||||||
|
``DATATOOLS_PDF_TEMPLATES_DIR`` env var (used by tests)."""
|
||||||
|
override = os.environ.get("DATATOOLS_PDF_TEMPLATES_DIR")
|
||||||
|
if override:
|
||||||
|
return Path(override)
|
||||||
|
try:
|
||||||
|
return Path.home() / ".datatools" / "pdf_templates"
|
||||||
|
except Exception:
|
||||||
|
return Path(tempfile.gettempdir()) / "datatools-pdf-templates"
|
||||||
|
|
||||||
|
|
||||||
|
def template_path(slug: str) -> Path:
|
||||||
|
"""Resolve *slug* to its on-disk JSON path."""
|
||||||
|
return templates_dir() / f"{slug}.json"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Slugify
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
_SLUG_STRIP = re.compile(r"[^a-z0-9]+")
|
||||||
|
|
||||||
|
|
||||||
|
def slugify(name: str) -> str:
|
||||||
|
"""Make a filesystem-safe slug from a human-friendly name."""
|
||||||
|
s = (name or "").strip().lower()
|
||||||
|
s = _SLUG_STRIP.sub("-", s).strip("-")
|
||||||
|
return s or "untitled"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Construction + defaults
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def new_template(name: str) -> dict[str, Any]:
|
||||||
|
"""Build a blank template with sensible defaults.
|
||||||
|
|
||||||
|
Caller can edit any field; the GUI's build flow fills in the
|
||||||
|
table and columns sections as the user works through it.
|
||||||
|
"""
|
||||||
|
now = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
|
||||||
|
slug = slugify(name)
|
||||||
|
return {
|
||||||
|
"schema_version": SCHEMA_VERSION,
|
||||||
|
"slug": slug,
|
||||||
|
"name": name or slug,
|
||||||
|
"notes": "",
|
||||||
|
"created_at": now,
|
||||||
|
"updated_at": now,
|
||||||
|
"pages": {
|
||||||
|
"range": "all",
|
||||||
|
"skip_matching": "",
|
||||||
|
},
|
||||||
|
"table": {
|
||||||
|
"header_text": "",
|
||||||
|
"end_markers": [],
|
||||||
|
"column_boundaries": [],
|
||||||
|
"y_tolerance": 3.0,
|
||||||
|
"skip_rows_matching": [],
|
||||||
|
},
|
||||||
|
"columns": [],
|
||||||
|
"parse": {
|
||||||
|
"date_format": "%m/%d/%Y",
|
||||||
|
"date_formats": [],
|
||||||
|
"decimal_separator": ".",
|
||||||
|
"thousands_separator": ",",
|
||||||
|
"currency_strip": "$",
|
||||||
|
"amount_negative_in_parens": True,
|
||||||
|
"merge_multiline_description": True,
|
||||||
|
},
|
||||||
|
"visual": {
|
||||||
|
"page_width": 612.0,
|
||||||
|
"page_height": 792.0,
|
||||||
|
"sample_page": 1,
|
||||||
|
"table_bbox": None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Validation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def validate_template(template: dict[str, Any]) -> tuple[bool, list[str]]:
|
||||||
|
"""Check the template before saving. Returns ``(ok, errors)``.
|
||||||
|
|
||||||
|
The GUI shows the errors next to the Save button; nothing
|
||||||
|
silent here."""
|
||||||
|
errors: list[str] = []
|
||||||
|
if not isinstance(template, dict):
|
||||||
|
return False, ["Template must be a JSON object."]
|
||||||
|
|
||||||
|
sv = template.get("schema_version")
|
||||||
|
if sv != SCHEMA_VERSION:
|
||||||
|
errors.append(
|
||||||
|
f"Unsupported schema_version {sv!r} (expected {SCHEMA_VERSION})."
|
||||||
|
)
|
||||||
|
|
||||||
|
name = template.get("name", "")
|
||||||
|
if not isinstance(name, str) or not name.strip():
|
||||||
|
errors.append("name is required.")
|
||||||
|
|
||||||
|
slug = template.get("slug") or slugify(name)
|
||||||
|
if not re.match(r"^[a-z0-9][a-z0-9-]{0,63}$", slug or ""):
|
||||||
|
errors.append(
|
||||||
|
"slug must be lowercase alphanumeric + hyphens, "
|
||||||
|
"1–64 chars, starting with a letter or digit."
|
||||||
|
)
|
||||||
|
|
||||||
|
columns = template.get("columns", [])
|
||||||
|
if not isinstance(columns, list) or len(columns) < 2:
|
||||||
|
errors.append("At least two output columns are required.")
|
||||||
|
else:
|
||||||
|
seen_targets: list[str] = []
|
||||||
|
for i, col in enumerate(columns):
|
||||||
|
if not isinstance(col, dict):
|
||||||
|
errors.append(f"columns[{i}] must be an object.")
|
||||||
|
continue
|
||||||
|
src = col.get("source")
|
||||||
|
tgt = col.get("target")
|
||||||
|
if not isinstance(src, int) or src < 0:
|
||||||
|
errors.append(
|
||||||
|
f"columns[{i}].source must be a non-negative integer."
|
||||||
|
)
|
||||||
|
if not isinstance(tgt, str) or not tgt:
|
||||||
|
errors.append(f"columns[{i}].target must be a non-empty string.")
|
||||||
|
else:
|
||||||
|
seen_targets.append(tgt)
|
||||||
|
if "date" not in seen_targets:
|
||||||
|
errors.append("At least one column must map to 'date'.")
|
||||||
|
if (
|
||||||
|
"amount" not in seen_targets
|
||||||
|
and not (
|
||||||
|
"amount_debit" in seen_targets
|
||||||
|
and "amount_credit" in seen_targets
|
||||||
|
)
|
||||||
|
):
|
||||||
|
errors.append(
|
||||||
|
"Either an 'amount' column or both 'amount_debit' + "
|
||||||
|
"'amount_credit' columns are required."
|
||||||
|
)
|
||||||
|
|
||||||
|
table = template.get("table", {}) or {}
|
||||||
|
boundaries = table.get("column_boundaries", [])
|
||||||
|
if not isinstance(boundaries, list):
|
||||||
|
errors.append("table.column_boundaries must be a list.")
|
||||||
|
elif columns and len(boundaries) + 1 < len(set(
|
||||||
|
c.get("source") for c in columns if isinstance(c, dict)
|
||||||
|
)):
|
||||||
|
errors.append(
|
||||||
|
"table.column_boundaries doesn't match the number of source columns "
|
||||||
|
"implied by the column mapping."
|
||||||
|
)
|
||||||
|
|
||||||
|
return (not errors), errors
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Persistence
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _atomic_write(path: Path, payload: str) -> None:
|
||||||
|
"""Write *payload* to *path* via a temp file + rename.
|
||||||
|
|
||||||
|
Avoids leaving a half-written JSON if the process dies mid-save —
|
||||||
|
the GUI saves on every visual-picker change, and a corrupt
|
||||||
|
template file would be hostile to recover from.
|
||||||
|
"""
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
fd, tmp_path = tempfile.mkstemp(
|
||||||
|
prefix=f".{path.name}.",
|
||||||
|
suffix=".tmp",
|
||||||
|
dir=str(path.parent),
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||||||
|
f.write(payload)
|
||||||
|
os.replace(tmp_path, path)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def save_template(template: dict[str, Any]) -> str:
|
||||||
|
"""Persist *template* to disk; return the slug it was saved as.
|
||||||
|
|
||||||
|
Stamps ``updated_at``. Atomic via temp-file + rename.
|
||||||
|
Raises ``ValueError`` with a multi-line error list if validation
|
||||||
|
fails — caller should surface that to the user.
|
||||||
|
"""
|
||||||
|
ok, errors = validate_template(template)
|
||||||
|
if not ok:
|
||||||
|
raise ValueError("\n".join(errors))
|
||||||
|
template = dict(template)
|
||||||
|
template["updated_at"] = datetime.now(tz=timezone.utc).isoformat(
|
||||||
|
timespec="seconds"
|
||||||
|
)
|
||||||
|
slug = template["slug"]
|
||||||
|
payload = json.dumps(template, indent=2, ensure_ascii=False)
|
||||||
|
_atomic_write(template_path(slug), payload)
|
||||||
|
return slug
|
||||||
|
|
||||||
|
|
||||||
|
def load_template(slug: str) -> dict[str, Any]:
|
||||||
|
"""Read the template at *slug*. Raises ``FileNotFoundError`` if
|
||||||
|
missing, ``ValueError`` if the JSON is corrupt or the schema
|
||||||
|
version is unknown."""
|
||||||
|
p = template_path(slug)
|
||||||
|
try:
|
||||||
|
raw = p.read_text(encoding="utf-8")
|
||||||
|
except FileNotFoundError:
|
||||||
|
raise
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise ValueError(f"Corrupt template {slug!r}: {e}") from e
|
||||||
|
sv = data.get("schema_version")
|
||||||
|
if sv != SCHEMA_VERSION:
|
||||||
|
raise ValueError(
|
||||||
|
f"Template {slug!r} has unsupported schema_version {sv!r}; "
|
||||||
|
f"expected {SCHEMA_VERSION}."
|
||||||
|
)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def delete_template(slug: str) -> bool:
|
||||||
|
"""Remove the template file; returns ``True`` if it existed."""
|
||||||
|
p = template_path(slug)
|
||||||
|
try:
|
||||||
|
p.unlink()
|
||||||
|
return True
|
||||||
|
except FileNotFoundError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def list_templates() -> list[dict[str, Any]]:
|
||||||
|
"""Return a sorted list of ``{slug, name, updated_at}`` summaries.
|
||||||
|
|
||||||
|
Skips files that fail to parse — surfaces them in the manage UI
|
||||||
|
as warnings rather than crashing the list view.
|
||||||
|
"""
|
||||||
|
d = templates_dir()
|
||||||
|
if not d.exists():
|
||||||
|
return []
|
||||||
|
out: list[dict[str, Any]] = []
|
||||||
|
for p in sorted(d.glob("*.json")):
|
||||||
|
try:
|
||||||
|
data = json.loads(p.read_text(encoding="utf-8"))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
continue
|
||||||
|
out.append({
|
||||||
|
"slug": data.get("slug") or p.stem,
|
||||||
|
"name": data.get("name") or p.stem,
|
||||||
|
"updated_at": data.get("updated_at", ""),
|
||||||
|
"notes": data.get("notes", ""),
|
||||||
|
})
|
||||||
|
out.sort(key=lambda r: r["updated_at"] or r["name"], reverse=True)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Import / export
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def template_to_json(template: dict[str, Any]) -> str:
|
||||||
|
"""Serialize a template for download. Pretty-printed for human
|
||||||
|
inspection / diffing."""
|
||||||
|
return json.dumps(template, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
def template_from_json(payload: str) -> dict[str, Any]:
|
||||||
|
"""Deserialize uploaded template JSON. Validates schema version
|
||||||
|
but does NOT save — caller decides whether to ``save_template``
|
||||||
|
or merge into the current build.
|
||||||
|
|
||||||
|
Raises ``ValueError`` on malformed input."""
|
||||||
|
try:
|
||||||
|
data = json.loads(payload)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise ValueError(f"Not valid JSON: {e}") from e
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
raise ValueError("Top-level JSON must be an object.")
|
||||||
|
sv = data.get("schema_version")
|
||||||
|
if sv != SCHEMA_VERSION:
|
||||||
|
raise ValueError(
|
||||||
|
f"Imported template has schema_version {sv!r}; "
|
||||||
|
f"this build expects {SCHEMA_VERSION}."
|
||||||
|
)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"SCHEMA_VERSION",
|
||||||
|
"VALID_TARGETS",
|
||||||
|
"delete_template",
|
||||||
|
"list_templates",
|
||||||
|
"load_template",
|
||||||
|
"new_template",
|
||||||
|
"save_template",
|
||||||
|
"slugify",
|
||||||
|
"template_from_json",
|
||||||
|
"template_path",
|
||||||
|
"template_to_json",
|
||||||
|
"templates_dir",
|
||||||
|
"validate_template",
|
||||||
|
]
|
||||||
239
tests/test_pdf_templates.py
Normal file
239
tests/test_pdf_templates.py
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
"""Tests for the PDF template storage layer."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.pdf_templates import (
|
||||||
|
SCHEMA_VERSION,
|
||||||
|
delete_template,
|
||||||
|
list_templates,
|
||||||
|
load_template,
|
||||||
|
new_template,
|
||||||
|
save_template,
|
||||||
|
slugify,
|
||||||
|
template_from_json,
|
||||||
|
template_path,
|
||||||
|
templates_dir,
|
||||||
|
template_to_json,
|
||||||
|
validate_template,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def isolated_templates(monkeypatch, tmp_path):
|
||||||
|
"""Redirect the templates directory into ``tmp_path``."""
|
||||||
|
monkeypatch.setenv("DATATOOLS_PDF_TEMPLATES_DIR", str(tmp_path))
|
||||||
|
yield tmp_path
|
||||||
|
|
||||||
|
|
||||||
|
class TestSlugify:
|
||||||
|
def test_basic(self):
|
||||||
|
assert slugify("Chase Personal Checking") == "chase-personal-checking"
|
||||||
|
|
||||||
|
def test_strips_punctuation(self):
|
||||||
|
assert slugify("BofA: Business (USD)") == "bofa-business-usd"
|
||||||
|
|
||||||
|
def test_empty_falls_back(self):
|
||||||
|
assert slugify("") == "untitled"
|
||||||
|
assert slugify(" ") == "untitled"
|
||||||
|
|
||||||
|
|
||||||
|
class TestNewTemplate:
|
||||||
|
def test_has_schema_version(self):
|
||||||
|
t = new_template("Sample")
|
||||||
|
assert t["schema_version"] == SCHEMA_VERSION
|
||||||
|
|
||||||
|
def test_slug_derived_from_name(self):
|
||||||
|
t = new_template("Sample Bank")
|
||||||
|
assert t["slug"] == "sample-bank"
|
||||||
|
assert t["name"] == "Sample Bank"
|
||||||
|
|
||||||
|
def test_timestamps_present(self):
|
||||||
|
t = new_template("X")
|
||||||
|
assert t["created_at"]
|
||||||
|
assert t["updated_at"]
|
||||||
|
|
||||||
|
|
||||||
|
class TestValidateTemplate:
|
||||||
|
def _valid(self) -> dict:
|
||||||
|
return {
|
||||||
|
"schema_version": SCHEMA_VERSION,
|
||||||
|
"slug": "x",
|
||||||
|
"name": "X",
|
||||||
|
"pages": {"range": "all"},
|
||||||
|
"table": {"column_boundaries": [100, 200]},
|
||||||
|
"columns": [
|
||||||
|
{"source": 0, "target": "date"},
|
||||||
|
{"source": 1, "target": "description"},
|
||||||
|
{"source": 2, "target": "amount"},
|
||||||
|
],
|
||||||
|
"parse": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_valid_passes(self):
|
||||||
|
ok, errs = validate_template(self._valid())
|
||||||
|
assert ok, errs
|
||||||
|
|
||||||
|
def test_missing_name_fails(self):
|
||||||
|
t = self._valid()
|
||||||
|
t["name"] = ""
|
||||||
|
ok, errs = validate_template(t)
|
||||||
|
assert not ok
|
||||||
|
assert any("name" in e for e in errs)
|
||||||
|
|
||||||
|
def test_bad_schema_version(self):
|
||||||
|
t = self._valid()
|
||||||
|
t["schema_version"] = 999
|
||||||
|
ok, errs = validate_template(t)
|
||||||
|
assert not ok
|
||||||
|
|
||||||
|
def test_requires_date_column(self):
|
||||||
|
t = self._valid()
|
||||||
|
t["columns"] = [
|
||||||
|
{"source": 0, "target": "description"},
|
||||||
|
{"source": 1, "target": "amount"},
|
||||||
|
]
|
||||||
|
ok, errs = validate_template(t)
|
||||||
|
assert not ok
|
||||||
|
assert any("date" in e for e in errs)
|
||||||
|
|
||||||
|
def test_requires_amount_or_debit_credit(self):
|
||||||
|
t = self._valid()
|
||||||
|
t["columns"] = [
|
||||||
|
{"source": 0, "target": "date"},
|
||||||
|
{"source": 1, "target": "description"},
|
||||||
|
]
|
||||||
|
ok, errs = validate_template(t)
|
||||||
|
assert not ok
|
||||||
|
assert any("amount" in e for e in errs)
|
||||||
|
|
||||||
|
def test_debit_credit_pair_is_valid(self):
|
||||||
|
t = self._valid()
|
||||||
|
t["columns"] = [
|
||||||
|
{"source": 0, "target": "date"},
|
||||||
|
{"source": 1, "target": "description"},
|
||||||
|
{"source": 2, "target": "amount_debit"},
|
||||||
|
{"source": 3, "target": "amount_credit"},
|
||||||
|
]
|
||||||
|
t["table"]["column_boundaries"] = [100, 200, 300]
|
||||||
|
ok, errs = validate_template(t)
|
||||||
|
assert ok, errs
|
||||||
|
|
||||||
|
|
||||||
|
class TestPersistence:
|
||||||
|
def test_round_trip(self, isolated_templates):
|
||||||
|
t = new_template("Round Trip Bank")
|
||||||
|
t["columns"] = [
|
||||||
|
{"source": 0, "target": "date"},
|
||||||
|
{"source": 1, "target": "description"},
|
||||||
|
{"source": 2, "target": "amount"},
|
||||||
|
]
|
||||||
|
t["table"]["column_boundaries"] = [100, 200]
|
||||||
|
slug = save_template(t)
|
||||||
|
assert slug == "round-trip-bank"
|
||||||
|
|
||||||
|
path = template_path(slug)
|
||||||
|
assert path.exists()
|
||||||
|
loaded = load_template(slug)
|
||||||
|
assert loaded["name"] == "Round Trip Bank"
|
||||||
|
assert loaded["columns"][0]["target"] == "date"
|
||||||
|
|
||||||
|
def test_save_rejects_invalid(self, isolated_templates):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
save_template({"schema_version": 1, "name": ""})
|
||||||
|
|
||||||
|
def test_load_missing_raises(self, isolated_templates):
|
||||||
|
with pytest.raises(FileNotFoundError):
|
||||||
|
load_template("does-not-exist")
|
||||||
|
|
||||||
|
def test_load_corrupt_raises(self, isolated_templates, tmp_path):
|
||||||
|
bad = tmp_path / "bad.json"
|
||||||
|
bad.write_text("not json", encoding="utf-8")
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
load_template("bad")
|
||||||
|
|
||||||
|
def test_delete(self, isolated_templates):
|
||||||
|
t = new_template("To Delete")
|
||||||
|
t["columns"] = [
|
||||||
|
{"source": 0, "target": "date"},
|
||||||
|
{"source": 1, "target": "amount"},
|
||||||
|
]
|
||||||
|
t["table"]["column_boundaries"] = [100]
|
||||||
|
save_template(t)
|
||||||
|
assert delete_template("to-delete") is True
|
||||||
|
assert delete_template("to-delete") is False
|
||||||
|
|
||||||
|
def test_list_returns_summaries(self, isolated_templates):
|
||||||
|
for name in ["Alpha", "Bravo"]:
|
||||||
|
t = new_template(name)
|
||||||
|
t["columns"] = [
|
||||||
|
{"source": 0, "target": "date"},
|
||||||
|
{"source": 1, "target": "amount"},
|
||||||
|
]
|
||||||
|
t["table"]["column_boundaries"] = [100]
|
||||||
|
save_template(t)
|
||||||
|
rows = list_templates()
|
||||||
|
assert {r["slug"] for r in rows} == {"alpha", "bravo"}
|
||||||
|
|
||||||
|
def test_list_skips_corrupt(self, isolated_templates, tmp_path):
|
||||||
|
(tmp_path / "broken.json").write_text("nope", encoding="utf-8")
|
||||||
|
# Even with a broken file present, list still returns []
|
||||||
|
rows = list_templates()
|
||||||
|
assert rows == []
|
||||||
|
|
||||||
|
def test_atomic_save_no_partial_file_on_failure(
|
||||||
|
self, isolated_templates, monkeypatch
|
||||||
|
):
|
||||||
|
"""If the write step fails mid-way, no half-written JSON survives
|
||||||
|
at the target path. Tests the temp-file-rename safety pattern."""
|
||||||
|
t = new_template("Atomic")
|
||||||
|
t["columns"] = [
|
||||||
|
{"source": 0, "target": "date"},
|
||||||
|
{"source": 1, "target": "amount"},
|
||||||
|
]
|
||||||
|
t["table"]["column_boundaries"] = [100]
|
||||||
|
|
||||||
|
# Make json.dumps blow up to simulate a failure during write.
|
||||||
|
# save_template already validated before this step, so the
|
||||||
|
# crash is "after validation, during write".
|
||||||
|
import src.pdf_templates as mod
|
||||||
|
original_dumps = mod.json.dumps
|
||||||
|
|
||||||
|
def boom(*a, **kw):
|
||||||
|
raise IOError("disk full")
|
||||||
|
|
||||||
|
monkeypatch.setattr(mod.json, "dumps", boom)
|
||||||
|
with pytest.raises(IOError):
|
||||||
|
save_template(t)
|
||||||
|
monkeypatch.setattr(mod.json, "dumps", original_dumps)
|
||||||
|
|
||||||
|
assert not template_path("atomic").exists()
|
||||||
|
|
||||||
|
|
||||||
|
class TestImportExport:
|
||||||
|
def test_round_trip_via_json(self):
|
||||||
|
t = new_template("Exported")
|
||||||
|
t["columns"] = [
|
||||||
|
{"source": 0, "target": "date"},
|
||||||
|
{"source": 1, "target": "amount"},
|
||||||
|
]
|
||||||
|
payload = template_to_json(t)
|
||||||
|
loaded = template_from_json(payload)
|
||||||
|
assert loaded["name"] == "Exported"
|
||||||
|
|
||||||
|
def test_import_rejects_bad_schema(self):
|
||||||
|
bad = json.dumps({"schema_version": 999, "name": "X"})
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
template_from_json(bad)
|
||||||
|
|
||||||
|
def test_import_rejects_non_object(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
template_from_json('["not", "an", "object"]')
|
||||||
|
|
||||||
|
|
||||||
|
def test_templates_dir_env_override(monkeypatch, tmp_path):
|
||||||
|
monkeypatch.setenv("DATATOOLS_PDF_TEMPLATES_DIR", str(tmp_path))
|
||||||
|
assert templates_dir() == tmp_path
|
||||||
Reference in New Issue
Block a user