"""Tests for the PDF template storage layer.""" from __future__ import annotations import json import pytest from src.pdf_templates import ( SCHEMA_VERSION, delete_template, list_templates, load_template, new_template, save_template, slugify, template_from_json, template_path, templates_dir, template_to_json, validate_template, ) @pytest.fixture def isolated_templates(monkeypatch, tmp_path): """Redirect the templates directory into ``tmp_path``.""" monkeypatch.setenv("DATATOOLS_PDF_TEMPLATES_DIR", str(tmp_path)) yield tmp_path class TestSlugify: def test_basic(self): assert slugify("Chase Personal Checking") == "chase-personal-checking" def test_strips_punctuation(self): assert slugify("BofA: Business (USD)") == "bofa-business-usd" def test_empty_falls_back(self): assert slugify("") == "untitled" assert slugify(" ") == "untitled" class TestNewTemplate: def test_has_schema_version(self): t = new_template("Sample") assert t["schema_version"] == SCHEMA_VERSION def test_slug_derived_from_name(self): t = new_template("Sample Bank") assert t["slug"] == "sample-bank" assert t["name"] == "Sample Bank" def test_timestamps_present(self): t = new_template("X") assert t["created_at"] assert t["updated_at"] class TestValidateTemplateRowHeuristic: """Row-heuristic mode is the v2 default.""" def _valid(self) -> dict: return { "schema_version": SCHEMA_VERSION, "slug": "x", "name": "X", "mode": "row_heuristic", "row_detection": { "min_amounts_per_row": 1, "max_amounts_per_row": 3, }, "amounts": {"shape": "single"}, "date": {"format": "%m/%d/%Y"}, } def test_valid_passes(self): ok, errs = validate_template(self._valid()) assert ok, errs def test_missing_name_fails(self): t = self._valid() t["name"] = "" ok, errs = validate_template(t) assert not ok def test_bad_mode_fails(self): t = self._valid() t["mode"] = "magic" ok, errs = validate_template(t) assert not ok assert any("mode" in e for e in errs) def test_bad_shape_fails(self): t = self._valid() t["amounts"]["shape"] = "telepathic" ok, errs = validate_template(t) assert not ok assert any("shape" in e for e in errs) def test_inverted_amount_range_fails(self): t = self._valid() t["row_detection"]["min_amounts_per_row"] = 5 t["row_detection"]["max_amounts_per_row"] = 2 ok, errs = validate_template(t) assert not ok def test_does_not_require_columns_in_row_mode(self): """Key point: row mode doesn't need ``columns`` populated. That's what makes the GUI's primary path simpler than v1.""" t = self._valid() # No columns key at all. ok, errs = validate_template(t) assert ok, errs class TestValidateTemplateColumnVisual: """Legacy column-visual mode keeps its own contract.""" def _valid(self) -> dict: return { "schema_version": SCHEMA_VERSION, "slug": "x", "name": "X", "mode": "column_visual", "pages": {"range": "all"}, "table": {"column_boundaries": [100, 200]}, "columns": [ {"source": 0, "target": "date"}, {"source": 1, "target": "description"}, {"source": 2, "target": "amount"}, ], "parse": {}, } def test_valid_passes(self): ok, errs = validate_template(self._valid()) assert ok, errs def test_requires_date_column(self): t = self._valid() t["columns"] = [ {"source": 0, "target": "description"}, {"source": 1, "target": "amount"}, ] ok, errs = validate_template(t) assert not ok assert any("date" in e for e in errs) def test_requires_amount_or_debit_credit(self): t = self._valid() t["columns"] = [ {"source": 0, "target": "date"}, {"source": 1, "target": "description"}, ] ok, errs = validate_template(t) assert not ok assert any("amount" in e for e in errs) def test_debit_credit_pair_is_valid(self): t = self._valid() t["columns"] = [ {"source": 0, "target": "date"}, {"source": 1, "target": "description"}, {"source": 2, "target": "amount_debit"}, {"source": 3, "target": "amount_credit"}, ] t["table"]["column_boundaries"] = [100, 200, 300] ok, errs = validate_template(t) assert ok, errs class TestV1Migration: """v1 templates load with mode='column_visual' auto-injected; the file on disk stays v1 until the user re-saves.""" def test_loads_v1_template(self, isolated_templates, tmp_path): import json v1_payload = { "schema_version": 1, "slug": "legacy", "name": "Legacy Bank", "pages": {"range": "all"}, "table": {"column_boundaries": [100, 200]}, "columns": [ {"source": 0, "target": "date"}, {"source": 1, "target": "description"}, {"source": 2, "target": "amount"}, ], "parse": {}, } (tmp_path / "legacy.json").write_text( json.dumps(v1_payload), encoding="utf-8", ) loaded = load_template("legacy") # In-memory migration adds mode + bumps schema_version assert loaded["mode"] == "column_visual" assert loaded["schema_version"] == SCHEMA_VERSION # Original keys still intact assert loaded["columns"][0]["target"] == "date" class TestPersistence: def test_round_trip(self, isolated_templates): t = new_template("Round Trip Bank") t["columns"] = [ {"source": 0, "target": "date"}, {"source": 1, "target": "description"}, {"source": 2, "target": "amount"}, ] t["table"]["column_boundaries"] = [100, 200] slug = save_template(t) assert slug == "round-trip-bank" path = template_path(slug) assert path.exists() loaded = load_template(slug) assert loaded["name"] == "Round Trip Bank" assert loaded["columns"][0]["target"] == "date" def test_save_rejects_invalid(self, isolated_templates): with pytest.raises(ValueError): save_template({"schema_version": 1, "name": ""}) def test_load_missing_raises(self, isolated_templates): with pytest.raises(FileNotFoundError): load_template("does-not-exist") def test_load_corrupt_raises(self, isolated_templates, tmp_path): bad = tmp_path / "bad.json" bad.write_text("not json", encoding="utf-8") with pytest.raises(ValueError): load_template("bad") def test_delete(self, isolated_templates): t = new_template("To Delete") t["columns"] = [ {"source": 0, "target": "date"}, {"source": 1, "target": "amount"}, ] t["table"]["column_boundaries"] = [100] save_template(t) assert delete_template("to-delete") is True assert delete_template("to-delete") is False def test_list_returns_summaries(self, isolated_templates): for name in ["Alpha", "Bravo"]: t = new_template(name) t["columns"] = [ {"source": 0, "target": "date"}, {"source": 1, "target": "amount"}, ] t["table"]["column_boundaries"] = [100] save_template(t) rows = list_templates() assert {r["slug"] for r in rows} == {"alpha", "bravo"} def test_list_skips_corrupt(self, isolated_templates, tmp_path): (tmp_path / "broken.json").write_text("nope", encoding="utf-8") # Even with a broken file present, list still returns [] rows = list_templates() assert rows == [] def test_atomic_save_no_partial_file_on_failure( self, isolated_templates, monkeypatch ): """If the write step fails mid-way, no half-written JSON survives at the target path. Tests the temp-file-rename safety pattern.""" t = new_template("Atomic") t["columns"] = [ {"source": 0, "target": "date"}, {"source": 1, "target": "amount"}, ] t["table"]["column_boundaries"] = [100] # Make json.dumps blow up to simulate a failure during write. # save_template already validated before this step, so the # crash is "after validation, during write". import src.pdf_templates as mod original_dumps = mod.json.dumps def boom(*a, **kw): raise IOError("disk full") monkeypatch.setattr(mod.json, "dumps", boom) with pytest.raises(IOError): save_template(t) monkeypatch.setattr(mod.json, "dumps", original_dumps) assert not template_path("atomic").exists() class TestImportExport: def test_round_trip_via_json(self): t = new_template("Exported") t["columns"] = [ {"source": 0, "target": "date"}, {"source": 1, "target": "amount"}, ] payload = template_to_json(t) loaded = template_from_json(payload) assert loaded["name"] == "Exported" def test_import_rejects_bad_schema(self): bad = json.dumps({"schema_version": 999, "name": "X"}) with pytest.raises(ValueError): template_from_json(bad) def test_import_rejects_non_object(self): with pytest.raises(ValueError): template_from_json('["not", "an", "object"]') def test_templates_dir_env_override(monkeypatch, tmp_path): monkeypatch.setenv("DATATOOLS_PDF_TEMPLATES_DIR", str(tmp_path)) assert templates_dir() == tmp_path