test: fix v3 branding drift, add reconcile CLI + registry coverage

GUI/lang-pack tests were asserting against pre-v3 strings ("Data
Cleaning Mastery", "Maestría en limpieza…") that the brand refresh
replaced with "UNALOGIX DataTools" + "Clean. Normalize. Transform."
Updated assertions to the current copy and switched the findings
panel tests to the redesigned flat-list layout (per-finding "Open
Tool →" buttons instead of per-tool expanders).

New coverage:
- tests/test_cli_reconcile.py (13) — preview/apply, tolerance flags,
  sign inversion, key flags, error paths, Excel input.
- tests/test_tools_registry.py (27) — unique tool_ids, page_slug →
  real file, valid sections/tiers, localized accessor fallbacks,
  explicit pins for PDF Extractor + Reconciler entries.
- tests/test_reconcile.py — one-side-empty, key-pass tagging,
  additional validation cases, input-DataFrame immutability.
- tests/gui/test_smoke.py — PAGE_SLUGS now includes 10_PDF_Extractor
  and 11_Reconciler in both en/es.
- tests/gui/test_workflows.py — TestPdfExtractorWorkflow and
  TestReconcilerWorkflow render checks.

Net: 2317 passed → 2418 passed, 0 failures.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-22 19:30:02 +00:00
parent ea99e292d2
commit 6627895a10
9 changed files with 737 additions and 80 deletions

View File

@@ -85,8 +85,8 @@ class TestGatePassesWithTrialLicense:
home_app.run()
text = collected_text(home_app)
# With a valid license, the activation form should NOT be the
# primary content; we should see the home title + tool cards.
assert "Data Cleaning Mastery" in text
# primary content; we should see the home tagline + tool cards.
assert "Clean. Normalize. Transform." in text
assert "Activate DataTools" not in text # form not shown inline
def test_sidebar_shows_active_status(self, trial_license, home_app):
@@ -150,7 +150,7 @@ class TestActivationFormSubmission:
# After activation the page reruns and the activation form
# should be gone — we should see the home page proper.
text = collected_text(home_app)
assert "Data Cleaning Mastery" in text
assert "Clean. Normalize. Transform." in text
def test_trial_button_absent_paid_only(self, no_license_env, home_app):
"""v1.6 dropped the user-facing trial flow — paid licenses only.

View File

@@ -59,7 +59,7 @@ class TestLanguageSwitch:
lang = home_app.session_state["ui_lang"] if "ui_lang" in home_app.session_state else "en"
assert lang == "en"
text = collected_text(home_app)
assert "Data Cleaning Mastery" in text
assert "Clean. Normalize. Transform." in text
def test_selecting_spanish_persists_in_session(self, home_app):
home_app.run()
@@ -72,22 +72,22 @@ class TestLanguageSwitch:
selector = home_app.sidebar.selectbox[0]
selector.select("es").run()
text = collected_text(home_app)
assert "Maestría" in text, (
"after selecting Spanish, the home title should switch to "
f"'🧹 DataTools — Maestría…'; got:\n{text[:300]}"
assert "Limpia. Normaliza. Transforma." in text, (
"after selecting Spanish, the home tagline should switch to "
f"'Limpia. Normaliza. Transforma.'; got:\n{text[:300]}"
)
def test_selecting_back_to_english_reverts(self, home_app):
# Start in Spanish, then flip back.
with_language(home_app, "es")
home_app.run()
assert "Maestría" in collected_text(home_app)
assert "Limpia. Normaliza. Transforma." in collected_text(home_app)
selector = home_app.sidebar.selectbox[0]
selector.select("en").run()
text = collected_text(home_app)
assert "Data Cleaning Mastery" in text
assert "Maestría" not in text
assert "Clean. Normalize. Transform." in text
assert "Limpia. Normaliza. Transforma." not in text
# ---------------------------------------------------------------------------
@@ -96,26 +96,34 @@ class TestLanguageSwitch:
class TestLocalizedChrome:
"""A spot-check on the parts of the chrome that aren't the selector:
the bottom footer caption and the home-page hero text. Other strings
are pinned indirectly by ``TestEveryPageRenders.test_expected_*``."""
the home-page privacy pill (visible to AppTest) and the upload
section heading. The sticky footer caption is rendered via a
component-iframe and isn't visible through ``collected_text``."""
def test_footer_english(self, home_app):
def test_privacy_pill_english(self, home_app):
home_app.run()
text = collected_text(home_app)
assert "Your data never leaves" in text
assert "Runs 100% locally" in text
def test_footer_spanish(self, home_app):
def test_privacy_pill_spanish(self, home_app):
with_language(home_app, "es")
home_app.run()
text = collected_text(home_app)
assert "Tus datos nunca salen" in text
assert "Se ejecuta 100% en local" in text
def test_upload_section_heading_localizes(self, home_app):
with_language(home_app, "es")
home_app.run()
text = collected_text(home_app)
# ``📤 Sube uno o más archivos para empezar`` from the es pack.
assert "Sube uno o más archivos" in text
# The visible "Files" section heading is hard-coded English
# in the redesigned home page; what's still localized is the
# file_uploader widget's label (``upload.uploader_label_multi``).
# AppTest exposes uploaders separately from the text-bearing
# widget collections, so we check the uploader's label
# attribute directly.
labels = [u.label for u in home_app.file_uploader]
assert any("Importa archivos" in lbl for lbl in labels), (
f"Spanish uploader label missing; got: {labels}"
)
# ---------------------------------------------------------------------------

View File

@@ -98,11 +98,19 @@ class TestHeader:
# ---------------------------------------------------------------------------
# Per-tool grouping → one expander per tool id
# Per-finding row → one "Open Tool" button per targeted finding
# ---------------------------------------------------------------------------
#
# The findings panel was redesigned (mockup-v2): it now renders ONE
# severity-sorted flat list rather than per-tool expanders. Each finding
# with a known tool id gets a tertiary button labelled
# ``"{Tool display name} →"`` that switches pages on click. Findings
# with no tool id (file-level CSV-shape warnings, encoding flags, etc.)
# render without a button — the description still shows so the user
# isn't blind to them.
class TestGrouping:
def test_findings_grouped_into_per_tool_expanders(self):
class TestRowsRenderForFindings:
def test_one_button_per_targeted_finding(self):
findings = [
_make_finding(tool="02_text_cleaner", id="whitespace_padding"),
_make_finding(tool="02_text_cleaner", id="nbsp_padding"),
@@ -110,96 +118,96 @@ class TestGrouping:
]
app = _harness(findings)
app.run()
labels = [e.label for e in app.expander]
# Two unique tools → two expanders. Each label carries the
# tool's display name + finding count.
text_cleaner_expanders = [lbl for lbl in labels if "Clean Text" in lbl]
format_expanders = [lbl for lbl in labels if "Standardize Formats" in lbl]
assert len(text_cleaner_expanders) == 1, (
f"expected one Clean Text expander; got: {labels}"
labels = [b.label for b in app.button]
# Each targeted finding gets its own "Open Tool" button — three
# findings → three buttons (two pointing at Clean Text, one at
# Standardize Formats).
clean_text_buttons = [l for l in labels if l == "Clean Text →"]
format_buttons = [l for l in labels if l == "Standardize Formats →"]
assert len(clean_text_buttons) == 2, (
f"expected 2 Clean Text buttons; got: {labels}"
)
assert len(format_expanders) == 1, (
f"expected one Standardize Formats expander; got: {labels}"
assert len(format_buttons) == 1, (
f"expected 1 Standardize Formats button; got: {labels}"
)
def test_tool_names_localize_in_spanish(self):
findings = [_make_finding(tool="02_text_cleaner")]
app = _harness(findings, lang="es")
app.run()
labels = [e.label for e in app.expander]
labels = [b.label for b in app.button]
assert any("Limpiar texto" in lbl for lbl in labels), (
f"Spanish tool name missing; expanders: {labels}"
)
def test_finding_count_in_expander_label(self):
findings = [
_make_finding(tool="02_text_cleaner", id=f"f{i}")
for i in range(3)
]
app = _harness(findings)
app.run()
labels = [e.label for e in app.expander]
# Pack template: "{tool} — {n} finding(s)"
text_cleaner_label = next(l for l in labels if "Clean Text" in l)
assert "3" in text_cleaner_label, (
f"expected count '3' in expander label; got {text_cleaner_label!r}"
f"Spanish tool name missing; buttons: {labels}"
)
# ---------------------------------------------------------------------------
# Open-tool button localizes
# Open-tool button labels — confirm the arrow + name format
# ---------------------------------------------------------------------------
class TestOpenToolButton:
"""Each tool section has an ``st.page_link`` to jump to that tool's
page. AppTest exposes page_links as ``app.button`` entries with
label ``"Open {tool}"`` (English) / ``"Abrir {tool}"`` (Spanish)."""
"""Each finding with a known tool gets a tertiary button labelled
``"{Tool name} →"``. The arrow + spacing is the affordance that
distinguishes the row's primary action from the title text."""
def test_open_tool_label_english(self):
findings = [_make_finding(tool="02_text_cleaner")]
app = _harness(findings)
app.run()
# ``st.page_link`` may show up under ``app.button`` or in the
# raw markdown. We probe both.
text = collected_text(app)
# Pack template: "Open {tool} →"
assert "Open Clean Text" in text
labels = [b.label for b in app.button]
assert "Clean Text →" in labels, (
f"expected 'Clean Text →' button; got: {labels}"
)
def test_open_tool_label_spanish(self):
findings = [_make_finding(tool="02_text_cleaner")]
app = _harness(findings, lang="es")
app.run()
text = collected_text(app)
# Pack template: "Abrir {tool} →"
assert "Abrir Limpiar texto" in text
labels = [b.label for b in app.button]
assert "Limpiar texto →" in labels, (
f"expected 'Limpiar texto →' button; got: {labels}"
)
# ---------------------------------------------------------------------------
# Untargeted findings (file-level) go in the "Other" expander
# Untargeted findings (file-level) render without an action button
# ---------------------------------------------------------------------------
class TestUntargetedFindings:
def test_untargeted_goes_to_other_expander_en(self):
"""A finding with ``tool=""`` (e.g., CSV BOM stripped at read time)
is file-level — no tool page to jump to — and the redesigned panel
renders the description without a button. We assert that the row
contributes nothing to ``app.button`` while still appearing in the
rendered markdown."""
def test_untargeted_renders_no_button_en(self):
findings = [
_make_finding(tool="", id="csv_bom_stripped"),
_make_finding(tool="", id="csv_bom_stripped", description="BOM stripped"),
_make_finding(tool="02_text_cleaner", id="nbsp_padding"),
]
app = _harness(findings)
app.run()
labels = [e.label for e in app.expander]
# Pack template: "Other / file-level — {n} finding(s)"
assert any("Other / file-level" in lbl for lbl in labels), (
f"untargeted expander missing; got: {labels}"
labels = [b.label for b in app.button]
# Only the targeted finding contributed a button.
assert "Clean Text →" in labels
# The BOM finding's description must still be visible somewhere.
all_md = "\n".join(
m.body for m in app.markdown if hasattr(m, "body")
)
assert "BOM stripped" in all_md, (
"untargeted finding's description should still render"
)
def test_untargeted_label_spanish(self):
findings = [_make_finding(tool="", id="csv_bom_stripped")]
def test_untargeted_renders_no_button_es(self):
findings = [_make_finding(
tool="", id="csv_bom_stripped", description="BOM eliminado",
)]
app = _harness(findings, lang="es")
app.run()
labels = [e.label for e in app.expander]
# Spanish pack: "Otros / a nivel de archivo — {n} hallazgo(s)"
assert any("Otros / a nivel de archivo" in lbl for lbl in labels), (
f"Spanish 'Other' expander missing; got: {labels}"
labels = [b.label for b in app.button]
# No tool id → no tool-jump button at all.
assert not any("" in lbl for lbl in labels), (
f"untargeted finding should not render a tool button; got: {labels}"
)

View File

@@ -34,6 +34,8 @@ PAGE_SLUGS = [
"7_Multi_File_Merger",
"8_Validator_Reporter",
"9_Pipeline_Runner",
"10_PDF_Extractor",
"11_Reconciler",
"99_Close",
]
@@ -61,17 +63,28 @@ EXPECTED_SUBSTRINGS: dict[str, dict[str, str]] = {
"7_Multi_File_Merger": {"en": "Combine Files", "es": "Combinar archivos"},
"8_Validator_Reporter": {"en": "Quality Check", "es": "Verificación de calidad"},
"9_Pipeline_Runner": {"en": "Automated", "es": "Flujos automatizados"},
# The PDF Extractor and Reconciler pages are English-only today
# (translations tracked as a follow-up). The smoke test value is
# still that the page *renders at all* in 'es'; the substring is
# the same English hero text under both languages.
"10_PDF_Extractor": {"en": "PDF to CSV", "es": "PDF to CSV"},
"11_Reconciler": {"en": "Reconcile", "es": "Reconcile"},
"99_Close": {"en": "Shutting down", "es": "Cerrando"},
}
class TestHomePageRenders:
"""The home page is the only one with full EN/ES coverage in v1.6.
Pin it independently so its translation is non-regressable."""
"""Pin the home hero in both languages.
Since the v3 brand refresh the title is the literal wordmark
("UNALOGIX DataTools") in both packs; the localized tagline is
what shifts between en and es. We assert against the tagline
string, which lives in ``home.caption`` of each pack.
"""
@pytest.mark.parametrize("lang,expected", [
("en", "DataTools — Data Cleaning Mastery"),
("es", "DataTools — Maestría en limpieza de datos"),
("en", "Clean. Normalize. Transform."),
("es", "Limpia. Normaliza. Transforma."),
])
def test_home_renders_in_language(self, home_app, lang, expected):
with_language(home_app, lang)
@@ -81,11 +94,15 @@ class TestHomePageRenders:
)
assert expected in collected_text(home_app)
def test_home_renders_footer_in_es(self, home_app):
def test_home_renders_privacy_pill_in_es(self, home_app):
# The footer caption is rendered via a component-iframe so
# ``collected_text`` can't see it. The privacy pill on the
# home header IS visible to AppTest and carries the same
# locality story, so we pin that instead.
with_language(home_app, "es")
home_app.run()
text = collected_text(home_app)
assert "Tus datos nunca salen" in text or "Se ejecuta localmente" in text
assert "Se ejecuta 100% en local" in text
class TestEveryPageRenders:
"""Parametrize over (page, language). Failure tells you exactly which

View File

@@ -152,6 +152,48 @@ class TestPipelineRunnerWorkflow:
# ---------------------------------------------------------------------------
# PDF to CSV — file-uploader-driven so we can't fully exercise the
# scan flow through AppTest. Pin the initial render (which carries the
# dep-status banner when deps are missing) so a future regression in
# the dep guard shows up here.
# ---------------------------------------------------------------------------
class TestPdfExtractorWorkflow:
def test_page_renders_without_upload(self, app_factory):
app = app_factory("10_PDF_Extractor")
app.run()
assert not app.exception
text = collected_text(app)
assert "PDF to CSV" in text
# ---------------------------------------------------------------------------
# Reconcile Two Files — early-exits at ``st.stop()`` without both
# uploads. Pin both the no-upload state and the title.
# ---------------------------------------------------------------------------
class TestReconcilerWorkflow:
def test_page_renders_without_uploads(self, app_factory):
app = app_factory("11_Reconciler")
app.run()
assert not app.exception
text = collected_text(app)
assert "Reconcile" in text
def test_prompts_for_both_uploads_when_empty(self, app_factory):
# ``st.info("Upload both files to continue.")`` fires when
# either side is missing; that text is the contract we test
# against — if the prompt disappears the user has no idea
# what to do next.
app = app_factory("11_Reconciler")
app.run()
info_messages = [i.body for i in app.info if hasattr(i, "body")]
assert any("Upload both files" in m for m in info_messages), (
f"missing 'Upload both files' prompt; got: {info_messages}"
)
# ---------------------------------------------------------------------------
# Coming-Soon pages still render (just a stub) — pinned so we know if a
# Coming-Soon goes from "stub renders" to "import error".

284
tests/test_cli_reconcile.py Normal file
View File

@@ -0,0 +1,284 @@
"""Tests for src.cli_reconcile — Typer CLI for two-source reconciliation.
The reconciliation engine itself is covered by ``test_reconcile.py``;
this file exercises the CLI surface around it: argument parsing
(comma-separated keys, optional dates), preview vs. apply modes, the
four output files, and error paths for bad inputs.
"""
from __future__ import annotations
import sys
from pathlib import Path
import pandas as pd
import pytest
from typer.testing import CliRunner
from src.cli_reconcile import app
runner = CliRunner()
def _write_bank(path: Path) -> None:
"""Bank-feed-shaped CSV with two transactions."""
path.write_text(
"date,amount,desc\n"
"2026-01-05,100.00,ACME\n"
"2026-01-06,250.00,WIDGET CO\n"
)
def _write_ledger(path: Path) -> None:
"""Ledger-shaped CSV with the same two transactions under
different column names — exercises the rename-on-match path."""
path.write_text(
"posted,amt,memo\n"
"2026-01-05,100.00,Acme Inc\n"
"2026-01-06,250.00,Widget\n"
)
class TestPreviewMode:
"""Default mode (no ``--apply``): print stats only, write nothing."""
def test_basic_preview_succeeds(self, tmp_path):
bank = tmp_path / "bank.csv"
ledger = tmp_path / "ledger.csv"
_write_bank(bank)
_write_ledger(ledger)
result = runner.invoke(app, [
str(bank), str(ledger),
"--left-amount", "amount", "--right-amount", "amt",
"--left-date", "date", "--right-date", "posted",
])
assert result.exit_code == 0, result.stdout
assert "Matched:" in result.stdout
assert "Unmatched left:" in result.stdout
# Two-of-two match in the fixture.
assert "Matched: 2" in result.stdout
# The reminder banner is part of the preview UX.
assert "Add --apply" in result.stdout
def test_preview_does_not_write_files(self, tmp_path):
bank = tmp_path / "bank.csv"
ledger = tmp_path / "ledger.csv"
_write_bank(bank)
_write_ledger(ledger)
runner.invoke(app, [
str(bank), str(ledger),
"--left-amount", "amount", "--right-amount", "amt",
"--left-date", "date", "--right-date", "posted",
])
# None of the four output suffixes should land beside the input.
for suffix in ("matched", "unmatched_left", "unmatched_right", "review"):
assert not (tmp_path / f"bank_{suffix}.csv").exists()
class TestApplyMode:
"""``--apply``: write the four output files beside the LEFT input."""
def test_apply_writes_four_files(self, tmp_path):
bank = tmp_path / "bank.csv"
ledger = tmp_path / "ledger.csv"
_write_bank(bank)
_write_ledger(ledger)
result = runner.invoke(app, [
str(bank), str(ledger),
"--left-amount", "amount", "--right-amount", "amt",
"--left-date", "date", "--right-date", "posted",
"--apply",
])
assert result.exit_code == 0, result.stdout
# All four output files land beside the left input, sharing
# its stem.
for suffix in ("matched", "unmatched_left", "unmatched_right", "review"):
out = tmp_path / f"bank_{suffix}.csv"
assert out.exists(), f"missing {out.name}"
# Matched.csv carries the two pairs.
matched = pd.read_csv(tmp_path / "bank_matched.csv")
assert len(matched) == 2
def test_apply_with_unmatched_rows(self, tmp_path):
bank = tmp_path / "bank.csv"
ledger = tmp_path / "ledger.csv"
bank.write_text(
"date,amount,desc\n"
"2026-01-05,100.00,ACME\n"
"2026-01-07,99.99,LEFT-ONLY\n"
)
ledger.write_text(
"posted,amt,memo\n"
"2026-01-05,100.00,Acme\n"
"2026-01-08,500.00,RIGHT-ONLY\n"
)
result = runner.invoke(app, [
str(bank), str(ledger),
"--left-amount", "amount", "--right-amount", "amt",
"--left-date", "date", "--right-date", "posted",
"--apply",
])
assert result.exit_code == 0
unmatched_l = pd.read_csv(tmp_path / "bank_unmatched_left.csv")
unmatched_r = pd.read_csv(tmp_path / "bank_unmatched_right.csv")
assert "LEFT-ONLY" in unmatched_l["desc"].tolist()
assert "RIGHT-ONLY" in unmatched_r["memo"].tolist()
class TestToleranceFlags:
def test_amount_tolerance_absorbs_rounding(self, tmp_path):
bank = tmp_path / "bank.csv"
ledger = tmp_path / "ledger.csv"
bank.write_text(
"date,amount,desc\n"
"2026-01-05,100.00,ACME\n"
)
ledger.write_text(
"posted,amt,memo\n"
"2026-01-05,100.02,Acme\n"
)
# Without tolerance: no match.
result_no_tol = runner.invoke(app, [
str(bank), str(ledger),
"--left-amount", "amount", "--right-amount", "amt",
"--left-date", "date", "--right-date", "posted",
])
assert "Matched: 0" in result_no_tol.stdout
# With tolerance: one match.
result_with_tol = runner.invoke(app, [
str(bank), str(ledger),
"--left-amount", "amount", "--right-amount", "amt",
"--left-date", "date", "--right-date", "posted",
"--amount-tolerance", "0.05",
])
assert "Matched: 1" in result_with_tol.stdout
def test_date_tolerance_allows_drift(self, tmp_path):
bank = tmp_path / "bank.csv"
ledger = tmp_path / "ledger.csv"
bank.write_text(
"date,amount,desc\n"
"2026-01-05,100.00,ACME\n"
)
ledger.write_text(
"posted,amt,memo\n"
"2026-01-07,100.00,Acme\n" # 2-day drift
)
result = runner.invoke(app, [
str(bank), str(ledger),
"--left-amount", "amount", "--right-amount", "amt",
"--left-date", "date", "--right-date", "posted",
"--date-tolerance", "3",
])
assert "Matched: 1" in result.stdout
class TestSignInversion:
def test_invert_right_sign(self, tmp_path):
bank = tmp_path / "bank.csv"
ledger = tmp_path / "ledger.csv"
bank.write_text(
"date,amount,desc\n"
"2026-01-05,100.00,ACME\n"
)
ledger.write_text(
"posted,amt,memo\n"
"2026-01-05,-100.00,Acme\n" # sign convention flipped
)
result = runner.invoke(app, [
str(bank), str(ledger),
"--left-amount", "amount", "--right-amount", "amt",
"--left-date", "date", "--right-date", "posted",
"--invert-right-sign",
])
assert "Matched: 1" in result.stdout
class TestKeyFlags:
def test_comma_separated_keys_pair_off(self, tmp_path):
# Same check number, mismatched posting dates — the date-only
# pass would miss but the key match catches.
bank = tmp_path / "bank.csv"
ledger = tmp_path / "ledger.csv"
bank.write_text(
"date,amount,desc,check_no\n"
"2026-01-05,100.00,ACME,1042\n"
)
ledger.write_text(
"posted,amt,memo,ref\n"
"2026-01-12,100.00,Acme,1042\n" # 7-day drift
)
result = runner.invoke(app, [
str(bank), str(ledger),
"--left-amount", "amount", "--right-amount", "amt",
"--left-date", "date", "--right-date", "posted",
"--left-keys", "check_no",
"--right-keys", "ref",
])
assert "Matched: 1" in result.stdout
class TestErrorPaths:
def test_missing_left_file(self, tmp_path):
ledger = tmp_path / "ledger.csv"
_write_ledger(ledger)
result = runner.invoke(app, [
str(tmp_path / "nope.csv"), str(ledger),
"--left-amount", "amount", "--right-amount", "amt",
])
assert result.exit_code != 0
assert "not found" in result.stdout.lower() or "not found" in (result.stderr or "").lower()
def test_missing_right_file(self, tmp_path):
bank = tmp_path / "bank.csv"
_write_bank(bank)
result = runner.invoke(app, [
str(bank), str(tmp_path / "nope.csv"),
"--left-amount", "amount", "--right-amount", "amt",
])
assert result.exit_code != 0
def test_unknown_amount_column_surfaces_value_error(self, tmp_path):
# The reconcile engine raises ValueError on unknown column names;
# the CLI catches it and exits 1 with a readable banner.
bank = tmp_path / "bank.csv"
ledger = tmp_path / "ledger.csv"
_write_bank(bank)
_write_ledger(ledger)
result = runner.invoke(app, [
str(bank), str(ledger),
"--left-amount", "NOT_A_COLUMN", "--right-amount", "amt",
])
assert result.exit_code == 1
# Banner format: "Error: <message>"
assert "Error" in result.stdout or "Error" in (result.stderr or "")
def test_help_renders(self):
# ``--help`` must work — examples in docstrings reference it.
result = runner.invoke(app, ["--help"])
assert result.exit_code == 0
assert "reconcile" in result.stdout.lower()
class TestExcelInput:
"""Input may be CSV, TSV, or Excel — read_file dispatches by suffix."""
def test_excel_left_file_reads(self, tmp_path):
bank = tmp_path / "bank.xlsx"
df = pd.DataFrame({
"date": ["2026-01-05"],
"amount": [100.00],
"desc": ["ACME"],
})
df.to_excel(bank, index=False)
ledger = tmp_path / "ledger.csv"
_write_ledger(ledger)
result = runner.invoke(app, [
str(bank), str(ledger),
"--left-amount", "amount", "--right-amount", "amt",
"--left-date", "date", "--right-date", "posted",
])
assert result.exit_code == 0, result.stdout
# 1 of 1 left rows matched against the 2-row right ledger.
assert "Matched: 1" in result.stdout

View File

@@ -39,10 +39,16 @@ def _load_pack(code: str) -> dict:
class TestLookup:
def test_returns_english_value_by_default(self):
assert t("home.title", "en").startswith("🧹 DataTools")
# Hero title is "UNALOGIX DataTools" since the v3 rebrand. The
# Spanish value is identical (proper noun); the localized
# tagline lives under ``home.caption`` instead.
assert t("home.title", "en") == "UNALOGIX DataTools"
def test_returns_spanish_value(self):
assert "Maestría" in t("home.title", "es")
# Title stays "UNALOGIX DataTools" in es too; the localized
# tagline is what differs.
assert t("home.title", "es") == "UNALOGIX DataTools"
assert "Limpia" in t("home.caption", "es")
def test_missing_key_falls_back_to_english(self):
# ``tools.99_pipeline_runner.name`` doesn't exist; the pipeline

View File

@@ -315,3 +315,117 @@ class TestResultShape:
assert result.matched.empty
assert result.unmatched_left.empty
assert result.unmatched_right.empty
def test_one_side_empty_keeps_other_unmatched(self):
# A reconcile against an empty ledger should surface every
# left row as unmatched, not crash. Mirror case for the
# other side.
left = _bank([
("2026-01-05", 100.00, "ACME"),
("2026-01-06", 250.00, "WIDGET"),
])
right = _ledger([])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
))
assert result.stats["matched"] == 0
assert result.stats["unmatched_left"] == 2
assert result.stats["unmatched_right"] == 0
def test_match_pass_tagged_for_key_pass(self):
# Pass name on each matched row tells the user *why* the engine
# accepted the pair — verify the "key" label propagates.
left = pd.DataFrame([
{"date": "2026-01-05", "amount": 100.00, "check_no": "1042"},
])
right = pd.DataFrame([
{"posted": "2099-12-31", "amt": 100.00, "ref": "1042"},
])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
left_keys=["check_no"], right_keys=["ref"],
))
assert result.stats["matched"] == 1
assert result.matched.iloc[0]["match_pass"] == "key"
class TestAdditionalValidation:
"""Boundary cases for ``_validate_options`` not pinned elsewhere."""
def test_unknown_left_amount_column_raises(self):
left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}])
right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}])
with pytest.raises(ValueError, match="not in left DataFrame"):
reconcile(left, right, ReconcileOptions(
left_amount="NOT_A_COLUMN", right_amount="amt",
))
def test_unknown_right_amount_column_raises(self):
left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}])
right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}])
with pytest.raises(ValueError, match="not in right DataFrame"):
reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="NOT_A_COLUMN",
))
def test_unknown_left_key_column_raises(self):
left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}])
right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}])
with pytest.raises(ValueError, match="left key column"):
reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_keys=["nope"], right_keys=["nope"],
))
def test_negative_date_tolerance_rejected(self):
left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}])
right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}])
with pytest.raises(ValueError, match="date_tolerance_days"):
reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
date_tolerance_days=-1,
))
def test_desc_min_score_out_of_range_rejected(self):
left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}])
right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}])
with pytest.raises(ValueError, match="desc_min_score"):
reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
desc_min_score=150,
))
class TestImmutability:
"""The engine must NOT mutate the caller's DataFrames — callers
rely on holding onto their input frames after the call (the GUI
Reconciler page re-renders previews from them)."""
def test_left_df_columns_unchanged(self):
left = _bank([("2026-01-05", 100.00, "ACME")])
right = _ledger([("2026-01-05", 100.00, "Acme Inc")])
before_cols = list(left.columns)
before_id = id(left)
reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
))
assert list(left.columns) == before_cols
# And the caller's DataFrame object identity is preserved.
assert id(left) == before_id
def test_amounts_preserved_when_invert_right_sign_set(self):
# Even with --invert-right-sign, the original right amounts
# must come back unchanged in the result.
left = _bank([("2026-01-05", 100.00, "A")])
right = _ledger([("2026-01-05", -100.00, "X")])
original_right_amts = right["amt"].tolist()
reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
invert_right_sign=True,
))
assert right["amt"].tolist() == original_right_amts

View File

@@ -0,0 +1,178 @@
"""Tests for src.gui.tools_registry — the per-tool manifest.
The registry is loaded at import time by the home page sidebar nav,
the home grid, and the findings panel's "Open Tool" links. A broken
entry would surface as a sidebar disappearance, a missing card, or a
``KeyError`` in the findings rendering. We pin the invariants those
call sites rely on:
- Every page_slug points at a file that actually exists.
- Every tool_id is unique (the analyzer keys findings on it).
- Every section is one of the declared literals.
- ``tool_by_id`` round-trips, ``display_name`` falls back gracefully.
- ``section_label`` resolves localized labels.
"""
from __future__ import annotations
from pathlib import Path
from typing import get_args
import pytest
from src.gui.tools_registry import (
SECTION_LABELS,
TOOLS,
Section,
Tier,
Tool,
display_name,
section_label,
tool_by_id,
tool_description,
tool_name,
tools_for_tier,
tools_in_section,
)
PAGES_DIR = Path(__file__).resolve().parent.parent / "src" / "gui" / "pages"
class TestRegistryInvariants:
def test_all_tool_ids_are_unique(self):
ids = [t.tool_id for t in TOOLS]
assert len(ids) == len(set(ids)), (
f"duplicate tool_id in TOOLS: {sorted(ids)}"
)
def test_all_page_slugs_point_at_real_files(self):
for tool in TOOLS:
page_file = PAGES_DIR / f"{tool.page_slug}.py"
assert page_file.exists(), (
f"{tool.tool_id}{tool.page_slug}.py does not exist"
)
def test_all_sections_are_declared_literals(self):
valid = set(get_args(Section))
for tool in TOOLS:
assert tool.section in valid, (
f"{tool.tool_id} has unknown section {tool.section!r}; "
f"valid: {sorted(valid)}"
)
def test_all_tiers_are_declared_literals(self):
valid = set(get_args(Tier))
for tool in TOOLS:
assert tool.tier in valid, (
f"{tool.tool_id} has unknown tier {tool.tier!r}; "
f"valid: {sorted(valid)}"
)
def test_every_section_has_a_display_label(self):
for section in get_args(Section):
assert section in SECTION_LABELS, (
f"section {section!r} has no SECTION_LABELS entry"
)
def test_no_orphan_section_labels(self):
# The other direction: a SECTION_LABELS key that isn't a
# declared Section literal is dead config.
valid = set(get_args(Section))
for key in SECTION_LABELS:
assert key in valid, (
f"SECTION_LABELS has stray key {key!r} not in Section"
)
class TestToolLookups:
def test_tool_by_id_round_trips_every_entry(self):
for tool in TOOLS:
found = tool_by_id(tool.tool_id)
assert found is tool, (
f"tool_by_id({tool.tool_id!r}) returned {found!r}"
)
def test_tool_by_id_returns_none_for_unknown(self):
assert tool_by_id("not_a_real_tool_id") is None
def test_display_name_falls_back_to_id(self):
# Documented behavior: unknown id returns the id itself so the
# bug is visible in the UI rather than crashing.
assert display_name("not_a_real_tool_id") == "not_a_real_tool_id"
def test_display_name_resolves_known_tool(self):
# Pick a tool we know ships in every build.
assert display_name("02_text_cleaner") == "Clean Text"
class TestTierAndSectionFilters:
def test_tools_for_tier_empty_returns_all(self):
assert tools_for_tier() == list(TOOLS)
def test_tools_for_tier_filters(self):
# Every tool is tier="core" today, so an explicit core filter
# should still match the full set. A "pro"-only call should
# return an empty list.
assert tools_for_tier("core") == list(TOOLS)
assert tools_for_tier("pro") == []
def test_tools_in_section_preserves_registry_order(self):
cleaners = tools_in_section("cleaners")
in_full_order = [t for t in TOOLS if t.section == "cleaners"]
assert cleaners == in_full_order
@pytest.mark.parametrize("section", list(get_args(Section)))
def test_every_section_has_at_least_one_tool(self, section):
assert tools_in_section(section), (
f"section {section!r} has zero tools — sidebar group would be empty"
)
class TestLocalizedAccessors:
def test_tool_name_falls_back_to_registry_default(self):
# An unknown tool id should return the literal id, not crash.
assert tool_name("not_a_real_tool_id") == "not_a_real_tool_id"
def test_tool_name_returns_localized_when_pack_has_key(self):
# The lang packs ship a "tools.{id}.name" key for every shipped
# tool. We don't assert the exact translation here (the lang
# pack parity test pins that); we just check the helper returns
# something non-empty and not the literal lookup key.
name = tool_name("02_text_cleaner")
assert name and name != "tools.02_text_cleaner.name"
def test_tool_description_returns_localized_or_fallback(self):
desc = tool_description("02_text_cleaner")
assert desc and desc != "tools.02_text_cleaner.description"
def test_tool_description_for_unknown_returns_empty(self):
# Unknown ids return the registry fallback (""), not a
# lookup-key string. The home grid avoids rendering empty
# descriptions, so this contract matters.
assert tool_description("not_a_real_tool_id") == ""
@pytest.mark.parametrize("section", list(get_args(Section)))
def test_section_label_returns_non_empty(self, section):
label = section_label(section)
assert label and label != f"nav.section_{section}"
class TestReconcilerAndPdfArePresent:
"""The two newest pages were the most likely to be forgotten in
the registry — pin them explicitly so a regression flagging
"Ready" tools as missing from nav is loud."""
def test_pdf_extractor_present(self):
tool = tool_by_id("10_pdf_extractor")
assert tool is not None
assert tool.page_slug == "10_PDF_Extractor"
assert tool.status == "Ready"
def test_reconciler_present(self):
tool = tool_by_id("11_reconciler")
assert tool is not None
assert tool.page_slug == "11_Reconciler"
assert tool.status == "Ready"
# The new "analysis" section was introduced with this tool;
# if the section disappears, the sidebar group goes empty.
assert tool.section == "analysis"