Files
datatools-dev/tests/test_lang_packs.py
Michael 4955fb239b test: cover help_md keys, header smoke, and bilingual ES smoke
Two stale Spanish smoke assertions still expected English page titles
for PDF Extractor and Reconciler — the i18n work landed real
translations ("PDF a CSV", "Reconciliar dos archivos"), so refresh the
expected substrings and the surrounding comment.

Add new coverage for the help-popover feature:
- TestHelpPopoverKeys (test_lang_packs): every tool_id resolves a
  non-empty tools.<id>.help_md in BOTH packs; help.button_label and
  help.missing_body resolve in both.
- TestDescriptionCopy (test_tools_registry): every Tool.description
  non-empty and under 120 chars — pins the post-jargon-scrub copy
  so future drift back into multi-clause prose is loud.
- TestRenderToolHeaderSmoke: render_tool_header is callable, listed
  in components.__all__, and every i18n key it touches resolves in
  both packs. Runs without a Streamlit script context.

Suite: 2427 passed (+9 new), 91 skipped.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-02 18:07:19 +00:00

210 lines
7.5 KiB
Python

"""Tests for the GUI language-pack i18n module.
Covers:
- t() basic lookup, missing-key fallback to English, then to the key.
- str.format kwargs interpolation and tolerant handling of missing keys.
- Parity between English and Spanish packs so a new key in en.json
doesn't silently regress to English when es is active.
- The JS-escape helper used by the farewell overlay.
"""
from __future__ import annotations
import json
from pathlib import Path
import pytest
from src.i18n import LANGUAGES, available_languages, t
# Loaded once for the parity test.
_PACK_DIR = Path(__file__).resolve().parent.parent / "src" / "i18n" / "packs"
def _flatten(obj, prefix=""):
"""Yield dotted-key paths from a nested dict pack."""
if isinstance(obj, dict):
for k, v in obj.items():
path = f"{prefix}.{k}" if prefix else k
yield from _flatten(v, path)
else:
yield prefix
def _load_pack(code: str) -> dict:
with (_PACK_DIR / f"{code}.json").open("r", encoding="utf-8") as fh:
return json.load(fh)
class TestLookup:
def test_returns_english_value_by_default(self):
# Hero title is "UNALOGIX DataTools" since the v3 rebrand. The
# Spanish value is identical (proper noun); the localized
# tagline lives under ``home.caption`` instead.
assert t("home.title", "en") == "UNALOGIX DataTools"
def test_returns_spanish_value(self):
# Title stays "UNALOGIX DataTools" in es too; the localized
# tagline is what differs.
assert t("home.title", "es") == "UNALOGIX DataTools"
assert "Limpia" in t("home.caption", "es")
def test_missing_key_falls_back_to_english(self):
# ``tools.99_pipeline_runner.name`` doesn't exist; the pipeline
# runner is keyed by 09. A wrong key should fall back through to
# the literal key string so the bug is visible, not silent.
out = t("definitely.not.a.real.key", "es")
assert out == "definitely.not.a.real.key"
def test_spanish_missing_key_falls_back_to_english(self, tmp_path, monkeypatch):
# Simulate: a key exists in en.json but not in es.json. The Spanish
# lookup should resolve via the English fallback rather than
# returning the dotted key.
from src import i18n as i18n_mod
i18n_mod._load_pack.cache_clear()
# Point the loader at a temp dir with a sparse Spanish pack.
monkeypatch.setattr(i18n_mod, "_PACK_DIR", tmp_path)
(tmp_path / "en.json").write_text(
json.dumps({"a": {"b": "english-only"}}), encoding="utf-8",
)
(tmp_path / "es.json").write_text(json.dumps({}), encoding="utf-8")
try:
assert i18n_mod.t("a.b", "es") == "english-only"
finally:
i18n_mod._load_pack.cache_clear()
class TestInterpolation:
def test_named_placeholder(self):
# ``upload.using_session_file`` uses ``{name}``.
out = t("upload.using_session_file", "en", name="data.csv")
assert "data.csv" in out
def test_missing_placeholder_is_tolerated(self):
# If a caller forgets a placeholder, return the raw template
# rather than crashing the UI.
out = t("upload.using_session_file", "en")
assert "{name}" in out
class TestPackParity:
"""Every key in en.json must exist in every other registered pack.
A divergence means a user with that language sees an English
fallback for a string the translator hasn't been told about, which
is a translation gap we want CI to surface.
"""
def test_es_mirrors_en(self):
en_keys = set(_flatten(_load_pack("en")))
es_keys = set(_flatten(_load_pack("es")))
missing = en_keys - es_keys
assert not missing, f"Spanish pack missing keys: {sorted(missing)}"
def test_no_orphan_keys_in_es(self):
# The other direction: stale Spanish keys that no longer exist
# in English are dead weight; flag them too.
en_keys = set(_flatten(_load_pack("en")))
es_keys = set(_flatten(_load_pack("es")))
orphans = es_keys - en_keys
assert not orphans, f"Spanish pack has stale keys: {sorted(orphans)}"
class TestRegistry:
def test_languages_listed(self):
codes = {entry["code"] for entry in available_languages()}
assert {"en", "es"} <= codes
def test_every_registered_lang_has_a_pack(self):
for entry in LANGUAGES:
assert (_PACK_DIR / f"{entry['code']}.json").exists()
class TestFarewellEscape:
"""The farewell overlay interpolates pack strings into a JS payload.
A malicious / accidental quote or angle bracket in the translation
must not be able to break out of the JS string or the surrounding
HTML. Test the escape helper directly so the contract is pinned.
"""
def test_escapes_quotes_and_html(self):
from src.gui.components._legacy import _js_html_safe
out = _js_html_safe("Cerrando 'app' <script>x</script>")
# Every single-quote must be backslash-escaped so it can't
# terminate the JS string literal that wraps the payload.
assert "\\'" in out
assert "'" not in out.replace("\\'", "")
assert "<script>" not in out
assert "&lt;script&gt;" in out
def test_backslash_doubled(self):
from src.gui.components._legacy import _js_html_safe
assert _js_html_safe("a\\b") == "a\\\\b"
class TestKeyCoverage:
"""Spot-check a few keys the GUI relies on so a rename in one place
doesn't silently disappear from the other."""
@pytest.mark.parametrize("key", [
"home.title",
"home.caption",
"chrome.footer",
"chrome.language_label",
"upload.heading",
"upload.run_button",
"upload.skip_button",
"findings.header",
"findings.none",
"gate.warning",
"gate.open_review",
"quit.button",
"quit.shutting_down",
"quit.farewell_title",
"quit.farewell_subtitle",
"close_page.title",
"close_page.button",
"status.ready",
"status.coming_soon",
"tools.01_deduplicator.name",
"tools.09_pipeline_runner.description",
])
def test_key_resolves_in_both_packs(self, key):
for lang in ("en", "es"):
value = t(key, lang)
assert value and value != key, f"missing {key!r} in {lang}"
class TestHelpPopoverKeys:
"""Every tool's inline Help popover (``render_tool_header``) pulls
its copy from ``tools.<id>.help_md`` and the two shared labels
``help.button_label`` / ``help.missing_body``. A missing key would
fall back to the literal lookup key and render that string in the
popover instead of helpful content."""
@pytest.mark.parametrize("lang", ["en", "es"])
def test_help_shared_keys_present(self, lang):
for key in ("help.button_label", "help.missing_body"):
value = t(key, lang)
assert value and value != key, f"missing {key!r} in {lang!r}"
@pytest.mark.parametrize("lang", ["en", "es"])
def test_every_tool_has_help_md(self, lang):
# Import lazily so this file stays importable without the GUI.
from src.gui.tools_registry import TOOLS
missing: list[str] = []
for tool in TOOLS:
key = f"tools.{tool.tool_id}.help_md"
value = t(key, lang)
if not value or value == key or not value.strip():
missing.append(tool.tool_id)
assert not missing, (
f"language {lang!r} is missing help_md for: {missing}"
)