Files
datatools-dev/tests/test_lang_packs.py
Michael c4ce86bd64 feat(i18n): add language-pack scaffold with English and Spanish
Introduces ``src/i18n`` with a tiny JSON-backed t() lookup, an in-session
language preference, and a sidebar selector wired through
``hide_streamlit_chrome`` so every page picks up the same picker. Covers
home, tool cards, findings panel, gate, shutdown, and pickup banner
strings. Tests pin pack parity and the farewell-overlay JS escape so
future packs can't silently regress.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 15:11:30 +00:00

175 lines
6.0 KiB
Python

"""Tests for the GUI language-pack i18n module.
Covers:
- t() basic lookup, missing-key fallback to English, then to the key.
- str.format kwargs interpolation and tolerant handling of missing keys.
- Parity between English and Spanish packs so a new key in en.json
doesn't silently regress to English when es is active.
- The JS-escape helper used by the farewell overlay.
"""
from __future__ import annotations
import json
from pathlib import Path
import pytest
from src.i18n import LANGUAGES, available_languages, t
# Loaded once for the parity test.
_PACK_DIR = Path(__file__).resolve().parent.parent / "src" / "i18n" / "packs"
def _flatten(obj, prefix=""):
"""Yield dotted-key paths from a nested dict pack."""
if isinstance(obj, dict):
for k, v in obj.items():
path = f"{prefix}.{k}" if prefix else k
yield from _flatten(v, path)
else:
yield prefix
def _load_pack(code: str) -> dict:
with (_PACK_DIR / f"{code}.json").open("r", encoding="utf-8") as fh:
return json.load(fh)
class TestLookup:
def test_returns_english_value_by_default(self):
assert t("home.title", "en").startswith("🧹 DataTools")
def test_returns_spanish_value(self):
assert "Maestría" in t("home.title", "es")
def test_missing_key_falls_back_to_english(self):
# ``tools.99_pipeline_runner.name`` doesn't exist; the pipeline
# runner is keyed by 09. A wrong key should fall back through to
# the literal key string so the bug is visible, not silent.
out = t("definitely.not.a.real.key", "es")
assert out == "definitely.not.a.real.key"
def test_spanish_missing_key_falls_back_to_english(self, tmp_path, monkeypatch):
# Simulate: a key exists in en.json but not in es.json. The Spanish
# lookup should resolve via the English fallback rather than
# returning the dotted key.
from src import i18n as i18n_mod
i18n_mod._load_pack.cache_clear()
# Point the loader at a temp dir with a sparse Spanish pack.
monkeypatch.setattr(i18n_mod, "_PACK_DIR", tmp_path)
(tmp_path / "en.json").write_text(
json.dumps({"a": {"b": "english-only"}}), encoding="utf-8",
)
(tmp_path / "es.json").write_text(json.dumps({}), encoding="utf-8")
try:
assert i18n_mod.t("a.b", "es") == "english-only"
finally:
i18n_mod._load_pack.cache_clear()
class TestInterpolation:
def test_named_placeholder(self):
# ``upload.using_session_file`` uses ``{name}``.
out = t("upload.using_session_file", "en", name="data.csv")
assert "data.csv" in out
def test_missing_placeholder_is_tolerated(self):
# If a caller forgets a placeholder, return the raw template
# rather than crashing the UI.
out = t("upload.using_session_file", "en")
assert "{name}" in out
class TestPackParity:
"""Every key in en.json must exist in every other registered pack.
A divergence means a user with that language sees an English
fallback for a string the translator hasn't been told about, which
is a translation gap we want CI to surface.
"""
def test_es_mirrors_en(self):
en_keys = set(_flatten(_load_pack("en")))
es_keys = set(_flatten(_load_pack("es")))
missing = en_keys - es_keys
assert not missing, f"Spanish pack missing keys: {sorted(missing)}"
def test_no_orphan_keys_in_es(self):
# The other direction: stale Spanish keys that no longer exist
# in English are dead weight; flag them too.
en_keys = set(_flatten(_load_pack("en")))
es_keys = set(_flatten(_load_pack("es")))
orphans = es_keys - en_keys
assert not orphans, f"Spanish pack has stale keys: {sorted(orphans)}"
class TestRegistry:
def test_languages_listed(self):
codes = {entry["code"] for entry in available_languages()}
assert {"en", "es"} <= codes
def test_every_registered_lang_has_a_pack(self):
for entry in LANGUAGES:
assert (_PACK_DIR / f"{entry['code']}.json").exists()
class TestFarewellEscape:
"""The farewell overlay interpolates pack strings into a JS payload.
A malicious / accidental quote or angle bracket in the translation
must not be able to break out of the JS string or the surrounding
HTML. Test the escape helper directly so the contract is pinned.
"""
def test_escapes_quotes_and_html(self):
from src.gui.components._legacy import _js_html_safe
out = _js_html_safe("Cerrando 'app' <script>x</script>")
# Every single-quote must be backslash-escaped so it can't
# terminate the JS string literal that wraps the payload.
assert "\\'" in out
assert "'" not in out.replace("\\'", "")
assert "<script>" not in out
assert "&lt;script&gt;" in out
def test_backslash_doubled(self):
from src.gui.components._legacy import _js_html_safe
assert _js_html_safe("a\\b") == "a\\\\b"
class TestKeyCoverage:
"""Spot-check a few keys the GUI relies on so a rename in one place
doesn't silently disappear from the other."""
@pytest.mark.parametrize("key", [
"home.title",
"home.caption",
"chrome.footer",
"chrome.language_label",
"upload.heading",
"upload.run_button",
"upload.skip_button",
"findings.header",
"findings.none",
"gate.warning",
"gate.open_review",
"quit.button",
"quit.shutting_down",
"quit.farewell_title",
"quit.farewell_subtitle",
"close_page.title",
"close_page.button",
"status.ready",
"status.coming_soon",
"tools.01_deduplicator.name",
"tools.09_pipeline_runner.description",
])
def test_key_resolves_in_both_packs(self, key):
for lang in ("en", "es"):
value = t(key, lang)
assert value and value != key, f"missing {key!r} in {lang}"