feat(i18n): add language-pack scaffold with English and Spanish

Introduces ``src/i18n`` with a tiny JSON-backed t() lookup, an in-session language preference, and a sidebar selector wired through ``hide_streamlit_chrome`` so every page picks up the same picker. Covers home, tool cards, findings panel, gate, shutdown, and pickup banner strings. Tests pin pack parity and the farewell-overlay JS escape so future packs can't silently regress. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 15:11:30 +00:00
parent 4706ed571e
commit c4ce86bd64
8 changed files with 649 additions and 75 deletions
--- a/src/gui/app.py
+++ b/src/gui/app.py
@@ -26,13 +26,16 @@ from src.gui.components import (
    hide_streamlit_chrome,
    upload_and_analyze_section,
 )
+from src.i18n import t

 st.set_page_config(
-    page_title="DataTools — Data Cleaning Mastery",
+    page_title=t("home.page_title"),
    page_icon="🧹",
    layout="wide",
 )

+# ``hide_streamlit_chrome`` also renders the sidebar language selector,
+# so every page that hides chrome picks up the same picker.
 hide_streamlit_chrome()


@@ -40,8 +43,8 @@ hide_streamlit_chrome()
 # Home page
 # ---------------------------------------------------------------------------

-st.title("🧹 DataTools — Data Cleaning Mastery")
-st.caption("A 9-tool suite for cleaning, standardizing, and validating tabular data. Runs 100% locally.")
+st.title(t("home.title"))
+st.caption(t("home.caption"))

 st.divider()

@@ -57,7 +60,7 @@ st.divider()
 # Tool cards
 # ---------------------------------------------------------------------------

-from src.gui.tools_registry import TOOLS
+from src.gui.tools_registry import TOOLS, tool_description, tool_name

 # Render tool cards in a 3-column grid. Cards picked up by the analyzer get a
 # coloured "N findings" badge so the user can see at a glance which tools
@@ -70,15 +73,17 @@ for row_start in range(0, len(TOOLS), 3):
            break
        tool = TOOLS[idx]
        with col:
+            status_key = "status.ready" if tool.status == "Ready" else "status.coming_soon"
            status_color = "green" if tool.status == "Ready" else "orange"
            badge = ""
            n = findings_count_for_tool(tool.tool_id)
            if n:
-                badge = f" :red-background[**{n} finding{'s' if n != 1 else ''}**]"
+                badge_key = "home.findings_badge_one" if n == 1 else "home.findings_badge_other"
+                badge = f" :red-background[**{t(badge_key, n=n)}**]"
            st.markdown(
-                f"### {tool.icon} {tool.name}{badge}\n\n"
-                f"{tool.description}\n\n"
-                f":{status_color}[**{tool.status}**]"
+                f"### {tool.icon} {tool_name(tool.tool_id)}{badge}\n\n"
+                f"{tool_description(tool.tool_id)}\n\n"
+                f":{status_color}[**{t(status_key)}**]"
            )


@@ -87,7 +92,4 @@ for row_start in range(0, len(TOOLS), 3):
 # ---------------------------------------------------------------------------

 st.divider()
-st.caption(
-    "Runs locally. Your data never leaves this computer. "
-    "| DataTools v3.0"
-)
+st.caption(t("chrome.footer"))
--- a/src/gui/components/_legacy.py
+++ b/src/gui/components/_legacy.py
@@ -11,6 +11,7 @@ from typing import Optional
 import pandas as pd
 import streamlit as st

+from src.i18n import t as _t
 from src.core.dedup import (
    Algorithm,
    ColumnMatchStrategy,
@@ -72,15 +73,26 @@ footer {


 def hide_streamlit_chrome() -> None:
-    """Inject CSS to hide Streamlit's default header, menu, and footer."""
+    """Inject CSS to hide Streamlit's default header, menu, and footer.
+
+    Also renders the sidebar language selector, since every entrypoint
+    that hides the default chrome wants the picker visible in the
+    same place. Pages that want a clean chrome without the selector can
+    inject ``_HIDE_CHROME_CSS`` themselves instead of calling this.
+    """
    st.markdown(_HIDE_CHROME_CSS, unsafe_allow_html=True)
+    # Imported lazily so this module stays importable in environments
+    # where the i18n packs haven't been laid out (e.g. unit tests of
+    # individual legacy helpers).
+    from src.i18n import render_language_selector
+    render_language_selector()


 # ---------------------------------------------------------------------------
 # Clean shutdown
 # ---------------------------------------------------------------------------

-_FAREWELL_SCRIPT = """
+_FAREWELL_SCRIPT_TEMPLATE = """
 <script>
  (function () {
    // Strategy: append a full-screen overlay directly to the parent's
@@ -104,8 +116,8 @@ _FAREWELL_SCRIPT = """
        '<div style="text-align:center;padding:32px 40px;border:1px solid #252a36;' +
        'border-radius:12px;background:#161922;max-width:480px;">' +
        '<h1 style="margin:0 0 8px 0;font-weight:600;letter-spacing:-0.01em;">' +
-        'DataTools has shut down</h1>' +
-        '<p style="opacity:0.7;margin:0;">You can close this window.</p>' +
+        '__TITLE__</h1>' +
+        '<p style="opacity:0.7;margin:0;">__SUBTITLE__</p>' +
        '</div>';
      return overlay;
    }
@@ -127,7 +139,32 @@ _FAREWELL_SCRIPT = """
 """


-def quit_button(label: str = "Quit app", *, key: str = "quit_app_button") -> None:
+def _js_html_safe(s: str) -> str:
+    """Escape *s* so it can be embedded inside the farewell overlay's
+    JS-single-quoted, innerHTML-bound payload.
+
+    Order matters: backslash first (so subsequent escapes don't get
+    re-escaped), then the JS string-terminator, then HTML-special chars.
+    """
+    return (
+        s.replace("\\", "\\\\")
+         .replace("'", "\\'")
+         .replace("&", "&amp;")
+         .replace("<", "&lt;")
+         .replace(">", "&gt;")
+    )
+
+
+def _farewell_script() -> str:
+    """Render the farewell overlay JS with the current language's strings."""
+    return (
+        _FAREWELL_SCRIPT_TEMPLATE
+        .replace("__TITLE__", _js_html_safe(_t("quit.farewell_title")))
+        .replace("__SUBTITLE__", _js_html_safe(_t("quit.farewell_subtitle")))
+    )
+
+
+def quit_button(label: str | None = None, *, key: str = "quit_app_button") -> None:
    """Render a Quit button that terminates the Streamlit server.

    Streamlit has no first-class shutdown hook, and signalling the
@@ -140,10 +177,13 @@ def quit_button(label: str = "Quit app", *, key: str = "quit_app_button") -> Non
    a self-contained "App closed" page so the user never sees
    Streamlit's red connection-error overlay.
    """
+    if label is None:
+        label = _t("quit.button")
+
    if st.session_state.get("_app_shutting_down"):
        from streamlit.components.v1 import html as _components_html
-        _components_html(_FAREWELL_SCRIPT, height=0)
-        st.success("Shutting down… you can close this window.")
+        _components_html(_farewell_script(), height=0)
+        st.success(_t("quit.shutting_down"))
        st.stop()

    if st.button(label, key=key, type="secondary"):
@@ -824,15 +864,25 @@ _TOOL_PAGE_PATHS: dict[str, str] = {


 def tool_display_name(tool_id: str) -> str:
-    """Map a stable tool id to its GUI display name; falls back to the id."""
-    return TOOL_DISPLAY_NAMES.get(tool_id, tool_id) if tool_id else "Informational"
+    """Map a stable tool id to its GUI display name; falls back to the id.
+
+    Routes through the active language pack so the home grid, findings
+    panel headers, and "Open tool" buttons all stay in sync with the
+    sidebar's language selection.
+    """
+    if not tool_id:
+        return _t("findings.untargeted_label")
+    translated = _t(f"tools.{tool_id}.name")
+    if translated != f"tools.{tool_id}.name":
+        return translated
+    return TOOL_DISPLAY_NAMES.get(tool_id, tool_id)


 def _tool_page_slug(tool_id: str) -> str:
    return _TOOL_PAGE_PATHS.get(tool_id, "")


-def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
+def render_findings_panel(findings, *, header: str | None = None) -> None:
    """Render a list of :class:`Finding` objects grouped by tool.

    Each tool gets a header with the count, an open-tool button, and a list
@@ -842,8 +892,11 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
    from src.core.analyze import findings_by_tool  # local import to avoid cycle
    from src.core.text_clean import hidden_char_css

+    if header is None:
+        header = _t("findings.header")
+
    if not findings:
-        st.success("No issues detected. Open any tool below to start working.")
+        st.success(_t("findings.none"))
        return

    # Inject the hidden-char badge styles once so every sample value below
@@ -854,7 +907,10 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
    for f in findings:
        by_sev[f.severity] = by_sev.get(f.severity, 0) + 1
    sev_summary = " · ".join(
-        f"{_SEVERITY_ICON[s]} {by_sev[s]} {s}"
+        _t(
+            "findings.severity_summary_segment",
+            icon=_SEVERITY_ICON[s], n=by_sev[s], severity=s,
+        )
        for s in ("error", "warn", "info") if by_sev.get(s)
    )
    st.markdown(f"### {header}")
@@ -865,8 +921,9 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:

    for tool_id in sorted(grouped):
        items = grouped[tool_id]
+        name = tool_display_name(tool_id)
        with st.expander(
-            f"{tool_display_name(tool_id)} — {len(items)} finding(s)",
+            _t("findings.tool_section_label", tool=name, n=len(items)),
            expanded=any(f.severity == "error" for f in items),
        ):
            for f in items:
@@ -876,11 +933,11 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
                # Streamlit resolves page paths relative to the entrypoint
                # (src/gui/app.py), so a leading ``src/gui/`` would point
                # outside the allowed page tree on Windows.
-                st.page_link(page_slug, label=f"Open {tool_display_name(tool_id)} →")
+                st.page_link(page_slug, label=_t("findings.open_tool", tool=name))

    if untargeted:
        with st.expander(
-            f"Other / file-level — {len(untargeted)} finding(s)",
+            _t("findings.other_section_label", n=len(untargeted)),
            expanded=False,
        ):
            for f in untargeted:
@@ -1066,28 +1123,15 @@ def upload_and_analyze_section() -> None:
    own uploader. Each tool page already has its own uploader today, so
    this is purely additive.
    """
-    st.markdown("### 📤 Upload a file to start")
-    st.caption(
-        "Optional: scan an uploaded file for data quality issues and see "
-        "which tools can fix each one. Skip if you already know what you need."
-    )
-    st.caption(
-        "**Up to 1 GB.** Formats: CSV, TSV, XLSX, XLS. "
-        "Delimiters auto-detected: comma, tab, semicolon, pipe. "
-        "Encodings auto-detected: UTF-8 (with/without BOM), UTF-16, "
-        "cp1252, Latin-1/9, cp1250, ISO-8859-2, cp1251, KOI8-R, "
-        "Mac Roman, Shift_JIS, GB18030, Big5, EUC-KR — and override on the Review page."
-    )
+    st.markdown(f"### {_t('upload.heading')}")
+    st.caption(_t("upload.intro"))
+    st.caption(_t("upload.limits"))

    uploaded = st.file_uploader(
-        "Upload CSV or Excel",
+        _t("upload.uploader_label"),
        type=["csv", "tsv", "xlsx", "xls"],
        key="home_upload",
-        help=(
-            "Up to 1 GB. Comma / tab / semicolon / pipe delimiters all "
-            "auto-detected. Encoding auto-detected with override on the "
-            "Review page if needed."
-        ),
+        help=_t("upload.uploader_help"),
    )
    if uploaded is None:
        return
@@ -1106,16 +1150,16 @@ def upload_and_analyze_section() -> None:

    col_run, col_skip, _ = st.columns([1, 1, 4])
    with col_run:
-        run_clicked = st.button("Run analysis", type="primary", key="home_run_analysis")
+        run_clicked = st.button(_t("upload.run_button"), type="primary", key="home_run_analysis")
    with col_skip:
-        skip_clicked = st.button("Skip", key="home_skip_analysis")
+        skip_clicked = st.button(_t("upload.skip_button"), key="home_skip_analysis")

    if skip_clicked:
        st.session_state["home_findings"] = []
        st.session_state["home_skipped"] = True

    if run_clicked:
-        with st.spinner("Scanning…"):
+        with st.spinner(_t("upload.scanning")):
            findings = _run_analysis_on_upload(uploaded)
        st.session_state["home_findings"] = findings
        st.session_state["home_skipped"] = False
@@ -1125,7 +1169,7 @@ def upload_and_analyze_section() -> None:
        return

    if st.session_state.get("home_skipped"):
-        st.info("Analysis skipped. Open any tool below to start working.")
+        st.info(_t("upload.skipped_notice"))
        return

    st.divider()
@@ -1230,13 +1274,9 @@ def require_normalization_gate() -> None:
    if matched:
        return

-    name = st.session_state.get("home_uploaded_name", "the uploaded file")
-    st.warning(
-        f"**{name}** must pass the CSV-normalization gate before you can "
-        f"use this tool. Open the Review page to apply the fixes our "
-        f"analyzer recommends."
-    )
-    if st.button("Go to Review & Normalize", type="primary"):
+    name = st.session_state.get("home_uploaded_name") or _t("gate.default_name")
+    st.warning(_t("gate.warning", name=name))
+    if st.button(_t("gate.open_review"), type="primary"):
        st.switch_page("pages/0_Review.py")
    st.stop()

@@ -1269,27 +1309,22 @@ def pickup_or_upload(
    use_session = has_session_upload and not st.session_state.get(override_key, False)

    if use_session:
-        name = st.session_state.get("home_uploaded_name", "uploaded file")
-        st.info(f"Using **{name}** from the upload screen.")
-        if st.button("Use a different file", key=f"{key}__pick_diff"):
+        name = st.session_state.get("home_uploaded_name") or _t("gate.default_name")
+        st.info(_t("upload.using_session_file", name=name))
+        if st.button(_t("upload.use_different_file"), key=f"{key}__pick_diff"):
            st.session_state[override_key] = True
            st.rerun()
        return _StashedUpload(name, st.session_state["home_uploaded_bytes"])

    if {"csv", "tsv", "xlsx", "xls"} & set(types):
-        st.caption(
-            "Up to 1 GB. Delimiters auto-detected: comma, tab, semicolon, pipe. "
-            "Encoding auto-detected (UTF-8 / UTF-16 / cp1252 / Latin-1 family / "
-            "cp1250 / cp1251 / KOI8-R / Mac Roman / Shift_JIS / GB18030 / Big5 / "
-            "EUC-KR), with override on the Review page."
-        )
+        st.caption(_t("upload.pickup_caption"))
    uploaded = st.file_uploader(label, type=types, key=key, help=help)
    if uploaded is not None and st.session_state.get(override_key):
        # User has uploaded their own file on this page; clear the override
        # so the next visit to a tool page starts fresh.
        pass
    if uploaded is None and st.session_state.get(override_key) and has_session_upload:
-        if st.button("Switch back to upload-screen file", key=f"{key}__switch_back"):
+        if st.button(_t("upload.switch_back"), key=f"{key}__switch_back"):
            st.session_state[override_key] = False
            st.rerun()
    return uploaded
--- a/src/gui/pages/99_Close.py
+++ b/src/gui/pages/99_Close.py
@@ -18,24 +18,21 @@ if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))

 from src.gui.components import hide_streamlit_chrome, quit_button
+from src.i18n import t

 st.set_page_config(
-    page_title="DataTools — Close",
+    page_title=t("close_page.page_title"),
    page_icon="🛑",
    layout="wide",
 )

 hide_streamlit_chrome()

-st.title("🛑 Close DataTools")
-st.caption("Shut down the local app and free the terminal.")
+st.title(t("close_page.title"))
+st.caption(t("close_page.caption"))
 st.divider()

-st.markdown(
-    "Clicking the button below will terminate the DataTools server. "
-    "Any unsaved work in other tools will be lost. Once the app shuts "
-    "down you can close this window."
-)
+st.markdown(t("close_page.body"))

 st.write("")
-quit_button(label="Close the app", key="quit_app_button_page")
+quit_button(label=t("close_page.button"), key="quit_app_button_page")
--- a/src/gui/tools_registry.py
+++ b/src/gui/tools_registry.py
@@ -150,3 +150,20 @@ def display_name(tool_id: str) -> str:
    """Return the human-readable name; fall back to the id when unknown."""
    t = tool_by_id(tool_id)
    return t.name if t else tool_id
+
+
+def tool_name(tool_id: str) -> str:
+    """Return the localized tool name, falling back to the registry default."""
+    from src.i18n import t as _t
+    fallback = display_name(tool_id)
+    translated = _t(f"tools.{tool_id}.name")
+    return translated if translated != f"tools.{tool_id}.name" else fallback
+
+
+def tool_description(tool_id: str) -> str:
+    """Return the localized tool description, falling back to the registry default."""
+    from src.i18n import t as _t
+    tool = tool_by_id(tool_id)
+    fallback = tool.description if tool else ""
+    translated = _t(f"tools.{tool_id}.description")
+    return translated if translated != f"tools.{tool_id}.description" else fallback
--- a/src/i18n/init.py
+++ b/src/i18n/init.py
@@ -0,0 +1,155 @@
+"""Language packs for the DataTools GUI.
+
+A language pack is a JSON file under ``src/i18n/packs/`` keyed by ISO 639-1
+language code (``en.json``, ``es.json``, …). Keys are dotted paths
+(``home.title``, ``tools.deduplicator.name``); values are the translated
+strings. The English pack is canonical — missing keys in other packs fall
+back to the English value, and missing keys in English fall back to the
+key itself so a typo surfaces as a visible string instead of a crash.
+
+Adding a language: drop a new ``<code>.json`` next to ``en.json`` mirroring
+its key tree, then add a one-line entry to ``LANGUAGES``. The sidebar
+selector picks it up automatically.
+
+Translation lookup is intentionally tiny — no gettext, no babel, no
+po-file pipeline. Format-string interpolation is supplied by callers via
+``str.format``; this module only resolves keys.
+"""
+
+from __future__ import annotations
+
+import json
+from functools import lru_cache
+from pathlib import Path
+from typing import Any
+
+_PACK_DIR = Path(__file__).resolve().parent / "packs"
+_DEFAULT_LANG = "en"
+_SESSION_KEY = "ui_lang"
+
+
+# Display registry. ``label`` is what the sidebar shows; ``code`` is the
+# JSON filename stem. Keep this list short — every entry must have a
+# fully-translated pack in ``packs/``.
+LANGUAGES: list[dict[str, str]] = [
+    {"code": "en", "label": "English"},
+    {"code": "es", "label": "Español"},
+]
+
+
+def available_languages() -> list[dict[str, str]]:
+    """Return the public language registry (a fresh list each call)."""
+    return [dict(entry) for entry in LANGUAGES]
+
+
+@lru_cache(maxsize=8)
+def _load_pack(lang: str) -> dict[str, Any]:
+    """Read a pack JSON off disk. Cached so re-renders don't reparse."""
+    path = _PACK_DIR / f"{lang}.json"
+    if not path.exists():
+        return {}
+    with path.open("r", encoding="utf-8") as fh:
+        return json.load(fh)
+
+
+def _resolve(pack: dict[str, Any], key: str) -> Any:
+    """Walk a dotted key through a nested dict. Returns None if absent."""
+    node: Any = pack
+    for part in key.split("."):
+        if not isinstance(node, dict) or part not in node:
+            return None
+        node = node[part]
+    return node
+
+
+def t(key: str, lang: str | None = None, /, **kwargs: Any) -> str:
+    """Look up *key* in the current language pack.
+
+    Fallback chain: requested lang → English → the key itself. ``kwargs``
+    are passed through ``str.format`` so call sites can use named
+    placeholders (``t("upload.using", name=name)``). Missing placeholders
+    are tolerated — the raw braces remain in the output rather than
+    raising — because a translation file shouldn't be able to crash the
+    UI.
+    """
+    if lang is None:
+        lang = current_language()
+
+    value = _resolve(_load_pack(lang), key)
+    if value is None and lang != _DEFAULT_LANG:
+        value = _resolve(_load_pack(_DEFAULT_LANG), key)
+    if value is None:
+        value = key
+
+    if not isinstance(value, str):
+        return str(value)
+
+    if kwargs:
+        try:
+            return value.format(**kwargs)
+        except (KeyError, IndexError):
+            return value
+    return value
+
+
+def current_language() -> str:
+    """Return the active language code, defaulting to English.
+
+    Reads from ``st.session_state`` when Streamlit is loaded so a sidebar
+    selector can change the language for the current session. Falls back
+    to the default when called outside a Streamlit run (e.g. in tests),
+    which keeps this module importable without Streamlit installed at
+    that import path.
+    """
+    try:
+        import streamlit as st
+    except Exception:
+        return _DEFAULT_LANG
+    return st.session_state.get(_SESSION_KEY, _DEFAULT_LANG)
+
+
+def set_language(lang: str) -> None:
+    """Persist *lang* on the Streamlit session. No-op outside Streamlit."""
+    try:
+        import streamlit as st
+    except Exception:
+        return
+    st.session_state[_SESSION_KEY] = lang
+
+
+def render_language_selector(*, location: str = "sidebar") -> None:
+    """Render the language picker.
+
+    ``location`` is either ``"sidebar"`` (default) or ``"inline"``. The
+    sidebar form is what the home page wires up so every tool page picks
+    up the same selector through Streamlit's shared sidebar.
+    """
+    import streamlit as st
+
+    target = st.sidebar if location == "sidebar" else st
+    codes = [entry["code"] for entry in LANGUAGES]
+    labels = {entry["code"]: entry["label"] for entry in LANGUAGES}
+    current = current_language()
+    if current not in codes:
+        current = _DEFAULT_LANG
+
+    choice = target.selectbox(
+        t("chrome.language_label"),
+        codes,
+        index=codes.index(current),
+        format_func=lambda c: labels.get(c, c),
+        key="_ui_lang_select",
+    )
+    if choice != current:
+        set_language(choice)
+        st.rerun()
+
+
+__all__ = [
+    "LANGUAGES",
+    "available_languages",
+    "current_language",
+    "render_language_selector",
+    "set_language",
+    "t",
+]
--- a/src/i18n/packs/en.json
+++ b/src/i18n/packs/en.json
@@ -0,0 +1,97 @@
+{
+  "chrome": {
+    "language_label": "Language",
+    "footer": "Runs locally. Your data never leaves this computer. | DataTools v3.0"
+  },
+  "home": {
+    "page_title": "DataTools — Data Cleaning Mastery",
+    "title": "🧹 DataTools — Data Cleaning Mastery",
+    "caption": "A 9-tool suite for cleaning, standardizing, and validating tabular data. Runs 100% locally.",
+    "findings_badge_one": "{n} finding",
+    "findings_badge_other": "{n} findings"
+  },
+  "status": {
+    "ready": "Ready",
+    "coming_soon": "Coming Soon"
+  },
+  "upload": {
+    "heading": "📤 Upload a file to start",
+    "intro": "Optional: scan an uploaded file for data quality issues and see which tools can fix each one. Skip if you already know what you need.",
+    "limits": "**Up to 1 GB.** Formats: CSV, TSV, XLSX, XLS. Delimiters auto-detected: comma, tab, semicolon, pipe. Encodings auto-detected: UTF-8 (with/without BOM), UTF-16, cp1252, Latin-1/9, cp1250, ISO-8859-2, cp1251, KOI8-R, Mac Roman, Shift_JIS, GB18030, Big5, EUC-KR — and override on the Review page.",
+    "uploader_label": "Upload CSV or Excel",
+    "uploader_help": "Up to 1 GB. Comma / tab / semicolon / pipe delimiters all auto-detected. Encoding auto-detected with override on the Review page if needed.",
+    "run_button": "Run analysis",
+    "skip_button": "Skip",
+    "scanning": "Scanning…",
+    "skipped_notice": "Analysis skipped. Open any tool below to start working.",
+    "using_session_file": "Using **{name}** from the upload screen.",
+    "use_different_file": "Use a different file",
+    "switch_back": "Switch back to upload-screen file",
+    "pickup_caption": "Up to 1 GB. Delimiters auto-detected: comma, tab, semicolon, pipe. Encoding auto-detected (UTF-8 / UTF-16 / cp1252 / Latin-1 family / cp1250 / cp1251 / KOI8-R / Mac Roman / Shift_JIS / GB18030 / Big5 / EUC-KR), with override on the Review page."
+  },
+  "findings": {
+    "header": "Detected issues",
+    "none": "No issues detected. Open any tool below to start working.",
+    "severity_summary_segment": "{icon} {n} {severity}",
+    "tool_section_label": "{tool} — {n} finding(s)",
+    "other_section_label": "Other / file-level — {n} finding(s)",
+    "open_tool": "Open {tool} →",
+    "untargeted_label": "Informational"
+  },
+  "gate": {
+    "warning": "**{name}** must pass the CSV-normalization gate before you can use this tool. Open the Review page to apply the fixes our analyzer recommends.",
+    "default_name": "the uploaded file",
+    "open_review": "Go to Review & Normalize"
+  },
+  "quit": {
+    "button": "Quit app",
+    "shutting_down": "Shutting down… you can close this window.",
+    "farewell_title": "DataTools has shut down",
+    "farewell_subtitle": "You can close this window."
+  },
+  "close_page": {
+    "page_title": "DataTools — Close",
+    "title": "🛑 Close DataTools",
+    "caption": "Shut down the local app and free the terminal.",
+    "body": "Clicking the button below will terminate the DataTools server. Any unsaved work in other tools will be lost. Once the app shuts down you can close this window.",
+    "button": "Close the app"
+  },
+  "tools": {
+    "01_deduplicator": {
+      "name": "Deduplicator",
+      "description": "Fuzzy matching, normalization, survivor selection, and interactive review."
+    },
+    "02_text_cleaner": {
+      "name": "Text Cleaner",
+      "description": "Whitespace trim, multi-space collapse, Unicode normalization, BOM and line-ending handling."
+    },
+    "03_format_standardizer": {
+      "name": "Format Standardizer",
+      "description": "Standardize dates, currencies, names, phone numbers, and addresses."
+    },
+    "04_missing_handler": {
+      "name": "Missing Value Handler",
+      "description": "Detect disguised nulls, missingness analysis, and imputation strategies."
+    },
+    "05_column_mapper": {
+      "name": "Column Mapper",
+      "description": "Rename columns, enforce a target schema, and coerce types."
+    },
+    "06_outlier_detector": {
+      "name": "Outlier Detector",
+      "description": "Z-score, IQR, and MAD detection with domain-rule violations and winsorization."
+    },
+    "07_multi_file_merger": {
+      "name": "Multi-File Merger",
+      "description": "Combine multiple CSV/Excel files with schema alignment."
+    },
+    "08_validator_reporter": {
+      "name": "Validator & Reporter",
+      "description": "Validate against rules and generate PDF/Excel quality reports."
+    },
+    "09_pipeline_runner": {
+      "name": "Pipeline Runner",
+      "description": "Chain tools in recommended order and pass output between steps."
+    }
+  }
+}
--- a/src/i18n/packs/es.json
+++ b/src/i18n/packs/es.json
@@ -0,0 +1,97 @@
+{
+  "chrome": {
+    "language_label": "Idioma",
+    "footer": "Se ejecuta localmente. Tus datos nunca salen de este equipo. | DataTools v3.0"
+  },
+  "home": {
+    "page_title": "DataTools — Maestría en limpieza de datos",
+    "title": "🧹 DataTools — Maestría en limpieza de datos",
+    "caption": "Conjunto de 9 herramientas para limpiar, estandarizar y validar datos tabulares. Se ejecuta 100% en local.",
+    "findings_badge_one": "{n} hallazgo",
+    "findings_badge_other": "{n} hallazgos"
+  },
+  "status": {
+    "ready": "Listo",
+    "coming_soon": "Próximamente"
+  },
+  "upload": {
+    "heading": "📤 Sube un archivo para empezar",
+    "intro": "Opcional: analiza un archivo para detectar problemas de calidad de datos y ver qué herramientas pueden corregir cada uno. Sáltalo si ya sabes lo que necesitas.",
+    "limits": "**Hasta 1 GB.** Formatos: CSV, TSV, XLSX, XLS. Delimitadores detectados automáticamente: coma, tabulador, punto y coma, barra vertical. Codificaciones detectadas automáticamente: UTF-8 (con/sin BOM), UTF-16, cp1252, Latin-1/9, cp1250, ISO-8859-2, cp1251, KOI8-R, Mac Roman, Shift_JIS, GB18030, Big5, EUC-KR — y se pueden sustituir desde la página Revisar.",
+    "uploader_label": "Sube un archivo CSV o Excel",
+    "uploader_help": "Hasta 1 GB. Delimitadores coma / tabulador / punto y coma / barra vertical detectados automáticamente. Codificación detectada automáticamente, con opción de sustituirla en la página Revisar.",
+    "run_button": "Ejecutar análisis",
+    "skip_button": "Omitir",
+    "scanning": "Analizando…",
+    "skipped_notice": "Análisis omitido. Abre cualquier herramienta de abajo para empezar a trabajar.",
+    "using_session_file": "Usando **{name}** de la pantalla de carga.",
+    "use_different_file": "Usar otro archivo",
+    "switch_back": "Volver al archivo de la pantalla de carga",
+    "pickup_caption": "Hasta 1 GB. Delimitadores detectados automáticamente: coma, tabulador, punto y coma, barra vertical. Codificación detectada automáticamente (UTF-8 / UTF-16 / cp1252 / familia Latin-1 / cp1250 / cp1251 / KOI8-R / Mac Roman / Shift_JIS / GB18030 / Big5 / EUC-KR), con opción de sustituirla en la página Revisar."
+  },
+  "findings": {
+    "header": "Problemas detectados",
+    "none": "No se detectaron problemas. Abre cualquier herramienta de abajo para empezar a trabajar.",
+    "severity_summary_segment": "{icon} {n} {severity}",
+    "tool_section_label": "{tool} — {n} hallazgo(s)",
+    "other_section_label": "Otros / a nivel de archivo — {n} hallazgo(s)",
+    "open_tool": "Abrir {tool} →",
+    "untargeted_label": "Informativo"
+  },
+  "gate": {
+    "warning": "**{name}** debe pasar la verificación de normalización CSV antes de poder usar esta herramienta. Abre la página Revisar para aplicar las correcciones recomendadas por el analizador.",
+    "default_name": "el archivo cargado",
+    "open_review": "Ir a Revisar y Normalizar"
+  },
+  "quit": {
+    "button": "Cerrar app",
+    "shutting_down": "Cerrando… ya puedes cerrar esta ventana.",
+    "farewell_title": "DataTools se ha cerrado",
+    "farewell_subtitle": "Ya puedes cerrar esta ventana."
+  },
+  "close_page": {
+    "page_title": "DataTools — Cerrar",
+    "title": "🛑 Cerrar DataTools",
+    "caption": "Detén la aplicación local y libera la terminal.",
+    "body": "Al pulsar el botón de abajo se cerrará el servidor de DataTools. Cualquier trabajo sin guardar en otras herramientas se perderá. Una vez cerrada la app, puedes cerrar esta ventana.",
+    "button": "Cerrar la app"
+  },
+  "tools": {
+    "01_deduplicator": {
+      "name": "Eliminador de duplicados",
+      "description": "Coincidencia difusa, normalización, selección de superviviente y revisión interactiva."
+    },
+    "02_text_cleaner": {
+      "name": "Limpiador de texto",
+      "description": "Recorte de espacios, colapso de espacios múltiples, normalización Unicode, manejo de BOM y de finales de línea."
+    },
+    "03_format_standardizer": {
+      "name": "Estandarizador de formatos",
+      "description": "Estandariza fechas, monedas, nombres, números de teléfono y direcciones."
+    },
+    "04_missing_handler": {
+      "name": "Gestor de valores faltantes",
+      "description": "Detecta nulos disfrazados, analiza la ausencia de datos y aplica estrategias de imputación."
+    },
+    "05_column_mapper": {
+      "name": "Mapeador de columnas",
+      "description": "Renombra columnas, aplica un esquema objetivo y fuerza tipos de datos."
+    },
+    "06_outlier_detector": {
+      "name": "Detector de valores atípicos",
+      "description": "Detección por Z-score, IQR y MAD con reglas de dominio y winsorización."
+    },
+    "07_multi_file_merger": {
+      "name": "Combinador de varios archivos",
+      "description": "Combina varios archivos CSV/Excel alineando sus esquemas."
+    },
+    "08_validator_reporter": {
+      "name": "Validador e informes",
+      "description": "Valida contra reglas y genera informes de calidad en PDF/Excel."
+    },
+    "09_pipeline_runner": {
+      "name": "Ejecutor de canalizaciones",
+      "description": "Encadena herramientas en el orden recomendado y pasa la salida entre pasos."
+    }
+  }
+}
--- a/tests/test_lang_packs.py
+++ b/tests/test_lang_packs.py
@@ -0,0 +1,174 @@
+"""Tests for the GUI language-pack i18n module.
+
+Covers:
+- t() basic lookup, missing-key fallback to English, then to the key.
+- str.format kwargs interpolation and tolerant handling of missing keys.
+- Parity between English and Spanish packs so a new key in en.json
+  doesn't silently regress to English when es is active.
+- The JS-escape helper used by the farewell overlay.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+from src.i18n import LANGUAGES, available_languages, t
+
+
+# Loaded once for the parity test.
+_PACK_DIR = Path(__file__).resolve().parent.parent / "src" / "i18n" / "packs"
+
+
+def _flatten(obj, prefix=""):
+    """Yield dotted-key paths from a nested dict pack."""
+    if isinstance(obj, dict):
+        for k, v in obj.items():
+            path = f"{prefix}.{k}" if prefix else k
+            yield from _flatten(v, path)
+    else:
+        yield prefix
+
+
+def _load_pack(code: str) -> dict:
+    with (_PACK_DIR / f"{code}.json").open("r", encoding="utf-8") as fh:
+        return json.load(fh)
+
+
+class TestLookup:
+    def test_returns_english_value_by_default(self):
+        assert t("home.title", "en").startswith("🧹 DataTools")
+
+    def test_returns_spanish_value(self):
+        assert "Maestría" in t("home.title", "es")
+
+    def test_missing_key_falls_back_to_english(self):
+        # ``tools.99_pipeline_runner.name`` doesn't exist; the pipeline
+        # runner is keyed by 09. A wrong key should fall back through to
+        # the literal key string so the bug is visible, not silent.
+        out = t("definitely.not.a.real.key", "es")
+        assert out == "definitely.not.a.real.key"
+
+    def test_spanish_missing_key_falls_back_to_english(self, tmp_path, monkeypatch):
+        # Simulate: a key exists in en.json but not in es.json. The Spanish
+        # lookup should resolve via the English fallback rather than
+        # returning the dotted key.
+        from src import i18n as i18n_mod
+
+        i18n_mod._load_pack.cache_clear()
+        # Point the loader at a temp dir with a sparse Spanish pack.
+        monkeypatch.setattr(i18n_mod, "_PACK_DIR", tmp_path)
+        (tmp_path / "en.json").write_text(
+            json.dumps({"a": {"b": "english-only"}}), encoding="utf-8",
+        )
+        (tmp_path / "es.json").write_text(json.dumps({}), encoding="utf-8")
+        try:
+            assert i18n_mod.t("a.b", "es") == "english-only"
+        finally:
+            i18n_mod._load_pack.cache_clear()
+
+
+class TestInterpolation:
+    def test_named_placeholder(self):
+        # ``upload.using_session_file`` uses ``{name}``.
+        out = t("upload.using_session_file", "en", name="data.csv")
+        assert "data.csv" in out
+
+    def test_missing_placeholder_is_tolerated(self):
+        # If a caller forgets a placeholder, return the raw template
+        # rather than crashing the UI.
+        out = t("upload.using_session_file", "en")
+        assert "{name}" in out
+
+
+class TestPackParity:
+    """Every key in en.json must exist in every other registered pack.
+
+    A divergence means a user with that language sees an English
+    fallback for a string the translator hasn't been told about, which
+    is a translation gap we want CI to surface.
+    """
+
+    def test_es_mirrors_en(self):
+        en_keys = set(_flatten(_load_pack("en")))
+        es_keys = set(_flatten(_load_pack("es")))
+        missing = en_keys - es_keys
+        assert not missing, f"Spanish pack missing keys: {sorted(missing)}"
+
+    def test_no_orphan_keys_in_es(self):
+        # The other direction: stale Spanish keys that no longer exist
+        # in English are dead weight; flag them too.
+        en_keys = set(_flatten(_load_pack("en")))
+        es_keys = set(_flatten(_load_pack("es")))
+        orphans = es_keys - en_keys
+        assert not orphans, f"Spanish pack has stale keys: {sorted(orphans)}"
+
+
+class TestRegistry:
+    def test_languages_listed(self):
+        codes = {entry["code"] for entry in available_languages()}
+        assert {"en", "es"} <= codes
+
+    def test_every_registered_lang_has_a_pack(self):
+        for entry in LANGUAGES:
+            assert (_PACK_DIR / f"{entry['code']}.json").exists()
+
+
+class TestFarewellEscape:
+    """The farewell overlay interpolates pack strings into a JS payload.
+
+    A malicious / accidental quote or angle bracket in the translation
+    must not be able to break out of the JS string or the surrounding
+    HTML. Test the escape helper directly so the contract is pinned.
+    """
+
+    def test_escapes_quotes_and_html(self):
+        from src.gui.components._legacy import _js_html_safe
+
+        out = _js_html_safe("Cerrando 'app' <script>x</script>")
+        # Every single-quote must be backslash-escaped so it can't
+        # terminate the JS string literal that wraps the payload.
+        assert "\\'" in out
+        assert "'" not in out.replace("\\'", "")
+        assert "<script>" not in out
+        assert "&lt;script&gt;" in out
+
+    def test_backslash_doubled(self):
+        from src.gui.components._legacy import _js_html_safe
+
+        assert _js_html_safe("a\\b") == "a\\\\b"
+
+
+class TestKeyCoverage:
+    """Spot-check a few keys the GUI relies on so a rename in one place
+    doesn't silently disappear from the other."""
+
+    @pytest.mark.parametrize("key", [
+        "home.title",
+        "home.caption",
+        "chrome.footer",
+        "chrome.language_label",
+        "upload.heading",
+        "upload.run_button",
+        "upload.skip_button",
+        "findings.header",
+        "findings.none",
+        "gate.warning",
+        "gate.open_review",
+        "quit.button",
+        "quit.shutting_down",
+        "quit.farewell_title",
+        "quit.farewell_subtitle",
+        "close_page.title",
+        "close_page.button",
+        "status.ready",
+        "status.coming_soon",
+        "tools.01_deduplicator.name",
+        "tools.09_pipeline_runner.description",
+    ])
+    def test_key_resolves_in_both_packs(self, key):
+        for lang in ("en", "es"):
+            value = t(key, lang)
+            assert value and value != key, f"missing {key!r} in {lang}"