feat(i18n): add language-pack scaffold with English and Spanish

Introduces ``src/i18n`` with a tiny JSON-backed t() lookup, an in-session
language preference, and a sidebar selector wired through
``hide_streamlit_chrome`` so every page picks up the same picker. Covers
home, tool cards, findings panel, gate, shutdown, and pickup banner
strings. Tests pin pack parity and the farewell-overlay JS escape so
future packs can't silently regress.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-13 15:11:30 +00:00
parent 4706ed571e
commit c4ce86bd64
8 changed files with 649 additions and 75 deletions

View File

@@ -26,13 +26,16 @@ from src.gui.components import (
hide_streamlit_chrome,
upload_and_analyze_section,
)
from src.i18n import t
st.set_page_config(
page_title="DataTools — Data Cleaning Mastery",
page_title=t("home.page_title"),
page_icon="🧹",
layout="wide",
)
# ``hide_streamlit_chrome`` also renders the sidebar language selector,
# so every page that hides chrome picks up the same picker.
hide_streamlit_chrome()
@@ -40,8 +43,8 @@ hide_streamlit_chrome()
# Home page
# ---------------------------------------------------------------------------
st.title("🧹 DataTools — Data Cleaning Mastery")
st.caption("A 9-tool suite for cleaning, standardizing, and validating tabular data. Runs 100% locally.")
st.title(t("home.title"))
st.caption(t("home.caption"))
st.divider()
@@ -57,7 +60,7 @@ st.divider()
# Tool cards
# ---------------------------------------------------------------------------
from src.gui.tools_registry import TOOLS
from src.gui.tools_registry import TOOLS, tool_description, tool_name
# Render tool cards in a 3-column grid. Cards picked up by the analyzer get a
# coloured "N findings" badge so the user can see at a glance which tools
@@ -70,15 +73,17 @@ for row_start in range(0, len(TOOLS), 3):
break
tool = TOOLS[idx]
with col:
status_key = "status.ready" if tool.status == "Ready" else "status.coming_soon"
status_color = "green" if tool.status == "Ready" else "orange"
badge = ""
n = findings_count_for_tool(tool.tool_id)
if n:
badge = f" :red-background[**{n} finding{'s' if n != 1 else ''}**]"
badge_key = "home.findings_badge_one" if n == 1 else "home.findings_badge_other"
badge = f" :red-background[**{t(badge_key, n=n)}**]"
st.markdown(
f"### {tool.icon} {tool.name}{badge}\n\n"
f"{tool.description}\n\n"
f":{status_color}[**{tool.status}**]"
f"### {tool.icon} {tool_name(tool.tool_id)}{badge}\n\n"
f"{tool_description(tool.tool_id)}\n\n"
f":{status_color}[**{t(status_key)}**]"
)
@@ -87,7 +92,4 @@ for row_start in range(0, len(TOOLS), 3):
# ---------------------------------------------------------------------------
st.divider()
st.caption(
"Runs locally. Your data never leaves this computer. "
"| DataTools v3.0"
)
st.caption(t("chrome.footer"))

View File

@@ -11,6 +11,7 @@ from typing import Optional
import pandas as pd
import streamlit as st
from src.i18n import t as _t
from src.core.dedup import (
Algorithm,
ColumnMatchStrategy,
@@ -72,15 +73,26 @@ footer {
def hide_streamlit_chrome() -> None:
"""Inject CSS to hide Streamlit's default header, menu, and footer."""
"""Inject CSS to hide Streamlit's default header, menu, and footer.
Also renders the sidebar language selector, since every entrypoint
that hides the default chrome wants the picker visible in the
same place. Pages that want a clean chrome without the selector can
inject ``_HIDE_CHROME_CSS`` themselves instead of calling this.
"""
st.markdown(_HIDE_CHROME_CSS, unsafe_allow_html=True)
# Imported lazily so this module stays importable in environments
# where the i18n packs haven't been laid out (e.g. unit tests of
# individual legacy helpers).
from src.i18n import render_language_selector
render_language_selector()
# ---------------------------------------------------------------------------
# Clean shutdown
# ---------------------------------------------------------------------------
_FAREWELL_SCRIPT = """
_FAREWELL_SCRIPT_TEMPLATE = """
<script>
(function () {
// Strategy: append a full-screen overlay directly to the parent's
@@ -104,8 +116,8 @@ _FAREWELL_SCRIPT = """
'<div style="text-align:center;padding:32px 40px;border:1px solid #252a36;' +
'border-radius:12px;background:#161922;max-width:480px;">' +
'<h1 style="margin:0 0 8px 0;font-weight:600;letter-spacing:-0.01em;">' +
'DataTools has shut down</h1>' +
'<p style="opacity:0.7;margin:0;">You can close this window.</p>' +
'__TITLE__</h1>' +
'<p style="opacity:0.7;margin:0;">__SUBTITLE__</p>' +
'</div>';
return overlay;
}
@@ -127,7 +139,32 @@ _FAREWELL_SCRIPT = """
"""
def quit_button(label: str = "Quit app", *, key: str = "quit_app_button") -> None:
def _js_html_safe(s: str) -> str:
"""Escape *s* so it can be embedded inside the farewell overlay's
JS-single-quoted, innerHTML-bound payload.
Order matters: backslash first (so subsequent escapes don't get
re-escaped), then the JS string-terminator, then HTML-special chars.
"""
return (
s.replace("\\", "\\\\")
.replace("'", "\\'")
.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
)
def _farewell_script() -> str:
"""Render the farewell overlay JS with the current language's strings."""
return (
_FAREWELL_SCRIPT_TEMPLATE
.replace("__TITLE__", _js_html_safe(_t("quit.farewell_title")))
.replace("__SUBTITLE__", _js_html_safe(_t("quit.farewell_subtitle")))
)
def quit_button(label: str | None = None, *, key: str = "quit_app_button") -> None:
"""Render a Quit button that terminates the Streamlit server.
Streamlit has no first-class shutdown hook, and signalling the
@@ -140,10 +177,13 @@ def quit_button(label: str = "Quit app", *, key: str = "quit_app_button") -> Non
a self-contained "App closed" page so the user never sees
Streamlit's red connection-error overlay.
"""
if label is None:
label = _t("quit.button")
if st.session_state.get("_app_shutting_down"):
from streamlit.components.v1 import html as _components_html
_components_html(_FAREWELL_SCRIPT, height=0)
st.success("Shutting down… you can close this window.")
_components_html(_farewell_script(), height=0)
st.success(_t("quit.shutting_down"))
st.stop()
if st.button(label, key=key, type="secondary"):
@@ -824,15 +864,25 @@ _TOOL_PAGE_PATHS: dict[str, str] = {
def tool_display_name(tool_id: str) -> str:
"""Map a stable tool id to its GUI display name; falls back to the id."""
return TOOL_DISPLAY_NAMES.get(tool_id, tool_id) if tool_id else "Informational"
"""Map a stable tool id to its GUI display name; falls back to the id.
Routes through the active language pack so the home grid, findings
panel headers, and "Open tool" buttons all stay in sync with the
sidebar's language selection.
"""
if not tool_id:
return _t("findings.untargeted_label")
translated = _t(f"tools.{tool_id}.name")
if translated != f"tools.{tool_id}.name":
return translated
return TOOL_DISPLAY_NAMES.get(tool_id, tool_id)
def _tool_page_slug(tool_id: str) -> str:
return _TOOL_PAGE_PATHS.get(tool_id, "")
def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
def render_findings_panel(findings, *, header: str | None = None) -> None:
"""Render a list of :class:`Finding` objects grouped by tool.
Each tool gets a header with the count, an open-tool button, and a list
@@ -842,8 +892,11 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
from src.core.analyze import findings_by_tool # local import to avoid cycle
from src.core.text_clean import hidden_char_css
if header is None:
header = _t("findings.header")
if not findings:
st.success("No issues detected. Open any tool below to start working.")
st.success(_t("findings.none"))
return
# Inject the hidden-char badge styles once so every sample value below
@@ -854,7 +907,10 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
for f in findings:
by_sev[f.severity] = by_sev.get(f.severity, 0) + 1
sev_summary = " · ".join(
f"{_SEVERITY_ICON[s]} {by_sev[s]} {s}"
_t(
"findings.severity_summary_segment",
icon=_SEVERITY_ICON[s], n=by_sev[s], severity=s,
)
for s in ("error", "warn", "info") if by_sev.get(s)
)
st.markdown(f"### {header}")
@@ -865,8 +921,9 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
for tool_id in sorted(grouped):
items = grouped[tool_id]
name = tool_display_name(tool_id)
with st.expander(
f"{tool_display_name(tool_id)}{len(items)} finding(s)",
_t("findings.tool_section_label", tool=name, n=len(items)),
expanded=any(f.severity == "error" for f in items),
):
for f in items:
@@ -876,11 +933,11 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
# Streamlit resolves page paths relative to the entrypoint
# (src/gui/app.py), so a leading ``src/gui/`` would point
# outside the allowed page tree on Windows.
st.page_link(page_slug, label=f"Open {tool_display_name(tool_id)}")
st.page_link(page_slug, label=_t("findings.open_tool", tool=name))
if untargeted:
with st.expander(
f"Other / file-level — {len(untargeted)} finding(s)",
_t("findings.other_section_label", n=len(untargeted)),
expanded=False,
):
for f in untargeted:
@@ -1066,28 +1123,15 @@ def upload_and_analyze_section() -> None:
own uploader. Each tool page already has its own uploader today, so
this is purely additive.
"""
st.markdown("### 📤 Upload a file to start")
st.caption(
"Optional: scan an uploaded file for data quality issues and see "
"which tools can fix each one. Skip if you already know what you need."
)
st.caption(
"**Up to 1 GB.** Formats: CSV, TSV, XLSX, XLS. "
"Delimiters auto-detected: comma, tab, semicolon, pipe. "
"Encodings auto-detected: UTF-8 (with/without BOM), UTF-16, "
"cp1252, Latin-1/9, cp1250, ISO-8859-2, cp1251, KOI8-R, "
"Mac Roman, Shift_JIS, GB18030, Big5, EUC-KR — and override on the Review page."
)
st.markdown(f"### {_t('upload.heading')}")
st.caption(_t("upload.intro"))
st.caption(_t("upload.limits"))
uploaded = st.file_uploader(
"Upload CSV or Excel",
_t("upload.uploader_label"),
type=["csv", "tsv", "xlsx", "xls"],
key="home_upload",
help=(
"Up to 1 GB. Comma / tab / semicolon / pipe delimiters all "
"auto-detected. Encoding auto-detected with override on the "
"Review page if needed."
),
help=_t("upload.uploader_help"),
)
if uploaded is None:
return
@@ -1106,16 +1150,16 @@ def upload_and_analyze_section() -> None:
col_run, col_skip, _ = st.columns([1, 1, 4])
with col_run:
run_clicked = st.button("Run analysis", type="primary", key="home_run_analysis")
run_clicked = st.button(_t("upload.run_button"), type="primary", key="home_run_analysis")
with col_skip:
skip_clicked = st.button("Skip", key="home_skip_analysis")
skip_clicked = st.button(_t("upload.skip_button"), key="home_skip_analysis")
if skip_clicked:
st.session_state["home_findings"] = []
st.session_state["home_skipped"] = True
if run_clicked:
with st.spinner("Scanning"):
with st.spinner(_t("upload.scanning")):
findings = _run_analysis_on_upload(uploaded)
st.session_state["home_findings"] = findings
st.session_state["home_skipped"] = False
@@ -1125,7 +1169,7 @@ def upload_and_analyze_section() -> None:
return
if st.session_state.get("home_skipped"):
st.info("Analysis skipped. Open any tool below to start working.")
st.info(_t("upload.skipped_notice"))
return
st.divider()
@@ -1230,13 +1274,9 @@ def require_normalization_gate() -> None:
if matched:
return
name = st.session_state.get("home_uploaded_name", "the uploaded file")
st.warning(
f"**{name}** must pass the CSV-normalization gate before you can "
f"use this tool. Open the Review page to apply the fixes our "
f"analyzer recommends."
)
if st.button("Go to Review & Normalize", type="primary"):
name = st.session_state.get("home_uploaded_name") or _t("gate.default_name")
st.warning(_t("gate.warning", name=name))
if st.button(_t("gate.open_review"), type="primary"):
st.switch_page("pages/0_Review.py")
st.stop()
@@ -1269,27 +1309,22 @@ def pickup_or_upload(
use_session = has_session_upload and not st.session_state.get(override_key, False)
if use_session:
name = st.session_state.get("home_uploaded_name", "uploaded file")
st.info(f"Using **{name}** from the upload screen.")
if st.button("Use a different file", key=f"{key}__pick_diff"):
name = st.session_state.get("home_uploaded_name") or _t("gate.default_name")
st.info(_t("upload.using_session_file", name=name))
if st.button(_t("upload.use_different_file"), key=f"{key}__pick_diff"):
st.session_state[override_key] = True
st.rerun()
return _StashedUpload(name, st.session_state["home_uploaded_bytes"])
if {"csv", "tsv", "xlsx", "xls"} & set(types):
st.caption(
"Up to 1 GB. Delimiters auto-detected: comma, tab, semicolon, pipe. "
"Encoding auto-detected (UTF-8 / UTF-16 / cp1252 / Latin-1 family / "
"cp1250 / cp1251 / KOI8-R / Mac Roman / Shift_JIS / GB18030 / Big5 / "
"EUC-KR), with override on the Review page."
)
st.caption(_t("upload.pickup_caption"))
uploaded = st.file_uploader(label, type=types, key=key, help=help)
if uploaded is not None and st.session_state.get(override_key):
# User has uploaded their own file on this page; clear the override
# so the next visit to a tool page starts fresh.
pass
if uploaded is None and st.session_state.get(override_key) and has_session_upload:
if st.button("Switch back to upload-screen file", key=f"{key}__switch_back"):
if st.button(_t("upload.switch_back"), key=f"{key}__switch_back"):
st.session_state[override_key] = False
st.rerun()
return uploaded

View File

@@ -18,24 +18,21 @@ if str(_project_root) not in sys.path:
sys.path.insert(0, str(_project_root))
from src.gui.components import hide_streamlit_chrome, quit_button
from src.i18n import t
st.set_page_config(
page_title="DataTools — Close",
page_title=t("close_page.page_title"),
page_icon="🛑",
layout="wide",
)
hide_streamlit_chrome()
st.title("🛑 Close DataTools")
st.caption("Shut down the local app and free the terminal.")
st.title(t("close_page.title"))
st.caption(t("close_page.caption"))
st.divider()
st.markdown(
"Clicking the button below will terminate the DataTools server. "
"Any unsaved work in other tools will be lost. Once the app shuts "
"down you can close this window."
)
st.markdown(t("close_page.body"))
st.write("")
quit_button(label="Close the app", key="quit_app_button_page")
quit_button(label=t("close_page.button"), key="quit_app_button_page")

View File

@@ -150,3 +150,20 @@ def display_name(tool_id: str) -> str:
"""Return the human-readable name; fall back to the id when unknown."""
t = tool_by_id(tool_id)
return t.name if t else tool_id
def tool_name(tool_id: str) -> str:
"""Return the localized tool name, falling back to the registry default."""
from src.i18n import t as _t
fallback = display_name(tool_id)
translated = _t(f"tools.{tool_id}.name")
return translated if translated != f"tools.{tool_id}.name" else fallback
def tool_description(tool_id: str) -> str:
"""Return the localized tool description, falling back to the registry default."""
from src.i18n import t as _t
tool = tool_by_id(tool_id)
fallback = tool.description if tool else ""
translated = _t(f"tools.{tool_id}.description")
return translated if translated != f"tools.{tool_id}.description" else fallback