feat(gui): wire analyzer into home page with findings panel and tool badges

Home page (src/gui/app.py) gains an upload + analyze section above the tool
grid: file uploader, "Run analysis" / "Skip" buttons, and a findings panel
grouped by destination tool. Tool cards now carry a "N findings" badge
when the active session's findings reference that tool, so the user sees
at a glance which tools their just-uploaded file would benefit from.

src/gui/components.py adds the shared GUI surface:
  - TOOL_DISPLAY_NAMES + tool_display_name() — single source of truth for
    GUI labels, keeping detector tool ids decoupled from the UI.
  - render_findings_panel(findings) — severity icons, expander per tool,
    open-tool page link, sample-cells dataframe.
  - upload_and_analyze_section() — the home-page widget; stashes file
    bytes and findings in session_state so future tool pages can pick up
    the existing upload instead of re-prompting.
  - findings_count_for_tool(tool_id) — used by app.py to badge cards.

CSV/TSV uploads run through repair_bytes() before analysis, so the user
also sees csv_bom_stripped / csv_smart_quotes_folded findings synthesized
from the pre-parse repair pass. Excel uploads skip that step.

The Text Cleaner tool card flips from "Coming Soon" to "Ready" — that has
been true since the v3.0 implementation and the home page just hadn't been
updated.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-29 15:53:22 +00:00
parent 5c62fb6117
commit a8943f29eb
2 changed files with 248 additions and 4 deletions

View File

@@ -21,7 +21,11 @@ if str(_project_root) not in sys.path:
# Page config
# ---------------------------------------------------------------------------
from src.gui.components import hide_streamlit_chrome
from src.gui.components import (
findings_count_for_tool,
hide_streamlit_chrome,
upload_and_analyze_section,
)
st.set_page_config(
page_title="DataTools — Data Cleaning Mastery",
@@ -41,6 +45,14 @@ st.caption("A 9-tool suite for cleaning, standardizing, and validating tabular d
st.divider()
# ---------------------------------------------------------------------------
# Upload & analyze (optional onboarding step)
# ---------------------------------------------------------------------------
upload_and_analyze_section()
st.divider()
# ---------------------------------------------------------------------------
# Tool cards
# ---------------------------------------------------------------------------
@@ -52,13 +64,15 @@ TOOLS = [
"description": "Fuzzy matching, normalization, survivor selection, and interactive review.",
"status": "Ready",
"page": "1_Deduplicator",
"tool_id": "01_deduplicator",
},
{
"icon": "✂️",
"name": "Text Cleaner",
"description": "Whitespace trim, multi-space collapse, Unicode normalization, BOM and line-ending handling.",
"status": "Coming Soon",
"status": "Ready",
"page": "2_Text_Cleaner",
"tool_id": "02_text_cleaner",
},
{
"icon": "📐",
@@ -66,6 +80,7 @@ TOOLS = [
"description": "Standardize dates, currencies, names, phone numbers, and addresses.",
"status": "Coming Soon",
"page": "3_Format_Standardizer",
"tool_id": "03_format_standardizer",
},
{
"icon": "🕳️",
@@ -73,6 +88,7 @@ TOOLS = [
"description": "Detect disguised nulls, missingness analysis, and imputation strategies.",
"status": "Coming Soon",
"page": "4_Missing_Values",
"tool_id": "04_missing_handler",
},
{
"icon": "🗂️",
@@ -80,6 +96,7 @@ TOOLS = [
"description": "Rename columns, enforce a target schema, and coerce types.",
"status": "Coming Soon",
"page": "5_Column_Mapper",
"tool_id": "05_column_mapper",
},
{
"icon": "📊",
@@ -87,6 +104,7 @@ TOOLS = [
"description": "Z-score, IQR, and MAD detection with domain-rule violations and winsorization.",
"status": "Coming Soon",
"page": "6_Outlier_Detector",
"tool_id": "06_outlier_detector",
},
{
"icon": "📎",
@@ -94,6 +112,7 @@ TOOLS = [
"description": "Combine multiple CSV/Excel files with schema alignment.",
"status": "Coming Soon",
"page": "7_Multi_File_Merger",
"tool_id": "07_multi_file_merger",
},
{
"icon": "",
@@ -101,6 +120,7 @@ TOOLS = [
"description": "Validate against rules and generate PDF/Excel quality reports.",
"status": "Coming Soon",
"page": "8_Validator_Reporter",
"tool_id": "08_validator_reporter",
},
{
"icon": "⚙️",
@@ -108,10 +128,13 @@ TOOLS = [
"description": "Chain tools in recommended order and pass output between steps.",
"status": "Coming Soon",
"page": "9_Pipeline_Runner",
"tool_id": "09_pipeline_runner",
},
]
# Render tool cards in a 3-column grid
# Render tool cards in a 3-column grid. Cards picked up by the analyzer get a
# coloured "N findings" badge so the user can see at a glance which tools
# would help with the just-uploaded file.
for row_start in range(0, len(TOOLS), 3):
cols = st.columns(3)
for i, col in enumerate(cols):
@@ -121,8 +144,12 @@ for row_start in range(0, len(TOOLS), 3):
tool = TOOLS[idx]
with col:
status_color = "green" if tool["status"] == "Ready" else "orange"
badge = ""
n = findings_count_for_tool(tool.get("tool_id", ""))
if n:
badge = f" :red-background[**{n} finding{'s' if n != 1 else ''}**]"
st.markdown(
f"### {tool['icon']} {tool['name']}\n\n"
f"### {tool['icon']} {tool['name']}{badge}\n\n"
f"{tool['description']}\n\n"
f":{status_color}[**{tool['status']}**]"
)