refactor(gui): tool registry + components package for per-tool builds
Two low-risk seam moves to enable selling per-tool subsets without
breaking the existing all-in-one bundle. Behaviour identical; every
existing import still resolves; full pytest suite + every page returns
HTTP 200.
1. **Tool registry** (src/gui/tools_registry.py) — replaces the
inline dict-of-dicts in app.py with a Tool dataclass and a TOOLS
list. Adds a tier field ("core" today, "pro" / "enterprise" later)
and tools_for_tier() / tool_by_id() / display_name() helpers. A
per-tool build slices TOOLS at import time without code changes.
2. **components package** (src/gui/components/) — converts the former
single components.py into a package with:
_legacy.py — original file, unchanged.
__init__.py — re-exports the legacy surface; existing
"from src.gui.components import …" calls
continue to work.
shared.py — hide_streamlit_chrome, pickup_or_upload
(every build needs these).
gate.py — require_normalization_gate (Pro / Suite SKUs).
findings.py — analyzer-finding widgets (drops out of a
standalone-Dedup build).
dedup_review.py — match-group cards + apply pipeline (drops out
of a non-dedup build).
The seam modules are narrow re-exports today. As code migrates out
of _legacy.py into the focused modules, the public import path
stays stable via the shim.
E2E: 765 passed, 17 xfailed (unchanged); home page + all 9 tool pages
+ Review page render HTTP 200; full pipeline (analyze → auto_fix →
apply_decisions → output bytes) round-trips on the kitchen-sink
fixture with zero high-confidence findings remaining post-fix.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -57,80 +57,7 @@ st.divider()
|
||||
# Tool cards
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
TOOLS = [
|
||||
{
|
||||
"icon": "🔍",
|
||||
"name": "Deduplicator",
|
||||
"description": "Fuzzy matching, normalization, survivor selection, and interactive review.",
|
||||
"status": "Ready",
|
||||
"page": "1_Deduplicator",
|
||||
"tool_id": "01_deduplicator",
|
||||
},
|
||||
{
|
||||
"icon": "✂️",
|
||||
"name": "Text Cleaner",
|
||||
"description": "Whitespace trim, multi-space collapse, Unicode normalization, BOM and line-ending handling.",
|
||||
"status": "Ready",
|
||||
"page": "2_Text_Cleaner",
|
||||
"tool_id": "02_text_cleaner",
|
||||
},
|
||||
{
|
||||
"icon": "📐",
|
||||
"name": "Format Standardizer",
|
||||
"description": "Standardize dates, currencies, names, phone numbers, and addresses.",
|
||||
"status": "Coming Soon",
|
||||
"page": "3_Format_Standardizer",
|
||||
"tool_id": "03_format_standardizer",
|
||||
},
|
||||
{
|
||||
"icon": "🕳️",
|
||||
"name": "Missing Value Handler",
|
||||
"description": "Detect disguised nulls, missingness analysis, and imputation strategies.",
|
||||
"status": "Coming Soon",
|
||||
"page": "4_Missing_Values",
|
||||
"tool_id": "04_missing_handler",
|
||||
},
|
||||
{
|
||||
"icon": "🗂️",
|
||||
"name": "Column Mapper",
|
||||
"description": "Rename columns, enforce a target schema, and coerce types.",
|
||||
"status": "Coming Soon",
|
||||
"page": "5_Column_Mapper",
|
||||
"tool_id": "05_column_mapper",
|
||||
},
|
||||
{
|
||||
"icon": "📊",
|
||||
"name": "Outlier Detector",
|
||||
"description": "Z-score, IQR, and MAD detection with domain-rule violations and winsorization.",
|
||||
"status": "Coming Soon",
|
||||
"page": "6_Outlier_Detector",
|
||||
"tool_id": "06_outlier_detector",
|
||||
},
|
||||
{
|
||||
"icon": "📎",
|
||||
"name": "Multi-File Merger",
|
||||
"description": "Combine multiple CSV/Excel files with schema alignment.",
|
||||
"status": "Coming Soon",
|
||||
"page": "7_Multi_File_Merger",
|
||||
"tool_id": "07_multi_file_merger",
|
||||
},
|
||||
{
|
||||
"icon": "✅",
|
||||
"name": "Validator & Reporter",
|
||||
"description": "Validate against rules and generate PDF/Excel quality reports.",
|
||||
"status": "Coming Soon",
|
||||
"page": "8_Validator_Reporter",
|
||||
"tool_id": "08_validator_reporter",
|
||||
},
|
||||
{
|
||||
"icon": "⚙️",
|
||||
"name": "Pipeline Runner",
|
||||
"description": "Chain tools in recommended order and pass output between steps.",
|
||||
"status": "Coming Soon",
|
||||
"page": "9_Pipeline_Runner",
|
||||
"tool_id": "09_pipeline_runner",
|
||||
},
|
||||
]
|
||||
from src.gui.tools_registry import TOOLS
|
||||
|
||||
# Render tool cards in a 3-column grid. Cards picked up by the analyzer get a
|
||||
# coloured "N findings" badge so the user can see at a glance which tools
|
||||
@@ -143,15 +70,15 @@ for row_start in range(0, len(TOOLS), 3):
|
||||
break
|
||||
tool = TOOLS[idx]
|
||||
with col:
|
||||
status_color = "green" if tool["status"] == "Ready" else "orange"
|
||||
status_color = "green" if tool.status == "Ready" else "orange"
|
||||
badge = ""
|
||||
n = findings_count_for_tool(tool.get("tool_id", ""))
|
||||
n = findings_count_for_tool(tool.tool_id)
|
||||
if n:
|
||||
badge = f" :red-background[**{n} finding{'s' if n != 1 else ''}**]"
|
||||
st.markdown(
|
||||
f"### {tool['icon']} {tool['name']}{badge}\n\n"
|
||||
f"{tool['description']}\n\n"
|
||||
f":{status_color}[**{tool['status']}**]"
|
||||
f"### {tool.icon} {tool.name}{badge}\n\n"
|
||||
f"{tool.description}\n\n"
|
||||
f":{status_color}[**{tool.status}**]"
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user