datatools-dev/src/gui/tools_registry.py

"""Per-tool manifest registry.

Single source of truth for what tools exist, their display strings, and
the tier (which controls whether a tool ships in a given build SKU). The
home-page sidebar consumes this list; future per-tool packaging will
filter it via the ``tier`` field.

Adding a tool: append one ``Tool`` entry. Page filenames must match the
``page_slug`` so Streamlit's automatic page discovery picks them up.

Selling subsets: builds can filter ``TOOLS`` by tier or tool_id at
import time — no other code changes required, since pages key off
``tool_id`` for findings badges and the home grid renders whatever's in
the filtered list.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Literal


Tier = Literal["core", "pro", "enterprise"]
Status = Literal["Ready", "Coming Soon"]


@dataclass(frozen=True)
class Tool:
    """One tool's manifest entry."""

    tool_id: str           # Stable identifier matching the analyzer's tool field.
    icon: str              # Single-glyph icon for the home grid card.
    name: str              # Display name (sidebar + card title).
    description: str       # One-sentence card body.
    page_slug: str         # Streamlit page filename without ".py" (e.g. "1_Deduplicator").
    status: Status         # "Ready" or "Coming Soon" — drives the card badge color.
    tier: Tier = "core"    # Build-time gating hook; every tool is "core" today.


TOOLS: list[Tool] = [
    Tool(
        tool_id="01_deduplicator",
        icon="🔍",
        name="Deduplicator",
        description=(
            "Fuzzy matching, normalization, survivor selection, and "
            "interactive review."
        ),
        page_slug="1_Deduplicator",
        status="Ready",
    ),
    Tool(
        tool_id="02_text_cleaner",
        icon="✂️",
        name="Text Cleaner",
        description=(
            "Whitespace trim, multi-space collapse, Unicode normalization, "
            "BOM and line-ending handling."
        ),
        page_slug="2_Text_Cleaner",
        status="Ready",
    ),
    Tool(
        tool_id="03_format_standardizer",
        icon="📐",
        name="Format Standardizer",
        description=(
            "Standardize dates, currencies, names, phone numbers, and addresses."
        ),
        page_slug="3_Format_Standardizer",
        status="Ready",
    ),
    Tool(
        tool_id="04_missing_handler",
        icon="🕳️",
        name="Missing Value Handler",
        description=(
            "Detect disguised nulls, missingness analysis, and imputation strategies."
        ),
        page_slug="4_Missing_Values",
        status="Coming Soon",
    ),
    Tool(
        tool_id="05_column_mapper",
        icon="🗂️",
        name="Column Mapper",
        description="Rename columns, enforce a target schema, and coerce types.",
        page_slug="5_Column_Mapper",
        status="Coming Soon",
    ),
    Tool(
        tool_id="06_outlier_detector",
        icon="📊",
        name="Outlier Detector",
        description=(
            "Z-score, IQR, and MAD detection with domain-rule violations and "
            "winsorization."
        ),
        page_slug="6_Outlier_Detector",
        status="Coming Soon",
    ),
    Tool(
        tool_id="07_multi_file_merger",
        icon="📎",
        name="Multi-File Merger",
        description="Combine multiple CSV/Excel files with schema alignment.",
        page_slug="7_Multi_File_Merger",
        status="Coming Soon",
    ),
    Tool(
        tool_id="08_validator_reporter",
        icon="✅",
        name="Validator & Reporter",
        description=(
            "Validate against rules and generate PDF/Excel quality reports."
        ),
        page_slug="8_Validator_Reporter",
        status="Coming Soon",
    ),
    Tool(
        tool_id="09_pipeline_runner",
        icon="⚙️",
        name="Pipeline Runner",
        description=(
            "Chain tools in recommended order and pass output between steps."
        ),
        page_slug="9_Pipeline_Runner",
        status="Coming Soon",
    ),
]


def tools_for_tier(*tiers: Tier) -> list[Tool]:
    """Subset filter for build-time slicing.

    Empty *tiers* returns every tool. Used by per-tool packaging to ship
    only the relevant subset of pages and home-grid cards.
    """
    if not tiers:
        return list(TOOLS)
    keep = set(tiers)
    return [t for t in TOOLS if t.tier in keep]


def tool_by_id(tool_id: str) -> Tool | None:
    return next((t for t in TOOLS if t.tool_id == tool_id), None)


def display_name(tool_id: str) -> str:
    """Return the human-readable name; fall back to the id when unknown."""
    t = tool_by_id(tool_id)
    return t.name if t else tool_id