docs+code: rename tool labels everywhere
Sweep follow-up to 93e43fc. Display labels now consistent across docs,
landing pages, CLI output, code comments, docstrings, and test prose.
Five parallel surfaces touched:
- docs (EN + ES): README, USER-GUIDE, CLI-REFERENCE, and 11 internal
design/planning docs
- landing pages: index + bookkeeper/revops/shopify-pet
- src: CLI module docstrings, _TOOL_DISPLAY dicts in cli_analyze.py
and gui/components/_legacy.py, core module headers, every tool
page's module docstring
- tests: class/method/module docstrings and section-header comments
- test-cases READMEs
Page slugs (1_Deduplicator etc.), tool_id strings (01_deduplicator
etc.), Python class names (TestDeduplicatorWorkflow, FeatureFlag.*),
URL paths, anchor IDs, CSS classes, and asset filenames were left
intact since they're code identifiers / structural references.
All 2033 tests pass.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -45,15 +45,15 @@ app = typer.Typer(
|
||||
# Tool id -> friendly display name. Kept in the CLI module since the GUI has
|
||||
# its own version; both stay in lockstep with the actual script lineup.
|
||||
_TOOL_DISPLAY = {
|
||||
"01_deduplicator": "Deduplicator",
|
||||
"02_text_cleaner": "Text Cleaner",
|
||||
"03_format_standardizer": "Format Standardizer",
|
||||
"04_missing_handler": "Missing Value Handler",
|
||||
"05_column_mapper": "Column Mapper",
|
||||
"06_outlier_detector": "Outlier Detector",
|
||||
"07_multi_file_merger": "Multi-File Merger",
|
||||
"08_validator_reporter": "Validator & Reporter",
|
||||
"09_pipeline_runner": "Pipeline Runner",
|
||||
"01_deduplicator": "Find Duplicates",
|
||||
"02_text_cleaner": "Clean Text",
|
||||
"03_format_standardizer": "Standardize Formats",
|
||||
"04_missing_handler": "Fix Missing Values",
|
||||
"05_column_mapper": "Map Columns",
|
||||
"06_outlier_detector": "Find Unusual Values",
|
||||
"07_multi_file_merger": "Combine Files",
|
||||
"08_validator_reporter": "Quality Check",
|
||||
"09_pipeline_runner": "Automated Workflows",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""CLI for the DataTools Column Mapper (script 05).
|
||||
"""CLI for the DataTools Map Columns tool (script 05).
|
||||
|
||||
Usage:
|
||||
python -m src.cli_column_map input.csv # auto-mapping preview
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""CLI for the DataTools Format Standardizer (script 03).
|
||||
"""CLI for the DataTools Standardize Formats tool (script 03).
|
||||
|
||||
Usage:
|
||||
python -m src.cli_format input.csv \\
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""CLI for the DataTools Missing Value Handler (script 04).
|
||||
"""CLI for the DataTools Fix Missing Values tool (script 04).
|
||||
|
||||
Usage:
|
||||
python -m src.cli_missing input.csv # profile only
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""CLI for the DataTools Pipeline Runner (script 09).
|
||||
"""CLI for the DataTools Automated Workflows tool (script 09).
|
||||
|
||||
Usage:
|
||||
# Run the recommended default pipeline (text → format → missing → dedup):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Column Mapper.
|
||||
"""DataTools Map Columns.
|
||||
|
||||
Rename columns, enforce a target schema, coerce types, drop / add /
|
||||
reorder columns. Designed for the three buyer profiles the toolkit
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Missing Value Handler.
|
||||
"""DataTools Fix Missing Values.
|
||||
|
||||
Detects disguised nulls, profiles missingness per column, and applies
|
||||
imputation or drop strategies with a full audit trail.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Pipeline Runner.
|
||||
"""DataTools Automated Workflows.
|
||||
|
||||
Chain the cleaning tools (text-clean, format-standardize, missing,
|
||||
column-map, dedup) into a single orchestrated workflow. The pipeline
|
||||
|
||||
@@ -1 +1 @@
|
||||
"""Streamlit GUI for the DataTools Deduplicator."""
|
||||
"""Streamlit GUI for DataTools."""
|
||||
|
||||
@@ -16,7 +16,7 @@ they need without dragging the entire kitchen-sink module:
|
||||
dedup_review.py ← dedup match-group cards + review pipeline
|
||||
shared.py ← chrome / file-pickup helpers used by every tool
|
||||
|
||||
A standalone Deduplicator build, for example, can ship without
|
||||
A standalone Find Duplicates build, for example, can ship without
|
||||
``findings.py`` and ``gate.py`` — those modules import the analyzer /
|
||||
gate code that the Lite SKU does not include.
|
||||
|
||||
|
||||
@@ -847,15 +847,15 @@ def _build_match_groups_csv(
|
||||
# Tool id -> friendly display name. Single source of truth for the GUI; the
|
||||
# CLI keeps its own copy so each entrypoint stays self-contained.
|
||||
TOOL_DISPLAY_NAMES: dict[str, str] = {
|
||||
"01_deduplicator": "Deduplicator",
|
||||
"02_text_cleaner": "Text Cleaner",
|
||||
"03_format_standardizer": "Format Standardizer",
|
||||
"04_missing_handler": "Missing Value Handler",
|
||||
"05_column_mapper": "Column Mapper",
|
||||
"06_outlier_detector": "Outlier Detector",
|
||||
"07_multi_file_merger": "Multi-File Merger",
|
||||
"08_validator_reporter": "Validator & Reporter",
|
||||
"09_pipeline_runner": "Pipeline Runner",
|
||||
"01_deduplicator": "Find Duplicates",
|
||||
"02_text_cleaner": "Clean Text",
|
||||
"03_format_standardizer": "Standardize Formats",
|
||||
"04_missing_handler": "Fix Missing Values",
|
||||
"05_column_mapper": "Map Columns",
|
||||
"06_outlier_detector": "Find Unusual Values",
|
||||
"07_multi_file_merger": "Combine Files",
|
||||
"08_validator_reporter": "Quality Check",
|
||||
"09_pipeline_runner": "Automated Workflows",
|
||||
}
|
||||
|
||||
_SEVERITY_ICON: dict[str, str] = {
|
||||
@@ -1016,7 +1016,7 @@ def render_hidden_aware_preview(
|
||||
) -> None:
|
||||
"""Render a DataFrame preview that shows hidden characters in every cell.
|
||||
|
||||
Used for the Text Cleaner's "before" and "after" previews so the user
|
||||
Used for the Clean Text tool's "before" and "after" previews so the user
|
||||
can actually see the leading/trailing whitespace, NBSP padding,
|
||||
zero-width characters, and smart punctuation that the cleaner is going
|
||||
to remove (or just removed). A plain ``st.dataframe`` collapses outer
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Deduplicator — full working tool page."""
|
||||
"""DataTools Find Duplicates — full working tool page."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Text Cleaner — Streamlit page."""
|
||||
"""DataTools Clean Text — Streamlit page."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Format Standardizer — Streamlit page."""
|
||||
"""DataTools Standardize Formats — Streamlit page."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Missing Value Handler — Streamlit page."""
|
||||
"""DataTools Fix Missing Values — Streamlit page."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Column Mapper — Streamlit page."""
|
||||
"""DataTools Map Columns — Streamlit page."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Outlier Detector — stub page."""
|
||||
"""DataTools Find Unusual Values — stub page."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Multi-File Merger — stub page."""
|
||||
"""DataTools Combine Files — stub page."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Validator & Reporter — stub page."""
|
||||
"""DataTools Quality Check — stub page."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""DataTools Pipeline Runner — Streamlit page."""
|
||||
"""DataTools Automated Workflows — Streamlit page."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
Reference in New Issue
Block a user