diff --git a/src/gui/components/pipeline_modules.py b/src/gui/components/pipeline_modules.py index f9c116c..ed7fe2a 100644 --- a/src/gui/components/pipeline_modules.py +++ b/src/gui/components/pipeline_modules.py @@ -67,6 +67,135 @@ def step_caption(tool: str) -> str: return _STEP_CAPTIONS.get(tool, "") +# --------------------------------------------------------------------------- +# Plain-English result phrasing +# --------------------------------------------------------------------------- +# +# Each adapter returns a stats dict (see ``TOOL_ADAPTERS`` in +# ``src/core/pipeline.py``). ``step_phrase`` turns that dict into the one-line +# sentence the mockup shows in the Results table ("312 duplicates removed across +# 147 groups …"); ``step_status`` derives the status pill + an optional inline +# detail line for steps that warn (e.g. unparseable cells) or error. + + +def _fmt_cols(cols: list) -> str: + """Join column names for prose: 'name', 'name & city', 'a, b & 2 more'.""" + cols = [str(c) for c in cols] + if not cols: + return "" + if len(cols) == 1: + return cols[0] + if len(cols) == 2: + return f"{cols[0]} & {cols[1]}" + if len(cols) == 3: + return f"{cols[0]}, {cols[1]} & {cols[2]}" + return f"{cols[0]}, {cols[1]} & {len(cols) - 2} more" + + +def _in_cols(cols: list) -> str: + label = _fmt_cols(cols) + return f" in {label}" if label else "" + + +def _n(count: int, noun: str) -> str: + """'1 column' / '3 columns' — naive but covers every noun used here.""" + return f"{count:,} {noun}" if count == 1 else f"{count:,} {noun}s" + + +def step_phrase(tool: str, summary: dict) -> str: + """A plain-English, one-line summary of what a step did.""" + s = summary or {} + + if tool == "text_clean": + changed = s.get("cells_changed", 0) + if not changed: + return "No changes needed." + return f"{_n(changed, 'cell')} cleaned{_in_cols(s.get('columns_processed', []))}" + + if tool == "format_standardize": + changed = s.get("cells_changed", 0) + bad = s.get("cells_unparseable", 0) + if not changed and not bad: + return "Nothing to standardize." + base = f"{_n(changed, 'cell')} standardized{_in_cols(s.get('columns_processed', []))}" + return base if not bad else f"{base} ({bad:,} left unchanged)" + + if tool == "missing": + parts: list[str] = [] + if s.get("cells_filled"): + parts.append(f"{_n(s['cells_filled'], 'cell')} filled") + if s.get("rows_dropped"): + parts.append(f"{_n(s['rows_dropped'], 'row')} dropped") + if s.get("columns_dropped"): + parts.append(f"{_n(len(s['columns_dropped']), 'column')} dropped") + if not parts and s.get("sentinels_standardized"): + parts.append(f"{_n(s['sentinels_standardized'], 'blank cell')} flagged") + return ", ".join(parts) if parts else "No missing values to handle." + + if tool == "column_map": + parts = [] + if s.get("columns_renamed"): + parts.append(f"{_n(s['columns_renamed'], 'column')} renamed") + if s.get("columns_added"): + parts.append(f"{_n(len(s['columns_added']), 'column')} added") + if s.get("columns_dropped"): + parts.append(f"{_n(len(s['columns_dropped']), 'column')} dropped") + return ", ".join(parts) if parts else "Columns already aligned." + + if tool == "dedup": + removed = s.get("duplicates_removed", 0) + if not removed: + return "No duplicates found." + return ( + f"{_n(removed, 'duplicate')} removed across {_n(s.get('groups', 0), 'group')} " + f"({s.get('input_rows', 0):,} → {s.get('output_rows', 0):,} rows)" + ) + + return ", ".join(f"{k}: {v}" for k, v in s.items()) + + +def step_status( + tool: str, summary: dict, *, skipped: bool = False, error: Optional[str] = None, +) -> tuple[str, str, str]: + """Return ``(pill_label, level, detail)`` for a step result. + + ``level`` is one of ``ok`` / ``warn`` / ``error`` / ``skipped``. ``detail`` + is a longer inline explanation for warn/error rows (else ""). + """ + if error: + return "✗ error", "error", error.splitlines()[0] + if skipped: + return "⏭ skipped", "skipped", "" + + s = summary or {} + if tool == "format_standardize" and s.get("cells_unparseable"): + n = s["cells_unparseable"] + return ( + f"⚠ ok · {n:,} skipped", "warn", + f"{n:,} values didn't match a known pattern and were left " + "unchanged. The step still completed — review them in the output " + "preview if needed.", + ) + if tool == "column_map": + fails = s.get("coercion_failures") or {} + n_fail = sum(fails.values()) if isinstance(fails, dict) else 0 + missing_req = s.get("missing_required_targets") or [] + if missing_req: + return ( + "⚠ ok · missing targets", "warn", + "Required target columns had no source match: " + + ", ".join(map(str, missing_req)) + ".", + ) + if n_fail: + return ( + f"⚠ ok · {n_fail:,} not coerced", "warn", + f"{n_fail:,} values couldn't be coerced to their target type " + "and were left as-is.", + ) + + return "✓ ok", "ok", "" + + # --------------------------------------------------------------------------- # Per-tool config renderers # --------------------------------------------------------------------------- diff --git a/src/gui/pages/9_Pipeline_Runner.py b/src/gui/pages/9_Pipeline_Runner.py index 0652213..71eb907 100644 --- a/src/gui/pages/9_Pipeline_Runner.py +++ b/src/gui/pages/9_Pipeline_Runner.py @@ -32,7 +32,12 @@ from src.core.pipeline import ( run_pipeline, validate_pipeline, ) -from src.gui.components.pipeline_modules import render_step_card, step_label +from src.gui.components.pipeline_modules import ( + render_step_card, + step_label, + step_phrase, + step_status, +) from src.license import FeatureFlag hide_streamlit_chrome() @@ -380,22 +385,38 @@ m3.metric("Steps run", sum(1 for s in result.step_results if not s.skipped)) m4.metric("Elapsed", f"{result.total_elapsed:.2f} s") st.markdown("**Per-step summary**") +# Plain-English status pill + summary phrase per step (mockup §Results). The +# at-a-glance table stays scannable; any warn/error step also gets an inline +# detail callout directly below it, so a non-fatal issue surfaces in context +# without a dedicated always-empty column. step_df = pd.DataFrame([ { "step": step_label(sr.step.tool), - "status": ( - "⏭ skipped" if sr.skipped - else "✗ error" if sr.error - else "✓ ok" + "status": step_status( + sr.step.tool, sr.summary, skipped=sr.skipped, error=sr.error, + )[0], + "elapsed": f"{int(sr.elapsed_seconds * 1000)} ms", + "summary": ( + "—" if sr.skipped + else step_phrase(sr.step.tool, sr.summary) ), - "elapsed_ms": int(sr.elapsed_seconds * 1000), - "summary": json.dumps(sr.summary, default=str)[:200], - "error": sr.error or "", } for sr in result.step_results ]) st.dataframe(step_df, width="stretch", hide_index=True) +for sr in result.step_results: + _label, level, detail = step_status( + sr.step.tool, sr.summary, skipped=sr.skipped, error=sr.error, + ) + if not detail: + continue + name = step_label(sr.step.tool) + if level == "error": + st.error(f"**{name}** — {detail}") + else: + st.warning(f"**{name}** — {detail}") + st.markdown("**Output preview (first 10 rows)**") st.dataframe(result.final_df.head(10), width="stretch") diff --git a/tests/gui/test_pipeline_builder.py b/tests/gui/test_pipeline_builder.py index d671933..3dfc98f 100644 --- a/tests/gui/test_pipeline_builder.py +++ b/tests/gui/test_pipeline_builder.py @@ -89,3 +89,32 @@ def test_run_produces_results_with_friendly_names(): res = at.session_state["pipeline_result"] assert res.initial_rows == 3 and res.final_rows == 2 # the two Jane rows merge assert all(sr.error is None for sr in res.step_results) + + +def test_step_phrase_is_plain_english_not_json(): + from src.gui.components.pipeline_modules import step_phrase, step_status + + # dedup phrasing mirrors the design mockup wording exactly. + phrase = step_phrase("dedup", { + "input_rows": 18442, "output_rows": 18130, + "duplicates_removed": 312, "groups": 147, + }) + assert phrase == "312 duplicates removed across 147 groups (18,442 → 18,130 rows)" + + # text_clean lists affected columns in prose, with thousands separators. + assert step_phrase("text_clean", { + "cells_changed": 1204, "columns_processed": ["name", "city"], + }) == "1,204 cells cleaned in name & city" + + # singular nouns pluralize correctly + assert step_phrase("missing", {"rows_dropped": 1, "columns_dropped": ["x"]}) == \ + "1 row dropped, 1 column dropped" + + # unparseable cells downgrade the pill to warn with an inline detail + label, level, detail = step_status( + "format_standardize", {"cells_changed": 100, "cells_unparseable": 141}, + ) + assert level == "warn" and "141 skipped" in label and detail + + # a clean step is "ok" with no detail + assert step_status("text_clean", {"cells_changed": 5})[1] == "ok"