From c73d716d064caf153185a077ab7dc3b0b5fc3daf Mon Sep 17 00:00:00 2001
From: Michael <michael.dombaugh@gmail.com>
Date: Sun, 17 May 2026 01:36:35 +0000
Subject: [PATCH] feat(audit): JSONL audit log for support diagnostics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New ``src/audit.py`` module records GUI actions to a per-session
JSONL file under ``~/.datatools/logs/`` (overrideable via
``DATATOOLS_AUDIT_DIR``). The file is human-readable (one JSON
object per line, each with a ``message`` field) AND trivially
machine-parseable — the support flow is "client mails the file,
we read it and explain what went wrong."

Format example::

    {"ts":"2026-05-17T05:30:00.123+00:00","level":"info","category":"session",
     "session":"a1b2c3d4","message":"Session started",
     "platform":"Windows 11","python":"3.14.0","user":"Michael Dombaugh",
     "log_file":"C:\\Users\\Michael Dombaugh\\.datatools\\logs\\datatools-...jsonl"}
    {"ts":"...","category":"upload","message":"Uploaded customers.csv",
     "filename":"customers.csv","bytes":24813}
    {"ts":"...","category":"analyze","message":"Analyzed customers.csv (3 findings)",
     "filename":"customers.csv","findings":3,"rows":120,"cols":8}
    {"ts":"...","category":"tool_run","message":"Clean Text run",
     "page":"2_Text_Cleaner"}
    {"ts":"...","category":"error","level":"error",
     "message":"analyze(weird.csv): EmptyDataError: No columns to parse",
     "filename":"weird.csv","outcome":"empty_after_repair"}

Public API:

- ``log_event(category, message, **extra)``
- ``log_session_start()`` — idempotent banner with platform info
- ``log_page_open(slug)`` — emit a ``nav`` event, deduplicated per
  Streamlit session so reruns don't spam the log
- ``log_exception(where, exc, **extra)`` — convenience wrapper
- ``audit_log_path()`` / ``audit_log_dir()`` — for the UI

Wired in at:

- ``hide_streamlit_chrome``: stamps session start, mounts a small
  "🩺  Diagnostics" expander in the sidebar with the log path and
  an "Open log folder" button so the user can grab the file to
  attach to a support email.
- Home page: ``upload`` event on every new file, ``upload`` event
  on per-file remove, ``analyze`` event with file count when
  Run-analysis fires.
- ``_run_analysis_on_upload``: ``analyze`` event with rows / cols /
  findings count per file, plus ``error`` events on every caught
  exception (empty upload, empty after repair, pandas EmptyDataError,
  generic Exception).
- Every Ready tool page (1, 2, 3, 4, 5, 9): ``tool_run`` event
  immediately after the primary action stashes its result.
- Every tool page (1-9): ``log_page_open(slug)`` on render — deduped
  via session state so we don't get one event per Streamlit rerun.

Safety:

- ``log_event`` wraps every write in try/except. A broken audit
  log must NOT crash the GUI.
- Non-JSON-serializable extras are ``str()``-coerced before writing.
- File CONTENTS are never logged. We capture filename, byte count,
  and (in the analyzer) a 12-char sha1 fingerprint of the bytes so
  the same file re-uploaded gets the same trace.
- License keys, session cookies, etc. are not logged.
- ``DATATOOLS_AUDIT_DIR`` env var lets tests redirect writes into a
  tmp dir.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/audit.py                           | 227 +++++++++++++++++++++++++
 src/gui/_home.py                       |  19 +++
 src/gui/components/_legacy.py          | 100 ++++++++++-
 src/gui/pages/1_Deduplicator.py        |   4 +
 src/gui/pages/2_Text_Cleaner.py        |   4 +
 src/gui/pages/3_Format_Standardizer.py |   4 +
 src/gui/pages/4_Missing_Values.py      |   4 +
 src/gui/pages/5_Column_Mapper.py       |   4 +
 src/gui/pages/6_Outlier_Detector.py    |   2 +
 src/gui/pages/7_Multi_File_Merger.py   |   2 +
 src/gui/pages/8_Validator_Reporter.py  |   2 +
 src/gui/pages/9_Pipeline_Runner.py     |   4 +
 12 files changed, 373 insertions(+), 3 deletions(-)
 create mode 100644 src/audit.py

diff --git a/src/audit.py b/src/audit.py
new file mode 100644
index 0000000..e455169
--- /dev/null
+++ b/src/audit.py
@@ -0,0 +1,227 @@
+"""Audit log — records GUI actions for support diagnostics.
+
+A client running DataTools who hits a bug should be able to grab one
+file off disk, mail it to support, and have us reconstruct what they
+were doing when things broke. That file is the audit log written by
+this module.
+
+Design choices:
+
+- **JSONL**, one event per line. Each line is a valid JSON object; the
+  whole file is grep-friendly, ``jq``-friendly, and still readable in
+  Notepad / TextEdit if no tooling is available. Each event carries a
+  human-readable ``message`` field so the file is useful even without
+  any tooling.
+- **One file per session**, named ``datatools-<utc-timestamp>-<id>.jsonl``.
+  Multiple sessions on the same machine don't clobber each other, and
+  the filename sorts chronologically.
+- **Default location**: ``~/.datatools/logs/`` on every platform.
+  Overrideable via the ``DATATOOLS_AUDIT_DIR`` environment variable —
+  used by tests to redirect writes into a tmp dir.
+- **Never crashes the app**. Every write is wrapped in a try/except;
+  a broken audit log must not take down the GUI.
+- **No PII bytes**: file CONTENTS are never logged. We log the
+  filename, byte size, and a short content hash so the same file
+  re-uploaded gets the same fingerprint, but the actual bytes stay
+  local.
+
+Public API:
+
+- ``log_event(category, message, **extra)`` — write one event.
+- ``log_session_start()`` — emit a session-start record with platform
+  info. Idempotent within a single session.
+- ``audit_log_path()`` — return the path to the current session's file
+  so the GUI can show it to the user.
+- ``audit_log_dir()`` — return the directory holding all session logs.
+"""
+
+from __future__ import annotations
+
+import getpass
+import json
+import os
+import platform
+import sys
+import threading
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+
+# Module-level cache for per-session state. Streamlit reruns the script
+# many times per session but the module is imported once, so these
+# survive across reruns within the same Python process.
+_LOCK = threading.Lock()
+_LOG_PATH: Path | None = None
+_SESSION_ID: str | None = None
+_SESSION_STARTED: bool = False
+
+
+def audit_log_dir() -> Path:
+    """Return the directory where audit logs are written.
+
+    Defaults to ``~/.datatools/logs/``. Overrideable via the
+    ``DATATOOLS_AUDIT_DIR`` environment variable so tests can redirect
+    writes into ``tmp_path``.
+    """
+    override = os.environ.get("DATATOOLS_AUDIT_DIR")
+    if override:
+        return Path(override)
+    return Path.home() / ".datatools" / "logs"
+
+
+def _session_id() -> str:
+    global _SESSION_ID
+    with _LOCK:
+        if _SESSION_ID is None:
+            _SESSION_ID = uuid.uuid4().hex
+        return _SESSION_ID
+
+
+def audit_log_path() -> Path:
+    """Return this session's log file path.
+
+    The path is created the first time it's queried so each Python
+    process gets a single file regardless of how many Streamlit
+    reruns happen.
+    """
+    global _LOG_PATH
+    with _LOCK:
+        if _LOG_PATH is None:
+            ts = datetime.now(tz=timezone.utc).strftime("%Y%m%dT%H%M%SZ")
+            sid = _session_id()[:8]
+            d = audit_log_dir()
+            try:
+                d.mkdir(parents=True, exist_ok=True)
+            except Exception:
+                # If we can't create the dir, fall back to a tmpdir
+                # location so we never crash the app for the audit
+                # log's sake.
+                import tempfile
+                d = Path(tempfile.gettempdir()) / "datatools-logs"
+                d.mkdir(parents=True, exist_ok=True)
+            _LOG_PATH = d / f"datatools-{ts}-{sid}.jsonl"
+        return _LOG_PATH
+
+
+def log_event(
+    category: str,
+    message: str,
+    *,
+    level: str = "info",
+    **extra: Any,
+) -> None:
+    """Append one event to the session log.
+
+    ``category`` groups related events (e.g. ``upload``, ``analyze``,
+    ``tool_run``, ``error``, ``nav``). ``message`` is the human
+    sentence that lands in the file. ``extra`` keys are passed through
+    to the JSON object verbatim, so callers can attach structured
+    context (filename, byte counts, finding counts, timings).
+
+    Failures are swallowed silently — a broken audit log must not
+    take the GUI down.
+    """
+    try:
+        event = {
+            "ts": datetime.now(tz=timezone.utc).isoformat(timespec="milliseconds"),
+            "level": level,
+            "category": category,
+            "session": _session_id()[:8],
+            "message": message,
+        }
+        # Attach extras with serialization safety: non-JSON values get
+        # str()'d so a bad caller can't poison the whole entry.
+        for k, v in extra.items():
+            try:
+                json.dumps(v)
+                event[k] = v
+            except (TypeError, ValueError):
+                event[k] = str(v)
+        with audit_log_path().open("a", encoding="utf-8") as f:
+            f.write(json.dumps(event, ensure_ascii=False) + "\n")
+    except Exception:
+        # Last-ditch silent swallow. Diagnostics is best-effort.
+        pass
+
+
+def log_session_start() -> None:
+    """Write the session-start banner. Idempotent within one process."""
+    global _SESSION_STARTED
+    with _LOCK:
+        if _SESSION_STARTED:
+            return
+        _SESSION_STARTED = True
+    # Best-effort metadata. Failures don't propagate.
+    try:
+        user = getpass.getuser()
+    except Exception:
+        user = "?"
+    try:
+        cwd = str(Path.cwd())
+    except Exception:
+        cwd = "?"
+    log_event(
+        "session",
+        "Session started",
+        platform=f"{platform.system()} {platform.release()}",
+        python=sys.version.split()[0],
+        user=user,
+        cwd=cwd,
+        log_file=str(audit_log_path()),
+    )
+
+
+def log_exception(where: str, exc: BaseException, **extra: Any) -> None:
+    """Convenience wrapper for caught exceptions."""
+    log_event(
+        "error",
+        f"{where}: {type(exc).__name__}: {exc}",
+        level="error",
+        exc_type=type(exc).__name__,
+        exc_message=str(exc),
+        **extra,
+    )
+
+
+def log_page_open(slug: str) -> None:
+    """Emit a "page open" event, deduplicated within a session.
+
+    Streamlit reruns the script many times per page (every widget
+    interaction triggers a rerun). Tracking the last page the user
+    visited in session state lets us emit a single ``nav`` event when
+    they actually switch pages, not one per rerun. Falls back to
+    always-emit when session state is unreachable (running outside
+    Streamlit, e.g. in tests).
+    """
+    try:
+        import streamlit as st
+        prev = st.session_state.get("_audit_current_page")
+        if prev == slug:
+            return
+        st.session_state["_audit_current_page"] = slug
+    except Exception:
+        pass
+    log_event("nav", f"Opened {slug}", page=slug)
+
+
+def reset_for_tests() -> None:
+    """Reset module-level state. Test-only — call from a pytest fixture
+    when isolation between tests matters."""
+    global _LOG_PATH, _SESSION_ID, _SESSION_STARTED
+    with _LOCK:
+        _LOG_PATH = None
+        _SESSION_ID = None
+        _SESSION_STARTED = False
+
+
+__all__ = [
+    "audit_log_dir",
+    "audit_log_path",
+    "log_event",
+    "log_exception",
+    "log_page_open",
+    "log_session_start",
+    "reset_for_tests",
+]
diff --git a/src/gui/_home.py b/src/gui/_home.py
index 317f59c..5f2a279 100644
--- a/src/gui/_home.py
+++ b/src/gui/_home.py
@@ -86,6 +86,7 @@ def _home_page() -> None:
         help=t("upload.uploader_help"),
     )
     if new_files:
+        from src.audit import log_event
         changed = False
         for f in new_files:
             if f.name not in home_uploads:
@@ -94,6 +95,12 @@ def _home_page() -> None:
                     "size": f.size,
                 }
                 changed = True
+                log_event(
+                    "upload",
+                    f"Uploaded {f.name}",
+                    filename=f.name,
+                    bytes=f.size,
+                )
         if changed:
             st.session_state["home_uploads"] = home_uploads
 
@@ -139,6 +146,12 @@ def _home_page() -> None:
                 to_remove = name
 
         if to_remove is not None:
+            from src.audit import log_event
+            log_event(
+                "upload",
+                f"Removed {to_remove}",
+                filename=to_remove,
+            )
             del home_uploads[to_remove]
             # Drop any findings/results tied to the removed file.
             findings_by_file_drop = st.session_state.get(
@@ -209,6 +222,12 @@ def _home_page() -> None:
         st.rerun()
 
     if run_clicked:
+        from src.audit import log_event
+        log_event(
+            "analyze",
+            f"Run analysis clicked on {len(pending)} file(s)",
+            files=list(pending),
+        )
         progress = st.progress(0.0, text=t("upload.scanning"))
         for i, name in enumerate(pending, start=1):
             stashed = _StashedUpload(name, home_uploads[name]["bytes"])
diff --git a/src/gui/components/_legacy.py b/src/gui/components/_legacy.py
index 246e850..300fb9b 100644
--- a/src/gui/components/_legacy.py
+++ b/src/gui/components/_legacy.py
@@ -155,6 +155,10 @@ def hide_streamlit_chrome(*, gate_license: bool = True) -> None:
     can render its own form without recursion.
     """
     st.markdown(_HIDE_CHROME_CSS, unsafe_allow_html=True)
+    # Stamp a session-start record into the audit log the first time
+    # any page renders. Idempotent — subsequent calls are no-ops.
+    from src.audit import log_session_start
+    log_session_start()
     # Production-safe check runs first so a misconfigured shipped
     # build refuses to render anything (rather than rendering a
     # broken activation form that doesn't accept real blobs).
@@ -172,10 +176,39 @@ def hide_streamlit_chrome(*, gate_license: bool = True) -> None:
         require_license_or_render_activation,
     )
     render_license_status_sidebar()
+    _render_diagnostics_sidebar()
     if gate_license:
         require_license_or_render_activation()
 
 
+def _render_diagnostics_sidebar() -> None:
+    """Render a small Diagnostics expander in the sidebar.
+
+    Shows the path to the current session's audit log and an "Open
+    folder" button. Lives behind an expander so it doesn't take
+    screen space until the user opens it; the support flow is
+    "client mails us the file, we tell them what went wrong."
+    """
+    from src.audit import audit_log_dir, audit_log_path
+    log_path = audit_log_path()
+    with st.sidebar:
+        with st.expander("🩺  Diagnostics", expanded=False):
+            st.caption("Audit log for this session:")
+            st.code(str(log_path), language=None)
+            if st.button(
+                "📂  Open log folder",
+                key="_diag_open_logs",
+                type="secondary",
+                use_container_width=True,
+            ):
+                opened = _open_in_file_manager(audit_log_dir(), select=log_path)
+                if not opened:
+                    st.warning(
+                        "Could not open the file manager from here. "
+                        "Path is above — paste it into your file manager."
+                    )
+
+
 # ---------------------------------------------------------------------------
 # Clean shutdown
 # ---------------------------------------------------------------------------
@@ -1669,6 +1702,8 @@ def _run_analysis_on_upload(uploaded):
     one of several uploaded files) should yield a clean red banner for
     that file, not kill the whole multi-file analysis run.
     """
+    import hashlib
+    from src.audit import log_event, log_exception
     from src.core.analyze import Finding, analyze
     from src.core.errors import format_for_user
     from src.core.io import repair_bytes
@@ -1676,6 +1711,18 @@ def _run_analysis_on_upload(uploaded):
     name = uploaded.name
     data = uploaded.getvalue()
     suffix = name.rsplit(".", 1)[-1].lower() if "." in name else ""
+    digest = hashlib.sha1(
+        data, usedforsecurity=False,
+    ).hexdigest()[:12] if data else "empty"
+
+    log_event(
+        "analyze",
+        f"Analyzing {name}",
+        filename=name,
+        bytes=len(data),
+        sha1_12=digest,
+        suffix=suffix,
+    )
 
     def _error_finding(description: str, fid: str = "analysis_failed") -> list[Finding]:
         return [Finding(
@@ -1689,6 +1736,13 @@ def _run_analysis_on_upload(uploaded):
         )]
 
     if not data:
+        log_event(
+            "analyze",
+            f"Skipping {name} — 0 bytes",
+            level="warn",
+            filename=name,
+            outcome="empty_upload",
+        )
         return _error_finding(
             f"`{name}` is empty (0 bytes). Please re-upload — the bytes "
             f"may not have transferred correctly from your browser.",
@@ -1698,7 +1752,17 @@ def _run_analysis_on_upload(uploaded):
     try:
         if suffix in ("xlsx", "xls"):
             df = pd.read_excel(io.BytesIO(data), dtype=str, keep_default_na=False)
-            return analyze(df)
+            findings = analyze(df)
+            log_event(
+                "analyze",
+                f"Analyzed {name} ({len(findings)} findings)",
+                filename=name,
+                bytes=len(data),
+                sha1_12=digest,
+                findings=len(findings),
+                rows=len(df), cols=len(df.columns),
+            )
+            return findings
 
         # CSV / TSV: run repair_bytes so the user sees csv_* findings.
         text_head = data[:4096].decode("utf-8", errors="replace")
@@ -1710,6 +1774,13 @@ def _run_analysis_on_upload(uploaded):
                     break
         repair = repair_bytes(data, encoding="utf-8", delimiter=delim)
         if not repair.repaired_bytes:
+            log_event(
+                "analyze",
+                f"Skipping {name} — empty after repair",
+                level="warn",
+                filename=name,
+                outcome="empty_after_repair",
+            )
             return _error_finding(
                 f"`{name}` is empty after pre-parse repair "
                 f"(original was {len(data)} bytes — likely all NUL "
@@ -1723,8 +1794,25 @@ def _run_analysis_on_upload(uploaded):
             encoding="utf-8", delimiter=delim,
             dtype=str, keep_default_na=False, on_bad_lines="warn",
         )
-        return analyze(df, repair_result=repair)
-    except pd.errors.EmptyDataError:
+        findings = analyze(df, repair_result=repair)
+        log_event(
+            "analyze",
+            f"Analyzed {name} ({len(findings)} findings)",
+            filename=name,
+            bytes=len(data),
+            sha1_12=digest,
+            findings=len(findings),
+            rows=len(df), cols=len(df.columns),
+            delimiter=repr(delim),
+        )
+        return findings
+    except pd.errors.EmptyDataError as e:
+        log_exception(
+            f"analyze({name})",
+            e,
+            filename=name,
+            outcome="empty_after_repair",
+        )
         return _error_finding(
             f"`{name}` could not be parsed — pandas reports no columns "
             f"in the file. Original size was {len(data)} bytes. Open "
@@ -1733,6 +1821,12 @@ def _run_analysis_on_upload(uploaded):
             fid="empty_after_repair",
         )
     except Exception as e:
+        log_exception(
+            f"analyze({name})",
+            e,
+            filename=name,
+            outcome="analysis_failed",
+        )
         return _error_finding(
             f"`{name}` could not be analyzed: {format_for_user(e)}",
         )
diff --git a/src/gui/pages/1_Deduplicator.py b/src/gui/pages/1_Deduplicator.py
index 05716dc..ed03144 100644
--- a/src/gui/pages/1_Deduplicator.py
+++ b/src/gui/pages/1_Deduplicator.py
@@ -33,6 +33,8 @@ from src.license import FeatureFlag
 
 hide_streamlit_chrome()
 render_sticky_footer()
+from src.audit import log_page_open
+log_page_open("1_Deduplicator")
 require_feature_or_render_upgrade(FeatureFlag.DEDUPLICATOR)
 
 # ---------------------------------------------------------------------------
@@ -231,6 +233,8 @@ if uploaded is not None:
 
             progress_bar.empty()
             st.session_state["result"] = result
+            from src.audit import log_event
+            log_event("tool_run", "Find Duplicates run", page="1_Deduplicator")
             st.session_state["review_decisions"] = {}
             # One-shot flag for the scroll snippet at the bottom of the
             # page. Force a rerun so the Preview / Options expanders see
diff --git a/src/gui/pages/2_Text_Cleaner.py b/src/gui/pages/2_Text_Cleaner.py
index e808668..495a631 100644
--- a/src/gui/pages/2_Text_Cleaner.py
+++ b/src/gui/pages/2_Text_Cleaner.py
@@ -35,6 +35,8 @@ from src.core.text_clean import (
 
 hide_streamlit_chrome()
 render_sticky_footer()
+from src.audit import log_page_open
+log_page_open("2_Text_Cleaner")
 require_feature_or_render_upgrade(FeatureFlag.TEXT_CLEANER)
 
 
@@ -218,6 +220,8 @@ if st.button("Clean Text", type="primary", use_container_width=True):
             st.error(str(e))
             st.stop()
     st.session_state["textclean_result"] = result
+    from src.audit import log_event
+    log_event("tool_run", "Clean Text run", page="2_Text_Cleaner")
     st.session_state["textclean_input_name"] = uploaded.name
     # One-shot flag picked up on the next pass to scroll the parent
     # document to the Results anchor (see scroll snippet below).
diff --git a/src/gui/pages/3_Format_Standardizer.py b/src/gui/pages/3_Format_Standardizer.py
index c043f95..35d7ab1 100644
--- a/src/gui/pages/3_Format_Standardizer.py
+++ b/src/gui/pages/3_Format_Standardizer.py
@@ -33,6 +33,8 @@ from src.license import FeatureFlag
 
 hide_streamlit_chrome()
 render_sticky_footer()
+from src.audit import log_page_open
+log_page_open("3_Format_Standardizer")
 require_feature_or_render_upgrade(FeatureFlag.FORMAT_STANDARDIZER)
 
 
@@ -537,6 +539,8 @@ if st.button(
             st.error(str(e))
             st.stop()
     st.session_state["fmtstd_result"] = result
+    from src.audit import log_event
+    log_event("tool_run", "Standardize Formats run", page="3_Format_Standardizer")
     st.session_state["fmtstd_input_name"] = uploaded.name
     # One-shot flag picked up on the next pass to scroll the parent
     # document to the Results anchor (see scroll snippet below).
diff --git a/src/gui/pages/4_Missing_Values.py b/src/gui/pages/4_Missing_Values.py
index 0bb7d75..ee997af 100644
--- a/src/gui/pages/4_Missing_Values.py
+++ b/src/gui/pages/4_Missing_Values.py
@@ -34,6 +34,8 @@ from src.license import FeatureFlag
 
 hide_streamlit_chrome()
 render_sticky_footer()
+from src.audit import log_page_open
+log_page_open("4_Missing_Values")
 require_feature_or_render_upgrade(FeatureFlag.MISSING_HANDLER)
 
 
@@ -291,6 +293,8 @@ if st.button("Handle Missing Values", type="primary", use_container_width=True):
             st.error(format_for_user(e))
             st.stop()
     st.session_state["missing_result"] = result
+    from src.audit import log_event
+    log_event("tool_run", "Fix Missing Values run", page="4_Missing_Values")
     st.session_state["missing_input_name"] = uploaded.name
     st.session_state["missing_options"] = options.to_dict()
     # One-shot flag picked up on the next pass to scroll the parent
diff --git a/src/gui/pages/5_Column_Mapper.py b/src/gui/pages/5_Column_Mapper.py
index 52e0636..38b28e7 100644
--- a/src/gui/pages/5_Column_Mapper.py
+++ b/src/gui/pages/5_Column_Mapper.py
@@ -35,6 +35,8 @@ from src.license import FeatureFlag
 
 hide_streamlit_chrome()
 render_sticky_footer()
+from src.audit import log_page_open
+log_page_open("5_Column_Mapper")
 require_feature_or_render_upgrade(FeatureFlag.COLUMN_MAPPER)
 
 
@@ -338,6 +340,8 @@ if st.button("Apply Column Mapping", type="primary", use_container_width=True):
             st.error(format_for_user(e))
             st.stop()
     st.session_state["colmap_result"] = result
+    from src.audit import log_event
+    log_event("tool_run", "Map Columns run", page="5_Column_Mapper")
     st.session_state["colmap_input_name"] = uploaded.name
     st.session_state["colmap_options"] = options.to_dict()
     # One-shot flag picked up on the next pass to scroll the parent
diff --git a/src/gui/pages/6_Outlier_Detector.py b/src/gui/pages/6_Outlier_Detector.py
index 033e921..e878877 100644
--- a/src/gui/pages/6_Outlier_Detector.py
+++ b/src/gui/pages/6_Outlier_Detector.py
@@ -22,6 +22,8 @@ from src.license import FeatureFlag
 
 hide_streamlit_chrome()
 render_sticky_footer()
+from src.audit import log_page_open
+log_page_open("6_Outlier_Detector")
 require_feature_or_render_upgrade(FeatureFlag.OUTLIER_DETECTOR)
 
 # ---------------------------------------------------------------------------
diff --git a/src/gui/pages/7_Multi_File_Merger.py b/src/gui/pages/7_Multi_File_Merger.py
index f4c6616..dc94ce4 100644
--- a/src/gui/pages/7_Multi_File_Merger.py
+++ b/src/gui/pages/7_Multi_File_Merger.py
@@ -22,6 +22,8 @@ from src.license import FeatureFlag
 
 hide_streamlit_chrome()
 render_sticky_footer()
+from src.audit import log_page_open
+log_page_open("7_Multi_File_Merger")
 require_feature_or_render_upgrade(FeatureFlag.MULTI_FILE_MERGER)
 
 # ---------------------------------------------------------------------------
diff --git a/src/gui/pages/8_Validator_Reporter.py b/src/gui/pages/8_Validator_Reporter.py
index 184a171..c0ee773 100644
--- a/src/gui/pages/8_Validator_Reporter.py
+++ b/src/gui/pages/8_Validator_Reporter.py
@@ -22,6 +22,8 @@ from src.license import FeatureFlag
 
 hide_streamlit_chrome()
 render_sticky_footer()
+from src.audit import log_page_open
+log_page_open("8_Validator_Reporter")
 require_feature_or_render_upgrade(FeatureFlag.VALIDATOR_REPORTER)
 
 # ---------------------------------------------------------------------------
diff --git a/src/gui/pages/9_Pipeline_Runner.py b/src/gui/pages/9_Pipeline_Runner.py
index d954c5a..d598133 100644
--- a/src/gui/pages/9_Pipeline_Runner.py
+++ b/src/gui/pages/9_Pipeline_Runner.py
@@ -36,6 +36,8 @@ from src.license import FeatureFlag
 
 hide_streamlit_chrome()
 render_sticky_footer()
+from src.audit import log_page_open
+log_page_open("9_Pipeline_Runner")
 require_feature_or_render_upgrade(FeatureFlag.PIPELINE_RUNNER)
 
 
@@ -283,6 +285,8 @@ if st.button(
 
     progress.progress(1.0, text="Done")
     st.session_state["pipeline_result"] = result
+    from src.audit import log_event
+    log_event("tool_run", "Automated Workflows run", page="9_Pipeline_Runner")
     st.session_state["pipeline_input_name"] = uploaded.name
     # One-shot flag picked up on the next pass to scroll the parent
     # document to the Results anchor (see scroll snippet at end of file).