feat(audit): JSONL audit log for support diagnostics
New ``src/audit.py`` module records GUI actions to a per-session
JSONL file under ``~/.datatools/logs/`` (overrideable via
``DATATOOLS_AUDIT_DIR``). The file is human-readable (one JSON
object per line, each with a ``message`` field) AND trivially
machine-parseable — the support flow is "client mails the file,
we read it and explain what went wrong."
Format example::
{"ts":"2026-05-17T05:30:00.123+00:00","level":"info","category":"session",
"session":"a1b2c3d4","message":"Session started",
"platform":"Windows 11","python":"3.14.0","user":"Michael Dombaugh",
"log_file":"C:\\Users\\Michael Dombaugh\\.datatools\\logs\\datatools-...jsonl"}
{"ts":"...","category":"upload","message":"Uploaded customers.csv",
"filename":"customers.csv","bytes":24813}
{"ts":"...","category":"analyze","message":"Analyzed customers.csv (3 findings)",
"filename":"customers.csv","findings":3,"rows":120,"cols":8}
{"ts":"...","category":"tool_run","message":"Clean Text run",
"page":"2_Text_Cleaner"}
{"ts":"...","category":"error","level":"error",
"message":"analyze(weird.csv): EmptyDataError: No columns to parse",
"filename":"weird.csv","outcome":"empty_after_repair"}
Public API:
- ``log_event(category, message, **extra)``
- ``log_session_start()`` — idempotent banner with platform info
- ``log_page_open(slug)`` — emit a ``nav`` event, deduplicated per
Streamlit session so reruns don't spam the log
- ``log_exception(where, exc, **extra)`` — convenience wrapper
- ``audit_log_path()`` / ``audit_log_dir()`` — for the UI
Wired in at:
- ``hide_streamlit_chrome``: stamps session start, mounts a small
"🩺 Diagnostics" expander in the sidebar with the log path and
an "Open log folder" button so the user can grab the file to
attach to a support email.
- Home page: ``upload`` event on every new file, ``upload`` event
on per-file remove, ``analyze`` event with file count when
Run-analysis fires.
- ``_run_analysis_on_upload``: ``analyze`` event with rows / cols /
findings count per file, plus ``error`` events on every caught
exception (empty upload, empty after repair, pandas EmptyDataError,
generic Exception).
- Every Ready tool page (1, 2, 3, 4, 5, 9): ``tool_run`` event
immediately after the primary action stashes its result.
- Every tool page (1-9): ``log_page_open(slug)`` on render — deduped
via session state so we don't get one event per Streamlit rerun.
Safety:
- ``log_event`` wraps every write in try/except. A broken audit
log must NOT crash the GUI.
- Non-JSON-serializable extras are ``str()``-coerced before writing.
- File CONTENTS are never logged. We capture filename, byte count,
and (in the analyzer) a 12-char sha1 fingerprint of the bytes so
the same file re-uploaded gets the same trace.
- License keys, session cookies, etc. are not logged.
- ``DATATOOLS_AUDIT_DIR`` env var lets tests redirect writes into a
tmp dir.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
227
src/audit.py
Normal file
227
src/audit.py
Normal file
@@ -0,0 +1,227 @@
|
||||
"""Audit log — records GUI actions for support diagnostics.
|
||||
|
||||
A client running DataTools who hits a bug should be able to grab one
|
||||
file off disk, mail it to support, and have us reconstruct what they
|
||||
were doing when things broke. That file is the audit log written by
|
||||
this module.
|
||||
|
||||
Design choices:
|
||||
|
||||
- **JSONL**, one event per line. Each line is a valid JSON object; the
|
||||
whole file is grep-friendly, ``jq``-friendly, and still readable in
|
||||
Notepad / TextEdit if no tooling is available. Each event carries a
|
||||
human-readable ``message`` field so the file is useful even without
|
||||
any tooling.
|
||||
- **One file per session**, named ``datatools-<utc-timestamp>-<id>.jsonl``.
|
||||
Multiple sessions on the same machine don't clobber each other, and
|
||||
the filename sorts chronologically.
|
||||
- **Default location**: ``~/.datatools/logs/`` on every platform.
|
||||
Overrideable via the ``DATATOOLS_AUDIT_DIR`` environment variable —
|
||||
used by tests to redirect writes into a tmp dir.
|
||||
- **Never crashes the app**. Every write is wrapped in a try/except;
|
||||
a broken audit log must not take down the GUI.
|
||||
- **No PII bytes**: file CONTENTS are never logged. We log the
|
||||
filename, byte size, and a short content hash so the same file
|
||||
re-uploaded gets the same fingerprint, but the actual bytes stay
|
||||
local.
|
||||
|
||||
Public API:
|
||||
|
||||
- ``log_event(category, message, **extra)`` — write one event.
|
||||
- ``log_session_start()`` — emit a session-start record with platform
|
||||
info. Idempotent within a single session.
|
||||
- ``audit_log_path()`` — return the path to the current session's file
|
||||
so the GUI can show it to the user.
|
||||
- ``audit_log_dir()`` — return the directory holding all session logs.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import getpass
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
import threading
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
# Module-level cache for per-session state. Streamlit reruns the script
|
||||
# many times per session but the module is imported once, so these
|
||||
# survive across reruns within the same Python process.
|
||||
_LOCK = threading.Lock()
|
||||
_LOG_PATH: Path | None = None
|
||||
_SESSION_ID: str | None = None
|
||||
_SESSION_STARTED: bool = False
|
||||
|
||||
|
||||
def audit_log_dir() -> Path:
|
||||
"""Return the directory where audit logs are written.
|
||||
|
||||
Defaults to ``~/.datatools/logs/``. Overrideable via the
|
||||
``DATATOOLS_AUDIT_DIR`` environment variable so tests can redirect
|
||||
writes into ``tmp_path``.
|
||||
"""
|
||||
override = os.environ.get("DATATOOLS_AUDIT_DIR")
|
||||
if override:
|
||||
return Path(override)
|
||||
return Path.home() / ".datatools" / "logs"
|
||||
|
||||
|
||||
def _session_id() -> str:
|
||||
global _SESSION_ID
|
||||
with _LOCK:
|
||||
if _SESSION_ID is None:
|
||||
_SESSION_ID = uuid.uuid4().hex
|
||||
return _SESSION_ID
|
||||
|
||||
|
||||
def audit_log_path() -> Path:
|
||||
"""Return this session's log file path.
|
||||
|
||||
The path is created the first time it's queried so each Python
|
||||
process gets a single file regardless of how many Streamlit
|
||||
reruns happen.
|
||||
"""
|
||||
global _LOG_PATH
|
||||
with _LOCK:
|
||||
if _LOG_PATH is None:
|
||||
ts = datetime.now(tz=timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
||||
sid = _session_id()[:8]
|
||||
d = audit_log_dir()
|
||||
try:
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
# If we can't create the dir, fall back to a tmpdir
|
||||
# location so we never crash the app for the audit
|
||||
# log's sake.
|
||||
import tempfile
|
||||
d = Path(tempfile.gettempdir()) / "datatools-logs"
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
_LOG_PATH = d / f"datatools-{ts}-{sid}.jsonl"
|
||||
return _LOG_PATH
|
||||
|
||||
|
||||
def log_event(
|
||||
category: str,
|
||||
message: str,
|
||||
*,
|
||||
level: str = "info",
|
||||
**extra: Any,
|
||||
) -> None:
|
||||
"""Append one event to the session log.
|
||||
|
||||
``category`` groups related events (e.g. ``upload``, ``analyze``,
|
||||
``tool_run``, ``error``, ``nav``). ``message`` is the human
|
||||
sentence that lands in the file. ``extra`` keys are passed through
|
||||
to the JSON object verbatim, so callers can attach structured
|
||||
context (filename, byte counts, finding counts, timings).
|
||||
|
||||
Failures are swallowed silently — a broken audit log must not
|
||||
take the GUI down.
|
||||
"""
|
||||
try:
|
||||
event = {
|
||||
"ts": datetime.now(tz=timezone.utc).isoformat(timespec="milliseconds"),
|
||||
"level": level,
|
||||
"category": category,
|
||||
"session": _session_id()[:8],
|
||||
"message": message,
|
||||
}
|
||||
# Attach extras with serialization safety: non-JSON values get
|
||||
# str()'d so a bad caller can't poison the whole entry.
|
||||
for k, v in extra.items():
|
||||
try:
|
||||
json.dumps(v)
|
||||
event[k] = v
|
||||
except (TypeError, ValueError):
|
||||
event[k] = str(v)
|
||||
with audit_log_path().open("a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(event, ensure_ascii=False) + "\n")
|
||||
except Exception:
|
||||
# Last-ditch silent swallow. Diagnostics is best-effort.
|
||||
pass
|
||||
|
||||
|
||||
def log_session_start() -> None:
|
||||
"""Write the session-start banner. Idempotent within one process."""
|
||||
global _SESSION_STARTED
|
||||
with _LOCK:
|
||||
if _SESSION_STARTED:
|
||||
return
|
||||
_SESSION_STARTED = True
|
||||
# Best-effort metadata. Failures don't propagate.
|
||||
try:
|
||||
user = getpass.getuser()
|
||||
except Exception:
|
||||
user = "?"
|
||||
try:
|
||||
cwd = str(Path.cwd())
|
||||
except Exception:
|
||||
cwd = "?"
|
||||
log_event(
|
||||
"session",
|
||||
"Session started",
|
||||
platform=f"{platform.system()} {platform.release()}",
|
||||
python=sys.version.split()[0],
|
||||
user=user,
|
||||
cwd=cwd,
|
||||
log_file=str(audit_log_path()),
|
||||
)
|
||||
|
||||
|
||||
def log_exception(where: str, exc: BaseException, **extra: Any) -> None:
|
||||
"""Convenience wrapper for caught exceptions."""
|
||||
log_event(
|
||||
"error",
|
||||
f"{where}: {type(exc).__name__}: {exc}",
|
||||
level="error",
|
||||
exc_type=type(exc).__name__,
|
||||
exc_message=str(exc),
|
||||
**extra,
|
||||
)
|
||||
|
||||
|
||||
def log_page_open(slug: str) -> None:
|
||||
"""Emit a "page open" event, deduplicated within a session.
|
||||
|
||||
Streamlit reruns the script many times per page (every widget
|
||||
interaction triggers a rerun). Tracking the last page the user
|
||||
visited in session state lets us emit a single ``nav`` event when
|
||||
they actually switch pages, not one per rerun. Falls back to
|
||||
always-emit when session state is unreachable (running outside
|
||||
Streamlit, e.g. in tests).
|
||||
"""
|
||||
try:
|
||||
import streamlit as st
|
||||
prev = st.session_state.get("_audit_current_page")
|
||||
if prev == slug:
|
||||
return
|
||||
st.session_state["_audit_current_page"] = slug
|
||||
except Exception:
|
||||
pass
|
||||
log_event("nav", f"Opened {slug}", page=slug)
|
||||
|
||||
|
||||
def reset_for_tests() -> None:
|
||||
"""Reset module-level state. Test-only — call from a pytest fixture
|
||||
when isolation between tests matters."""
|
||||
global _LOG_PATH, _SESSION_ID, _SESSION_STARTED
|
||||
with _LOCK:
|
||||
_LOG_PATH = None
|
||||
_SESSION_ID = None
|
||||
_SESSION_STARTED = False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"audit_log_dir",
|
||||
"audit_log_path",
|
||||
"log_event",
|
||||
"log_exception",
|
||||
"log_page_open",
|
||||
"log_session_start",
|
||||
"reset_for_tests",
|
||||
]
|
||||
Reference in New Issue
Block a user