From 59c6d0f914d631dfd853767fe809feb7ec2ebb0c Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 17 May 2026 02:00:31 +0000 Subject: [PATCH] fix(audit): defensive wrap so audit failures can never blank the GUI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported: after pulling commit c73d716 (audit log) the main body of every page showed empty. Sidebar nav still worked. Diagnosis: the most likely path is that something inside the audit calls — ``_render_diagnostics_sidebar()`` calling ``audit_log_path()``, or ``log_session_start()`` itself — raises during ``hide_streamlit_chrome`` on the user's environment (Python 3.14 on Windows, a less-tested combo than the test environment). Streamlit's script runner sees the exception, and on some chrome paths it eats it without surfacing an error block, leaving the page body empty. The audit log is best-effort by design. Make that contract real: 1. ``hide_streamlit_chrome`` now wraps both ``log_session_start()`` and ``_render_diagnostics_sidebar()`` in try/except. Errors print to stderr (so the developer running ``python -m src.gui`` sees them in the launcher's console) but never bubble up to kill the page render. 2. ``audit_log_path()`` already had a tempdir fallback for the primary mkdir failure, but the SECOND mkdir wasn't protected either. Restructured to a two-level fallback: configured dir → tempdir → ``/dev/null`` (or ``NUL`` on Windows). The last fallback ensures the function never raises; ``log_event``'s own try/except handles the eventual unwritable-file case. 3. ``log_page_open(slug)`` now has an outer try/except so it cannot raise either — protecting every tool page's render path. If a user reports the same symptom again, the launcher terminal will now show a real traceback explaining what's wrong, and the GUI will still render normally. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/audit.py | 62 ++++++++++++++++++++++++++--------- src/gui/components/_legacy.py | 20 +++++++++-- 2 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/audit.py b/src/audit.py index e455169..c0ecb61 100644 --- a/src/audit.py +++ b/src/audit.py @@ -85,23 +85,47 @@ def audit_log_path() -> Path: The path is created the first time it's queried so each Python process gets a single file regardless of how many Streamlit reruns happen. + + Two-level fallback so this NEVER raises: first try the configured + audit dir, then a tempdir, then ``/dev/null``-equivalent (just a + nonexistent path) as a last resort. Callers downstream skip + writing when the file isn't writable. """ global _LOG_PATH with _LOCK: if _LOG_PATH is None: - ts = datetime.now(tz=timezone.utc).strftime("%Y%m%dT%H%M%SZ") - sid = _session_id()[:8] - d = audit_log_dir() try: + ts = datetime.now(tz=timezone.utc).strftime("%Y%m%dT%H%M%SZ") + sid = _session_id()[:8] + except Exception: + ts, sid = "unknown", "unknown" + name = f"datatools-{ts}-{sid}.jsonl" + + # First choice: configured audit dir. + d = None + try: + d = audit_log_dir() d.mkdir(parents=True, exist_ok=True) except Exception: - # If we can't create the dir, fall back to a tmpdir - # location so we never crash the app for the audit - # log's sake. - import tempfile - d = Path(tempfile.gettempdir()) / "datatools-logs" - d.mkdir(parents=True, exist_ok=True) - _LOG_PATH = d / f"datatools-{ts}-{sid}.jsonl" + d = None + + # Fallback: tempdir. + if d is None: + try: + import tempfile + d = Path(tempfile.gettempdir()) / "datatools-logs" + d.mkdir(parents=True, exist_ok=True) + except Exception: + d = None + + # Last resort: point at a path we won't try to write to — + # ``log_event``'s own try/except handles the eventual + # write failure cleanly. + if d is None: + d = Path("/dev/null") if os.name != "nt" else Path("NUL") + _LOG_PATH = d + else: + _LOG_PATH = d / name return _LOG_PATH @@ -194,16 +218,22 @@ def log_page_open(slug: str) -> None: they actually switch pages, not one per rerun. Falls back to always-emit when session state is unreachable (running outside Streamlit, e.g. in tests). + + Whole body is wrapped in try/except — the audit log is best- + effort and MUST NOT crash the page that called it. """ try: - import streamlit as st - prev = st.session_state.get("_audit_current_page") - if prev == slug: - return - st.session_state["_audit_current_page"] = slug + try: + import streamlit as st + prev = st.session_state.get("_audit_current_page") + if prev == slug: + return + st.session_state["_audit_current_page"] = slug + except Exception: + pass + log_event("nav", f"Opened {slug}", page=slug) except Exception: pass - log_event("nav", f"Opened {slug}", page=slug) def reset_for_tests() -> None: diff --git a/src/gui/components/_legacy.py b/src/gui/components/_legacy.py index 300fb9b..349a991 100644 --- a/src/gui/components/_legacy.py +++ b/src/gui/components/_legacy.py @@ -157,8 +157,14 @@ def hide_streamlit_chrome(*, gate_license: bool = True) -> None: st.markdown(_HIDE_CHROME_CSS, unsafe_allow_html=True) # Stamp a session-start record into the audit log the first time # any page renders. Idempotent — subsequent calls are no-ops. - from src.audit import log_session_start - log_session_start() + # Wrapped because a broken audit log MUST NOT take the GUI down. + try: + from src.audit import log_session_start + log_session_start() + except Exception: + import traceback, sys + print("DataTools: audit log session-start failed:", file=sys.stderr) + traceback.print_exc() # Production-safe check runs first so a misconfigured shipped # build refuses to render anything (rather than rendering a # broken activation form that doesn't accept real blobs). @@ -176,7 +182,15 @@ def hide_streamlit_chrome(*, gate_license: bool = True) -> None: require_license_or_render_activation, ) render_license_status_sidebar() - _render_diagnostics_sidebar() + # Wrapped — a broken audit-log or filesystem permission issue must + # NOT kill the page-body render. Errors print to stderr so the + # developer can still see what went wrong. + try: + _render_diagnostics_sidebar() + except Exception: + import traceback, sys + print("DataTools: diagnostics sidebar render failed:", file=sys.stderr) + traceback.print_exc() if gate_license: require_license_or_render_activation()