feat(audit): daily filename + 7-day retention sweep
Replaces the per-session ``datatools-<ts>-<sid>.jsonl`` filename with a single daily file ``datatools-YYYY-MM-DD.jsonl`` (local date). Sessions on the same calendar day share a file via the writer thread's per-batch open+append; multiple DataTools instances running concurrently on the same day fan into the same file (append-mode small writes are atomic on POSIX, safe-enough on Windows under realistic load). Drops the ``_LOG_PATH`` module global and the lock around it — ``audit_log_path()`` is now pure date math, recomputed on every call so a session that crosses midnight follows the rollover into the next day's file. Adds ``_sweep_old_logs()`` invoked once per process at writer- thread start. Deletes any ``datatools-*.jsonl`` whose mtime is older than 7 days. The glob deliberately matches the legacy per-session filename too, so users upgrading from the previous build don't keep a permanent backlog of pre-retention files. Event ``ts`` fields stay UTC; only the filename uses local date, because users go looking for "today's log" on their wall clock. Tests cover: daily filename shape, sweep removes stale files, sweep keeps fresh files, sweep also clears legacy filenames. Rollback: ``git revert HEAD`` restores the per-session filename and removes the sweep. No data migration needed either way — existing files keep working as JSONL. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -174,6 +174,63 @@ class TestKillSwitchContract:
|
||||
audit.reset_for_tests()
|
||||
|
||||
|
||||
class TestDailyFileRetention:
|
||||
"""Daily filename + 7-day retention sweep on writer-thread start."""
|
||||
|
||||
def test_filename_is_daily_local_date(self, isolated_audit, tmp_path):
|
||||
from datetime import datetime
|
||||
audit = isolated_audit
|
||||
audit.log_event("test", "today")
|
||||
audit.flush_audit_log(timeout_s=2.0)
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
expected = tmp_path / f"datatools-{today}.jsonl"
|
||||
assert expected.exists(), (
|
||||
f"expected daily file {expected.name}, got "
|
||||
f"{[p.name for p in tmp_path.iterdir()]}"
|
||||
)
|
||||
|
||||
def test_sweep_deletes_files_older_than_retention(
|
||||
self, isolated_audit, tmp_path,
|
||||
):
|
||||
"""Files with mtime > _RETENTION_DAYS old get pruned when the
|
||||
writer thread starts. Files within the window survive."""
|
||||
import os
|
||||
audit = isolated_audit
|
||||
stale = tmp_path / "datatools-2025-01-01.jsonl"
|
||||
stale.write_text('{"ts": "stale"}\n', encoding="utf-8")
|
||||
old_mtime = time.time() - (audit._RETENTION_DAYS + 1) * 86400
|
||||
os.utime(stale, (old_mtime, old_mtime))
|
||||
|
||||
fresh = tmp_path / "datatools-2026-05-18.jsonl"
|
||||
fresh.write_text('{"ts": "fresh"}\n', encoding="utf-8")
|
||||
recent_mtime = time.time() - 1 * 86400
|
||||
os.utime(fresh, (recent_mtime, recent_mtime))
|
||||
|
||||
audit.log_event("test", "kick the writer")
|
||||
audit.flush_audit_log(timeout_s=2.0)
|
||||
|
||||
assert not stale.exists(), "Stale log should have been swept."
|
||||
assert fresh.exists(), "Fresh log must not be swept."
|
||||
|
||||
def test_sweep_also_clears_legacy_per_session_files(
|
||||
self, isolated_audit, tmp_path,
|
||||
):
|
||||
"""Old ``datatools-<ts>-<sid>.jsonl`` filenames must also match
|
||||
the sweep glob, so upgrading users don't keep a permanent
|
||||
backlog from before the retention switch."""
|
||||
import os
|
||||
audit = isolated_audit
|
||||
legacy = tmp_path / "datatools-20250101T120000Z-abc12345.jsonl"
|
||||
legacy.write_text('{"ts": "legacy"}\n', encoding="utf-8")
|
||||
old_mtime = time.time() - (audit._RETENTION_DAYS + 1) * 86400
|
||||
os.utime(legacy, (old_mtime, old_mtime))
|
||||
|
||||
audit.log_event("test", "kick the writer")
|
||||
audit.flush_audit_log(timeout_s=2.0)
|
||||
|
||||
assert not legacy.exists(), "Legacy per-session file not swept."
|
||||
|
||||
|
||||
class TestSerializationSafety:
|
||||
def test_non_json_extras_get_str_coerced(self, isolated_audit, tmp_path):
|
||||
audit = isolated_audit
|
||||
|
||||
Reference in New Issue
Block a user