fix: clear all latent deprecation + resource warnings

Three real issues surfaced when running the suite with strict warnings:

1. src/core/format_standardize.py: ``datetime.utcfromtimestamp`` is
   deprecated in CPython 3.12 and slated for removal. Replace with
   ``datetime.fromtimestamp(ts, tz=timezone.utc)``. Output for the
   date-only format codes we use is byte-identical.

2. src/core/io.py: ``list_sheets`` leaked the openpyxl file handle by
   returning ``xl.sheet_names`` from an unclosed ``pd.ExcelFile``.
   Wrap in a ``with`` block so the FD closes deterministically — also
   prevents the Windows-only "file is locked" repro path.

3. tests/test_corpus.py: ``TestXlsxPollution.workbook`` fixture
   returned the bare ``pd.ExcelFile`` instead of yielding + closing.
   Convert to a yield-and-finally pattern so the class-scoped handle
   isn't leaked across the whole test file.

Also harden pytest.ini's warning policy: escalate
``ResourceWarning`` from ``src`` to an error, alongside the existing
``DeprecationWarning`` rule. Third-party warnings stay filtered — we
can't fix pandas/openpyxl/streamlit churn from here.

All 1916 tests pass under the strict filter; full and split runs
(``pytest``, ``pytest -m 'not gui'``, ``pytest -m gui``) all clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-13 16:28:48 +00:00
parent 070e3c9f06
commit b2c7b94fe9
4 changed files with 32 additions and 8 deletions

View File

@@ -14,8 +14,12 @@ markers =
fixture_sweep: parametrized sweep over the test-cases/ folder
gui: Streamlit AppTest-driven tests (live in tests/gui/)
# Warnings discipline: fail on unexpected DeprecationWarning from our own
# code, but tolerate third-party deprecations that we can't fix.
# Warnings discipline: fail on any DeprecationWarning *or* ResourceWarning
# from our own ``src`` package so a leaked file handle or stale stdlib call
# can't slip in unnoticed. Tolerate third-party deprecations / resource
# warnings — we can't fix pandas / openpyxl / streamlit churn from here.
filterwarnings =
error::DeprecationWarning:src
error::ResourceWarning:src
ignore::DeprecationWarning
ignore::ResourceWarning

View File

@@ -26,7 +26,7 @@ import re
from loguru import logger
from dataclasses import asdict, dataclass, field
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from enum import Enum
from pathlib import Path
from typing import Any, Iterable, Literal, Optional
@@ -350,7 +350,12 @@ def _try_unix_timestamp(s: str, output_format: str) -> Optional[str]:
else:
return None
try:
return datetime.utcfromtimestamp(seconds).strftime(output_format)
# ``utcfromtimestamp`` is deprecated in 3.12 and slated for
# removal — use a timezone-aware UTC datetime and format it
# the same way. The output is identical for ``%Y-%m-%d`` and
# friends because ``strftime`` ignores tzinfo on date-only
# format codes.
return datetime.fromtimestamp(seconds, tz=timezone.utc).strftime(output_format)
except (OverflowError, ValueError, OSError):
return None

View File

@@ -368,9 +368,15 @@ def _looks_like_header(value: str) -> bool:
# ---------------------------------------------------------------------------
def list_sheets(path: Path) -> list[str]:
"""Return sheet names from an Excel workbook."""
xl = pd.ExcelFile(path, engine="openpyxl")
return xl.sheet_names
"""Return sheet names from an Excel workbook.
Uses a ``with`` block so the underlying openpyxl workbook closes
its file handle deterministically. Leaving it to garbage collection
surfaces as a ``ResourceWarning`` under strict warning filters and
can briefly hold a lock on the source file on Windows.
"""
with pd.ExcelFile(path, engine="openpyxl") as xl:
return list(xl.sheet_names)
# ---------------------------------------------------------------------------

View File

@@ -204,8 +204,17 @@ class TestXlsxPollution:
@pytest.fixture(scope="class")
def workbook(self):
# ``ExcelFile`` is a context manager — using ``yield`` + close in
# a finally lets us share one open handle across the class while
# still closing it deterministically. Returning the object bare
# leaks the file descriptor and trips strict resource-warning
# filters under recent pytest versions.
path = TEST_DATA / "21_excel_pollution.xlsx"
return pd.ExcelFile(path, engine="openpyxl")
xl = pd.ExcelFile(path, engine="openpyxl")
try:
yield xl
finally:
xl.close()
def test_sheets_present(self, workbook):
names = set(workbook.sheet_names)