From b2c7b94fe92efbb050595053b8b15828947cd359 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 13 May 2026 16:28:48 +0000 Subject: [PATCH] fix: clear all latent deprecation + resource warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three real issues surfaced when running the suite with strict warnings: 1. src/core/format_standardize.py: ``datetime.utcfromtimestamp`` is deprecated in CPython 3.12 and slated for removal. Replace with ``datetime.fromtimestamp(ts, tz=timezone.utc)``. Output for the date-only format codes we use is byte-identical. 2. src/core/io.py: ``list_sheets`` leaked the openpyxl file handle by returning ``xl.sheet_names`` from an unclosed ``pd.ExcelFile``. Wrap in a ``with`` block so the FD closes deterministically — also prevents the Windows-only "file is locked" repro path. 3. tests/test_corpus.py: ``TestXlsxPollution.workbook`` fixture returned the bare ``pd.ExcelFile`` instead of yielding + closing. Convert to a yield-and-finally pattern so the class-scoped handle isn't leaked across the whole test file. Also harden pytest.ini's warning policy: escalate ``ResourceWarning`` from ``src`` to an error, alongside the existing ``DeprecationWarning`` rule. Third-party warnings stay filtered — we can't fix pandas/openpyxl/streamlit churn from here. All 1916 tests pass under the strict filter; full and split runs (``pytest``, ``pytest -m 'not gui'``, ``pytest -m gui``) all clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- pytest.ini | 8 ++++++-- src/core/format_standardize.py | 9 +++++++-- src/core/io.py | 12 +++++++++--- tests/test_corpus.py | 11 ++++++++++- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/pytest.ini b/pytest.ini index 09177d9..ae27693 100644 --- a/pytest.ini +++ b/pytest.ini @@ -14,8 +14,12 @@ markers = fixture_sweep: parametrized sweep over the test-cases/ folder gui: Streamlit AppTest-driven tests (live in tests/gui/) -# Warnings discipline: fail on unexpected DeprecationWarning from our own -# code, but tolerate third-party deprecations that we can't fix. +# Warnings discipline: fail on any DeprecationWarning *or* ResourceWarning +# from our own ``src`` package so a leaked file handle or stale stdlib call +# can't slip in unnoticed. Tolerate third-party deprecations / resource +# warnings — we can't fix pandas / openpyxl / streamlit churn from here. filterwarnings = error::DeprecationWarning:src + error::ResourceWarning:src ignore::DeprecationWarning + ignore::ResourceWarning diff --git a/src/core/format_standardize.py b/src/core/format_standardize.py index 28f86ed..83a9492 100644 --- a/src/core/format_standardize.py +++ b/src/core/format_standardize.py @@ -26,7 +26,7 @@ import re from loguru import logger from dataclasses import asdict, dataclass, field -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from enum import Enum from pathlib import Path from typing import Any, Iterable, Literal, Optional @@ -350,7 +350,12 @@ def _try_unix_timestamp(s: str, output_format: str) -> Optional[str]: else: return None try: - return datetime.utcfromtimestamp(seconds).strftime(output_format) + # ``utcfromtimestamp`` is deprecated in 3.12 and slated for + # removal — use a timezone-aware UTC datetime and format it + # the same way. The output is identical for ``%Y-%m-%d`` and + # friends because ``strftime`` ignores tzinfo on date-only + # format codes. + return datetime.fromtimestamp(seconds, tz=timezone.utc).strftime(output_format) except (OverflowError, ValueError, OSError): return None diff --git a/src/core/io.py b/src/core/io.py index 6f432a6..6562e1d 100644 --- a/src/core/io.py +++ b/src/core/io.py @@ -368,9 +368,15 @@ def _looks_like_header(value: str) -> bool: # --------------------------------------------------------------------------- def list_sheets(path: Path) -> list[str]: - """Return sheet names from an Excel workbook.""" - xl = pd.ExcelFile(path, engine="openpyxl") - return xl.sheet_names + """Return sheet names from an Excel workbook. + + Uses a ``with`` block so the underlying openpyxl workbook closes + its file handle deterministically. Leaving it to garbage collection + surfaces as a ``ResourceWarning`` under strict warning filters and + can briefly hold a lock on the source file on Windows. + """ + with pd.ExcelFile(path, engine="openpyxl") as xl: + return list(xl.sheet_names) # --------------------------------------------------------------------------- diff --git a/tests/test_corpus.py b/tests/test_corpus.py index e776444..241e94a 100644 --- a/tests/test_corpus.py +++ b/tests/test_corpus.py @@ -204,8 +204,17 @@ class TestXlsxPollution: @pytest.fixture(scope="class") def workbook(self): + # ``ExcelFile`` is a context manager — using ``yield`` + close in + # a finally lets us share one open handle across the class while + # still closing it deterministically. Returning the object bare + # leaks the file descriptor and trips strict resource-warning + # filters under recent pytest versions. path = TEST_DATA / "21_excel_pollution.xlsx" - return pd.ExcelFile(path, engine="openpyxl") + xl = pd.ExcelFile(path, engine="openpyxl") + try: + yield xl + finally: + xl.close() def test_sheets_present(self, workbook): names = set(workbook.sheet_names)