fix: clear all latent deprecation + resource warnings
Three real issues surfaced when running the suite with strict warnings: 1. src/core/format_standardize.py: ``datetime.utcfromtimestamp`` is deprecated in CPython 3.12 and slated for removal. Replace with ``datetime.fromtimestamp(ts, tz=timezone.utc)``. Output for the date-only format codes we use is byte-identical. 2. src/core/io.py: ``list_sheets`` leaked the openpyxl file handle by returning ``xl.sheet_names`` from an unclosed ``pd.ExcelFile``. Wrap in a ``with`` block so the FD closes deterministically — also prevents the Windows-only "file is locked" repro path. 3. tests/test_corpus.py: ``TestXlsxPollution.workbook`` fixture returned the bare ``pd.ExcelFile`` instead of yielding + closing. Convert to a yield-and-finally pattern so the class-scoped handle isn't leaked across the whole test file. Also harden pytest.ini's warning policy: escalate ``ResourceWarning`` from ``src`` to an error, alongside the existing ``DeprecationWarning`` rule. Third-party warnings stay filtered — we can't fix pandas/openpyxl/streamlit churn from here. All 1916 tests pass under the strict filter; full and split runs (``pytest``, ``pytest -m 'not gui'``, ``pytest -m gui``) all clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,8 +14,12 @@ markers =
|
||||
fixture_sweep: parametrized sweep over the test-cases/ folder
|
||||
gui: Streamlit AppTest-driven tests (live in tests/gui/)
|
||||
|
||||
# Warnings discipline: fail on unexpected DeprecationWarning from our own
|
||||
# code, but tolerate third-party deprecations that we can't fix.
|
||||
# Warnings discipline: fail on any DeprecationWarning *or* ResourceWarning
|
||||
# from our own ``src`` package so a leaked file handle or stale stdlib call
|
||||
# can't slip in unnoticed. Tolerate third-party deprecations / resource
|
||||
# warnings — we can't fix pandas / openpyxl / streamlit churn from here.
|
||||
filterwarnings =
|
||||
error::DeprecationWarning:src
|
||||
error::ResourceWarning:src
|
||||
ignore::DeprecationWarning
|
||||
ignore::ResourceWarning
|
||||
|
||||
@@ -26,7 +26,7 @@ import re
|
||||
|
||||
from loguru import logger
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Literal, Optional
|
||||
@@ -350,7 +350,12 @@ def _try_unix_timestamp(s: str, output_format: str) -> Optional[str]:
|
||||
else:
|
||||
return None
|
||||
try:
|
||||
return datetime.utcfromtimestamp(seconds).strftime(output_format)
|
||||
# ``utcfromtimestamp`` is deprecated in 3.12 and slated for
|
||||
# removal — use a timezone-aware UTC datetime and format it
|
||||
# the same way. The output is identical for ``%Y-%m-%d`` and
|
||||
# friends because ``strftime`` ignores tzinfo on date-only
|
||||
# format codes.
|
||||
return datetime.fromtimestamp(seconds, tz=timezone.utc).strftime(output_format)
|
||||
except (OverflowError, ValueError, OSError):
|
||||
return None
|
||||
|
||||
|
||||
@@ -368,9 +368,15 @@ def _looks_like_header(value: str) -> bool:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def list_sheets(path: Path) -> list[str]:
|
||||
"""Return sheet names from an Excel workbook."""
|
||||
xl = pd.ExcelFile(path, engine="openpyxl")
|
||||
return xl.sheet_names
|
||||
"""Return sheet names from an Excel workbook.
|
||||
|
||||
Uses a ``with`` block so the underlying openpyxl workbook closes
|
||||
its file handle deterministically. Leaving it to garbage collection
|
||||
surfaces as a ``ResourceWarning`` under strict warning filters and
|
||||
can briefly hold a lock on the source file on Windows.
|
||||
"""
|
||||
with pd.ExcelFile(path, engine="openpyxl") as xl:
|
||||
return list(xl.sheet_names)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -204,8 +204,17 @@ class TestXlsxPollution:
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def workbook(self):
|
||||
# ``ExcelFile`` is a context manager — using ``yield`` + close in
|
||||
# a finally lets us share one open handle across the class while
|
||||
# still closing it deterministically. Returning the object bare
|
||||
# leaks the file descriptor and trips strict resource-warning
|
||||
# filters under recent pytest versions.
|
||||
path = TEST_DATA / "21_excel_pollution.xlsx"
|
||||
return pd.ExcelFile(path, engine="openpyxl")
|
||||
xl = pd.ExcelFile(path, engine="openpyxl")
|
||||
try:
|
||||
yield xl
|
||||
finally:
|
||||
xl.close()
|
||||
|
||||
def test_sheets_present(self, workbook):
|
||||
names = set(workbook.sheet_names)
|
||||
|
||||
Reference in New Issue
Block a user