fix: clear all latent deprecation + resource warnings
Three real issues surfaced when running the suite with strict warnings: 1. src/core/format_standardize.py: ``datetime.utcfromtimestamp`` is deprecated in CPython 3.12 and slated for removal. Replace with ``datetime.fromtimestamp(ts, tz=timezone.utc)``. Output for the date-only format codes we use is byte-identical. 2. src/core/io.py: ``list_sheets`` leaked the openpyxl file handle by returning ``xl.sheet_names`` from an unclosed ``pd.ExcelFile``. Wrap in a ``with`` block so the FD closes deterministically — also prevents the Windows-only "file is locked" repro path. 3. tests/test_corpus.py: ``TestXlsxPollution.workbook`` fixture returned the bare ``pd.ExcelFile`` instead of yielding + closing. Convert to a yield-and-finally pattern so the class-scoped handle isn't leaked across the whole test file. Also harden pytest.ini's warning policy: escalate ``ResourceWarning`` from ``src`` to an error, alongside the existing ``DeprecationWarning`` rule. Third-party warnings stay filtered — we can't fix pandas/openpyxl/streamlit churn from here. All 1916 tests pass under the strict filter; full and split runs (``pytest``, ``pytest -m 'not gui'``, ``pytest -m gui``) all clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,8 +14,12 @@ markers =
|
|||||||
fixture_sweep: parametrized sweep over the test-cases/ folder
|
fixture_sweep: parametrized sweep over the test-cases/ folder
|
||||||
gui: Streamlit AppTest-driven tests (live in tests/gui/)
|
gui: Streamlit AppTest-driven tests (live in tests/gui/)
|
||||||
|
|
||||||
# Warnings discipline: fail on unexpected DeprecationWarning from our own
|
# Warnings discipline: fail on any DeprecationWarning *or* ResourceWarning
|
||||||
# code, but tolerate third-party deprecations that we can't fix.
|
# from our own ``src`` package so a leaked file handle or stale stdlib call
|
||||||
|
# can't slip in unnoticed. Tolerate third-party deprecations / resource
|
||||||
|
# warnings — we can't fix pandas / openpyxl / streamlit churn from here.
|
||||||
filterwarnings =
|
filterwarnings =
|
||||||
error::DeprecationWarning:src
|
error::DeprecationWarning:src
|
||||||
|
error::ResourceWarning:src
|
||||||
ignore::DeprecationWarning
|
ignore::DeprecationWarning
|
||||||
|
ignore::ResourceWarning
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ import re
|
|||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from dataclasses import asdict, dataclass, field
|
from dataclasses import asdict, dataclass, field
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta, timezone
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Iterable, Literal, Optional
|
from typing import Any, Iterable, Literal, Optional
|
||||||
@@ -350,7 +350,12 @@ def _try_unix_timestamp(s: str, output_format: str) -> Optional[str]:
|
|||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
return datetime.utcfromtimestamp(seconds).strftime(output_format)
|
# ``utcfromtimestamp`` is deprecated in 3.12 and slated for
|
||||||
|
# removal — use a timezone-aware UTC datetime and format it
|
||||||
|
# the same way. The output is identical for ``%Y-%m-%d`` and
|
||||||
|
# friends because ``strftime`` ignores tzinfo on date-only
|
||||||
|
# format codes.
|
||||||
|
return datetime.fromtimestamp(seconds, tz=timezone.utc).strftime(output_format)
|
||||||
except (OverflowError, ValueError, OSError):
|
except (OverflowError, ValueError, OSError):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
@@ -368,9 +368,15 @@ def _looks_like_header(value: str) -> bool:
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def list_sheets(path: Path) -> list[str]:
|
def list_sheets(path: Path) -> list[str]:
|
||||||
"""Return sheet names from an Excel workbook."""
|
"""Return sheet names from an Excel workbook.
|
||||||
xl = pd.ExcelFile(path, engine="openpyxl")
|
|
||||||
return xl.sheet_names
|
Uses a ``with`` block so the underlying openpyxl workbook closes
|
||||||
|
its file handle deterministically. Leaving it to garbage collection
|
||||||
|
surfaces as a ``ResourceWarning`` under strict warning filters and
|
||||||
|
can briefly hold a lock on the source file on Windows.
|
||||||
|
"""
|
||||||
|
with pd.ExcelFile(path, engine="openpyxl") as xl:
|
||||||
|
return list(xl.sheet_names)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -204,8 +204,17 @@ class TestXlsxPollution:
|
|||||||
|
|
||||||
@pytest.fixture(scope="class")
|
@pytest.fixture(scope="class")
|
||||||
def workbook(self):
|
def workbook(self):
|
||||||
|
# ``ExcelFile`` is a context manager — using ``yield`` + close in
|
||||||
|
# a finally lets us share one open handle across the class while
|
||||||
|
# still closing it deterministically. Returning the object bare
|
||||||
|
# leaks the file descriptor and trips strict resource-warning
|
||||||
|
# filters under recent pytest versions.
|
||||||
path = TEST_DATA / "21_excel_pollution.xlsx"
|
path = TEST_DATA / "21_excel_pollution.xlsx"
|
||||||
return pd.ExcelFile(path, engine="openpyxl")
|
xl = pd.ExcelFile(path, engine="openpyxl")
|
||||||
|
try:
|
||||||
|
yield xl
|
||||||
|
finally:
|
||||||
|
xl.close()
|
||||||
|
|
||||||
def test_sheets_present(self, workbook):
|
def test_sheets_present(self, workbook):
|
||||||
names = set(workbook.sheet_names)
|
names = set(workbook.sheet_names)
|
||||||
|
|||||||
Reference in New Issue
Block a user