fix(pdf): consistent 2-decimal amount precision in display and CSV

User reported amounts losing trailing zeros — 4.50 rendering as 4.5, 1000.00 as 1000 — on the same statement. Classic float display issue: Python's native ``repr(4.5)`` drops the ``.0``, and pandas / Streamlit happily show that inconsistency cell-by-cell. Two layers of fix, internal type stays ``float`` for arithmetic: **Display.** ``st.column_config.NumberColumn(format="%.2f")`` applied programmatically to every ``amount_*`` column on the data_editor. Every numeric amount now shows with exactly two decimal places regardless of trailing zeros. **CSV export.** Pandas' default float-to-CSV writer also drops trailing zeros (the same issue an accountant would see when opening the file in Excel). Before serialising, each amount column is mapped through the new ``format_amount`` helper — returns ``f"{v:.2f}"`` for numerics, empty string for None/NaN/inf, ``str(value)`` for booleans (guards the ``True → "1.00"`` foot-gun since ``bool`` is an ``int`` subclass), and passes through any string the scanner kept because parsing failed (e.g. ``(4.50)`` when parens-negative is off — user can correct in the editor before re-exporting). ``format_amount`` lives in ``src/pdf_extract.py`` so it's testable in isolation (the page module can't easily be unit tested because of its Streamlit import chain). 8 new tests cover the trailing-zeros case, negatives, None/empty, string-passthrough, bool guard, NaN/inf, and the ``places`` parameter. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 01:27:16 +00:00
parent 6f2ad57490
commit ad7c22d7fb
3 changed files with 91 additions and 1 deletions
--- a/tests/test_pdf_extract.py
+++ b/tests/test_pdf_extract.py
@@ -22,6 +22,7 @@ from src.pdf_extract import (
    _infer_year_for_short_date,
    cluster_rows,
    extract_statement_metadata,
+    format_amount,
    format_date,
    parse_amount,
    parse_date,
@@ -214,6 +215,43 @@ class TestFindAmountTokens:
 # the building blocks; smoke tests pin the wiring.


+class TestFormatAmount:
+    """Two-decimal-place consistency at the display + export layer."""
+
+    def test_drops_no_trailing_zeros(self):
+        # The bug: 4.5 should NOT render as "4.5" — accountants
+        # need consistent precision across rows.
+        assert format_amount(4.5) == "4.50"
+        assert format_amount(12.0) == "12.00"
+        assert format_amount(1000) == "1000.00"
+
+    def test_negatives(self):
+        assert format_amount(-40.0) == "-40.00"
+        assert format_amount(-4.5) == "-4.50"
+
+    def test_none_and_empty(self):
+        assert format_amount(None) == ""
+        assert format_amount("") == ""
+
+    def test_string_passthrough(self):
+        # ``(4.50)`` was preserved by the scanner because parsing
+        # failed; the user sees the raw text and can fix in editor.
+        assert format_amount("(4.50)") == "(4.50)"
+
+    def test_bool_doesnt_render_as_number(self):
+        # bool is an int subclass — guard prevents True → "1.00".
+        assert format_amount(True) == "True"
+        assert format_amount(False) == "False"
+
+    def test_nan_inf_become_empty(self):
+        assert format_amount(float("nan")) == ""
+        assert format_amount(float("inf")) == ""
+
+    def test_custom_places(self):
+        assert format_amount(4.5, places=4) == "4.5000"
+        assert format_amount(4.567, places=0) == "5"
+
+
 class TestFormatDate:
    def test_yyyymmdd(self):
        assert format_date("2026-01-13", "%Y%m%d") == "20260113"