{ "steps": [ { "tool": "text_clean", "options": {}, "enabled": true, "name": "1. Clean text (header whitespace, smart quotes, em-dash)" }, { "tool": "format_standardize", "options": { "column_types": { "Date": "date", "Amount": "currency", "Balance": "currency", "Vendor": "name" }, "currency_decimal": "auto", "currency_preserve_code": false, "currency_decimals": 2, "date_output_format": "%Y-%m-%d" }, "enabled": true, "name": "2. ISO dates · numeric amounts (parens-negative) · vendor casing" }, { "tool": "missing", "options": { "strategy": "none", "standardize_sentinels": true, "sentinels": ["N/A", "n/a", "—", "-", "?", "(blank)", "(none)", "unknown", "#N/A"] }, "enabled": true, "name": "3. Standardize disguised nulls (— / N/A / (blank))" }, { "tool": "dedup", "options": { "survivor_rule": "most_complete", "merge": false, "date_column": "Date", "strategies": [ { "columns": [ {"column": "Date", "algorithm": "exact", "threshold": 100}, {"column": "Amount", "algorithm": "exact", "threshold": 100}, {"column": "Vendor", "algorithm": "jaro_winkler", "threshold": 80} ] } ] }, "enabled": true, "name": "4. Dedup transactions on Date+Amount+fuzzy Vendor" } ] }