demo: reconstruct sales demos for an accounting audience
Replaces the Shopify / RevOps / Bookkeeper demo trio with three accounting personas that share one buyer, each entering through a workflow where a messy export costs money — all running the same saved 4-step pipeline: - bank_reconciliation.csv (Bookkeeper): 26 -> 20 rows, 6 double-posted transactions caught after date+amount standardization. - vendor_1099.csv (AP / 1099): 24 records -> 8 vendors, 7 missing EINs recovered via dedup merge — the 1099-complete story. - ar_open_invoices.csv (AR): 26 -> 21 rows, 5 double-entered invoices removed, blank status backfilled from the twin row. Every number is validated against the live engine and pinned by tests/test_demo_pipelines.py (read path mirrors app_demo._load_demo: dtype=str, keep_default_na=False). Rewires src/gui/app_demo.py PERSONAS (keys bookkeeper / ap-1099 / ar-aging, accounting H1/sub/CTA) and rewrites docs/DEMO-PLAN.md sections 3/4/7 with the validated outcomes. (Repo hygiene forced by a partial-clone gap: finalizes the already-deleted, unreferenced samples/messy_text.csv whose blob was unrecoverable.) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,31 +0,0 @@
|
||||
Lead ID,First Name,Last Name,Company,Title,Email,Phone,Country,Source,Score,Last Activity,Tags
|
||||
HUB-001,Alice,Johnson,Acme Corp,VP Marketing,alice@acme.com,(415) 555-1234,USA,HubSpot,87,2025-12-04,Enterprise
|
||||
HUB-002,bob,smith,Beta LLC,Director Growth,bob@beta.com,N/A,United States,HubSpot,N/A,2025-11-22,SMB
|
||||
HUB-003,Carlos,Garcia,Gamma Inc,CEO,carlos@gamma.io,+34 91 411 1111,Spain,HubSpot,82,2025-10-30,Enterprise
|
||||
HUB-004,DIANA,LEE,Delta Co,Marketing Manager,diana@delta.com,020 7946 0958,United Kingdom,HubSpot,74,2025-12-15,Mid-Market
|
||||
HUB-005,Eve,Martinez,Epsilon Group,VP Ops,eve@epsilon.com,(none),Mexico,HubSpot,(blank),2025-09-15,SMB
|
||||
LIN-006,Alice,Johnson,Acme Corporation,VP of Marketing,Alice.Johnson@acme.com,4155551234,US,LinkedIn,—,2025-12-04,Enterprise
|
||||
LIN-007,Frank,Brown,Foxtrot Ltd,Head Sales,frank@foxtrot.de,+49 30 12345678,Germany,LinkedIn,68,2025-12-01,Mid-Market
|
||||
LIN-008,Grace,Davis,Golf Industries,Marketing Lead,grace@golfind.com,+44 20 7946 0958,UK,LinkedIn,79,2025-11-08,Mid-Market
|
||||
LIN-009,henry,wilson,Hotel Logistics,COO,henry@hotellog.com,+86 10 1234 5678,China,LinkedIn,91,2025-12-12,Enterprise
|
||||
LIN-010,IVY CHEN,,India Tech,CTO,ivy@indiatech.in,+91 11 2345 6789,IN,LinkedIn,88,2025-11-30,Enterprise
|
||||
LIN-011,Jack,Taylor,Juliet & Co,Founder,jack@juliet.co,unknown,United States,LinkedIn,?,(unknown),SMB
|
||||
SCR-012,Diana,Lee,Delta Company,Marketing Manager,diana@delta.com,020-7946-0958,UK,Manual Scrape,74,12/15/2025,Mid-Market
|
||||
SCR-013,kate,o'neil,Kilo Ventures,Partner,kate@kilo.vc,+1 415 555 2222,USA,Manual Scrape,N/A,?,Investor
|
||||
SCR-014,Carlos,García,Gamma Incorporated,CEO,Carlos@gamma.io,+34-91-411-1111,Spain,Manual Scrape,82,Oct 30 2025,Enterprise
|
||||
SCR-015,Liam,Park,Lima Solutions,Director Marketing,liam@limasol.kr,+82 2 2287 0114,South Korea,Manual Scrape,77,2025-11-20,Enterprise
|
||||
SCR-016,Mia,nguyen,Mike Corp,VP Marketing,mia@mikecorp.com.au,02 9374 4000,Australia,Manual Scrape,72,2025-10-05,Mid-Market
|
||||
SCR-017,Noah,Brown,November Inc,Head of Growth,noah@november.com,(555) 444-5555,US,Manual Scrape,—,#N/A,SMB
|
||||
HUB-018,Frank,Brown,Foxtrot,Head of Sales,Frank@Foxtrot.de,+49-30-12345678,Germany,HubSpot,68,2025-12-01,Mid-Market
|
||||
HUB-019,Olivia,Rossi,Oscar Italia,CMO,olivia@oscar.it,+39 06 6982,Italy,HubSpot,85,2025-12-08,Enterprise
|
||||
HUB-020,papa,wong,Papa Trading,Founder,papa@papatrading.hk,+852 2123 4567,Hong Kong,HubSpot,69,2025-11-15,SMB
|
||||
LIN-021,Quinn,Reyes,Quebec Group,VP Sales,quinn@quebec.mx,+52 55 5555 0000,Mexico,LinkedIn,80,2025-12-05,Mid-Market
|
||||
LIN-022,Robert,Tan,Romeo Logistics,Director,r.tan@romeo.sg,+65 6123 4567,Singapore,LinkedIn,76,2025-11-28,Mid-Market
|
||||
SCR-023,Sara,Khan,Sierra Foods,Head Marketing,sara@sierra.in,+91-22-1234-5678,India,Manual Scrape,73,2025-12-02,SMB
|
||||
SCR-024,bob,Smith,Beta,Director Growth,Bob@Beta.com,(none),United States,Manual Scrape,(unknown),(unknown),SMB
|
||||
HUB-025,Tara,Levi,Tango Tech,VP Product,tara@tango.il,+972 3 6957 0000,Israel,HubSpot,82,2025-12-10,Enterprise
|
||||
HUB-026,Uma,Patel,Uniform Health,CMO,uma at uniform dot com,+44 20 7946 8888,United Kingdom,HubSpot,71,2025-12-12,Enterprise
|
||||
LIN-027,Victor,Lee,Victor Co,Director,victor@@victorco.com,+1 415 555 8888,USA,LinkedIn,69,2025-11-30,SMB
|
||||
SCR-028,Wendy,Akin,Whiskey Inc,CMO,wendy@whiskey.tr,+90 212 252 1111,Turkey,Manual Scrape,77,2025-12-04,Mid-Market
|
||||
SCR-029,Xander,Ng,Xray Group,Founder,xander@xray.sg,+65 6234 5678,Singapore,Manual Scrape,65,2025-11-15,Suppressed
|
||||
HUB-030,Yara,Costa,Yankee Foods,Marketing Lead,yara@yankee.br,+55 11 3071 2222,Brazil,HubSpot,—,2025-12-15,Opted Out
|
||||
|
@@ -1,74 +0,0 @@
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"tool": "text_clean",
|
||||
"options": {},
|
||||
"enabled": true,
|
||||
"name": "1. Clean text (whitespace + smart quotes from copy-paste)"
|
||||
},
|
||||
{
|
||||
"tool": "format_standardize",
|
||||
"options": {
|
||||
"column_types": {
|
||||
"First Name": "name",
|
||||
"Last Name": "name",
|
||||
"Company": "name",
|
||||
"Email": "email",
|
||||
"Phone": "phone"
|
||||
},
|
||||
"phone_country_column": "Country",
|
||||
"phone_format": "E164",
|
||||
"email_gmail_canonical": true
|
||||
},
|
||||
"enabled": true,
|
||||
"name": "2. E.164 phones (per-row country) · canonical emails · name casing"
|
||||
},
|
||||
{
|
||||
"tool": "missing",
|
||||
"options": {
|
||||
"strategy": "none",
|
||||
"standardize_sentinels": true,
|
||||
"sentinels": ["N/A", "n/a", "—", "?", "(unknown)", "unknown", "(blank)", "(none)", "TBD", "#N/A"]
|
||||
},
|
||||
"enabled": true,
|
||||
"name": "3. Standardize sentinels across vendor exports"
|
||||
},
|
||||
{
|
||||
"tool": "column_map",
|
||||
"options": {
|
||||
"schema": {
|
||||
"fields": [
|
||||
{"name": "Lead ID", "dtype": "string", "required": true},
|
||||
{"name": "First Name", "dtype": "string"},
|
||||
{"name": "Last Name", "dtype": "string"},
|
||||
{"name": "Company", "dtype": "string"},
|
||||
{"name": "Title", "dtype": "string"},
|
||||
{"name": "Email", "dtype": "string"},
|
||||
{"name": "Phone", "dtype": "string"},
|
||||
{"name": "Country", "dtype": "string"},
|
||||
{"name": "Source", "dtype": "string"},
|
||||
{"name": "Score", "dtype": "integer"},
|
||||
{"name": "Last Activity", "dtype": "date"},
|
||||
{"name": "Tags", "dtype": "string"}
|
||||
]
|
||||
},
|
||||
"auto_infer": true,
|
||||
"unmapped": "keep",
|
||||
"coerce_types": true,
|
||||
"reorder_to_schema": true,
|
||||
"enforce_required": false
|
||||
},
|
||||
"enabled": true,
|
||||
"name": "4. Coerce types · reorder to canonical schema"
|
||||
},
|
||||
{
|
||||
"tool": "dedup",
|
||||
"options": {
|
||||
"survivor_rule": "most_complete",
|
||||
"merge": true
|
||||
},
|
||||
"enabled": true,
|
||||
"name": "5. Dedup leads across HubSpot / LinkedIn / Manual Scrape (fuzzy + merge)"
|
||||
}
|
||||
]
|
||||
}
|
||||
27
samples/demo/ar_open_invoices.csv
Normal file
27
samples/demo/ar_open_invoices.csv
Normal file
@@ -0,0 +1,27 @@
|
||||
Invoice,Client,Email,Invoice_Date,Due_Date,Amount,Status
|
||||
INV-1007,ACME LLC,AP@Acme.com,03/04/2025,04/03/2025,"$1,250.00",Open
|
||||
INV-1007, Acme LLC ,ap@acme.com,2025-03-04,2025-04-03,"1,250.00",(blank)
|
||||
INV-1001,northwind traders,billing@northwind.com,Mar 6 2025,04/05/2025,$980,Overdue
|
||||
INV-1002,Globex Corp,AR@Globex.com,3/11/25,4/10/25,"2,400.50",Sent
|
||||
INV-1011,initech,accounts@initech.com,04/01/2025,05/01/2025,"$ 1,100.00",?
|
||||
INV-1011,Initech,Accounts@Initech.com,2025-04-01,2025-05-01,1100,Open
|
||||
INV-1003,Stark Industries,ap@stark.com,Mar 6 2025,Apr 6 2025,$75.00,Open
|
||||
INV-1004,Wayne Enterprises,ar@wayne.com,03/15/2025,04/14/2025,($300.00),—
|
||||
INV-1015,Hooli,billing@hooli.com,3/11/25,4/10/25,"$4,300.00",Overdue
|
||||
INV-1015,hooli,Billing@Hooli.com,2025-03-11,2025-04-10,4300,(none)
|
||||
INV-1005,Soylent Corp,ap@soylent.com,2025-03-20,2025-04-19,"$1,875.25",Sent
|
||||
INV-1006,Umbrella Co,ar@umbrella.com,03/22/2025,04/21/2025,$640.00,TBD
|
||||
INV-1019,Cyberdyne Systems,ap@cyberdyne.com,Mar 25 2025,04/24/2025,"$2,050.00",unknown
|
||||
INV-1019,cyberdyne systems,AP@Cyberdyne.com,2025-03-25,2025-04-24,"2,050.00",Open
|
||||
INV-1008,Vandelay Industries,ar@vandelay.com,3/28/25,4/27/25,$915.00,Overdue
|
||||
INV-1009,Gekko & Co,billing@gekko.com,2025-03-30,2025-04-29,"$3,120.75",Open
|
||||
INV-1010,Pied Piper,ap@piedpiper.com,04/02/2025,05/02/2025,$180,Sent
|
||||
INV-1023,Tyrell Corp,ar@tyrell.com,04/05/2025,05/05/2025,($300.00),(blank)
|
||||
INV-1023,Tyrell Corp,AR@Tyrell.com,2025-04-05,2025-05-05,-300.00,Open
|
||||
INV-1012,Oscorp,ap@oscorp.com,Apr 8 2025,05/08/2025,"$5,000.00",Overdue
|
||||
INV-1013,Nakatomi Trading,ar@nakatomi.com,4/9/25,5/9/25,$725.50,Sent
|
||||
INV-1014,Bluth Company,billing@bluth.com,2025-04-10,2025-05-10,"$1,420.00",Open
|
||||
INV-1016,Dunder Mifflin,ap@dundermifflin.com,04/12/2025,05/12/2025,$960.00,Overdue
|
||||
INV-1017,Prestige Worldwide,ar@prestige.com,Apr 14 2025,05/14/2025,"$2,680.00",Sent
|
||||
INV-1018,Sterling Cooper,billing@sterlingcooper.com,4/15/25,5/15/25,"$3,950.00",Open
|
||||
INV-1020,Wonka Industries,ap@wonka.com,2025-04-18,2025-05-18,"$1,050.00",Overdue
|
||||
|
50
samples/demo/ar_open_invoices_pipeline.json
Normal file
50
samples/demo/ar_open_invoices_pipeline.json
Normal file
@@ -0,0 +1,50 @@
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"tool": "text_clean",
|
||||
"enabled": true,
|
||||
"options": {
|
||||
"trim": true,
|
||||
"collapse_whitespace": true,
|
||||
"fold_smart_chars": true,
|
||||
"strip_zero_width": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"tool": "format_standardize",
|
||||
"enabled": true,
|
||||
"options": {
|
||||
"column_types": {
|
||||
"Invoice_Date": "date",
|
||||
"Due_Date": "date",
|
||||
"Amount": "currency",
|
||||
"Email": "email"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"tool": "missing",
|
||||
"enabled": true,
|
||||
"options": {
|
||||
"strategy": "none",
|
||||
"standardize_sentinels": true,
|
||||
"sentinels": ["—", "-", "?", "(blank)", "TBD", "unknown", "(none)", "N/A", "#N/A"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"tool": "dedup",
|
||||
"enabled": true,
|
||||
"options": {
|
||||
"survivor_rule": "most_complete",
|
||||
"merge": true,
|
||||
"strategies": [
|
||||
{
|
||||
"columns": [
|
||||
{"column": "Invoice", "algorithm": "exact", "threshold": 100}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
27
samples/demo/bank_reconciliation.csv
Normal file
27
samples/demo/bank_reconciliation.csv
Normal file
@@ -0,0 +1,27 @@
|
||||
Date,Description,Vendor,Category,Amount,Account
|
||||
01/15/2025,“Stripe payout — weekly”,Stripe,Income,"+$3,450.00",Business Checking
|
||||
2025-01-15,Verizon business line,Verizon,—,($89.50),Business Checking
|
||||
Jan 18 2025,Adobe Creative Cloud ,Adobe,(blank),-$129.99,Business Checking
|
||||
1/27/25,Office supplies,Amazon,Supplies,-$74.20,Business Checking
|
||||
02/03/2025, Monthly office rent,Highland Properties,Rent,"$1,200.00",Business Checking
|
||||
Feb 5 2025,Account service fee,First National Bank,?,(50.00),Business Checking
|
||||
2025-01-09,Shipping labels,amazon.com,unknown,-$18.40,Business Checking
|
||||
1/22/25,Contractor — landing page,Bright Lane Design,TBD,- $599.88,Business Checking
|
||||
Jan 30 2025,Late fee adjustment,verizon,Utilities,-$12.00,Business Checking
|
||||
2025-01-11,Packaging tape,AMAZON.COM,Supplies,-$31.75,Business Checking
|
||||
01/06/2025,Client deposit — ACME Co,ACME Co,Income,"$2,500.00",Business Checking
|
||||
2025-01-20,Google Workspace,Google,Software,-$36.00,Business Checking
|
||||
Jan 24 2025,Fuel — delivery van,Shell,Vehicle,-$58.63,Business Checking
|
||||
1/28/25,QuickBooks subscription,Intuit,Software,-$80.00,Business Checking
|
||||
2025-01-15,Stripe payout weekly,Stripe,Income,3450.00,Business Checking
|
||||
01/15/2025,Verizon business line,Verizon,Utilities,-89.50,Business Checking
|
||||
2025-01-18,Adobe Creative Cloud,Adobe,Software,-129.99,Business Checking
|
||||
2025-02-03,Monthly office rent,Highland Properties,Rent,1200.00,Business Checking
|
||||
2025-02-05,Account service fee,First National Bank,Bank Fees,-50.00,Business Checking
|
||||
2025-01-22,Contractor landing page,Bright Lane Design,Contractors,-599.88,Business Checking
|
||||
02/10/2025,Client deposit — Globex,Globex,Income,"$1,800.00",Business Checking
|
||||
2025-02-12,Slack subscription,Slack,Software,-$96.00,Business Checking
|
||||
Feb 14 2025,Coffee — client meeting,Blue Bottle,Meals,-$23.10,Business Checking
|
||||
2/18/25,Insurance premium,Hartford,Insurance,-$240.50,Business Checking
|
||||
02/21/2025,Refund — returned printer,Staples,Supplies,$210.99,Business Checking
|
||||
Feb 25 2025,Domain renewal,Namecheap,Software,-$13.98,Business Checking
|
||||
|
6
samples/demo/bank_reconciliation_pipeline.json
Normal file
6
samples/demo/bank_reconciliation_pipeline.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{"steps":[
|
||||
{"tool":"text_clean","enabled":true,"options":{"trim":true,"collapse_whitespace":true,"fold_smart_chars":true,"strip_zero_width":true}},
|
||||
{"tool":"format_standardize","enabled":true,"options":{"column_types":{"Date":"date","Amount":"currency"}}},
|
||||
{"tool":"missing","enabled":true,"options":{"strategy":"none","standardize_sentinels":true,"sentinels":["—","(blank)","?","unknown","TBD","N/A","#N/A","(none)"]}},
|
||||
{"tool":"dedup","enabled":true,"options":{"survivor_rule":"most_complete","merge":true,"strategies":[{"columns":[{"column":"Date","algorithm":"exact","threshold":100},{"column":"Amount","algorithm":"exact","threshold":100}]}]}}
|
||||
]}
|
||||
@@ -1,56 +0,0 @@
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"tool": "text_clean",
|
||||
"options": {},
|
||||
"enabled": true,
|
||||
"name": "1. Clean text (header whitespace, smart quotes, em-dash)"
|
||||
},
|
||||
{
|
||||
"tool": "format_standardize",
|
||||
"options": {
|
||||
"column_types": {
|
||||
"Date": "date",
|
||||
"Amount": "currency",
|
||||
"Balance": "currency",
|
||||
"Vendor": "name"
|
||||
},
|
||||
"currency_decimal": "auto",
|
||||
"currency_preserve_code": false,
|
||||
"currency_decimals": 2,
|
||||
"date_output_format": "%Y-%m-%d"
|
||||
},
|
||||
"enabled": true,
|
||||
"name": "2. ISO dates · numeric amounts (parens-negative) · vendor casing"
|
||||
},
|
||||
{
|
||||
"tool": "missing",
|
||||
"options": {
|
||||
"strategy": "none",
|
||||
"standardize_sentinels": true,
|
||||
"sentinels": ["N/A", "n/a", "—", "-", "?", "(blank)", "(none)", "unknown", "#N/A"]
|
||||
},
|
||||
"enabled": true,
|
||||
"name": "3. Standardize disguised nulls (— / N/A / (blank))"
|
||||
},
|
||||
{
|
||||
"tool": "dedup",
|
||||
"options": {
|
||||
"survivor_rule": "most_complete",
|
||||
"merge": false,
|
||||
"date_column": "Date",
|
||||
"strategies": [
|
||||
{
|
||||
"columns": [
|
||||
{"column": "Date", "algorithm": "exact", "threshold": 100},
|
||||
{"column": "Amount", "algorithm": "exact", "threshold": 100},
|
||||
{"column": "Vendor", "algorithm": "jaro_winkler", "threshold": 80}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"enabled": true,
|
||||
"name": "4. Dedup transactions on Date+Amount+fuzzy Vendor"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
Txn ID,Date ,Description,Amount,Balance,Account,Vendor,Category
|
||||
TXN-2401,01/15/2025," AMAZON.COM*4F2X9 PURCHASE",-$129.99,"$2,450.01",Checking,Amazon,Office Supplies
|
||||
TXN-2402,2025-01-15,"AMAZON.COM*4F2X9 PURCHASE",-$129.99,"2450.01",Checking,amazon.com,Office Supplies
|
||||
TXN-2403,Jan 18 2025,"STAPLES #4422 — paper, toner",($89.50),$2360.51,Checking,STAPLES,Office Supplies
|
||||
TXN-2404,01/22/2025,"Verizon Wireless ""autopay""",-$120.00,"$2,240.51",Checking,Verizon,Utilities
|
||||
TXN-2405,2025-01-22,Verizon Wireless autopay,-120.00,"2,240.51",Checking,verizon,Utilities
|
||||
TXN-2406,01-25-2025,"Stripe Payout — invoice #1077","+$3,450.00","$5,690.51",Checking,Stripe,Income
|
||||
TXN-2407,1/27/25,"Office Lease - Suite 204",-1500.00,"$4,190.51",Checking,Acme Realty,Rent
|
||||
TXN-2408,02/01/2025,"Wire — Acme Realty Mgmt","-$1,500.00","$2,690.51",Checking,acme realty,Rent
|
||||
TXN-2409,2025-02-03,"Adobe Creative Cloud annual","- $599.88","$2,090.63",Credit Card,Adobe Inc.,Software
|
||||
TXN-2410,02/03/2025,"ADOBE CREATIVE CLOUD ANN",-599.88,2090.63,Credit Card,adobe,Software
|
||||
TXN-2411,Feb 5 2025,"FedEx — overnight to client A",-$32.50,"$2,058.13",Checking,FedEx,Shipping
|
||||
TXN-2412,02/07/2025,"Square fee — invoice #1078","-$3.20","$2,054.93",Checking,Square,Fees
|
||||
TXN-2413,02/10/2025,"Stripe Payout invoice #1079","+ $1,200.00","$3,254.93",Checking,Stripe,Income
|
||||
TXN-2414,2025-02-12,"USPS PRIORITY — to vendor B","-12.40","$3,242.53",Checking,USPS,Shipping
|
||||
TXN-2415,02/14/2025,"Zoom Video Comms — annual","-$149.90","$3,092.63",Credit Card,Zoom,Software
|
||||
TXN-2416,2/14/25,"Zoom Video Communications","-149.90","3092.63",Credit Card,zoom,Software
|
||||
TXN-2417,02/18/2025,"Costco Whse #421 — supplies","-$237.84","$2,854.79",Checking,Costco,Office Supplies
|
||||
TXN-2418,2025-02-18,COSTCO WHSE #421,-237.84,"2,854.79",Checking,costco,Office Supplies
|
||||
TXN-2419,02/22/2025,"Bank fee — int'l wire","-$45.00","$2,809.79",Checking,Bank Fee,Fees
|
||||
TXN-2420,02/24/2025,"Stripe Payout — invoice #1080","+$2,100.00","$4,909.79",Checking,Stripe,Income
|
||||
TXN-2421,02/28/2025," Refund — overcharge ","+$45.00","$4,954.79",Checking,—,Refunds
|
||||
TXN-2422,Feb 28 2025,REFUND OVERCHARGE,45.00,4954.79,Checking,N/A,Refunds
|
||||
TXN-2423,03/01/2025,"Office Lease — Suite 204","-$1,500.00","$3,454.79",Checking,Acme Realty,Rent
|
||||
TXN-2424,2025-03-03,"Slack Technologies — annual","-$840.00","$2,614.79",Credit Card,Slack,Software
|
||||
TXN-2425,03/05/2025,"Stripe Payout — invoice #1081","+$1,875.00","$4,489.79",Checking,Stripe,Income
|
||||
TXN-2426,03/08/2025,"Wire — Berlin office rent (EUR vendor)","-€1.450,00","$2,989.79",Checking,Mietverwaltung GmbH,Rent
|
||||
TXN-2427,03/10/2025,"London supplier invoice (GBP)","-£950.00","$1,939.79",Checking,Stationery Co Ltd,Office Supplies
|
||||
TXN-2428,03/12/2025,"São Paulo agency retainer","-R$ 1.299,90","$1,679.79",Credit Card,Estúdio Ágil,Software
|
||||
TXN-2429,03/14/2025,"VAT MOSS prep — multi-EU sales","($89.00)","$1,768.79",Checking,EU VAT Service,Fees
|
||||
TXN-2430,03/14/2025,"VAT MOSS prep multi EU sales",-89.00,"1,768.79",Checking,eu vat service,Fees
|
||||
|
@@ -1,21 +0,0 @@
|
||||
Customer ID,First Name,Last Name,Email,Phone,Address,City,State,ZIP,Country,Total Orders,Lifetime Value,Last Order Date,Tags
|
||||
SHOP-1001, Alice ,Johnson,alice@petshop.com,(415) 555-1234,"123 Main St., Apt 4B",San Francisco,CA,94102,US,12,$1,240.50,2025-12-04,VIP
|
||||
SHOP-1002,Bob,SMITH,Bob@PetShop.com,415.555.1234,"123 Main St, Apt 4B",San Francisco,CA,94102,US,12,"$1,240.50",N/A,VIP
|
||||
SHOP-1003,carlos,garcia,carlos@petshop.com,5559876543,"742 Evergreen Terrace",Springfield,IL,62704,US,5,420.00,12/15/2025,Wholesale
|
||||
SHOP-1004,Diana,Lee,diana@petshop.com,(555) 222-3344,"PO Box 12, Sherwood Forest",Nottingham,,NG1 5BA,GB,8,£890.25,2025-10-30,VIP|Wholesale
|
||||
SHOP-1005,EVE MARTINEZ,,eve.martinez@petshop.com,555-9988,"Calle Mayor 45","Madrid",,"28013",ES,3,€180,2025-09-15,
|
||||
SHOP-1006,Frank,Brown,frank@petshop.com,, ,"Berlin",BE,10115,DE,15,€2.410,75,(blank),Wholesale
|
||||
SHOP-1007,Grace,Davis,grace@petshop.com,+1 555-111-1111,"888 Maple Ave",Toronto,ON,M5V 3A8,CA,1,$49.99,#N/A,New
|
||||
SHOP-1008,henry,wilson,Henry@PetShop.com,5551111111,"888 Maple Avenue","Toronto",ON,M5V 3A8,CA,1,$49.99,2025-12-01,New
|
||||
SHOP-1009,Ivy,Chen,IVY@petshop.com,+1 (555) 777-7777,"550 Elm Street, Suite 200",Brooklyn,NY,11201,US,4,"$320.50 ",10/12/2025,
|
||||
SHOP-1010,Jack,Taylor,jack@petshop.com,(none),"550 elm street, suite 200",brooklyn,NY,11201,US,4,$320.50,2025-10-12,
|
||||
SHOP-1011,kate,o'neil,kate.oneil@petshop.com,415-555-2222,"99 King's Rd","London",,SW3 4LX,GB,7,£675.00,?,VIP
|
||||
SHOP-1012,luis,rodriguez,LUIS@petshop.com,+34 91 411 1111,"Avenida de la Paz 12, 3°D",Madrid,,28013,ES,2,"€89,99",unknown,
|
||||
SHOP-1013,Mia,Park,mia@petshop.com,02-9374-4000,"Sydney Opera House Drive","Sydney",NSW,2000,AU,9,"A$ 1,299.00",2025-11-20,Wholesale
|
||||
SHOP-1014,Noah,nguyen,noah@petshop.com,+81 3 3210 7000,"丸の内 2-7-3","Tokyo",,100-0005,JP,6,"¥75000",2025-12-10,VIP
|
||||
SHOP-1015,Olivia,Brown,OLIVIA@PETSHOP.COM,(555) 333-4444,"742 evergreen terrace",springfield,IL,62704,US,3,$180.00,(none),
|
||||
SHOP-1016,Pavel,Novak,pavel@petshop.com,+44 20 7946 1234,"22 Baker Street",London,,W1U 6AB,United Kingdom,4,£412.00,2025-11-18,VIP
|
||||
SHOP-1017,Quinn,Murphy,quinn@petshop.com,+44 20 7946 5678,"5 Princes Street",Edinburgh,,EH2 2DA,U.K.,2,£189.50,2025-12-09,
|
||||
SHOP-1018,Rachel,O'Brien,rachel@petshop.com,02-9374-9999,"100 George Street","Sydney",NSW,2000,UK,1,£75.00,?,New
|
||||
SHOP-1019,Sam,Klein,sam@petshop.com,+49 30 99887766,"Friedrichstraße 100","Berlin",,10117,Germany,11,"€1.890,40",2025-12-11,VIP|Wholesale
|
||||
SHOP-1020,Tara,Gianni,tara@petshop.com,+39 06 6982 4567,"Via del Corso 250",Roma,,00186,Italia,5,"€649,99",2025-12-03,
|
||||
|
@@ -1,49 +0,0 @@
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"tool": "text_clean",
|
||||
"options": {},
|
||||
"enabled": true,
|
||||
"name": "1. Clean text (whitespace, smart quotes, NBSP, BOM)"
|
||||
},
|
||||
{
|
||||
"tool": "format_standardize",
|
||||
"options": {
|
||||
"column_types": {
|
||||
"First Name": "name",
|
||||
"Last Name": "name",
|
||||
"Email": "email",
|
||||
"Phone": "phone",
|
||||
"Address": "address",
|
||||
"Lifetime Value": "currency",
|
||||
"Last Order Date": "date"
|
||||
},
|
||||
"phone_country_column": "Country",
|
||||
"address_country_column": "Country",
|
||||
"currency_preserve_code": true,
|
||||
"currency_decimal": "auto",
|
||||
"email_gmail_canonical": false
|
||||
},
|
||||
"enabled": true,
|
||||
"name": "2. Standardize phones, addresses, dates, currencies, names"
|
||||
},
|
||||
{
|
||||
"tool": "missing",
|
||||
"options": {
|
||||
"strategy": "none",
|
||||
"standardize_sentinels": true
|
||||
},
|
||||
"enabled": true,
|
||||
"name": "3. Standardize disguised nulls (N/A, -, (blank), ?, #N/A)"
|
||||
},
|
||||
{
|
||||
"tool": "dedup",
|
||||
"options": {
|
||||
"survivor_rule": "most_complete",
|
||||
"merge": true
|
||||
},
|
||||
"enabled": true,
|
||||
"name": "4. Dedup customers (fuzzy match, merge missing fields)"
|
||||
}
|
||||
]
|
||||
}
|
||||
25
samples/demo/vendor_1099.csv
Normal file
25
samples/demo/vendor_1099.csv
Normal file
@@ -0,0 +1,25 @@
|
||||
Vendor,Contact,Email,Phone,EIN,Address,Total_Paid
|
||||
Acme Realty,Bob Stein,acme.ap@acmerealty.com,(212) 555-0100,12-3456789,(blank),"$12,400.00"
|
||||
acme realty llc,Bob Stein, ACME.AP@AcmeRealty.com ,,—,"118 Canal St, New York, NY 10013","$8,250"
|
||||
ACME REALTY,R. Stein,Acme.AP@acmerealty.com,212.555.0100,N/A,TBD,"1,999.99"
|
||||
Bright Books Bookkeeping,Dana Cole,hello@brightbooks.com,,98-7654321,(blank),"$6,000.00"
|
||||
bright books,Dana Cole,HELLO@brightbooks.com,(415) 555-0142,unknown,"50 Market St, San Francisco, CA 94105","$6,000"
|
||||
"Bright Books, LLC",D. Cole, hello@BrightBooks.com,4155550142,98-7654321,unknown,"5,500.00"
|
||||
Northwind Logistics,Sam Reyes,ap@northwindlog.com,(312) 555-0198,—,(blank),"$22,750.00"
|
||||
northwind logistics inc,Sam Reyes,AP@NorthwindLog.com,,45-6789012,"900 W Loop, Chicago, IL 60607","$22,750"
|
||||
Pearl Design Studio,“Jo” Marsh,billing@pearldesign.co,,33-2211000,(blank),"$3,200.00"
|
||||
pearl design,Jo Marsh,Billing@PearlDesign.co,(206) 555-0167,TBD,"77 Pike St, Seattle, WA 98101","$3,200"
|
||||
PEARL DESIGN STUDIO,J. Marsh, billing@pearldesign.co ,206.555.0167,33-2211000,unknown,"2,800.00"
|
||||
Cooper Plumbing,Lee Cooper,office@cooperplumb.com,(617) 555-0133,—,(blank),"$1,450.00"
|
||||
cooper plumbing co,Lee Cooper,OFFICE@cooperplumb.com,,TBD,"12 Beacon St, Boston, MA 02108","$1,450"
|
||||
COOPER PLUMBING,L. Cooper, office@CooperPlumb.com,6175550133,N/A,unknown,900.00
|
||||
Vertex Marketing,Pat Nguyen,accounts@vertexmktg.com,(404) 555-0119,77-8899001,(blank),"$15,000.00"
|
||||
vertex marketing group,Pat Nguyen,ACCOUNTS@VertexMktg.com,,unknown,"300 Peachtree St, Atlanta, GA 30308","$15,000"
|
||||
Summit Consulting,Ray Brooks,invoices@summitconsult.net,,21-0099887,(blank),"$9,800.00"
|
||||
summit consulting llc,Ray Brooks,INVOICES@summitconsult.net,(303) 555-0175,—,"1100 17th St, Denver, CO 80202","$9,800"
|
||||
SUMMIT CONSULTING,R. Brooks, invoices@SummitConsult.net ,303.555.0175,21-0099887,TBD,"7,250.00"
|
||||
Garcia Catering,Mia Garcia,ap@garciacatering.com,(305) 555-0188,—,(blank),"$4,600.00"
|
||||
garcia catering services,Mia Garcia,AP@GarciaCatering.com,,66-1234509,"450 Ocean Dr, Miami, FL 33139",$600.00
|
||||
Northwind Logistics,S. Reyes, ap@northwindlog.com ,312.555.0198,45-6789012,TBD,"21,000.00"
|
||||
VERTEX MARKETING,P. Nguyen, accounts@vertexmktg.com ,404.555.0119,77-8899001,TBD,"14,500.00"
|
||||
GARCIA CATERING,M. Garcia,ap@GARCIACATERING.com,305.555.0188,66-1234509,unknown,"4,200.00"
|
||||
|
49
samples/demo/vendor_1099_pipeline.json
Normal file
49
samples/demo/vendor_1099_pipeline.json
Normal file
@@ -0,0 +1,49 @@
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"tool": "text_clean",
|
||||
"enabled": true,
|
||||
"options": {
|
||||
"trim": true,
|
||||
"collapse_whitespace": true,
|
||||
"fold_smart_chars": true,
|
||||
"strip_zero_width": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"tool": "format_standardize",
|
||||
"enabled": true,
|
||||
"options": {
|
||||
"column_types": {
|
||||
"Phone": "phone",
|
||||
"Email": "email",
|
||||
"Total_Paid": "currency"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"tool": "missing",
|
||||
"enabled": true,
|
||||
"options": {
|
||||
"strategy": "none",
|
||||
"standardize_sentinels": true,
|
||||
"sentinels": ["—", "-", "--", "(blank)", "TBD", "unknown", "N/A", "#N/A", "(none)"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"tool": "dedup",
|
||||
"enabled": true,
|
||||
"options": {
|
||||
"survivor_rule": "most_complete",
|
||||
"merge": true,
|
||||
"strategies": [
|
||||
{
|
||||
"columns": [
|
||||
{"column": "Email", "algorithm": "exact", "threshold": 100, "normalizer": "email"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user