DataTools is local-first — "Upload" reads like "send data somewhere
remote", which contradicts the product positioning. Sweep replaces
the user-visible term throughout the UI:
- ``src/i18n/packs/en.json`` + ``es.json``: all ``upload.*`` strings
(heading, intro, uploader labels, empty state, switch-back, etc.)
and ``gate.default_name``. The ``intro_multi`` "no upload anywhere"
phrasing dropped the verb entirely — now reads "nothing leaves
this computer".
- All 9 tool pages: ``st.file_uploader(label="Upload …")`` →
``"Import …"``; matching ``st.info("Upload a …")`` empty-state
banners; ``help="Upload …"`` strings on disabled uploaders.
- ``9_Pipeline_Runner`` + ``5_Column_Mapper``: radio-option text
``"Upload schema/pipeline JSON"`` → ``"Import …"`` plus the
``.startswith("Upload")`` branch guards that read those values.
- ``_home.py``: "**Uploaded files**" → "**Imported files**".
- ``app_demo.py``: "Uploaded file is …" → "Imported file is …".
Internal identifiers left untouched: function names
(``pickup_or_upload``, ``_StashedUpload``), session-state keys
(``home_upload``, ``home_uploads``, ``home_uploaded_*``,
``merger_file_upload``), audit-log event category (``"upload"``),
Streamlit testid CSS selectors. None of those are visible to the
user.
The file_uploader's dropzone button text is a baked-in React
literal that Streamlit's ``label=`` doesn't reach; rewritten at the
DOM level with a small ``_RENAME_UPLOAD_BUTTON_JS`` snippet shipped
through ``st.iframe`` (same pattern the sticky footer uses to mount
on ``<body>``). A ``MutationObserver`` on the parent document re-
applies the swap when Streamlit remounts the dropzone after file
add/remove or page navigation, throttled via ``requestAnimationFrame``.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
98 lines
3.2 KiB
Python
98 lines
3.2 KiB
Python
"""DataTools Combine Files — stub page."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import streamlit as st
|
|
|
|
_project_root = Path(__file__).resolve().parent.parent.parent.parent
|
|
if str(_project_root) not in sys.path:
|
|
sys.path.insert(0, str(_project_root))
|
|
|
|
from src.gui.components import (
|
|
back_to_home_link,
|
|
render_sticky_footer,
|
|
hide_streamlit_chrome,
|
|
require_feature_or_render_upgrade,
|
|
)
|
|
from src.i18n import t
|
|
from src.license import FeatureFlag
|
|
|
|
hide_streamlit_chrome()
|
|
render_sticky_footer()
|
|
back_to_home_link()
|
|
from src.audit import log_page_open
|
|
log_page_open("7_Multi_File_Merger")
|
|
require_feature_or_render_upgrade(FeatureFlag.MULTI_FILE_MERGER)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Header
|
|
# ---------------------------------------------------------------------------
|
|
|
|
st.title(t("tools.07_multi_file_merger.page_title"))
|
|
st.caption(t("tools.07_multi_file_merger.page_caption"))
|
|
|
|
st.info("This tool is under development.")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# What this tool will do
|
|
# ---------------------------------------------------------------------------
|
|
|
|
st.markdown("""
|
|
**Features:**
|
|
- Import multiple CSV/Excel files at once
|
|
- Automatic schema alignment (matching columns by name)
|
|
- Append mode: stack files vertically (union)
|
|
- Join mode: merge files on shared key columns
|
|
- Handle mismatched columns (fill missing with nulls or drop)
|
|
- Source file tracking column
|
|
""")
|
|
|
|
st.divider()
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Multi-file upload (functional)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
uploaded_files = st.file_uploader(
|
|
"Import CSV or Excel files",
|
|
type=["csv", "tsv", "xlsx", "xls"],
|
|
accept_multiple_files=True,
|
|
help="Import multiple files to preview. Processing is not yet available.",
|
|
key="merger_file_upload",
|
|
)
|
|
|
|
if uploaded_files:
|
|
import pandas as pd
|
|
for f in uploaded_files:
|
|
try:
|
|
if f.name.endswith((".xlsx", ".xls")):
|
|
df = pd.read_excel(f)
|
|
else:
|
|
df = pd.read_csv(f)
|
|
st.subheader(f"Preview: {f.name}")
|
|
st.caption(f"{len(df)} rows, {len(df.columns)} columns — Columns: {', '.join(df.columns[:10])}{'...' if len(df.columns) > 10 else ''}")
|
|
st.dataframe(df.head(5), width="stretch")
|
|
except Exception as e:
|
|
from src.core.errors import format_for_user
|
|
st.error(
|
|
f"**Could not read `{f.name}`**\n\n"
|
|
f"```\n{format_for_user(e)}\n```"
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Placeholder options
|
|
# ---------------------------------------------------------------------------
|
|
|
|
st.subheader("Merge Strategy")
|
|
|
|
st.selectbox("Mode", ["Append (stack vertically)", "Join on key columns", "Schema alignment (smart merge)"], disabled=True)
|
|
st.selectbox("Mismatched columns", ["Fill with null", "Drop non-shared columns", "Error"], disabled=True)
|
|
st.checkbox("Add source filename column", value=True, disabled=True)
|
|
|
|
st.divider()
|
|
st.button("Merge Files", type="primary", width="stretch", disabled=True)
|
|
|