Sidebar nav now groups tools under Data Review / Data Cleaners / Transformations / Automations via st.navigation, replacing the flat auto-discovered list. Tool display names switch to action-first phrasing (Find Duplicates, Fix Missing Values, Find Unusual Values, Standardize Formats, Clean Text, Quality Check, Map Columns, Combine Files, Automated Workflows) in EN + ES packs and on each page's H1. The Data Cleaners section follows the requested order: Missing Values → Outliers → Text Cleaner → Format Standardizer → Deduplicator → Quality Check. (Text Cleaner kept inside cleaners since the request didn't list it but the tool still ships.) Registry now carries a section field; helpers added: tools_in_section(), section_label(). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
104 lines
3.3 KiB
Python
104 lines
3.3 KiB
Python
"""DataTools Outlier Detector — stub page."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import streamlit as st
|
|
|
|
_project_root = Path(__file__).resolve().parent.parent.parent.parent
|
|
if str(_project_root) not in sys.path:
|
|
sys.path.insert(0, str(_project_root))
|
|
|
|
from src.gui.components import (
|
|
hide_streamlit_chrome,
|
|
require_feature_or_render_upgrade,
|
|
require_normalization_gate,
|
|
)
|
|
from src.license import FeatureFlag
|
|
|
|
hide_streamlit_chrome()
|
|
require_feature_or_render_upgrade(FeatureFlag.OUTLIER_DETECTOR)
|
|
require_normalization_gate()
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Header
|
|
# ---------------------------------------------------------------------------
|
|
|
|
st.title("📊 Find Unusual Values")
|
|
st.caption("Detect and handle outliers in numeric columns.")
|
|
|
|
st.info("This tool is under development.")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# What this tool will do
|
|
# ---------------------------------------------------------------------------
|
|
|
|
st.markdown("""
|
|
**Features:**
|
|
- Z-score detection (configurable threshold)
|
|
- IQR (interquartile range) detection
|
|
- MAD (median absolute deviation) detection
|
|
- Domain-rule violations (e.g., age < 0, price > $1M)
|
|
- Visual outlier highlighting in data preview
|
|
- Handling: flag only, remove, cap/winsorize to bounds
|
|
""")
|
|
|
|
st.divider()
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# File upload (functional)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
uploaded = st.file_uploader(
|
|
"Upload CSV or Excel file",
|
|
type=["csv", "tsv", "xlsx", "xls"],
|
|
help="Upload a file to preview. Processing is not yet available.",
|
|
key="outlier_file_upload",
|
|
)
|
|
|
|
if uploaded is not None:
|
|
import pandas as pd
|
|
try:
|
|
if uploaded.name.endswith((".xlsx", ".xls")):
|
|
df = pd.read_excel(uploaded)
|
|
else:
|
|
df = pd.read_csv(uploaded)
|
|
st.subheader(f"Preview: {uploaded.name}")
|
|
st.caption(f"{len(df)} rows, {len(df.columns)} columns")
|
|
st.dataframe(df.head(10), use_container_width=True)
|
|
except Exception as e:
|
|
from src.core.errors import format_for_user
|
|
st.error(
|
|
f"**Could not read `{uploaded.name}`**\n\n"
|
|
f"```\n{format_for_user(e)}\n```"
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Placeholder options
|
|
# ---------------------------------------------------------------------------
|
|
|
|
st.subheader("Detection Method")
|
|
|
|
st.selectbox("Method", ["Z-Score", "IQR (Interquartile Range)", "MAD (Median Absolute Deviation)"], disabled=True)
|
|
st.slider("Z-Score threshold", 1.0, 5.0, 3.0, 0.1, disabled=True)
|
|
st.slider("IQR multiplier", 1.0, 3.0, 1.5, 0.1, disabled=True)
|
|
|
|
st.subheader("Handling")
|
|
|
|
st.selectbox("Action", ["Flag only (add column)", "Remove outlier rows", "Cap / Winsorize to bounds"], disabled=True)
|
|
|
|
st.divider()
|
|
st.button("Detect Outliers", type="primary", use_container_width=True, disabled=True)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Footer
|
|
# ---------------------------------------------------------------------------
|
|
|
|
st.divider()
|
|
st.caption(
|
|
"Runs locally. Your data never leaves this computer. "
|
|
"| DataTools v3.0"
|
|
)
|