Files
lifeos-prod/routers/files.py
Michael 27e07fefe1 feat: file search, type/tag filters, and pagination
Sync from dev: tsvector search, type/tag filters, pagination,
dropdown folder picker.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 01:05:20 +00:00

475 lines
16 KiB
Python

"""Files: upload, download, list, preview, folder-aware storage, and WebDAV sync."""
import os
import mimetypes
from pathlib import Path
from fastapi import APIRouter, Request, Form, Depends, UploadFile, File as FastAPIFile
from fastapi.templating import Jinja2Templates
from fastapi.responses import RedirectResponse, FileResponse, HTMLResponse
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import text
from typing import Optional
from core.database import get_db
from core.base_repository import BaseRepository
from core.sidebar import get_sidebar_data
router = APIRouter(prefix="/files", tags=["files"])
templates = Jinja2Templates(directory="templates")
FILE_STORAGE_PATH = os.getenv("FILE_STORAGE_PATH", "/opt/lifeos/webdav")
# Ensure storage dir exists
Path(FILE_STORAGE_PATH).mkdir(parents=True, exist_ok=True)
# MIME types that can be previewed inline
PREVIEWABLE = {
"image/jpeg", "image/png", "image/gif", "image/webp", "image/svg+xml",
"application/pdf", "text/plain", "text/html", "text/csv",
}
# Files to skip during sync
SKIP_FILES = {".DS_Store", "Thumbs.db", ".gitkeep", "desktop.ini"}
def _resolve_path(item):
"""Resolve a DB record's relative storage_path to an absolute path."""
return os.path.join(FILE_STORAGE_PATH, item["storage_path"])
def get_folders():
"""Walk FILE_STORAGE_PATH and return sorted list of relative folder paths."""
folders = []
for dirpath, dirnames, _filenames in os.walk(FILE_STORAGE_PATH):
# Skip hidden directories
dirnames[:] = [d for d in dirnames if not d.startswith(".")]
rel = os.path.relpath(dirpath, FILE_STORAGE_PATH)
if rel != ".":
folders.append(rel)
return sorted(folders)
def resolve_collision(folder_abs, filename):
"""If filename exists in folder_abs, return name (2).ext, name (3).ext, etc."""
target = os.path.join(folder_abs, filename)
if not os.path.exists(target):
return filename
name, ext = os.path.splitext(filename)
counter = 2
while True:
candidate = f"{name} ({counter}){ext}"
if not os.path.exists(os.path.join(folder_abs, candidate)):
return candidate
counter += 1
async def sync_files(db: AsyncSession):
"""Sync filesystem state with the database.
- Files on disk not in DB → create record
- Active DB records with missing files → soft-delete
Returns dict with added/removed counts.
"""
added = 0
removed = 0
# Build set of all relative file paths on disk
disk_files = set()
for dirpath, dirnames, filenames in os.walk(FILE_STORAGE_PATH):
dirnames[:] = [d for d in dirnames if not d.startswith(".")]
for fname in filenames:
if fname in SKIP_FILES or fname.startswith("."):
continue
abs_path = os.path.join(dirpath, fname)
rel_path = os.path.relpath(abs_path, FILE_STORAGE_PATH)
disk_files.add(rel_path)
# Get ALL DB records (including soft-deleted) to avoid re-creating deleted files
result = await db.execute(text(
"SELECT id, storage_path, is_deleted FROM files"
))
db_records = [dict(r._mapping) for r in result]
# Build lookup maps
all_db_paths = {r["storage_path"] for r in db_records}
active_db_paths = {r["storage_path"] for r in db_records if not r["is_deleted"]}
deleted_on_disk = {r["storage_path"]: r for r in db_records
if r["is_deleted"] and r["storage_path"] in disk_files}
# New on disk, not in DB at all → create record
new_files = disk_files - all_db_paths
for rel_path in new_files:
abs_path = os.path.join(FILE_STORAGE_PATH, rel_path)
filename = os.path.basename(rel_path)
mime_type = mimetypes.guess_type(filename)[0]
try:
size_bytes = os.path.getsize(abs_path)
except OSError:
continue
repo = BaseRepository("files", db)
await repo.create({
"filename": filename,
"original_filename": filename,
"storage_path": rel_path,
"mime_type": mime_type,
"size_bytes": size_bytes,
})
added += 1
# Soft-deleted in DB but still on disk → restore
for rel_path, record in deleted_on_disk.items():
repo = BaseRepository("files", db)
await repo.restore(record["id"])
added += 1
# Active in DB but missing from disk → soft-delete
missing_files = active_db_paths - disk_files
for record in db_records:
if record["storage_path"] in missing_files and not record["is_deleted"]:
repo = BaseRepository("files", db)
await repo.soft_delete(record["id"])
removed += 1
return {"added": added, "removed": removed}
SORT_OPTIONS = {
"path": "storage_path ASC",
"path_desc": "storage_path DESC",
"name": "original_filename ASC",
"name_desc": "original_filename DESC",
"date": "created_at DESC",
"date_asc": "created_at ASC",
}
# Map type filter values to mime prefixes/patterns
TYPE_FILTERS = {
"image": "image/%",
"document": "application/pdf",
"text": "text/%",
"spreadsheet": "application/vnd.%sheet%",
"archive": "application/%zip%",
}
PER_PAGE = 50
@router.get("/")
async def list_files(
request: Request,
folder: Optional[str] = None,
sort: Optional[str] = None,
q: Optional[str] = None,
file_type: Optional[str] = None,
tag: Optional[str] = None,
page: int = 1,
context_type: Optional[str] = None,
context_id: Optional[str] = None,
db: AsyncSession = Depends(get_db),
):
sidebar = await get_sidebar_data(db)
# Auto-sync on page load
sync_result = await sync_files(db)
folders = get_folders()
order_by = SORT_OPTIONS.get(sort, "storage_path ASC")
# Normalize folder param from form: " " (space) = root, "" = all, None = all
if folder is not None:
if folder.strip() == "" and folder != "":
# Space-only value means root folder
folder = ""
elif folder.strip() == "":
# Empty string means "all folders" (no filter)
folder = None
# Build dynamic WHERE clauses
where_clauses = ["f.is_deleted = false"]
params = {}
if context_type and context_id:
where_clauses.append("fm.context_type = :ct AND fm.context_id = :cid")
params["ct"] = context_type
params["cid"] = context_id
if folder is not None:
if folder == "":
where_clauses.append("f.storage_path NOT LIKE '%/%'")
else:
where_clauses.append("f.storage_path LIKE :prefix")
params["prefix"] = folder + "/%"
if q and q.strip():
search_terms = q.strip().split()
tsquery = " & ".join(f"{t}:*" for t in search_terms)
where_clauses.append("f.search_vector @@ to_tsquery('english', :tsquery)")
params["tsquery"] = tsquery
if file_type and file_type in TYPE_FILTERS:
where_clauses.append("f.mime_type LIKE :mime_pattern")
params["mime_pattern"] = TYPE_FILTERS[file_type]
if tag and tag.strip():
where_clauses.append(":tag = ANY(f.tags)")
params["tag"] = tag.strip()
where_sql = " AND ".join(where_clauses)
# Count total for pagination
if context_type and context_id:
count_sql = f"""
SELECT COUNT(*) FROM files f
JOIN file_mappings fm ON fm.file_id = f.id
WHERE {where_sql}
"""
else:
count_sql = f"SELECT COUNT(*) FROM files f WHERE {where_sql}"
total = (await db.execute(text(count_sql), params)).scalar()
# Paginate
if page < 1:
page = 1
offset = (page - 1) * PER_PAGE
total_pages = max(1, (total + PER_PAGE - 1) // PER_PAGE)
# Query
if context_type and context_id:
query_sql = f"""
SELECT f.*, fm.context_type, fm.context_id
FROM files f
JOIN file_mappings fm ON fm.file_id = f.id
WHERE {where_sql}
ORDER BY {order_by}
LIMIT :lim OFFSET :off
"""
else:
query_sql = f"""
SELECT f.* FROM files f
WHERE {where_sql}
ORDER BY {order_by}
LIMIT :lim OFFSET :off
"""
params["lim"] = PER_PAGE
params["off"] = offset
result = await db.execute(text(query_sql), params)
items = [dict(r._mapping) for r in result]
# Add derived folder field for display
for item in items:
dirname = os.path.dirname(item["storage_path"])
item["folder"] = dirname if dirname else "/"
# Get all unique tags for the tag filter dropdown
tag_result = await db.execute(text(
"SELECT DISTINCT unnest(tags) AS tag FROM files WHERE is_deleted = false AND tags IS NOT NULL ORDER BY tag"
))
all_tags = [r._mapping["tag"] for r in tag_result]
return templates.TemplateResponse("files.html", {
"request": request, "sidebar": sidebar, "items": items,
"folders": folders, "current_folder": folder,
"current_sort": sort or "path",
"current_q": q or "",
"current_type": file_type or "",
"current_tag": tag or "",
"current_page": page,
"total_pages": total_pages,
"total_files": total,
"all_tags": all_tags,
"sync_result": sync_result,
"context_type": context_type or "",
"context_id": context_id or "",
"page_title": "Files", "active_nav": "files",
})
@router.get("/upload")
async def upload_form(
request: Request,
folder: Optional[str] = None,
context_type: Optional[str] = None,
context_id: Optional[str] = None,
db: AsyncSession = Depends(get_db),
):
sidebar = await get_sidebar_data(db)
folders = get_folders()
return templates.TemplateResponse("file_upload.html", {
"request": request, "sidebar": sidebar,
"folders": folders, "prefill_folder": folder or "",
"context_type": context_type or "",
"context_id": context_id or "",
"page_title": "Upload File", "active_nav": "files",
})
@router.post("/upload")
async def upload_file(
request: Request,
file: UploadFile = FastAPIFile(...),
description: Optional[str] = Form(None),
tags: Optional[str] = Form(None),
folder: Optional[str] = Form(None),
new_folder: Optional[str] = Form(None),
context_type: Optional[str] = Form(None),
context_id: Optional[str] = Form(None),
db: AsyncSession = Depends(get_db),
):
# Determine target folder
target_folder = ""
if new_folder and new_folder.strip():
target_folder = new_folder.strip().strip("/")
elif folder and folder.strip():
target_folder = folder.strip()
# Build absolute folder path and ensure it exists
if target_folder:
folder_abs = os.path.join(FILE_STORAGE_PATH, target_folder)
else:
folder_abs = FILE_STORAGE_PATH
os.makedirs(folder_abs, exist_ok=True)
# Use original filename, handle collisions
original = file.filename or "unknown"
safe_name = original.replace("/", "_").replace("\\", "_")
final_name = resolve_collision(folder_abs, safe_name)
# Build relative storage path
if target_folder:
storage_path = os.path.join(target_folder, final_name)
else:
storage_path = final_name
abs_path = os.path.join(FILE_STORAGE_PATH, storage_path)
# Save to disk
with open(abs_path, "wb") as f:
content = await file.read()
f.write(content)
size_bytes = len(content)
# Insert file record
repo = BaseRepository("files", db)
data = {
"filename": final_name,
"original_filename": original,
"storage_path": storage_path,
"mime_type": file.content_type,
"size_bytes": size_bytes,
"description": description,
}
if tags and tags.strip():
data["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
new_file = await repo.create(data)
# Create file mapping if context provided
if context_type and context_type.strip() and context_id and context_id.strip():
await db.execute(text("""
INSERT INTO file_mappings (file_id, context_type, context_id)
VALUES (:fid, :ct, :cid)
ON CONFLICT DO NOTHING
"""), {"fid": new_file["id"], "ct": context_type, "cid": context_id})
# Redirect back to context or file list
if context_type and context_id:
return RedirectResponse(
url=f"/files?context_type={context_type}&context_id={context_id}",
status_code=303,
)
return RedirectResponse(url="/files", status_code=303)
@router.post("/sync")
async def manual_sync(request: Request, db: AsyncSession = Depends(get_db)):
"""Manual sync trigger."""
await sync_files(db)
return RedirectResponse(url="/files", status_code=303)
@router.get("/{file_id}/download")
async def download_file(file_id: str, db: AsyncSession = Depends(get_db)):
repo = BaseRepository("files", db)
item = await repo.get(file_id)
if not item:
return RedirectResponse(url="/files", status_code=303)
abs_path = _resolve_path(item)
if not os.path.exists(abs_path):
return RedirectResponse(url="/files", status_code=303)
return FileResponse(
path=abs_path,
filename=item["original_filename"],
media_type=item.get("mime_type") or "application/octet-stream",
)
@router.get("/{file_id}/preview")
async def preview_file(file_id: str, request: Request, db: AsyncSession = Depends(get_db)):
"""Inline preview for images and PDFs."""
repo = BaseRepository("files", db)
sidebar = await get_sidebar_data(db)
item = await repo.get(file_id)
if not item:
return RedirectResponse(url="/files", status_code=303)
can_preview = item.get("mime_type", "") in PREVIEWABLE
folder = os.path.dirname(item["storage_path"])
return templates.TemplateResponse("file_preview.html", {
"request": request, "sidebar": sidebar, "item": item,
"can_preview": can_preview,
"folder": folder if folder else "/",
"page_title": item["original_filename"], "active_nav": "files",
})
@router.get("/{file_id}/serve")
async def serve_file(file_id: str, db: AsyncSession = Depends(get_db)):
"""Serve file inline (for img src, iframe, etc)."""
repo = BaseRepository("files", db)
item = await repo.get(file_id)
if not item:
return RedirectResponse(url="/files", status_code=303)
abs_path = _resolve_path(item)
if not os.path.exists(abs_path):
return RedirectResponse(url="/files", status_code=303)
mime = item.get("mime_type") or "application/octet-stream"
# Wrap text files in HTML with forced white background / dark text
if mime.startswith("text/"):
try:
with open(abs_path, "r", errors="replace") as f:
text_content = f.read()
except Exception:
return FileResponse(path=abs_path, media_type=mime)
from html import escape
html = (
'<!DOCTYPE html><html><head><meta charset="utf-8">'
'<style>body{background:#fff;color:#1a1a1a;font-family:monospace;'
'font-size:14px;padding:16px;margin:0;white-space:pre-wrap;'
'word-wrap:break-word;}</style></head><body>'
f'{escape(text_content)}</body></html>'
)
return HTMLResponse(content=html)
return FileResponse(path=abs_path, media_type=mime)
@router.post("/{file_id}/delete")
async def delete_file(file_id: str, request: Request, db: AsyncSession = Depends(get_db)):
repo = BaseRepository("files", db)
await repo.soft_delete(file_id)
referer = request.headers.get("referer", "/files")
return RedirectResponse(url=referer, status_code=303)