Coverage for src / local_deep_research / security / filename_sanitizer.py: 100%
25 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""Filename sanitization for file uploads.
3Wraps werkzeug's secure_filename with additional safety checks.
4All file upload endpoints should use sanitize_filename() instead of
5importing secure_filename directly.
6"""
8from __future__ import annotations
10from typing import Optional
12from werkzeug.utils import secure_filename
14# Maximum filename length (including extension)
15MAX_FILENAME_LENGTH = 255
18class UnsafeFilenameError(ValueError):
19 """Raised when a filename cannot be sanitized to a safe value."""
22def sanitize_filename(
23 filename: Optional[str],
24 *,
25 allowed_extensions: Optional[set[str]] = None,
26 max_length: int = MAX_FILENAME_LENGTH,
27) -> str:
28 """Sanitize an uploaded filename for safe filesystem storage.
30 Args:
31 filename: Raw filename from the upload.
32 allowed_extensions: Optional set of allowed extensions
33 (lowercase, with dot, e.g. {".pdf", ".txt"}).
34 If None, all extensions are allowed.
35 max_length: Maximum allowed filename length.
37 Returns:
38 Sanitized filename safe for filesystem use.
40 Raises:
41 UnsafeFilenameError: If the filename is empty, becomes empty
42 after sanitization, or has a disallowed extension.
43 """
44 if not filename:
45 raise UnsafeFilenameError("No filename provided")
47 # Strip null bytes before passing to secure_filename
48 cleaned = filename.replace("\x00", "")
50 # Apply werkzeug's path traversal protection
51 safe_name = secure_filename(cleaned)
53 if not safe_name:
54 raise UnsafeFilenameError(
55 "Filename contains no safe characters after sanitization"
56 )
58 # Enforce length limit
59 if len(safe_name) > max_length:
60 # Preserve extension when truncating
61 dot_idx = safe_name.rfind(".")
62 if dot_idx > 0:
63 ext = safe_name[dot_idx:]
64 safe_name = safe_name[: max_length - len(ext)] + ext
65 else:
66 safe_name = safe_name[:max_length]
68 # Validate extension if allowlist provided
69 if allowed_extensions is not None:
70 dot_idx = safe_name.rfind(".")
71 ext = safe_name[dot_idx:].lower() if dot_idx > 0 else ""
72 # Normalize allowlist for case-insensitive comparison
73 normalized = {e.lower() for e in allowed_extensions}
74 if ext not in normalized:
75 raise UnsafeFilenameError("File type not allowed")
77 return safe_name