Coverage for src / local_deep_research / security / filename_sanitizer.py: 100%

25 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1"""Filename sanitization for file uploads. 

2 

3Wraps werkzeug's secure_filename with additional safety checks. 

4All file upload endpoints should use sanitize_filename() instead of 

5importing secure_filename directly. 

6""" 

7 

8from __future__ import annotations 

9 

10from typing import Optional 

11 

12from werkzeug.utils import secure_filename 

13 

14# Maximum filename length (including extension) 

15MAX_FILENAME_LENGTH = 255 

16 

17 

18class UnsafeFilenameError(ValueError): 

19 """Raised when a filename cannot be sanitized to a safe value.""" 

20 

21 

22def sanitize_filename( 

23 filename: Optional[str], 

24 *, 

25 allowed_extensions: Optional[set[str]] = None, 

26 max_length: int = MAX_FILENAME_LENGTH, 

27) -> str: 

28 """Sanitize an uploaded filename for safe filesystem storage. 

29 

30 Args: 

31 filename: Raw filename from the upload. 

32 allowed_extensions: Optional set of allowed extensions 

33 (lowercase, with dot, e.g. {".pdf", ".txt"}). 

34 If None, all extensions are allowed. 

35 max_length: Maximum allowed filename length. 

36 

37 Returns: 

38 Sanitized filename safe for filesystem use. 

39 

40 Raises: 

41 UnsafeFilenameError: If the filename is empty, becomes empty 

42 after sanitization, or has a disallowed extension. 

43 """ 

44 if not filename: 

45 raise UnsafeFilenameError("No filename provided") 

46 

47 # Strip null bytes before passing to secure_filename 

48 cleaned = filename.replace("\x00", "") 

49 

50 # Apply werkzeug's path traversal protection 

51 safe_name = secure_filename(cleaned) 

52 

53 if not safe_name: 

54 raise UnsafeFilenameError( 

55 "Filename contains no safe characters after sanitization" 

56 ) 

57 

58 # Enforce length limit 

59 if len(safe_name) > max_length: 

60 # Preserve extension when truncating 

61 dot_idx = safe_name.rfind(".") 

62 if dot_idx > 0: 

63 ext = safe_name[dot_idx:] 

64 safe_name = safe_name[: max_length - len(ext)] + ext 

65 else: 

66 safe_name = safe_name[:max_length] 

67 

68 # Validate extension if allowlist provided 

69 if allowed_extensions is not None: 

70 dot_idx = safe_name.rfind(".") 

71 ext = safe_name[dot_idx:].lower() if dot_idx > 0 else "" 

72 # Normalize allowlist for case-insensitive comparison 

73 normalized = {e.lower() for e in allowed_extensions} 

74 if ext not in normalized: 

75 raise UnsafeFilenameError("File type not allowed") 

76 

77 return safe_name