Coverage for src / local_deep_research / library / download_management / filters / resource_filter.py: 29%

32 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Resource Filter 

3 

4Smart filtering logic for downloadable resources based on failure history, 

5cooldowns, and retry policies. Replaces simple file existence checks with 

6intelligent retry management. 

7""" 

8 

9from typing import List, Optional 

10 

11from loguru import logger 

12 

13from ..retry_manager import RetryManager, ResourceFilterResult, FilterSummary 

14 

15 

16class ResourceFilter: 

17 """Filter resources for download based on history and policies""" 

18 

19 def __init__(self, username: str, password: Optional[str] = None): 

20 """ 

21 Initialize the resource filter. 

22 

23 Args: 

24 username: Username for database access 

25 password: Optional password for encrypted database 

26 """ 

27 self.username = username 

28 self.retry_manager = RetryManager(username, password) 

29 logger.info(f"Initialized for user: {username}") 

30 

31 def filter_downloadable_resources( 

32 self, resources: List, check_files: bool = True 

33 ) -> List[ResourceFilterResult]: 

34 """ 

35 Filter resources that are available for download. 

36 

37 Args: 

38 resources: List of ResearchResource objects to filter 

39 check_files: Whether to also check for existing text files (legacy behavior) 

40 

41 Returns: 

42 List of ResourceFilterResult objects with retry decisions 

43 """ 

44 logger.info(f"Filtering {len(resources)} resources") 

45 

46 # Use retry manager to filter based on failure history 

47 results = self.retry_manager.filter_resources(resources) 

48 

49 # Optional legacy file existence check 

50 if check_files: 

51 results = self._apply_legacy_file_check(results) 

52 

53 return results 

54 

55 def _apply_legacy_file_check( 

56 self, results: List[ResourceFilterResult] 

57 ) -> List[ResourceFilterResult]: 

58 """ 

59 Apply legacy file existence checking to filter results. 

60 

61 Args: 

62 results: Existing filter results to modify 

63 

64 Returns: 

65 Updated filter results with file existence check 

66 """ 

67 # This would get the download service instance to check for existing files 

68 # For now, we'll skip this as the retry manager handles the main logic 

69 return results 

70 

71 def get_filter_summary( 

72 self, resources: List, check_files: bool = True 

73 ) -> FilterSummary: 

74 """ 

75 Get a summary of filtering results. 

76 

77 Args: 

78 resources: List of resources that were filtered 

79 check_files: Whether file existence checking was applied 

80 

81 Returns: 

82 FilterSummary object with detailed counts 

83 """ 

84 results = self.filter_downloadable_resources(resources, check_files) 

85 return self.retry_manager.get_filter_summary(results) 

86 

87 def get_skipped_resources_info(self, resources: List) -> dict: 

88 """ 

89 Get detailed information about skipped resources for UI display. 

90 

91 Args: 

92 resources: List of all resources 

93 

94 Returns: 

95 Dictionary with detailed skip information 

96 """ 

97 results = self.filter_downloadable_resources(resources) 

98 

99 skipped_resources = [] 

100 for result in results: 

101 if not result.can_retry: 

102 status_info = ( 

103 self.retry_manager.status_tracker.get_resource_status( 

104 result.resource_id 

105 ) 

106 ) 

107 skipped_resources.append( 

108 { 

109 "resource_id": result.resource_id, 

110 "status": result.status, 

111 "reason": result.reason, 

112 "estimated_wait_minutes": result.estimated_wait.total_seconds() 

113 // 60 

114 if result.estimated_wait 

115 else None, 

116 "status_info": status_info, 

117 } 

118 ) 

119 

120 return { 

121 "total_skipped": len(skipped_resources), 

122 "permanently_failed": [ 

123 r 

124 for r in skipped_resources 

125 if r["status"] == "permanently_failed" 

126 ], 

127 "temporarily_failed": [ 

128 r 

129 for r in skipped_resources 

130 if r["status"] == "temporarily_failed" 

131 ], 

132 "other_skipped": [ 

133 r 

134 for r in skipped_resources 

135 if r["status"] 

136 not in ["permanently_failed", "temporarily_failed"] 

137 ], 

138 "skipped_resources": skipped_resources, 

139 } 

140 

141 def should_skip_resource(self, resource_id: int) -> tuple[bool, str]: 

142 """ 

143 Quick check if a specific resource should be skipped. 

144 

145 Args: 

146 resource_id: Resource identifier 

147 

148 Returns: 

149 Tuple of (should_skip, reason) 

150 """ 

151 decision = self.retry_manager.should_retry_resource(resource_id) 

152 return ( 

153 not decision.can_retry, 

154 decision.reason or "Resource not available for retry", 

155 ) 

156 

157 def get_retry_statistics(self) -> dict: 

158 """ 

159 Get retry statistics for monitoring and debugging. 

160 

161 Returns: 

162 Dictionary with retry statistics 

163 """ 

164 return self.retry_manager.get_retry_statistics()