Coverage for src / local_deep_research / library / download_management / filters / resource_filter.py: 29%
32 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Resource Filter
4Smart filtering logic for downloadable resources based on failure history,
5cooldowns, and retry policies. Replaces simple file existence checks with
6intelligent retry management.
7"""
9from typing import List, Optional
11from loguru import logger
13from ..retry_manager import RetryManager, ResourceFilterResult, FilterSummary
16class ResourceFilter:
17 """Filter resources for download based on history and policies"""
19 def __init__(self, username: str, password: Optional[str] = None):
20 """
21 Initialize the resource filter.
23 Args:
24 username: Username for database access
25 password: Optional password for encrypted database
26 """
27 self.username = username
28 self.retry_manager = RetryManager(username, password)
29 logger.info(f"Initialized for user: {username}")
31 def filter_downloadable_resources(
32 self, resources: List, check_files: bool = True
33 ) -> List[ResourceFilterResult]:
34 """
35 Filter resources that are available for download.
37 Args:
38 resources: List of ResearchResource objects to filter
39 check_files: Whether to also check for existing text files (legacy behavior)
41 Returns:
42 List of ResourceFilterResult objects with retry decisions
43 """
44 logger.info(f"Filtering {len(resources)} resources")
46 # Use retry manager to filter based on failure history
47 results = self.retry_manager.filter_resources(resources)
49 # Optional legacy file existence check
50 if check_files:
51 results = self._apply_legacy_file_check(results)
53 return results
55 def _apply_legacy_file_check(
56 self, results: List[ResourceFilterResult]
57 ) -> List[ResourceFilterResult]:
58 """
59 Apply legacy file existence checking to filter results.
61 Args:
62 results: Existing filter results to modify
64 Returns:
65 Updated filter results with file existence check
66 """
67 # This would get the download service instance to check for existing files
68 # For now, we'll skip this as the retry manager handles the main logic
69 return results
71 def get_filter_summary(
72 self, resources: List, check_files: bool = True
73 ) -> FilterSummary:
74 """
75 Get a summary of filtering results.
77 Args:
78 resources: List of resources that were filtered
79 check_files: Whether file existence checking was applied
81 Returns:
82 FilterSummary object with detailed counts
83 """
84 results = self.filter_downloadable_resources(resources, check_files)
85 return self.retry_manager.get_filter_summary(results)
87 def get_skipped_resources_info(self, resources: List) -> dict:
88 """
89 Get detailed information about skipped resources for UI display.
91 Args:
92 resources: List of all resources
94 Returns:
95 Dictionary with detailed skip information
96 """
97 results = self.filter_downloadable_resources(resources)
99 skipped_resources = []
100 for result in results:
101 if not result.can_retry:
102 status_info = (
103 self.retry_manager.status_tracker.get_resource_status(
104 result.resource_id
105 )
106 )
107 skipped_resources.append(
108 {
109 "resource_id": result.resource_id,
110 "status": result.status,
111 "reason": result.reason,
112 "estimated_wait_minutes": result.estimated_wait.total_seconds()
113 // 60
114 if result.estimated_wait
115 else None,
116 "status_info": status_info,
117 }
118 )
120 return {
121 "total_skipped": len(skipped_resources),
122 "permanently_failed": [
123 r
124 for r in skipped_resources
125 if r["status"] == "permanently_failed"
126 ],
127 "temporarily_failed": [
128 r
129 for r in skipped_resources
130 if r["status"] == "temporarily_failed"
131 ],
132 "other_skipped": [
133 r
134 for r in skipped_resources
135 if r["status"]
136 not in ["permanently_failed", "temporarily_failed"]
137 ],
138 "skipped_resources": skipped_resources,
139 }
141 def should_skip_resource(self, resource_id: int) -> tuple[bool, str]:
142 """
143 Quick check if a specific resource should be skipped.
145 Args:
146 resource_id: Resource identifier
148 Returns:
149 Tuple of (should_skip, reason)
150 """
151 decision = self.retry_manager.should_retry_resource(resource_id)
152 return (
153 not decision.can_retry,
154 decision.reason or "Resource not available for retry",
155 )
157 def get_retry_statistics(self) -> dict:
158 """
159 Get retry statistics for monitoring and debugging.
161 Returns:
162 Dictionary with retry statistics
163 """
164 return self.retry_manager.get_retry_statistics()