Coverage for src / local_deep_research / web / services / research_sources_service.py: 98%
71 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2Service for managing research sources/resources in the database.
4This service handles saving and retrieving sources from research
5in a proper relational way using the ResearchResource table.
6"""
8from typing import List, Dict, Any, Optional
9from datetime import datetime, UTC
10from loguru import logger
12from ...database.models import ResearchResource, ResearchHistory
13from ...database.session_context import get_user_db_session
16class ResearchSourcesService:
17 """Service for managing research sources in the database."""
19 @staticmethod
20 def save_research_sources(
21 research_id: str,
22 sources: List[Dict[str, Any]],
23 username: Optional[str] = None,
24 ) -> int:
25 """
26 Save sources from research to the ResearchResource table.
28 Args:
29 research_id: The UUID of the research
30 sources: List of source dictionaries with url, title, snippet, etc.
31 username: Username for database access
33 Returns:
34 Number of sources saved
35 """
36 if not sources:
37 logger.info(f"No sources to save for research {research_id}")
38 return 0
40 saved_count = 0
42 try:
43 with get_user_db_session(username) as db_session:
44 # First check if resources already exist for this research
45 existing = (
46 db_session.query(ResearchResource)
47 .filter_by(research_id=research_id)
48 .count()
49 )
51 if existing > 0:
52 logger.info(
53 f"Research {research_id} already has {existing} resources, skipping save"
54 )
55 return int(existing)
57 # Save each source as a ResearchResource
58 for source in sources:
59 try:
60 # Extract fields from various possible formats
61 url = source.get("url", "") or source.get("link", "")
62 title = source.get("title", "") or source.get(
63 "name", ""
64 )
65 snippet = (
66 source.get("snippet", "")
67 or source.get("content_preview", "")
68 or source.get("description", "")
69 )
70 source_type = source.get("source_type", "web")
72 # Skip if no URL
73 if not url:
74 continue
76 # Create resource record
77 resource = ResearchResource(
78 research_id=research_id,
79 title=title or "Untitled",
80 url=url,
81 content_preview=snippet[:1000]
82 if snippet
83 else None, # Limit preview length
84 source_type=source_type,
85 resource_metadata={
86 "added_at": datetime.now(UTC).isoformat(),
87 "original_data": source, # Keep original data for reference
88 },
89 created_at=datetime.now(UTC).isoformat(),
90 )
92 db_session.add(resource)
93 saved_count += 1
95 except Exception:
96 logger.warning(
97 f"Failed to save source {source.get('url', 'unknown')}"
98 )
99 continue
101 # Commit all resources
102 if saved_count > 0: 102 ↛ 112line 102 didn't jump to line 112
103 db_session.commit()
104 logger.info(
105 f"Saved {saved_count} sources for research {research_id}"
106 )
108 except Exception:
109 logger.exception("Error saving research sources")
110 raise
112 return saved_count
114 @staticmethod
115 def get_research_sources(
116 research_id: str, username: Optional[str] = None
117 ) -> List[Dict[str, Any]]:
118 """
119 Get all sources for a research from the database.
121 Args:
122 research_id: The UUID of the research
123 username: Username for database access
125 Returns:
126 List of source dictionaries
127 """
128 sources = []
130 try:
131 with get_user_db_session(username) as db_session:
132 resources = (
133 db_session.query(ResearchResource)
134 .filter_by(research_id=research_id)
135 .order_by(ResearchResource.id.asc())
136 .all()
137 )
139 for resource in resources:
140 sources.append(
141 {
142 "id": resource.id,
143 "url": resource.url,
144 "title": resource.title,
145 "snippet": resource.content_preview,
146 "content_preview": resource.content_preview,
147 "source_type": resource.source_type,
148 "metadata": resource.resource_metadata or {},
149 "created_at": resource.created_at,
150 }
151 )
153 logger.info(
154 f"Retrieved {len(sources)} sources for research {research_id}"
155 )
157 except Exception:
158 logger.exception("Error retrieving research sources")
159 raise
161 return sources
163 @staticmethod
164 def update_research_with_sources(
165 research_id: str,
166 all_links_of_system: List[Dict[str, Any]],
167 username: Optional[str] = None,
168 ) -> bool:
169 """
170 Update a completed research with its sources.
171 This should be called when research completes.
173 Args:
174 research_id: The UUID of the research
175 all_links_of_system: List of all sources found during research
176 username: Username for database access
178 Returns:
179 True if successful
180 """
181 try:
182 # Save sources to ResearchResource table
183 saved_count = ResearchSourcesService.save_research_sources(
184 research_id, all_links_of_system, username
185 )
187 # Also update the research metadata to include source count
188 with get_user_db_session(username) as db_session:
189 research = (
190 db_session.query(ResearchHistory)
191 .filter_by(id=research_id)
192 .first()
193 )
195 if research:
196 if not research.research_meta: 196 ↛ 200line 196 didn't jump to line 200 because the condition on line 196 was always true
197 research.research_meta = {}
199 # Update metadata with source information
200 research.research_meta["sources_count"] = saved_count
201 research.research_meta["has_sources"] = saved_count > 0
203 db_session.commit()
204 logger.info(
205 f"Updated research {research_id} with {saved_count} sources"
206 )
207 return True
208 logger.warning(
209 f"Research {research_id} not found for source update"
210 )
211 return False
213 except Exception:
214 logger.exception("Error updating research with sources")
215 return False