Coverage for src / local_deep_research / web / services / research_sources_service.py: 98%

71 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1""" 

2Service for managing research sources/resources in the database. 

3 

4This service handles saving and retrieving sources from research 

5in a proper relational way using the ResearchResource table. 

6""" 

7 

8from typing import List, Dict, Any, Optional 

9from datetime import datetime, UTC 

10from loguru import logger 

11 

12from ...database.models import ResearchResource, ResearchHistory 

13from ...database.session_context import get_user_db_session 

14 

15 

16class ResearchSourcesService: 

17 """Service for managing research sources in the database.""" 

18 

19 @staticmethod 

20 def save_research_sources( 

21 research_id: str, 

22 sources: List[Dict[str, Any]], 

23 username: Optional[str] = None, 

24 ) -> int: 

25 """ 

26 Save sources from research to the ResearchResource table. 

27 

28 Args: 

29 research_id: The UUID of the research 

30 sources: List of source dictionaries with url, title, snippet, etc. 

31 username: Username for database access 

32 

33 Returns: 

34 Number of sources saved 

35 """ 

36 if not sources: 

37 logger.info(f"No sources to save for research {research_id}") 

38 return 0 

39 

40 saved_count = 0 

41 

42 try: 

43 with get_user_db_session(username) as db_session: 

44 # First check if resources already exist for this research 

45 existing = ( 

46 db_session.query(ResearchResource) 

47 .filter_by(research_id=research_id) 

48 .count() 

49 ) 

50 

51 if existing > 0: 

52 logger.info( 

53 f"Research {research_id} already has {existing} resources, skipping save" 

54 ) 

55 return int(existing) 

56 

57 # Save each source as a ResearchResource 

58 for source in sources: 

59 try: 

60 # Extract fields from various possible formats 

61 url = source.get("url", "") or source.get("link", "") 

62 title = source.get("title", "") or source.get( 

63 "name", "" 

64 ) 

65 snippet = ( 

66 source.get("snippet", "") 

67 or source.get("content_preview", "") 

68 or source.get("description", "") 

69 ) 

70 source_type = source.get("source_type", "web") 

71 

72 # Skip if no URL 

73 if not url: 

74 continue 

75 

76 # Create resource record 

77 resource = ResearchResource( 

78 research_id=research_id, 

79 title=title or "Untitled", 

80 url=url, 

81 content_preview=snippet[:1000] 

82 if snippet 

83 else None, # Limit preview length 

84 source_type=source_type, 

85 resource_metadata={ 

86 "added_at": datetime.now(UTC).isoformat(), 

87 "original_data": source, # Keep original data for reference 

88 }, 

89 created_at=datetime.now(UTC).isoformat(), 

90 ) 

91 

92 db_session.add(resource) 

93 saved_count += 1 

94 

95 except Exception: 

96 logger.warning( 

97 f"Failed to save source {source.get('url', 'unknown')}" 

98 ) 

99 continue 

100 

101 # Commit all resources 

102 if saved_count > 0: 102 ↛ 112line 102 didn't jump to line 112

103 db_session.commit() 

104 logger.info( 

105 f"Saved {saved_count} sources for research {research_id}" 

106 ) 

107 

108 except Exception: 

109 logger.exception("Error saving research sources") 

110 raise 

111 

112 return saved_count 

113 

114 @staticmethod 

115 def get_research_sources( 

116 research_id: str, username: Optional[str] = None 

117 ) -> List[Dict[str, Any]]: 

118 """ 

119 Get all sources for a research from the database. 

120 

121 Args: 

122 research_id: The UUID of the research 

123 username: Username for database access 

124 

125 Returns: 

126 List of source dictionaries 

127 """ 

128 sources = [] 

129 

130 try: 

131 with get_user_db_session(username) as db_session: 

132 resources = ( 

133 db_session.query(ResearchResource) 

134 .filter_by(research_id=research_id) 

135 .order_by(ResearchResource.id.asc()) 

136 .all() 

137 ) 

138 

139 for resource in resources: 

140 sources.append( 

141 { 

142 "id": resource.id, 

143 "url": resource.url, 

144 "title": resource.title, 

145 "snippet": resource.content_preview, 

146 "content_preview": resource.content_preview, 

147 "source_type": resource.source_type, 

148 "metadata": resource.resource_metadata or {}, 

149 "created_at": resource.created_at, 

150 } 

151 ) 

152 

153 logger.info( 

154 f"Retrieved {len(sources)} sources for research {research_id}" 

155 ) 

156 

157 except Exception: 

158 logger.exception("Error retrieving research sources") 

159 raise 

160 

161 return sources 

162 

163 @staticmethod 

164 def update_research_with_sources( 

165 research_id: str, 

166 all_links_of_system: List[Dict[str, Any]], 

167 username: Optional[str] = None, 

168 ) -> bool: 

169 """ 

170 Update a completed research with its sources. 

171 This should be called when research completes. 

172 

173 Args: 

174 research_id: The UUID of the research 

175 all_links_of_system: List of all sources found during research 

176 username: Username for database access 

177 

178 Returns: 

179 True if successful 

180 """ 

181 try: 

182 # Save sources to ResearchResource table 

183 saved_count = ResearchSourcesService.save_research_sources( 

184 research_id, all_links_of_system, username 

185 ) 

186 

187 # Also update the research metadata to include source count 

188 with get_user_db_session(username) as db_session: 

189 research = ( 

190 db_session.query(ResearchHistory) 

191 .filter_by(id=research_id) 

192 .first() 

193 ) 

194 

195 if research: 

196 if not research.research_meta: 196 ↛ 200line 196 didn't jump to line 200 because the condition on line 196 was always true

197 research.research_meta = {} 

198 

199 # Update metadata with source information 

200 research.research_meta["sources_count"] = saved_count 

201 research.research_meta["has_sources"] = saved_count > 0 

202 

203 db_session.commit() 

204 logger.info( 

205 f"Updated research {research_id} with {saved_count} sources" 

206 ) 

207 return True 

208 logger.warning( 

209 f"Research {research_id} not found for source update" 

210 ) 

211 return False 

212 

213 except Exception: 

214 logger.exception("Error updating research with sources") 

215 return False