Coverage for src / local_deep_research / database / library_init.py: 100%

82 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1""" 

2Database initialization for Library - Unified Document Architecture. 

3 

4This module handles: 

5- Seeding source_types table with predefined types 

6- Creating the default "Library" collection 

7- Must be called on app startup for each user 

8""" 

9 

10import uuid 

11from loguru import logger 

12from sqlalchemy.exc import IntegrityError 

13 

14from .models import SourceType, Collection 

15from .session_context import get_user_db_session 

16from ..constants import ( 

17 RESEARCH_HISTORY_COLLECTION_NAME, 

18 RESEARCH_HISTORY_COLLECTION_DESCRIPTION, 

19) 

20 

21 

22def seed_source_types(username: str, password: str = None) -> None: 

23 """ 

24 Seed the source_types table with predefined document source types. 

25 

26 Args: 

27 username: User to seed types for 

28 password: User's password (optional, uses session context) 

29 """ 

30 predefined_types = [ 

31 { 

32 "name": "research_download", 

33 "display_name": "Research Download", 

34 "description": "Documents downloaded from research sessions (arXiv, PubMed, etc.)", 

35 "icon": "download", 

36 }, 

37 { 

38 "name": "user_upload", 

39 "display_name": "User Upload", 

40 "description": "Documents manually uploaded by the user", 

41 "icon": "upload", 

42 }, 

43 { 

44 "name": "manual_entry", 

45 "display_name": "Manual Entry", 

46 "description": "Documents manually created or entered", 

47 "icon": "edit", 

48 }, 

49 { 

50 "name": "research_report", 

51 "display_name": "Research Report", 

52 "description": "Generated research reports (markdown) for semantic search", 

53 "icon": "file-alt", 

54 }, 

55 { 

56 "name": "research_source", 

57 "display_name": "Research Source", 

58 "description": "Sources discovered during research with content for semantic search", 

59 "icon": "link", 

60 }, 

61 ] 

62 

63 try: 

64 with get_user_db_session(username, password) as session: 

65 for type_data in predefined_types: 

66 # Check if type already exists 

67 existing = ( 

68 session.query(SourceType) 

69 .filter_by(name=type_data["name"]) 

70 .first() 

71 ) 

72 

73 if not existing: 

74 source_type = SourceType(id=str(uuid.uuid4()), **type_data) 

75 session.add(source_type) 

76 logger.info(f"Created source type: {type_data['name']}") 

77 

78 session.commit() 

79 logger.info("Source types seeded successfully") 

80 

81 except IntegrityError: 

82 logger.warning("Source types may already exist") 

83 except Exception: 

84 logger.exception("Error seeding source types") 

85 raise 

86 

87 

88def ensure_default_library_collection( 

89 username: str, password: str = None 

90) -> str: 

91 """ 

92 Ensure the default "Library" collection exists for a user. 

93 Creates it if it doesn't exist. 

94 

95 Args: 

96 username: User to check/create library for 

97 password: User's password (optional, uses session context) 

98 

99 Returns: 

100 UUID of the Library collection 

101 """ 

102 try: 

103 with get_user_db_session(username, password) as session: 

104 # Check if default library exists 

105 library = ( 

106 session.query(Collection).filter_by(is_default=True).first() 

107 ) 

108 

109 if library: 

110 logger.debug(f"Default Library collection exists: {library.id}") 

111 return library.id 

112 

113 # Create default Library collection 

114 library_id = str(uuid.uuid4()) 

115 library = Collection( 

116 id=library_id, 

117 name="Library", 

118 description="Default collection for research downloads and documents", 

119 collection_type="default_library", 

120 is_default=True, 

121 ) 

122 session.add(library) 

123 session.commit() 

124 

125 logger.info(f"Created default Library collection: {library_id}") 

126 return library_id 

127 

128 except Exception: 

129 logger.exception("Error ensuring default Library collection") 

130 raise 

131 

132 

133def ensure_research_history_collection( 

134 username: str, password: str = None 

135) -> str: 

136 """ 

137 Ensure the "Research History" collection exists for a user. 

138 This collection is used for semantic search over research reports and sources. 

139 Creates it if it doesn't exist. 

140 

141 Args: 

142 username: User to check/create collection for 

143 password: User's password (optional, uses session context) 

144 

145 Returns: 

146 UUID of the Research History collection 

147 """ 

148 try: 

149 with get_user_db_session(username, password) as session: 

150 # Check if research history collection exists 

151 collection = ( 

152 session.query(Collection) 

153 .filter_by(collection_type="research_history") 

154 .first() 

155 ) 

156 

157 if collection: 

158 logger.debug( 

159 f"Research History collection exists: {collection.id}" 

160 ) 

161 return collection.id 

162 

163 # Create Research History collection 

164 collection_id = str(uuid.uuid4()) 

165 collection = Collection( 

166 id=collection_id, 

167 name=RESEARCH_HISTORY_COLLECTION_NAME, 

168 description=RESEARCH_HISTORY_COLLECTION_DESCRIPTION, 

169 collection_type="research_history", 

170 is_default=False, 

171 ) 

172 session.add(collection) 

173 session.commit() 

174 

175 logger.info(f"Created Research History collection: {collection_id}") 

176 return collection_id 

177 

178 except Exception: 

179 logger.exception("Error ensuring Research History collection") 

180 raise 

181 

182 

183def initialize_library_for_user(username: str, password: str = None) -> dict: 

184 """ 

185 Complete initialization of library system for a user. 

186 Seeds source types and ensures default Library and Research History collections exist. 

187 

188 Args: 

189 username: User to initialize for 

190 password: User's password (optional, uses session context) 

191 

192 Returns: 

193 Dict with initialization results 

194 """ 

195 results = { 

196 "source_types_seeded": False, 

197 "library_collection_id": None, 

198 "research_history_collection_id": None, 

199 "success": False, 

200 } 

201 

202 try: 

203 # Seed source types 

204 seed_source_types(username, password) 

205 results["source_types_seeded"] = True 

206 

207 # Ensure Library collection 

208 library_id = ensure_default_library_collection(username, password) 

209 results["library_collection_id"] = library_id 

210 

211 # Ensure Research History collection 

212 research_history_id = ensure_research_history_collection( 

213 username, password 

214 ) 

215 results["research_history_collection_id"] = research_history_id 

216 

217 results["success"] = True 

218 logger.info(f"Library initialization complete for user: {username}") 

219 

220 except Exception as e: 

221 logger.exception(f"Library initialization failed for {username}") 

222 results["error"] = str(e) 

223 

224 return results 

225 

226 

227def get_default_library_id(username: str, password: str = None) -> str: 

228 """ 

229 Get the ID of the default Library collection for a user. 

230 Creates it if it doesn't exist. 

231 

232 Args: 

233 username: User to get library for 

234 password: User's password (optional, uses session context) 

235 

236 Returns: 

237 UUID of the Library collection 

238 """ 

239 return ensure_default_library_collection(username, password) 

240 

241 

242def get_source_type_id( 

243 username: str, type_name: str, password: str = None 

244) -> str: 

245 """ 

246 Get the ID of a source type by name. 

247 

248 Args: 

249 username: User to query for 

250 type_name: Name of source type (e.g., 'research_download', 'user_upload') 

251 password: User's password (optional, uses session context) 

252 

253 Returns: 

254 UUID of the source type 

255 

256 Raises: 

257 ValueError: If source type not found 

258 """ 

259 try: 

260 with get_user_db_session(username, password) as session: 

261 source_type = ( 

262 session.query(SourceType).filter_by(name=type_name).first() 

263 ) 

264 

265 if not source_type: 

266 raise ValueError(f"Source type not found: {type_name}") # noqa: TRY301 — inside db session context, except logs and re-raises 

267 

268 return source_type.id 

269 

270 except Exception: 

271 logger.exception("Error getting source type ID") 

272 raise