Coverage for src/local_deep_research/database/library_init.py: 100%

95 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1""" 

2Database initialization for Library - Unified Document Architecture. 

3 

4This module handles: 

5- Seeding source_types table with predefined types 

6- Creating the default "Library" collection 

7- Must be called on app startup for each user 

8""" 

9 

10import threading 

11import uuid 

12from loguru import logger 

13from sqlalchemy.exc import IntegrityError 

14 

15from .models import SourceType, Collection 

16from .session_context import get_user_db_session 

17from ..constants import ( 

18 RESEARCH_HISTORY_COLLECTION_NAME, 

19 RESEARCH_HISTORY_COLLECTION_DESCRIPTION, 

20) 

21 

22 

23# Per-user locks serialise the check-then-insert critical sections below. 

24# Under IMMEDIATE isolation this was unnecessary; under DEFERRED, two 

25# concurrent invocations (e.g. two logins of the same user from two 

26# browser tabs) could both see the absent row and both insert, creating 

27# duplicate default collections. An application-level lock is simpler 

28# than a migration adding a partial UNIQUE constraint, and cheap. 

29_user_init_locks: dict[str, threading.Lock] = {} 

30_user_init_locks_lock = threading.Lock() 

31 

32 

33def _get_user_init_lock(username: str) -> threading.Lock: 

34 """Get (or lazily create) the per-user lock used to serialise the 

35 check-then-insert idempotent collection initialisers. 

36 """ 

37 with _user_init_locks_lock: 

38 lock = _user_init_locks.get(username) 

39 if lock is None: 

40 lock = threading.Lock() 

41 _user_init_locks[username] = lock 

42 return lock 

43 

44 

45def pop_user_init_lock(username: str) -> None: 

46 """Remove the per-user init lock for ``username`` from the registry. 

47 

48 Called from the user-close path (``db_manager.close_user_database`` 

49 callers in ``web/auth/connection_cleanup.py`` and ``web/auth/routes.py``) 

50 so the module-level dict doesn't accumulate one entry per username 

51 across the process lifetime. The next login lazily re-creates the 

52 lock, which is fine — the lock has no state that needs to persist 

53 across login/logout. 

54 """ 

55 with _user_init_locks_lock: 

56 _user_init_locks.pop(username, None) 

57 

58 

59def seed_source_types(username: str, password: str = None) -> None: 

60 """ 

61 Seed the source_types table with predefined document source types. 

62 

63 Args: 

64 username: User to seed types for 

65 password: User's password (optional, uses session context) 

66 """ 

67 predefined_types = [ 

68 { 

69 "name": "research_download", 

70 "display_name": "Research Download", 

71 "description": "Documents downloaded from research sessions (arXiv, PubMed, etc.)", 

72 "icon": "download", 

73 }, 

74 { 

75 "name": "user_upload", 

76 "display_name": "User Upload", 

77 "description": "Documents manually uploaded by the user", 

78 "icon": "upload", 

79 }, 

80 { 

81 "name": "manual_entry", 

82 "display_name": "Manual Entry", 

83 "description": "Documents manually created or entered", 

84 "icon": "edit", 

85 }, 

86 { 

87 "name": "research_report", 

88 "display_name": "Research Report", 

89 "description": "Generated research reports (markdown) for semantic search", 

90 "icon": "file-alt", 

91 }, 

92 { 

93 "name": "research_source", 

94 "display_name": "Research Source", 

95 "description": "Sources discovered during research with content for semantic search", 

96 "icon": "link", 

97 }, 

98 ] 

99 

100 try: 

101 with get_user_db_session(username, password) as session: 

102 for type_data in predefined_types: 

103 # Check if type already exists 

104 existing = ( 

105 session.query(SourceType) 

106 .filter_by(name=type_data["name"]) 

107 .first() 

108 ) 

109 

110 if not existing: 

111 source_type = SourceType(id=str(uuid.uuid4()), **type_data) 

112 session.add(source_type) 

113 logger.info(f"Created source type: {type_data['name']}") 

114 

115 session.commit() 

116 logger.info("Source types seeded successfully") 

117 

118 except IntegrityError: 

119 logger.warning("Source types may already exist") 

120 except Exception: 

121 logger.exception("Error seeding source types") 

122 raise 

123 

124 

125def ensure_default_library_collection( 

126 username: str, password: str = None 

127) -> str: 

128 """ 

129 Ensure the default "Library" collection exists for a user. 

130 Creates it if it doesn't exist. 

131 

132 Args: 

133 username: User to check/create library for 

134 password: User's password (optional, uses session context) 

135 

136 Returns: 

137 UUID of the Library collection 

138 """ 

139 try: 

140 with ( 

141 _get_user_init_lock(username), 

142 get_user_db_session(username, password) as session, 

143 ): 

144 # Check if default library exists 

145 library = ( 

146 session.query(Collection).filter_by(is_default=True).first() 

147 ) 

148 

149 if library: 

150 logger.debug(f"Default Library collection exists: {library.id}") 

151 return library.id 

152 

153 # Create default Library collection 

154 library_id = str(uuid.uuid4()) 

155 library = Collection( 

156 id=library_id, 

157 name="Library", 

158 description="Default collection for research downloads and documents", 

159 collection_type="default_library", 

160 is_default=True, 

161 ) 

162 session.add(library) 

163 session.commit() 

164 

165 logger.info(f"Created default Library collection: {library_id}") 

166 return library_id 

167 

168 except Exception: 

169 logger.exception("Error ensuring default Library collection") 

170 raise 

171 

172 

173def ensure_research_history_collection( 

174 username: str, password: str = None 

175) -> str: 

176 """ 

177 Ensure the "Research History" collection exists for a user. 

178 This collection is used for semantic search over research reports and sources. 

179 Creates it if it doesn't exist. 

180 

181 Args: 

182 username: User to check/create collection for 

183 password: User's password (optional, uses session context) 

184 

185 Returns: 

186 UUID of the Research History collection 

187 """ 

188 try: 

189 with ( 

190 _get_user_init_lock(username), 

191 get_user_db_session(username, password) as session, 

192 ): 

193 # Check if research history collection exists 

194 collection = ( 

195 session.query(Collection) 

196 .filter_by(collection_type="research_history") 

197 .first() 

198 ) 

199 

200 if collection: 

201 logger.debug( 

202 f"Research History collection exists: {collection.id}" 

203 ) 

204 return collection.id 

205 

206 # Create Research History collection 

207 collection_id = str(uuid.uuid4()) 

208 collection = Collection( 

209 id=collection_id, 

210 name=RESEARCH_HISTORY_COLLECTION_NAME, 

211 description=RESEARCH_HISTORY_COLLECTION_DESCRIPTION, 

212 collection_type="research_history", 

213 is_default=False, 

214 ) 

215 session.add(collection) 

216 session.commit() 

217 

218 logger.info(f"Created Research History collection: {collection_id}") 

219 return collection_id 

220 

221 except Exception: 

222 logger.exception("Error ensuring Research History collection") 

223 raise 

224 

225 

226def initialize_library_for_user(username: str, password: str = None) -> dict: 

227 """ 

228 Complete initialization of library system for a user. 

229 Seeds source types and ensures default Library and Research History collections exist. 

230 

231 Args: 

232 username: User to initialize for 

233 password: User's password (optional, uses session context) 

234 

235 Returns: 

236 Dict with initialization results 

237 """ 

238 results = { 

239 "source_types_seeded": False, 

240 "library_collection_id": None, 

241 "research_history_collection_id": None, 

242 "success": False, 

243 } 

244 

245 try: 

246 # Seed source types 

247 seed_source_types(username, password) 

248 results["source_types_seeded"] = True 

249 

250 # Ensure Library collection 

251 library_id = ensure_default_library_collection(username, password) 

252 results["library_collection_id"] = library_id 

253 

254 # Ensure Research History collection 

255 research_history_id = ensure_research_history_collection( 

256 username, password 

257 ) 

258 results["research_history_collection_id"] = research_history_id 

259 

260 results["success"] = True 

261 logger.info(f"Library initialization complete for user: {username}") 

262 

263 except Exception as e: 

264 logger.exception(f"Library initialization failed for {username}") 

265 results["error"] = str(e) 

266 

267 return results 

268 

269 

270def get_default_library_id(username: str, password: str = None) -> str: 

271 """ 

272 Get the ID of the default Library collection for a user. 

273 Creates it if it doesn't exist. 

274 

275 Args: 

276 username: User to get library for 

277 password: User's password (optional, uses session context) 

278 

279 Returns: 

280 UUID of the Library collection 

281 """ 

282 return ensure_default_library_collection(username, password) 

283 

284 

285def get_source_type_id( 

286 username: str, type_name: str, password: str = None 

287) -> str: 

288 """ 

289 Get the ID of a source type by name. 

290 

291 Args: 

292 username: User to query for 

293 type_name: Name of source type (e.g., 'research_download', 'user_upload') 

294 password: User's password (optional, uses session context) 

295 

296 Returns: 

297 UUID of the source type 

298 

299 Raises: 

300 ValueError: If source type not found 

301 """ 

302 try: 

303 with get_user_db_session(username, password) as session: 

304 source_type = ( 

305 session.query(SourceType).filter_by(name=type_name).first() 

306 ) 

307 

308 if not source_type: 

309 raise ValueError(f"Source type not found: {type_name}") # noqa: TRY301 — inside db session context, except logs and re-raises 

310 

311 return source_type.id 

312 

313 except Exception: 

314 logger.exception("Error getting source type ID") 

315 raise