Coverage for src/local_deep_research/database/library_init.py: 100%
95 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
1"""
2Database initialization for Library - Unified Document Architecture.
4This module handles:
5- Seeding source_types table with predefined types
6- Creating the default "Library" collection
7- Must be called on app startup for each user
8"""
10import threading
11import uuid
12from loguru import logger
13from sqlalchemy.exc import IntegrityError
15from .models import SourceType, Collection
16from .session_context import get_user_db_session
17from ..constants import (
18 RESEARCH_HISTORY_COLLECTION_NAME,
19 RESEARCH_HISTORY_COLLECTION_DESCRIPTION,
20)
23# Per-user locks serialise the check-then-insert critical sections below.
24# Under IMMEDIATE isolation this was unnecessary; under DEFERRED, two
25# concurrent invocations (e.g. two logins of the same user from two
26# browser tabs) could both see the absent row and both insert, creating
27# duplicate default collections. An application-level lock is simpler
28# than a migration adding a partial UNIQUE constraint, and cheap.
29_user_init_locks: dict[str, threading.Lock] = {}
30_user_init_locks_lock = threading.Lock()
33def _get_user_init_lock(username: str) -> threading.Lock:
34 """Get (or lazily create) the per-user lock used to serialise the
35 check-then-insert idempotent collection initialisers.
36 """
37 with _user_init_locks_lock:
38 lock = _user_init_locks.get(username)
39 if lock is None:
40 lock = threading.Lock()
41 _user_init_locks[username] = lock
42 return lock
45def pop_user_init_lock(username: str) -> None:
46 """Remove the per-user init lock for ``username`` from the registry.
48 Called from the user-close path (``db_manager.close_user_database``
49 callers in ``web/auth/connection_cleanup.py`` and ``web/auth/routes.py``)
50 so the module-level dict doesn't accumulate one entry per username
51 across the process lifetime. The next login lazily re-creates the
52 lock, which is fine — the lock has no state that needs to persist
53 across login/logout.
54 """
55 with _user_init_locks_lock:
56 _user_init_locks.pop(username, None)
59def seed_source_types(username: str, password: str = None) -> None:
60 """
61 Seed the source_types table with predefined document source types.
63 Args:
64 username: User to seed types for
65 password: User's password (optional, uses session context)
66 """
67 predefined_types = [
68 {
69 "name": "research_download",
70 "display_name": "Research Download",
71 "description": "Documents downloaded from research sessions (arXiv, PubMed, etc.)",
72 "icon": "download",
73 },
74 {
75 "name": "user_upload",
76 "display_name": "User Upload",
77 "description": "Documents manually uploaded by the user",
78 "icon": "upload",
79 },
80 {
81 "name": "manual_entry",
82 "display_name": "Manual Entry",
83 "description": "Documents manually created or entered",
84 "icon": "edit",
85 },
86 {
87 "name": "research_report",
88 "display_name": "Research Report",
89 "description": "Generated research reports (markdown) for semantic search",
90 "icon": "file-alt",
91 },
92 {
93 "name": "research_source",
94 "display_name": "Research Source",
95 "description": "Sources discovered during research with content for semantic search",
96 "icon": "link",
97 },
98 ]
100 try:
101 with get_user_db_session(username, password) as session:
102 for type_data in predefined_types:
103 # Check if type already exists
104 existing = (
105 session.query(SourceType)
106 .filter_by(name=type_data["name"])
107 .first()
108 )
110 if not existing:
111 source_type = SourceType(id=str(uuid.uuid4()), **type_data)
112 session.add(source_type)
113 logger.info(f"Created source type: {type_data['name']}")
115 session.commit()
116 logger.info("Source types seeded successfully")
118 except IntegrityError:
119 logger.warning("Source types may already exist")
120 except Exception:
121 logger.exception("Error seeding source types")
122 raise
125def ensure_default_library_collection(
126 username: str, password: str = None
127) -> str:
128 """
129 Ensure the default "Library" collection exists for a user.
130 Creates it if it doesn't exist.
132 Args:
133 username: User to check/create library for
134 password: User's password (optional, uses session context)
136 Returns:
137 UUID of the Library collection
138 """
139 try:
140 with (
141 _get_user_init_lock(username),
142 get_user_db_session(username, password) as session,
143 ):
144 # Check if default library exists
145 library = (
146 session.query(Collection).filter_by(is_default=True).first()
147 )
149 if library:
150 logger.debug(f"Default Library collection exists: {library.id}")
151 return library.id
153 # Create default Library collection
154 library_id = str(uuid.uuid4())
155 library = Collection(
156 id=library_id,
157 name="Library",
158 description="Default collection for research downloads and documents",
159 collection_type="default_library",
160 is_default=True,
161 )
162 session.add(library)
163 session.commit()
165 logger.info(f"Created default Library collection: {library_id}")
166 return library_id
168 except Exception:
169 logger.exception("Error ensuring default Library collection")
170 raise
173def ensure_research_history_collection(
174 username: str, password: str = None
175) -> str:
176 """
177 Ensure the "Research History" collection exists for a user.
178 This collection is used for semantic search over research reports and sources.
179 Creates it if it doesn't exist.
181 Args:
182 username: User to check/create collection for
183 password: User's password (optional, uses session context)
185 Returns:
186 UUID of the Research History collection
187 """
188 try:
189 with (
190 _get_user_init_lock(username),
191 get_user_db_session(username, password) as session,
192 ):
193 # Check if research history collection exists
194 collection = (
195 session.query(Collection)
196 .filter_by(collection_type="research_history")
197 .first()
198 )
200 if collection:
201 logger.debug(
202 f"Research History collection exists: {collection.id}"
203 )
204 return collection.id
206 # Create Research History collection
207 collection_id = str(uuid.uuid4())
208 collection = Collection(
209 id=collection_id,
210 name=RESEARCH_HISTORY_COLLECTION_NAME,
211 description=RESEARCH_HISTORY_COLLECTION_DESCRIPTION,
212 collection_type="research_history",
213 is_default=False,
214 )
215 session.add(collection)
216 session.commit()
218 logger.info(f"Created Research History collection: {collection_id}")
219 return collection_id
221 except Exception:
222 logger.exception("Error ensuring Research History collection")
223 raise
226def initialize_library_for_user(username: str, password: str = None) -> dict:
227 """
228 Complete initialization of library system for a user.
229 Seeds source types and ensures default Library and Research History collections exist.
231 Args:
232 username: User to initialize for
233 password: User's password (optional, uses session context)
235 Returns:
236 Dict with initialization results
237 """
238 results = {
239 "source_types_seeded": False,
240 "library_collection_id": None,
241 "research_history_collection_id": None,
242 "success": False,
243 }
245 try:
246 # Seed source types
247 seed_source_types(username, password)
248 results["source_types_seeded"] = True
250 # Ensure Library collection
251 library_id = ensure_default_library_collection(username, password)
252 results["library_collection_id"] = library_id
254 # Ensure Research History collection
255 research_history_id = ensure_research_history_collection(
256 username, password
257 )
258 results["research_history_collection_id"] = research_history_id
260 results["success"] = True
261 logger.info(f"Library initialization complete for user: {username}")
263 except Exception as e:
264 logger.exception(f"Library initialization failed for {username}")
265 results["error"] = str(e)
267 return results
270def get_default_library_id(username: str, password: str = None) -> str:
271 """
272 Get the ID of the default Library collection for a user.
273 Creates it if it doesn't exist.
275 Args:
276 username: User to get library for
277 password: User's password (optional, uses session context)
279 Returns:
280 UUID of the Library collection
281 """
282 return ensure_default_library_collection(username, password)
285def get_source_type_id(
286 username: str, type_name: str, password: str = None
287) -> str:
288 """
289 Get the ID of a source type by name.
291 Args:
292 username: User to query for
293 type_name: Name of source type (e.g., 'research_download', 'user_upload')
294 password: User's password (optional, uses session context)
296 Returns:
297 UUID of the source type
299 Raises:
300 ValueError: If source type not found
301 """
302 try:
303 with get_user_db_session(username, password) as session:
304 source_type = (
305 session.query(SourceType).filter_by(name=type_name).first()
306 )
308 if not source_type:
309 raise ValueError(f"Source type not found: {type_name}") # noqa: TRY301 — inside db session context, except logs and re-raises
311 return source_type.id
313 except Exception:
314 logger.exception("Error getting source type ID")
315 raise