Coverage for src / local_deep_research / database / library_init.py: 100%
82 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2Database initialization for Library - Unified Document Architecture.
4This module handles:
5- Seeding source_types table with predefined types
6- Creating the default "Library" collection
7- Must be called on app startup for each user
8"""
10import uuid
11from loguru import logger
12from sqlalchemy.exc import IntegrityError
14from .models import SourceType, Collection
15from .session_context import get_user_db_session
16from ..constants import (
17 RESEARCH_HISTORY_COLLECTION_NAME,
18 RESEARCH_HISTORY_COLLECTION_DESCRIPTION,
19)
22def seed_source_types(username: str, password: str = None) -> None:
23 """
24 Seed the source_types table with predefined document source types.
26 Args:
27 username: User to seed types for
28 password: User's password (optional, uses session context)
29 """
30 predefined_types = [
31 {
32 "name": "research_download",
33 "display_name": "Research Download",
34 "description": "Documents downloaded from research sessions (arXiv, PubMed, etc.)",
35 "icon": "download",
36 },
37 {
38 "name": "user_upload",
39 "display_name": "User Upload",
40 "description": "Documents manually uploaded by the user",
41 "icon": "upload",
42 },
43 {
44 "name": "manual_entry",
45 "display_name": "Manual Entry",
46 "description": "Documents manually created or entered",
47 "icon": "edit",
48 },
49 {
50 "name": "research_report",
51 "display_name": "Research Report",
52 "description": "Generated research reports (markdown) for semantic search",
53 "icon": "file-alt",
54 },
55 {
56 "name": "research_source",
57 "display_name": "Research Source",
58 "description": "Sources discovered during research with content for semantic search",
59 "icon": "link",
60 },
61 ]
63 try:
64 with get_user_db_session(username, password) as session:
65 for type_data in predefined_types:
66 # Check if type already exists
67 existing = (
68 session.query(SourceType)
69 .filter_by(name=type_data["name"])
70 .first()
71 )
73 if not existing:
74 source_type = SourceType(id=str(uuid.uuid4()), **type_data)
75 session.add(source_type)
76 logger.info(f"Created source type: {type_data['name']}")
78 session.commit()
79 logger.info("Source types seeded successfully")
81 except IntegrityError:
82 logger.warning("Source types may already exist")
83 except Exception:
84 logger.exception("Error seeding source types")
85 raise
88def ensure_default_library_collection(
89 username: str, password: str = None
90) -> str:
91 """
92 Ensure the default "Library" collection exists for a user.
93 Creates it if it doesn't exist.
95 Args:
96 username: User to check/create library for
97 password: User's password (optional, uses session context)
99 Returns:
100 UUID of the Library collection
101 """
102 try:
103 with get_user_db_session(username, password) as session:
104 # Check if default library exists
105 library = (
106 session.query(Collection).filter_by(is_default=True).first()
107 )
109 if library:
110 logger.debug(f"Default Library collection exists: {library.id}")
111 return library.id
113 # Create default Library collection
114 library_id = str(uuid.uuid4())
115 library = Collection(
116 id=library_id,
117 name="Library",
118 description="Default collection for research downloads and documents",
119 collection_type="default_library",
120 is_default=True,
121 )
122 session.add(library)
123 session.commit()
125 logger.info(f"Created default Library collection: {library_id}")
126 return library_id
128 except Exception:
129 logger.exception("Error ensuring default Library collection")
130 raise
133def ensure_research_history_collection(
134 username: str, password: str = None
135) -> str:
136 """
137 Ensure the "Research History" collection exists for a user.
138 This collection is used for semantic search over research reports and sources.
139 Creates it if it doesn't exist.
141 Args:
142 username: User to check/create collection for
143 password: User's password (optional, uses session context)
145 Returns:
146 UUID of the Research History collection
147 """
148 try:
149 with get_user_db_session(username, password) as session:
150 # Check if research history collection exists
151 collection = (
152 session.query(Collection)
153 .filter_by(collection_type="research_history")
154 .first()
155 )
157 if collection:
158 logger.debug(
159 f"Research History collection exists: {collection.id}"
160 )
161 return collection.id
163 # Create Research History collection
164 collection_id = str(uuid.uuid4())
165 collection = Collection(
166 id=collection_id,
167 name=RESEARCH_HISTORY_COLLECTION_NAME,
168 description=RESEARCH_HISTORY_COLLECTION_DESCRIPTION,
169 collection_type="research_history",
170 is_default=False,
171 )
172 session.add(collection)
173 session.commit()
175 logger.info(f"Created Research History collection: {collection_id}")
176 return collection_id
178 except Exception:
179 logger.exception("Error ensuring Research History collection")
180 raise
183def initialize_library_for_user(username: str, password: str = None) -> dict:
184 """
185 Complete initialization of library system for a user.
186 Seeds source types and ensures default Library and Research History collections exist.
188 Args:
189 username: User to initialize for
190 password: User's password (optional, uses session context)
192 Returns:
193 Dict with initialization results
194 """
195 results = {
196 "source_types_seeded": False,
197 "library_collection_id": None,
198 "research_history_collection_id": None,
199 "success": False,
200 }
202 try:
203 # Seed source types
204 seed_source_types(username, password)
205 results["source_types_seeded"] = True
207 # Ensure Library collection
208 library_id = ensure_default_library_collection(username, password)
209 results["library_collection_id"] = library_id
211 # Ensure Research History collection
212 research_history_id = ensure_research_history_collection(
213 username, password
214 )
215 results["research_history_collection_id"] = research_history_id
217 results["success"] = True
218 logger.info(f"Library initialization complete for user: {username}")
220 except Exception as e:
221 logger.exception(f"Library initialization failed for {username}")
222 results["error"] = str(e)
224 return results
227def get_default_library_id(username: str, password: str = None) -> str:
228 """
229 Get the ID of the default Library collection for a user.
230 Creates it if it doesn't exist.
232 Args:
233 username: User to get library for
234 password: User's password (optional, uses session context)
236 Returns:
237 UUID of the Library collection
238 """
239 return ensure_default_library_collection(username, password)
242def get_source_type_id(
243 username: str, type_name: str, password: str = None
244) -> str:
245 """
246 Get the ID of a source type by name.
248 Args:
249 username: User to query for
250 type_name: Name of source type (e.g., 'research_download', 'user_upload')
251 password: User's password (optional, uses session context)
253 Returns:
254 UUID of the source type
256 Raises:
257 ValueError: If source type not found
258 """
259 try:
260 with get_user_db_session(username, password) as session:
261 source_type = (
262 session.query(SourceType).filter_by(name=type_name).first()
263 )
265 if not source_type:
266 raise ValueError(f"Source type not found: {type_name}") # noqa: TRY301 — inside db session context, except logs and re-raises
268 return source_type.id
270 except Exception:
271 logger.exception("Error getting source type ID")
272 raise