Coverage for src / local_deep_research / web / api.py: 95%
163 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2REST API for Local Deep Research.
3Provides HTTP access to programmatic search and research capabilities.
4"""
6import time
7from functools import wraps
8from typing import Dict, Any
10from flask import Blueprint, jsonify, request, Response
11from loguru import logger
13from ..api.research_functions import analyze_documents
14from ..database.session_context import get_user_db_session
15from ..security.decorators import require_json_body
16from ..utilities.db_utils import get_settings_manager
17from ..security.rate_limiter import (
18 API_RATE_LIMIT_DEFAULT,
19 api_rate_limit,
20 get_current_username,
21)
23# Create a blueprint for the API
24api_blueprint = Blueprint("api_v1", __name__, url_prefix="/api/v1")
27def api_access_control(f):
28 """
29 Decorator to enforce API access control:
30 - Check if user is authenticated
31 - Check if API is enabled for the user
32 - Pre-cache api_rate_limit on g so the rate limiter avoids a second DB read
33 """
35 @wraps(f)
36 def decorated_function(*args, **kwargs):
37 from flask import g
39 username = get_current_username()
41 if not username:
42 return jsonify({"error": "Authentication required"}), 401
44 # Read both settings in a single DB session
45 api_enabled = True
46 with get_user_db_session(username) as db_session:
47 if db_session:
48 settings_manager = get_settings_manager(db_session, username)
49 api_enabled = settings_manager.get_setting(
50 "app.enable_api", True
51 )
52 # Pre-cache for _get_user_api_rate_limit() to avoid a second DB read
53 g._api_rate_limit = settings_manager.get_setting(
54 "app.api_rate_limit", API_RATE_LIMIT_DEFAULT
55 )
57 if not api_enabled:
58 return jsonify({"error": "API access is disabled"}), 403
60 return f(*args, **kwargs)
62 return decorated_function
65@api_blueprint.route("/", methods=["GET"])
66@api_access_control
67@api_rate_limit
68def api_documentation():
69 """
70 Provide documentation on the available API endpoints.
71 """
72 api_docs = {
73 "api_version": "v1",
74 "description": "REST API for Local Deep Research",
75 "endpoints": [
76 {
77 "path": "/api/v1/quick_summary",
78 "method": "POST",
79 "description": "Generate a quick research summary",
80 "parameters": {
81 "query": "Research query (required)",
82 "search_tool": "Search engine to use (optional)",
83 "iterations": "Number of search iterations (optional)",
84 "temperature": "LLM temperature (optional)",
85 },
86 },
87 {
88 "path": "/api/v1/generate_report",
89 "method": "POST",
90 "description": "Generate a comprehensive research report",
91 "parameters": {
92 "query": "Research query (required)",
93 "output_file": "Path to save report (optional)",
94 "searches_per_section": "Searches per report section (optional)",
95 "model_name": "LLM model to use (optional)",
96 "temperature": "LLM temperature (optional)",
97 },
98 },
99 {
100 "path": "/api/v1/analyze_documents",
101 "method": "POST",
102 "description": "Search and analyze documents in a local collection",
103 "parameters": {
104 "query": "Search query (required)",
105 "collection_name": "Local collection name (required)",
106 "max_results": "Maximum results to return (optional)",
107 "temperature": "LLM temperature (optional)",
108 "force_reindex": "Force collection reindexing (optional)",
109 },
110 },
111 ],
112 }
114 return jsonify(api_docs)
117@api_blueprint.route("/health", methods=["GET"])
118def health_check():
119 """Simple health check endpoint."""
120 return jsonify(
121 {"status": "ok", "message": "API is running", "timestamp": time.time()}
122 )
125@api_blueprint.route("/quick_summary_test", methods=["POST"])
126@api_access_control
127@api_rate_limit
128@require_json_body(error_message="Query parameter is required")
129def api_quick_summary_test():
130 """Test endpoint using programmatic access with minimal parameters for fast testing."""
131 data = request.json
132 if "query" not in data:
133 return jsonify({"error": "Query parameter is required"}), 400
135 query = data.get("query")
137 try:
138 # Import here to avoid circular imports
139 from ..api.research_functions import quick_summary
141 logger.info(f"Processing quick_summary_test request: query='{query}'")
143 # Use minimal parameters for faster testing
144 result = quick_summary(
145 query=query,
146 search_tool="wikipedia", # Use fast Wikipedia search for testing
147 iterations=1, # Single iteration for speed
148 temperature=0.7,
149 )
151 return jsonify(result)
152 except Exception:
153 logger.exception("Error in quick_summary_test API")
154 return (
155 jsonify(
156 {
157 "error": "An internal error has occurred. Please try again later."
158 }
159 ),
160 500,
161 )
164def _serialize_results(results: Dict[str, Any]) -> Response:
165 """
166 Converts the results dictionary into a JSON string.
168 Args:
169 results: The results dictionary.
171 Returns:
172 The JSON string.
174 """
175 # The main thing that needs to be handled here is the `Document` instances.
176 converted_results = results.copy()
177 for finding in converted_results.get("findings", []):
178 for i, document in enumerate(finding.get("documents", [])):
179 finding["documents"][i] = {
180 "metadata": document.metadata,
181 "content": document.page_content,
182 }
184 return jsonify(converted_results)
187@api_blueprint.route("/quick_summary", methods=["POST"])
188@api_access_control
189@api_rate_limit
190@require_json_body(error_message="Query parameter is required")
191def api_quick_summary():
192 """
193 Generate a quick research summary via REST API.
195 POST /api/v1/quick_summary
196 {
197 "query": "Advances in fusion energy research",
198 "search_tool": "auto", # Optional: search engine to use
199 "iterations": 2, # Optional: number of search iterations
200 "temperature": 0.7 # Optional: LLM temperature
201 }
202 """
203 logger.debug("API quick_summary endpoint called")
204 data = request.json
205 logger.debug(f"Request data keys: {list(data.keys())}")
207 if "query" not in data:
208 logger.debug("Missing query parameter")
209 return jsonify({"error": "Query parameter is required"}), 400
211 # Extract query and validate type
212 query = data.get("query")
213 if not isinstance(query, str):
214 return jsonify({"error": "Query must be a string"}), 400
215 params = {k: v for k, v in data.items() if k != "query"}
216 logger.debug(
217 f"Query length: {len(query) if query else 0}, params keys: {list(params.keys()) if params else 'None'}"
218 )
220 username = get_current_username()
221 if username: 221 ↛ 224line 221 didn't jump to line 224 because the condition on line 221 was always true
222 params["username"] = username
224 try:
225 # Import here to avoid circular imports
226 from ..api.research_functions import quick_summary
227 from ..database.session_context import get_user_db_session
228 from ..utilities.db_utils import get_settings_manager
230 logger.info(
231 f"Processing quick_summary request: query='{query}' for user='{username}'"
232 )
234 # Set reasonable defaults for API use
235 params.setdefault("temperature", 0.7)
236 params.setdefault("search_tool", "auto")
237 params.setdefault("iterations", 1)
239 # Get settings snapshot for the user
240 if username: 240 ↛ 279line 240 didn't jump to line 279 because the condition on line 240 was always true
241 try:
242 logger.debug(f"Getting settings snapshot for user: {username}")
243 with get_user_db_session(username) as db_session:
244 if db_session: 244 ↛ 271line 244 didn't jump to line 271 because the condition on line 244 was always true
245 try:
246 settings_manager = get_settings_manager(
247 db_session, username
248 )
249 all_settings = settings_manager.get_all_settings()
250 # Extract just the values for the settings snapshot
251 settings_snapshot = {}
252 for key, setting in all_settings.items():
253 if (
254 isinstance(setting, dict)
255 and "value" in setting
256 ):
257 settings_snapshot[key] = setting["value"]
258 else:
259 settings_snapshot[key] = setting
260 params["settings_snapshot"] = settings_snapshot
261 logger.debug(
262 f"Got settings snapshot with {len(settings_snapshot)} settings"
263 )
264 except AttributeError as ae:
265 logger.exception(
266 f"SettingsManager attribute error: {ae}. "
267 f"Type: {type(settings_manager) if 'settings_manager' in locals() else 'Unknown'}"
268 )
269 raise
270 else:
271 logger.warning(
272 f"No database session for user: {username}"
273 )
274 except Exception:
275 logger.warning("Failed to get settings snapshot")
276 # Continue with empty snapshot rather than failing
277 params["settings_snapshot"] = {}
278 else:
279 logger.debug("No username in session, skipping settings snapshot")
280 params["settings_snapshot"] = {}
282 # Call the actual research function
283 result = quick_summary(query, **params)
285 return _serialize_results(result)
286 except TimeoutError:
287 logger.exception("Request timed out")
288 return (
289 jsonify(
290 {
291 "error": "Request timed out. Please try with a simpler query or fewer iterations."
292 }
293 ),
294 504,
295 )
296 except Exception:
297 logger.exception("Error in quick_summary API")
298 return (
299 jsonify(
300 {
301 "error": "An internal error has occurred. Please try again later."
302 }
303 ),
304 500,
305 )
308@api_blueprint.route("/generate_report", methods=["POST"])
309@api_access_control
310@api_rate_limit
311@require_json_body(error_message="Query parameter is required")
312def api_generate_report():
313 """
314 Generate a comprehensive research report via REST API.
316 POST /api/v1/generate_report
317 {
318 "query": "Impact of climate change on agriculture",
319 "output_file": "/path/to/save/report.md", # Optional
320 "searches_per_section": 2, # Optional
321 "model_name": "gpt-4", # Optional
322 "temperature": 0.5 # Optional
323 }
324 """
325 data = request.json
326 if "query" not in data:
327 return jsonify({"error": "Query parameter is required"}), 400
329 query = data.get("query")
330 params = {k: v for k, v in data.items() if k != "query"}
332 try:
333 # Import here to avoid circular imports
334 from ..api.research_functions import generate_report
336 # Set reasonable defaults for API use
337 params.setdefault("searches_per_section", 1)
338 params.setdefault("temperature", 0.7)
340 logger.info(
341 f"Processing generate_report request: query='{query}', params={params}"
342 )
344 result = generate_report(query, **params)
346 # Don't return the full content for large reports
347 if (
348 result
349 and "content" in result
350 and isinstance(result["content"], str)
351 and len(result["content"]) > 10000
352 ):
353 # Include a summary of the report content
354 content_preview = (
355 result["content"][:2000] + "... [Content truncated]"
356 )
357 result["content"] = content_preview
358 result["content_truncated"] = True
360 return jsonify(result)
361 except TimeoutError:
362 logger.exception("Request timed out")
363 return (
364 jsonify(
365 {"error": "Request timed out. Please try with a simpler query."}
366 ),
367 504,
368 )
369 except Exception:
370 logger.exception("Error in generate_report API")
371 return (
372 jsonify(
373 {
374 "error": "An internal error has occurred. Please try again later."
375 }
376 ),
377 500,
378 )
381@api_blueprint.route("/analyze_documents", methods=["POST"])
382@api_access_control
383@api_rate_limit
384@require_json_body(
385 error_message="Both query and collection_name parameters are required"
386)
387def api_analyze_documents():
388 """
389 Search and analyze documents in a local collection via REST API.
391 POST /api/v1/analyze_documents
392 {
393 "query": "neural networks in medicine",
394 "collection_name": "my_collection", # Required: local collection name
395 "max_results": 20, # Optional: max results to return
396 "temperature": 0.7, # Optional: LLM temperature
397 "force_reindex": false # Optional: force reindexing
398 }
399 """
400 data = request.json
401 if "query" not in data or "collection_name" not in data:
402 return (
403 jsonify(
404 {
405 "error": "Both query and collection_name parameters are required"
406 }
407 ),
408 400,
409 )
411 query = data.get("query")
412 collection_name = data.get("collection_name")
413 params = {
414 k: v for k, v in data.items() if k not in ["query", "collection_name"]
415 }
417 try:
418 result = analyze_documents(query, collection_name, **params)
419 return jsonify(result)
420 except Exception:
421 logger.exception("Error in analyze_documents API")
422 return (
423 jsonify(
424 {
425 "error": "An internal error has occurred. Please try again later."
426 }
427 ),
428 500,
429 )