Coverage for src / local_deep_research / web / api.py: 95%

163 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1""" 

2REST API for Local Deep Research. 

3Provides HTTP access to programmatic search and research capabilities. 

4""" 

5 

6import time 

7from functools import wraps 

8from typing import Dict, Any 

9 

10from flask import Blueprint, jsonify, request, Response 

11from loguru import logger 

12 

13from ..api.research_functions import analyze_documents 

14from ..database.session_context import get_user_db_session 

15from ..security.decorators import require_json_body 

16from ..utilities.db_utils import get_settings_manager 

17from ..security.rate_limiter import ( 

18 API_RATE_LIMIT_DEFAULT, 

19 api_rate_limit, 

20 get_current_username, 

21) 

22 

23# Create a blueprint for the API 

24api_blueprint = Blueprint("api_v1", __name__, url_prefix="/api/v1") 

25 

26 

27def api_access_control(f): 

28 """ 

29 Decorator to enforce API access control: 

30 - Check if user is authenticated 

31 - Check if API is enabled for the user 

32 - Pre-cache api_rate_limit on g so the rate limiter avoids a second DB read 

33 """ 

34 

35 @wraps(f) 

36 def decorated_function(*args, **kwargs): 

37 from flask import g 

38 

39 username = get_current_username() 

40 

41 if not username: 

42 return jsonify({"error": "Authentication required"}), 401 

43 

44 # Read both settings in a single DB session 

45 api_enabled = True 

46 with get_user_db_session(username) as db_session: 

47 if db_session: 

48 settings_manager = get_settings_manager(db_session, username) 

49 api_enabled = settings_manager.get_setting( 

50 "app.enable_api", True 

51 ) 

52 # Pre-cache for _get_user_api_rate_limit() to avoid a second DB read 

53 g._api_rate_limit = settings_manager.get_setting( 

54 "app.api_rate_limit", API_RATE_LIMIT_DEFAULT 

55 ) 

56 

57 if not api_enabled: 

58 return jsonify({"error": "API access is disabled"}), 403 

59 

60 return f(*args, **kwargs) 

61 

62 return decorated_function 

63 

64 

65@api_blueprint.route("/", methods=["GET"]) 

66@api_access_control 

67@api_rate_limit 

68def api_documentation(): 

69 """ 

70 Provide documentation on the available API endpoints. 

71 """ 

72 api_docs = { 

73 "api_version": "v1", 

74 "description": "REST API for Local Deep Research", 

75 "endpoints": [ 

76 { 

77 "path": "/api/v1/quick_summary", 

78 "method": "POST", 

79 "description": "Generate a quick research summary", 

80 "parameters": { 

81 "query": "Research query (required)", 

82 "search_tool": "Search engine to use (optional)", 

83 "iterations": "Number of search iterations (optional)", 

84 "temperature": "LLM temperature (optional)", 

85 }, 

86 }, 

87 { 

88 "path": "/api/v1/generate_report", 

89 "method": "POST", 

90 "description": "Generate a comprehensive research report", 

91 "parameters": { 

92 "query": "Research query (required)", 

93 "output_file": "Path to save report (optional)", 

94 "searches_per_section": "Searches per report section (optional)", 

95 "model_name": "LLM model to use (optional)", 

96 "temperature": "LLM temperature (optional)", 

97 }, 

98 }, 

99 { 

100 "path": "/api/v1/analyze_documents", 

101 "method": "POST", 

102 "description": "Search and analyze documents in a local collection", 

103 "parameters": { 

104 "query": "Search query (required)", 

105 "collection_name": "Local collection name (required)", 

106 "max_results": "Maximum results to return (optional)", 

107 "temperature": "LLM temperature (optional)", 

108 "force_reindex": "Force collection reindexing (optional)", 

109 }, 

110 }, 

111 ], 

112 } 

113 

114 return jsonify(api_docs) 

115 

116 

117@api_blueprint.route("/health", methods=["GET"]) 

118def health_check(): 

119 """Simple health check endpoint.""" 

120 return jsonify( 

121 {"status": "ok", "message": "API is running", "timestamp": time.time()} 

122 ) 

123 

124 

125@api_blueprint.route("/quick_summary_test", methods=["POST"]) 

126@api_access_control 

127@api_rate_limit 

128@require_json_body(error_message="Query parameter is required") 

129def api_quick_summary_test(): 

130 """Test endpoint using programmatic access with minimal parameters for fast testing.""" 

131 data = request.json 

132 if "query" not in data: 

133 return jsonify({"error": "Query parameter is required"}), 400 

134 

135 query = data.get("query") 

136 

137 try: 

138 # Import here to avoid circular imports 

139 from ..api.research_functions import quick_summary 

140 

141 logger.info(f"Processing quick_summary_test request: query='{query}'") 

142 

143 # Use minimal parameters for faster testing 

144 result = quick_summary( 

145 query=query, 

146 search_tool="wikipedia", # Use fast Wikipedia search for testing 

147 iterations=1, # Single iteration for speed 

148 temperature=0.7, 

149 ) 

150 

151 return jsonify(result) 

152 except Exception: 

153 logger.exception("Error in quick_summary_test API") 

154 return ( 

155 jsonify( 

156 { 

157 "error": "An internal error has occurred. Please try again later." 

158 } 

159 ), 

160 500, 

161 ) 

162 

163 

164def _serialize_results(results: Dict[str, Any]) -> Response: 

165 """ 

166 Converts the results dictionary into a JSON string. 

167 

168 Args: 

169 results: The results dictionary. 

170 

171 Returns: 

172 The JSON string. 

173 

174 """ 

175 # The main thing that needs to be handled here is the `Document` instances. 

176 converted_results = results.copy() 

177 for finding in converted_results.get("findings", []): 

178 for i, document in enumerate(finding.get("documents", [])): 

179 finding["documents"][i] = { 

180 "metadata": document.metadata, 

181 "content": document.page_content, 

182 } 

183 

184 return jsonify(converted_results) 

185 

186 

187@api_blueprint.route("/quick_summary", methods=["POST"]) 

188@api_access_control 

189@api_rate_limit 

190@require_json_body(error_message="Query parameter is required") 

191def api_quick_summary(): 

192 """ 

193 Generate a quick research summary via REST API. 

194 

195 POST /api/v1/quick_summary 

196 { 

197 "query": "Advances in fusion energy research", 

198 "search_tool": "auto", # Optional: search engine to use 

199 "iterations": 2, # Optional: number of search iterations 

200 "temperature": 0.7 # Optional: LLM temperature 

201 } 

202 """ 

203 logger.debug("API quick_summary endpoint called") 

204 data = request.json 

205 logger.debug(f"Request data keys: {list(data.keys())}") 

206 

207 if "query" not in data: 

208 logger.debug("Missing query parameter") 

209 return jsonify({"error": "Query parameter is required"}), 400 

210 

211 # Extract query and validate type 

212 query = data.get("query") 

213 if not isinstance(query, str): 

214 return jsonify({"error": "Query must be a string"}), 400 

215 params = {k: v for k, v in data.items() if k != "query"} 

216 logger.debug( 

217 f"Query length: {len(query) if query else 0}, params keys: {list(params.keys()) if params else 'None'}" 

218 ) 

219 

220 username = get_current_username() 

221 if username: 221 ↛ 224line 221 didn't jump to line 224 because the condition on line 221 was always true

222 params["username"] = username 

223 

224 try: 

225 # Import here to avoid circular imports 

226 from ..api.research_functions import quick_summary 

227 from ..database.session_context import get_user_db_session 

228 from ..utilities.db_utils import get_settings_manager 

229 

230 logger.info( 

231 f"Processing quick_summary request: query='{query}' for user='{username}'" 

232 ) 

233 

234 # Set reasonable defaults for API use 

235 params.setdefault("temperature", 0.7) 

236 params.setdefault("search_tool", "auto") 

237 params.setdefault("iterations", 1) 

238 

239 # Get settings snapshot for the user 

240 if username: 240 ↛ 279line 240 didn't jump to line 279 because the condition on line 240 was always true

241 try: 

242 logger.debug(f"Getting settings snapshot for user: {username}") 

243 with get_user_db_session(username) as db_session: 

244 if db_session: 244 ↛ 271line 244 didn't jump to line 271 because the condition on line 244 was always true

245 try: 

246 settings_manager = get_settings_manager( 

247 db_session, username 

248 ) 

249 all_settings = settings_manager.get_all_settings() 

250 # Extract just the values for the settings snapshot 

251 settings_snapshot = {} 

252 for key, setting in all_settings.items(): 

253 if ( 

254 isinstance(setting, dict) 

255 and "value" in setting 

256 ): 

257 settings_snapshot[key] = setting["value"] 

258 else: 

259 settings_snapshot[key] = setting 

260 params["settings_snapshot"] = settings_snapshot 

261 logger.debug( 

262 f"Got settings snapshot with {len(settings_snapshot)} settings" 

263 ) 

264 except AttributeError as ae: 

265 logger.exception( 

266 f"SettingsManager attribute error: {ae}. " 

267 f"Type: {type(settings_manager) if 'settings_manager' in locals() else 'Unknown'}" 

268 ) 

269 raise 

270 else: 

271 logger.warning( 

272 f"No database session for user: {username}" 

273 ) 

274 except Exception: 

275 logger.warning("Failed to get settings snapshot") 

276 # Continue with empty snapshot rather than failing 

277 params["settings_snapshot"] = {} 

278 else: 

279 logger.debug("No username in session, skipping settings snapshot") 

280 params["settings_snapshot"] = {} 

281 

282 # Call the actual research function 

283 result = quick_summary(query, **params) 

284 

285 return _serialize_results(result) 

286 except TimeoutError: 

287 logger.exception("Request timed out") 

288 return ( 

289 jsonify( 

290 { 

291 "error": "Request timed out. Please try with a simpler query or fewer iterations." 

292 } 

293 ), 

294 504, 

295 ) 

296 except Exception: 

297 logger.exception("Error in quick_summary API") 

298 return ( 

299 jsonify( 

300 { 

301 "error": "An internal error has occurred. Please try again later." 

302 } 

303 ), 

304 500, 

305 ) 

306 

307 

308@api_blueprint.route("/generate_report", methods=["POST"]) 

309@api_access_control 

310@api_rate_limit 

311@require_json_body(error_message="Query parameter is required") 

312def api_generate_report(): 

313 """ 

314 Generate a comprehensive research report via REST API. 

315 

316 POST /api/v1/generate_report 

317 { 

318 "query": "Impact of climate change on agriculture", 

319 "output_file": "/path/to/save/report.md", # Optional 

320 "searches_per_section": 2, # Optional 

321 "model_name": "gpt-4", # Optional 

322 "temperature": 0.5 # Optional 

323 } 

324 """ 

325 data = request.json 

326 if "query" not in data: 

327 return jsonify({"error": "Query parameter is required"}), 400 

328 

329 query = data.get("query") 

330 params = {k: v for k, v in data.items() if k != "query"} 

331 

332 try: 

333 # Import here to avoid circular imports 

334 from ..api.research_functions import generate_report 

335 

336 # Set reasonable defaults for API use 

337 params.setdefault("searches_per_section", 1) 

338 params.setdefault("temperature", 0.7) 

339 

340 logger.info( 

341 f"Processing generate_report request: query='{query}', params={params}" 

342 ) 

343 

344 result = generate_report(query, **params) 

345 

346 # Don't return the full content for large reports 

347 if ( 

348 result 

349 and "content" in result 

350 and isinstance(result["content"], str) 

351 and len(result["content"]) > 10000 

352 ): 

353 # Include a summary of the report content 

354 content_preview = ( 

355 result["content"][:2000] + "... [Content truncated]" 

356 ) 

357 result["content"] = content_preview 

358 result["content_truncated"] = True 

359 

360 return jsonify(result) 

361 except TimeoutError: 

362 logger.exception("Request timed out") 

363 return ( 

364 jsonify( 

365 {"error": "Request timed out. Please try with a simpler query."} 

366 ), 

367 504, 

368 ) 

369 except Exception: 

370 logger.exception("Error in generate_report API") 

371 return ( 

372 jsonify( 

373 { 

374 "error": "An internal error has occurred. Please try again later." 

375 } 

376 ), 

377 500, 

378 ) 

379 

380 

381@api_blueprint.route("/analyze_documents", methods=["POST"]) 

382@api_access_control 

383@api_rate_limit 

384@require_json_body( 

385 error_message="Both query and collection_name parameters are required" 

386) 

387def api_analyze_documents(): 

388 """ 

389 Search and analyze documents in a local collection via REST API. 

390 

391 POST /api/v1/analyze_documents 

392 { 

393 "query": "neural networks in medicine", 

394 "collection_name": "my_collection", # Required: local collection name 

395 "max_results": 20, # Optional: max results to return 

396 "temperature": 0.7, # Optional: LLM temperature 

397 "force_reindex": false # Optional: force reindexing 

398 } 

399 """ 

400 data = request.json 

401 if "query" not in data or "collection_name" not in data: 

402 return ( 

403 jsonify( 

404 { 

405 "error": "Both query and collection_name parameters are required" 

406 } 

407 ), 

408 400, 

409 ) 

410 

411 query = data.get("query") 

412 collection_name = data.get("collection_name") 

413 params = { 

414 k: v for k, v in data.items() if k not in ["query", "collection_name"] 

415 } 

416 

417 try: 

418 result = analyze_documents(query, collection_name, **params) 

419 return jsonify(result) 

420 except Exception: 

421 logger.exception("Error in analyze_documents API") 

422 return ( 

423 jsonify( 

424 { 

425 "error": "An internal error has occurred. Please try again later." 

426 } 

427 ), 

428 500, 

429 )