Coverage for src/local_deep_research/web/app_factory.py: 87%

391 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1# import logging - replaced with loguru 

2import os 

3from pathlib import Path 

4from importlib import resources as importlib_resources 

5 

6from flask import ( 

7 Flask, 

8 Request, 

9 abort, 

10 jsonify, 

11 make_response, 

12 request, 

13 send_from_directory, 

14) 

15from flask_wtf.csrf import CSRFProtect 

16from werkzeug.middleware.proxy_fix import ProxyFix 

17from loguru import logger 

18from local_deep_research.settings.logger import log_settings 

19 

20from ..utilities.log_utils import InterceptHandler 

21from ..security import SecurityHeaders, get_security_default 

22from ..security.rate_limiter import limiter 

23from ..security.file_upload_validator import FileUploadValidator 

24from ..security.web_middleware import ( 

25 SecureCookieMiddleware, 

26 ServerHeaderMiddleware, 

27) 

28 

29# Removed DB_PATH import - using per-user databases now 

30from .services.socket_service import SocketIOService 

31 

32 

33class DiskSpoolingRequest(Request): 

34 """Custom Request class that spools large file uploads to disk. 

35 

36 This prevents memory exhaustion from large multipart uploads by writing 

37 files larger than max_form_memory_size to temporary files on disk instead 

38 of keeping them in memory. 

39 

40 Security fix for issue #1176: a request right up against the per-file × 

41 per-request limits could otherwise consume tens of GB of memory in 

42 one go. The spool-to-disk threshold keeps memory bounded regardless 

43 of how high the per-file cap (FileUploadValidator.MAX_FILE_SIZE) is set. 

44 """ 

45 

46 # Files larger than 5MB are spooled to disk instead of memory 

47 max_form_memory_size = 5 * 1024 * 1024 # 5MB threshold 

48 

49 

50def create_app(): 

51 """ 

52 Create and configure the Flask application. 

53 

54 Returns: 

55 tuple: (app, socketio) - The configured Flask app and SocketIO instance 

56 """ 

57 # Route stdlib loggers through loguru via InterceptHandler. 

58 # Guard against handler duplication when create_app() is called multiple 

59 # times (e.g. in tests). 

60 import logging 

61 

62 werkzeug_logger = logging.getLogger("werkzeug") 

63 werkzeug_logger.setLevel( 

64 logging.WARNING 

65 ) # Suppress verbose per-request logs 

66 if not any( 

67 isinstance(h, InterceptHandler) for h in werkzeug_logger.handlers 

68 ): 

69 werkzeug_logger.addHandler(InterceptHandler()) 

70 

71 # APScheduler logs job execution results (success/failure) to its own 

72 # logger hierarchy. Without an InterceptHandler the WARNING+ messages 

73 # only reach Python's lastResort handler as unformatted stderr. 

74 # Level is WARNING (not INFO) because job functions already log their 

75 # own progress via loguru — APScheduler's INFO messages would be redundant. 

76 apscheduler_logger = logging.getLogger("apscheduler") 

77 apscheduler_logger.setLevel(logging.WARNING) 

78 if not any( 

79 isinstance(h, InterceptHandler) for h in apscheduler_logger.handlers 

80 ): 

81 apscheduler_logger.addHandler(InterceptHandler()) 

82 

83 logger.info("Initializing Local Deep Research application...") 

84 

85 try: 

86 # Get directories based on package installation 

87 PACKAGE_DIR = importlib_resources.files("local_deep_research") / "web" 

88 with importlib_resources.as_file(PACKAGE_DIR) as package_dir: 

89 STATIC_DIR = (package_dir / "static").as_posix() 

90 TEMPLATE_DIR = (package_dir / "templates").as_posix() 

91 

92 # Initialize Flask app with package directories 

93 # Set static_folder to None to disable Flask's built-in static handling 

94 # We'll use our custom static route instead to handle dist folder 

95 app = Flask(__name__, static_folder=None, template_folder=TEMPLATE_DIR) 

96 # Store static dir for custom handling 

97 app.config["STATIC_DIR"] = STATIC_DIR 

98 logger.debug(f"Using package static path: {STATIC_DIR}") 

99 logger.debug(f"Using package template path: {TEMPLATE_DIR}") 

100 except Exception: 

101 # Fallback for development 

102 logger.exception("Package directories not found, using fallback paths") 

103 # Set static_folder to None to disable Flask's built-in static handling 

104 app = Flask( 

105 __name__, 

106 static_folder=None, 

107 template_folder=str(Path("templates").resolve()), 

108 ) 

109 # Store static dir for custom handling 

110 app.config["STATIC_DIR"] = str(Path("static").resolve()) 

111 

112 # Use custom Request class that spools large uploads to disk 

113 # This prevents memory exhaustion from large file uploads (issue #1176) 

114 app.request_class = DiskSpoolingRequest 

115 

116 # Middleware stack (wrapped innermost -> outermost; runs in reverse at 

117 # request time): 

118 # 1. SecureCookieMiddleware: adds Secure flag iff wsgi.url_scheme=https. 

119 # Wrapped INSIDE ProxyFix so it reads the post-rewrite scheme. 

120 # 2. ProxyFix: translates X-Forwarded-* into REMOTE_ADDR / wsgi.url_scheme. 

121 # 3. ServerHeaderMiddleware: strips Server header (outermost). 

122 app.wsgi_app = SecureCookieMiddleware(app.wsgi_app, app) # type: ignore[method-assign] 

123 app.wsgi_app = ProxyFix( # type: ignore[method-assign] 

124 app.wsgi_app, 

125 x_for=1, # Trust 1 proxy for X-Forwarded-For 

126 x_proto=1, # Trust 1 proxy for X-Forwarded-Proto (http/https) 

127 x_host=0, # Don't trust X-Forwarded-Host (security) 

128 x_port=0, # Don't trust X-Forwarded-Port (security) 

129 x_prefix=0, # Don't trust X-Forwarded-Prefix (security) 

130 ) 

131 app.wsgi_app = ServerHeaderMiddleware(app.wsgi_app) # type: ignore[method-assign] 

132 

133 # App configuration 

134 # Generate or load a unique SECRET_KEY per installation 

135 import secrets 

136 from ..config.paths import get_data_directory 

137 

138 secret_key_file = Path(get_data_directory()) / ".secret_key" 

139 secret_key_file.parent.mkdir(parents=True, exist_ok=True) 

140 new_key = secrets.token_hex(32) 

141 try: 

142 fd = os.open( 

143 str(secret_key_file), os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600 

144 ) 

145 try: 

146 os.write(fd, new_key.encode()) 

147 finally: 

148 os.close(fd) 

149 app.config["SECRET_KEY"] = new_key 

150 logger.info("Generated new SECRET_KEY for this installation") 

151 except FileExistsError: 

152 try: 

153 with open(secret_key_file, "r", encoding="utf-8") as f: 

154 app.config["SECRET_KEY"] = f.read().strip() 

155 except Exception: 

156 logger.warning("Could not read secret key file") 

157 app.config["SECRET_KEY"] = new_key 

158 except OSError: 

159 logger.warning("Could not save secret key file") 

160 app.config["SECRET_KEY"] = new_key 

161 # Session cookie security settings 

162 # SECURE flag is added dynamically based on request context (see after_request below) 

163 # This allows localhost HTTP to work for development while keeping production secure 

164 # 

165 # Check if explicitly in testing mode (for backwards compatibility) 

166 is_testing = ( 

167 os.getenv("CI") 

168 or os.getenv("TESTING") 

169 or os.getenv("PYTEST_CURRENT_TEST") 

170 or app.debug 

171 ) 

172 # Set to False - we add Secure flag dynamically in after_request handler 

173 # Exception: if TESTING mode is active, we never add Secure flag 

174 app.config["SESSION_COOKIE_SECURE"] = False 

175 app.config["LDR_TESTING_MODE"] = bool(is_testing) # Store for after_request 

176 app.config["SESSION_COOKIE_HTTPONLY"] = ( 

177 True # Prevent JavaScript access (XSS mitigation) 

178 ) 

179 app.config["SESSION_COOKIE_SAMESITE"] = "Lax" # CSRF protection 

180 # Set max cookie lifetime for permanent sessions (when session.permanent=True). 

181 # This applies to "remember me" sessions; non-permanent sessions expire on browser close. 

182 remember_me_days = get_security_default( 

183 "security.session_remember_me_days", 30 

184 ) 

185 app.config["PERMANENT_SESSION_LIFETIME"] = remember_me_days * 24 * 3600 

186 # PREFERRED_URL_SCHEME affects URL generation (url_for), not request.is_secure 

187 app.config["PREFERRED_URL_SCHEME"] = "https" 

188 

189 # File upload security limits - calculated from FileUploadValidator constants 

190 app.config["MAX_CONTENT_LENGTH"] = ( 

191 FileUploadValidator.MAX_FILES_PER_REQUEST 

192 * FileUploadValidator.MAX_FILE_SIZE 

193 ) 

194 

195 # Initialize CSRF protection 

196 # Explicitly enable CSRF protection (don't rely on implicit Flask-WTF behavior) 

197 app.config["WTF_CSRF_ENABLED"] = True 

198 CSRFProtect(app) 

199 # Exempt Socket.IO from CSRF protection 

200 # Note: Flask-SocketIO handles CSRF internally, so we don't need to exempt specific views 

201 

202 # Initialize security headers middleware 

203 SecurityHeaders(app) 

204 

205 # Initialize rate limiting for security (brute force protection) 

206 # Uses imported limiter from security.rate_limiter module 

207 # Rate limiting is disabled in CI via enabled callable in rate_limiter.py 

208 # Also set app config to ensure Flask-Limiter respects our settings 

209 from ..settings.env_registry import is_rate_limiting_enabled 

210 

211 app.config["RATELIMIT_ENABLED"] = is_rate_limiting_enabled() 

212 app.config["RATELIMIT_STRATEGY"] = "moving-window" 

213 limiter.init_app(app) 

214 

215 # Custom error handler for rate limit exceeded (429) 

216 @app.errorhandler(429) 

217 def ratelimit_handler(e): 

218 # Import here to avoid circular imports 

219 from ..security.rate_limiter import get_client_ip 

220 

221 # Audit logging for security monitoring 

222 # Use get_client_ip() to get the real IP behind proxies 

223 logger.warning( 

224 f"Rate limit exceeded: endpoint={request.endpoint} " 

225 f"ip={get_client_ip()} " 

226 f"user_agent={request.headers.get('User-Agent', 'unknown')}" 

227 ) 

228 return jsonify( 

229 error="Too many requests", 

230 message="Too many attempts. Please try again later.", 

231 ), 429 

232 

233 # Note: Dynamic cookie security is handled by SecureCookieMiddleware (WSGI level) 

234 # This is necessary because Flask's session cookies are set AFTER after_request handlers 

235 # The middleware wrapping happens below near ProxyFix 

236 

237 # Note: CSRF exemptions for API blueprints are applied after blueprint 

238 # registration below (search for "CSRF exemptions" in this file). 

239 

240 # Database configuration - Using per-user databases now 

241 # No shared database configuration needed 

242 app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False 

243 app.config["SQLALCHEMY_ECHO"] = False 

244 

245 # Per-user databases are created automatically via encrypted_db.py 

246 

247 # Log data location and security information 

248 from ..config.paths import get_data_directory 

249 from ..database.encrypted_db import db_manager 

250 

251 data_dir = get_data_directory() 

252 logger.info("=" * 60) 

253 logger.info("DATA STORAGE INFORMATION") 

254 logger.info("=" * 60) 

255 logger.info(f"Data directory: {data_dir}") 

256 logger.info( 

257 "Databases: Per-user encrypted databases in encrypted_databases/" 

258 ) 

259 

260 # Check if using custom location 

261 from local_deep_research.settings.manager import SettingsManager 

262 

263 settings_manager = SettingsManager() 

264 custom_data_dir = settings_manager.get_setting("bootstrap.data_dir") 

265 if custom_data_dir: 265 ↛ 266line 265 didn't jump to line 266 because the condition on line 265 was never true

266 logger.info( 

267 f"Using custom data location via LDR_DATA_DIR: {custom_data_dir}" 

268 ) 

269 else: 

270 logger.info("Using default platform-specific data location") 

271 

272 # Display security status based on actual SQLCipher availability 

273 if db_manager.has_encryption: 

274 logger.info( 

275 "SECURITY: Databases are encrypted with SQLCipher. Ensure appropriate file system permissions are set on the data directory." 

276 ) 

277 else: 

278 logger.warning( 

279 "SECURITY NOTICE: SQLCipher is not available - databases are NOT encrypted. " 

280 "Install SQLCipher for database encryption. Ensure appropriate file system permissions are set on the data directory." 

281 ) 

282 

283 logger.info( 

284 "TIP: You can change the data location by setting the LDR_DATA_DIR environment variable." 

285 ) 

286 logger.info("=" * 60) 

287 

288 # Initialize Vite helper for asset management 

289 from .utils.vite_helper import vite 

290 

291 vite.init_app(app) 

292 

293 # Initialize Theme helper for auto-detecting themes from CSS 

294 from .utils.theme_helper import theme_helper 

295 

296 theme_helper.init_app(app) 

297 

298 # Generate combined themes.css from individual theme files 

299 from .themes import theme_registry 

300 

301 try: 

302 static_dir = Path(app.config.get("STATIC_DIR", "static")) 

303 themes_css_path = static_dir / "css" / "themes.css" 

304 combined_css = theme_registry.get_combined_css() 

305 themes_css_path.write_text(combined_css, encoding="utf-8") 

306 logger.debug( 

307 f"Generated themes.css with {len(theme_registry.themes)} themes" 

308 ) 

309 except PermissionError: 

310 logger.warning( 

311 f"Cannot write themes.css to {themes_css_path}. " 

312 "Theme CSS will need to be pre-generated." 

313 ) 

314 except Exception: 

315 logger.exception("Error generating combined themes.css") 

316 

317 # Register socket service 

318 socket_service = SocketIOService(app=app) 

319 

320 # Initialize news subscription scheduler 

321 try: 

322 # News tables are now created per-user in their encrypted databases 

323 logger.info( 

324 "News tables will be created in per-user encrypted databases" 

325 ) 

326 

327 # Check if scheduler is enabled BEFORE importing/initializing 

328 # Use env registry which handles both env vars and settings 

329 from ..settings.env_registry import get_env_setting 

330 

331 scheduler_enabled = get_env_setting("news.scheduler.enabled", True) 

332 logger.info(f"News scheduler enabled: {scheduler_enabled}") 

333 

334 if scheduler_enabled: 

335 # Only import and initialize if enabled 

336 from ..scheduler.background import ( 

337 get_background_job_scheduler, 

338 ) 

339 from ..settings.manager import SettingsManager 

340 

341 # Get system settings for scheduler configuration (if not already loaded) 

342 if "settings_manager" not in locals(): 342 ↛ 343line 342 didn't jump to line 343 because the condition on line 342 was never true

343 settings_manager = SettingsManager() 

344 

345 # Get scheduler instance and initialize with settings 

346 scheduler = get_background_job_scheduler() 

347 scheduler.initialize_with_settings(settings_manager) 

348 scheduler.set_app(app) 

349 scheduler.start() 

350 app.background_job_scheduler = scheduler # type: ignore[attr-defined] 

351 logger.info("News scheduler started with activity-based tracking") 

352 else: 

353 # Don't initialize scheduler if disabled 

354 app.background_job_scheduler = None # type: ignore[attr-defined] 

355 logger.info("News scheduler disabled - not initializing") 

356 except Exception: 

357 logger.exception("Failed to initialize news scheduler") 

358 app.background_job_scheduler = None # type: ignore[attr-defined] 

359 

360 # Apply middleware 

361 logger.info("Applying middleware...") 

362 apply_middleware(app) 

363 logger.info("Middleware applied successfully") 

364 

365 # Register blueprints 

366 logger.info("Registering blueprints...") 

367 register_blueprints(app) 

368 logger.info("Blueprints registered successfully") 

369 

370 # Register error handlers 

371 logger.info("Registering error handlers...") 

372 register_error_handlers(app) 

373 logger.info("Error handlers registered successfully") 

374 

375 # Start the queue processor v2 (uses encrypted databases) 

376 # Always start the processor - it will handle per-user queue modes 

377 logger.info("Starting queue processor v2...") 

378 from .queue.processor_v2 import queue_processor 

379 

380 queue_processor.start() 

381 logger.info("Started research queue processor v2") 

382 

383 logger.info("App factory completed successfully") 

384 

385 return app, socket_service 

386 

387 

388def apply_middleware(app): 

389 """Apply middleware to the Flask app.""" 

390 

391 # Import auth decorators and middleware 

392 logger.info("Importing cleanup_middleware...") 

393 from .auth.cleanup_middleware import cleanup_completed_research 

394 

395 logger.info("Importing database_middleware...") 

396 from .auth.database_middleware import ensure_user_database 

397 

398 logger.info("Importing decorators...") 

399 from .auth.decorators import inject_current_user 

400 

401 logger.info("Importing queue_middleware...") 

402 from .auth.queue_middleware import process_pending_queue_operations 

403 

404 logger.info("Importing queue_middleware_v2...") 

405 from .auth.queue_middleware_v2 import notify_queue_processor 

406 

407 logger.info("Importing session_cleanup...") 

408 from .auth.session_cleanup import cleanup_stale_sessions 

409 

410 logger.info("All middleware imports completed") 

411 

412 # Register authentication middleware 

413 # First clean up stale sessions 

414 app.before_request(cleanup_stale_sessions) 

415 # Then ensure database is open for authenticated users 

416 app.before_request(ensure_user_database) 

417 # Then inject current user into g 

418 app.before_request(inject_current_user) 

419 # Clean up completed research records 

420 app.before_request(cleanup_completed_research) 

421 # Process any pending queue operations for this user (direct mode) 

422 app.before_request(process_pending_queue_operations) 

423 # Notify queue processor of user activity (queue mode) 

424 app.before_request(notify_queue_processor) 

425 

426 logger.info("All middleware registered") 

427 

428 # Note: log-queue flushing is handled asynchronously by the 

429 # background daemon started in web/app.py::main() (see 

430 # start_log_queue_processor). We deliberately do NOT register 

431 # flush_log_queue as a before_request handler: doing so makes 

432 # every request (including GET /auth/login) synchronously wait 

433 # on _write_log_to_database, which opens a user DB session and 

434 # blocks if the shared connection pool is wedged. At-exit 

435 # draining is still handled via atexit in web/app.py. 

436 

437 # Inject backend constants into Jinja2 templates for frontend JS. 

438 # This is the Flask-documented pattern for sharing Python enums with JavaScript. 

439 # Source of truth: src/local_deep_research/constants.py::ResearchStatus 

440 # Frontend helpers: src/local_deep_research/web/static/js/config/constants.js 

441 # Template injection: src/local_deep_research/web/templates/base.html 

442 from ..constants import ResearchStatus 

443 

444 @app.context_processor 

445 def inject_frontend_constants(): 

446 terminal = [ 

447 ResearchStatus.COMPLETED, 

448 ResearchStatus.SUSPENDED, 

449 ResearchStatus.FAILED, 

450 ResearchStatus.ERROR, 

451 ResearchStatus.CANCELLED, 

452 ] 

453 return { 

454 "research_status_enum": {m.name: m.value for m in ResearchStatus}, 

455 "research_terminal_states": [str(s) for s in terminal], 

456 } 

457 

458 # Clean up database sessions after each request 

459 @app.teardown_appcontext 

460 def cleanup_db_session(exception=None): 

461 """Clean up database session after each request to avoid cross-thread issues.""" 

462 from flask import g 

463 

464 session = g.pop("db_session", None) 

465 if session is not None: 

466 try: 

467 session.rollback() 

468 except Exception: 

469 logger.warning( 

470 "Error rolling back request session during cleanup" 

471 ) 

472 try: 

473 session.close() 

474 except Exception: 

475 logger.warning("Error closing request session during cleanup") 

476 

477 # Sweep credential entries for dead threads. Multiple trigger 

478 # points (here, processor_v2, and connection_cleanup scheduler) 

479 # ensure sweeps happen regardless of traffic patterns. 

480 try: 

481 from ..database.thread_local_session import cleanup_dead_threads 

482 

483 cleanup_dead_threads() 

484 except Exception: 

485 logger.debug("Error during dead thread sweep", exc_info=True) 

486 

487 # Clean up any thread-local database session that may have been created 

488 # via get_metrics_session() fallback in session_context.py (e.g. background 

489 # threads or error paths where g.db_session was unavailable). 

490 try: 

491 from ..database.thread_local_session import cleanup_current_thread 

492 

493 cleanup_current_thread() 

494 except Exception: 

495 logger.debug( 

496 "Error during thread-local session cleanup", exc_info=True 

497 ) 

498 

499 # Add a middleware layer to handle abrupt disconnections 

500 @app.before_request 

501 def handle_websocket_requests(): 

502 if request.path.startswith("/socket.io"): 

503 try: 

504 if not request.environ.get("werkzeug.socket"): 504 ↛ 510line 504 didn't jump to line 510 because the condition on line 504 was always true

505 return None 

506 except Exception: 

507 logger.exception("WebSocket preprocessing error") 

508 # Return empty response to prevent further processing 

509 return "", 200 

510 return None 

511 

512 # Note: CORS headers for API routes are now handled by SecurityHeaders middleware 

513 # (see src/local_deep_research/security/security_headers.py) 

514 

515 

516def register_blueprints(app): 

517 """Register blueprints with the Flask app.""" 

518 

519 # Import blueprints 

520 logger.info("Importing blueprints...") 

521 

522 # Import benchmark blueprint 

523 from ..benchmarks.web_api.benchmark_routes import benchmark_bp 

524 

525 logger.info("Importing API blueprint...") 

526 from .api import api_blueprint # Import the API blueprint 

527 

528 logger.info("Importing auth blueprint...") 

529 from .auth import auth_bp # Import the auth blueprint 

530 

531 logger.info("Importing API routes blueprint...") 

532 from .routes.api_routes import api_bp # Import the API blueprint 

533 

534 logger.info("Importing context overflow API...") 

535 from .routes.context_overflow_api import ( 

536 context_overflow_bp, 

537 ) # Import context overflow API 

538 

539 logger.info("Importing history routes...") 

540 from .routes.history_routes import history_bp 

541 

542 logger.info("Importing metrics routes...") 

543 from .routes.metrics_routes import metrics_bp 

544 

545 logger.info("Importing research routes...") 

546 from .routes.research_routes import research_bp 

547 

548 logger.info("Importing settings routes...") 

549 from .routes.settings_routes import settings_bp 

550 

551 logger.info("All core blueprints imported successfully") 

552 

553 # Add root route 

554 @app.route("/") 

555 def index(): 

556 """Root route - redirect to login if not authenticated""" 

557 from flask import redirect, session, url_for 

558 

559 from ..constants import get_available_strategies 

560 from ..database.session_context import get_user_db_session 

561 from ..utilities.db_utils import get_settings_manager 

562 from .utils.templates import render_template_with_defaults 

563 

564 # Check if user is authenticated 

565 if "username" not in session: 

566 return redirect(url_for("auth.login")) 

567 

568 # Load current settings from database using proper session context 

569 username = session.get("username") 

570 settings = {} 

571 show_all = False 

572 with get_user_db_session(username) as db_session: 

573 if db_session: 573 ↛ 610line 573 didn't jump to line 610

574 settings_manager = get_settings_manager(db_session, username) 

575 settings = { 

576 "llm_provider": settings_manager.get_setting( 

577 "llm.provider", "ollama" 

578 ), 

579 "llm_model": settings_manager.get_setting("llm.model", ""), 

580 "llm_openai_endpoint_url": settings_manager.get_setting( 

581 "llm.openai_endpoint.url", "" 

582 ), 

583 "llm_ollama_url": settings_manager.get_setting( 

584 "llm.ollama.url" 

585 ), 

586 "llm_lmstudio_url": settings_manager.get_setting( 

587 "llm.lmstudio.url" 

588 ), 

589 "llm_local_context_window_size": settings_manager.get_setting( 

590 "llm.local_context_window_size" 

591 ), 

592 "search_tool": settings_manager.get_setting( 

593 "search.tool", "" 

594 ), 

595 "search_iterations": settings_manager.get_setting( 

596 "search.iterations", 3 

597 ), 

598 "search_questions_per_iteration": settings_manager.get_setting( 

599 "search.questions_per_iteration", 2 

600 ), 

601 "search_strategy": settings_manager.get_setting( 

602 "search.search_strategy", "source-based" 

603 ), 

604 } 

605 show_all = settings_manager.get_setting( 

606 "search.show_all_strategies", False 

607 ) 

608 

609 # Debug logging 

610 log_settings(settings, "Research page settings loaded") 

611 

612 return render_template_with_defaults( 

613 "pages/research.html", 

614 settings=settings, 

615 strategies=get_available_strategies(show_all=bool(show_all)), 

616 ) 

617 

618 # Register auth blueprint FIRST (so login page is accessible) 

619 app.register_blueprint(auth_bp) # Already has url_prefix="/auth" 

620 

621 # Register other blueprints 

622 app.register_blueprint(research_bp) 

623 app.register_blueprint(history_bp) # Already has url_prefix="/history" 

624 app.register_blueprint(metrics_bp) 

625 app.register_blueprint(settings_bp) # Already has url_prefix="/settings" 

626 app.register_blueprint( 

627 api_bp, url_prefix="/research/api" 

628 ) # Register API blueprint with prefix 

629 app.register_blueprint(benchmark_bp) # Register benchmark blueprint 

630 app.register_blueprint( 

631 context_overflow_bp, url_prefix="/metrics" 

632 ) # Register context overflow API 

633 

634 # Register news API routes 

635 from .routes import news_routes 

636 

637 app.register_blueprint(news_routes.bp) 

638 logger.info("News API routes registered successfully") 

639 

640 # Register chat routes 

641 from ..chat.routes import chat_bp 

642 

643 app.register_blueprint(chat_bp) 

644 logger.info("Chat routes registered successfully") 

645 

646 # Register follow-up research routes 

647 from ..followup_research.routes import followup_bp 

648 

649 app.register_blueprint(followup_bp) 

650 logger.info("Follow-up research routes registered successfully") 

651 

652 # Register news page blueprint 

653 from ..news.web import create_news_blueprint 

654 

655 news_bp = create_news_blueprint() 

656 app.register_blueprint(news_bp, url_prefix="/news") 

657 logger.info("News page routes registered successfully") 

658 

659 # Register API v1 blueprint 

660 app.register_blueprint(api_blueprint) # Already has url_prefix='/api/v1' 

661 

662 # Register Research Library blueprint 

663 from ..research_library import library_bp, rag_bp, delete_bp 

664 

665 app.register_blueprint(library_bp) # Already has url_prefix='/library' 

666 logger.info("Research Library routes registered successfully") 

667 

668 # Register RAG Management blueprint 

669 app.register_blueprint(rag_bp) # Already has url_prefix='/library' 

670 logger.info("RAG Management routes registered successfully") 

671 

672 # Register Deletion Management blueprint 

673 app.register_blueprint(delete_bp) # Already has url_prefix='/library/api' 

674 logger.info("Deletion Management routes registered successfully") 

675 

676 # Register Semantic Search blueprint 

677 from ..research_library.search import search_bp 

678 

679 app.register_blueprint(search_bp) # url_prefix='/library' 

680 logger.info("Semantic Search routes registered successfully") 

681 

682 # Register Document Scheduler blueprint 

683 from ..research_scheduler.routes import scheduler_bp 

684 

685 app.register_blueprint(scheduler_bp) 

686 logger.info("Document Scheduler routes registered successfully") 

687 

688 # CSRF exemptions — Flask-WTF requires Blueprint objects (not strings) 

689 # to populate _exempt_blueprints. Passing strings only populates 

690 # _exempt_views, which compares against module-qualified names and 

691 # silently fails to match Flask endpoint names. 

692 if hasattr(app, "extensions") and "csrf" in app.extensions: 

693 csrf = app.extensions["csrf"] 

694 # Only api_v1 is exempt: it's a programmatic REST API used by 

695 # external clients. The api, benchmark, and research blueprints 

696 # are browser-facing and the frontend already sends CSRF tokens. 

697 for bp_name in ("api_v1",): 

698 bp_obj = app.blueprints.get(bp_name) 

699 if bp_obj is not None: 699 ↛ 697line 699 didn't jump to line 697 because the condition on line 699 was always true

700 csrf.exempt(bp_obj) 

701 

702 # Add favicon route 

703 # Exempt favicon from rate limiting 

704 @app.route("/favicon.ico") 

705 @limiter.exempt 

706 def favicon(): 

707 static_dir = app.config.get("STATIC_DIR", "static") 

708 return send_from_directory( 

709 static_dir, "favicon.ico", mimetype="image/x-icon" 

710 ) 

711 

712 # Add static route at the app level for compatibility 

713 # Exempt static files from rate limiting 

714 import re 

715 

716 _HASHED_FILENAME_RE = re.compile(r"\.[A-Za-z0-9_-]{8,}\.") 

717 

718 @app.route("/static/<path:path>") 

719 @limiter.exempt 

720 def app_serve_static(path): 

721 from ..security.path_validator import PathValidator 

722 

723 static_dir = Path(app.config.get("STATIC_DIR", "static")) 

724 

725 # First try to serve from dist directory (for built assets). 

726 # Flask captures path as "dist/js/app.abc.js", so strip the 

727 # "dist/" prefix before joining with dist_dir to avoid a 

728 # double-dist path (static/dist/dist/...). 

729 dist_prefix = "dist/" 

730 dist_dir = static_dir / "dist" 

731 if path.startswith(dist_prefix): 731 ↛ 732line 731 didn't jump to line 732 because the condition on line 731 was never true

732 dist_relative = path[len(dist_prefix) :] 

733 try: 

734 validated_path = PathValidator.validate_safe_path( 

735 dist_relative, 

736 dist_dir, 

737 allow_absolute=False, 

738 required_extensions=None, 

739 ) 

740 

741 if validated_path and validated_path.exists(): 

742 response = make_response( 

743 send_from_directory(str(dist_dir), dist_relative) 

744 ) 

745 if _HASHED_FILENAME_RE.search(dist_relative): 

746 # Content-hashed files are safe for immutable caching 

747 response.headers["Cache-Control"] = ( 

748 "public, max-age=31536000, immutable" 

749 ) 

750 else: 

751 response.headers["Cache-Control"] = ( 

752 "public, max-age=0, must-revalidate" 

753 ) 

754 return response 

755 except ValueError: 

756 pass 

757 

758 # Fall back to dist directory for Vite-built assets (fonts, etc.) 

759 # Vite uses base: '/static/' so CSS references /static/fonts/... 

760 # but the files live in static/dist/fonts/... 

761 try: 

762 validated_path = PathValidator.validate_safe_path( 

763 path, dist_dir, allow_absolute=False, required_extensions=None 

764 ) 

765 

766 if validated_path and validated_path.exists(): 766 ↛ 767line 766 didn't jump to line 767 because the condition on line 766 was never true

767 response = make_response( 

768 send_from_directory(str(dist_dir), path) 

769 ) 

770 if _HASHED_FILENAME_RE.search(path): 

771 response.headers["Cache-Control"] = ( 

772 "public, max-age=31536000, immutable" 

773 ) 

774 else: 

775 response.headers["Cache-Control"] = ( 

776 "public, max-age=0, must-revalidate" 

777 ) 

778 return response 

779 except ValueError: 

780 pass 

781 

782 # Fall back to regular static folder 

783 try: 

784 validated_path = PathValidator.validate_safe_path( 

785 path, static_dir, allow_absolute=False, required_extensions=None 

786 ) 

787 

788 if validated_path and validated_path.exists(): 788 ↛ 789line 788 didn't jump to line 789 because the condition on line 788 was never true

789 response = make_response( 

790 send_from_directory(str(static_dir), path) 

791 ) 

792 # Non-hashed files must revalidate on each request 

793 response.headers["Cache-Control"] = ( 

794 "public, max-age=0, must-revalidate" 

795 ) 

796 return response 

797 except ValueError: 

798 # Path validation failed 

799 pass 

800 

801 abort(404) 

802 return None 

803 

804 

805def register_error_handlers(app): 

806 """Register error handlers with the Flask app.""" 

807 from .auth.decorators import _is_api_path 

808 

809 @app.errorhandler(404) 

810 def not_found(error): 

811 if _is_api_path(request.path): 

812 return make_response(jsonify({"error": "Not found"}), 404) 

813 return make_response("Not found", 404) 

814 

815 @app.errorhandler(500) 

816 def server_error(error): 

817 if _is_api_path(request.path): 

818 return make_response(jsonify({"error": "Server error"}), 500) 

819 return make_response("Server error", 500) 

820 

821 @app.errorhandler(401) 

822 def handle_unauthorized(error): 

823 if _is_api_path(request.path): 

824 return make_response( 

825 jsonify({"error": "Authentication required"}), 

826 401, 

827 ) 

828 from .auth.decorators import _safe_redirect_to_login 

829 

830 return _safe_redirect_to_login() 

831 

832 @app.errorhandler(413) 

833 def handle_request_too_large(error): 

834 if _is_api_path(request.path): 

835 return make_response( 

836 jsonify({"error": "Request too large"}), 

837 413, 

838 ) 

839 return make_response("Request too large", 413) 

840 

841 from .exceptions import WebAPIException 

842 

843 @app.errorhandler(WebAPIException) 

844 def handle_web_api_exception(error): 

845 """Handle WebAPIException and return JSON.""" 

846 logger.error( 

847 "Web API error: {} (status {})", error.error_code, error.status_code 

848 ) 

849 return jsonify(error.to_dict()), error.status_code 

850 

851 # Handle CSRF validation errors as JSON 

852 try: 

853 from flask_wtf.csrf import CSRFError 

854 

855 @app.errorhandler(CSRFError) 

856 def handle_csrf_error(error): 

857 return make_response( 

858 jsonify({"error": str(error.description)}), 400 

859 ) 

860 except ImportError: 

861 pass 

862 

863 # Handle News API exceptions globally 

864 try: 

865 from ..news.exceptions import NewsAPIException 

866 

867 @app.errorhandler(NewsAPIException) 

868 def handle_news_api_exception(error): 

869 """Handle NewsAPIException and convert to JSON response.""" 

870 from loguru import logger 

871 

872 logger.error( 

873 "News API error: {} (status {})", 

874 error.error_code, 

875 error.status_code, 

876 ) 

877 return jsonify(error.to_dict()), error.status_code 

878 except ImportError: 

879 # News module not available 

880 pass 

881 

882 

883def create_database(app): 

884 """ 

885 DEPRECATED: Database creation is now handled per-user via encrypted_db.py 

886 This function is kept for compatibility but does nothing. 

887 """ 

888 pass