Coverage for src / local_deep_research / web / app_factory.py: 85%

343 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1# import logging - replaced with loguru 

2import ipaddress 

3import os 

4from pathlib import Path 

5from importlib import resources as importlib_resources 

6 

7from flask import ( 

8 Flask, 

9 Request, 

10 jsonify, 

11 make_response, 

12 request, 

13 send_from_directory, 

14) 

15from flask_wtf.csrf import CSRFProtect 

16from werkzeug.middleware.proxy_fix import ProxyFix 

17from loguru import logger 

18from local_deep_research.settings.logger import log_settings 

19 

20from ..utilities.log_utils import InterceptHandler 

21from ..security import SecurityHeaders 

22from .utils.rate_limiter import limiter 

23from ..security.file_upload_validator import FileUploadValidator 

24 

25# Removed DB_PATH import - using per-user databases now 

26from .services.socket_service import SocketIOService 

27 

28 

29def _is_private_ip(ip_str: str) -> bool: 

30 """Check if IP is a private/local network address (RFC 1918 + localhost). 

31 

32 This allows LAN access over HTTP without requiring HTTPS, matching the 

33 behavior of other self-hosted applications like Jellyfin and Home Assistant. 

34 

35 Private ranges: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, plus localhost. 

36 """ 

37 try: 

38 ip = ipaddress.ip_address(ip_str) 

39 return ip.is_private or ip.is_loopback 

40 except ValueError: 

41 return False 

42 

43 

44class DiskSpoolingRequest(Request): 

45 """Custom Request class that spools large file uploads to disk. 

46 

47 This prevents memory exhaustion from large multipart uploads by writing 

48 files larger than max_form_memory_size to temporary files on disk instead 

49 of keeping them in memory. 

50 

51 Security fix for issue #1176: With 200 files × 50MB limit, the default 

52 behavior could consume 10GB+ of memory per request. 

53 """ 

54 

55 # Files larger than 5MB are spooled to disk instead of memory 

56 max_form_memory_size = 5 * 1024 * 1024 # 5MB threshold 

57 

58 

59def create_app(): 

60 """ 

61 Create and configure the Flask application. 

62 

63 Returns: 

64 tuple: (app, socketio) - The configured Flask app and SocketIO instance 

65 """ 

66 # Set Werkzeug logger to WARNING level to suppress Socket.IO polling logs 

67 import logging 

68 

69 logging.getLogger("werkzeug").setLevel(logging.WARNING) 

70 logging.getLogger("werkzeug").addHandler(InterceptHandler()) 

71 

72 logger.info("Initializing Local Deep Research application...") 

73 

74 try: 

75 # Get directories based on package installation 

76 PACKAGE_DIR = importlib_resources.files("local_deep_research") / "web" 

77 with importlib_resources.as_file(PACKAGE_DIR) as package_dir: 

78 STATIC_DIR = (package_dir / "static").as_posix() 

79 TEMPLATE_DIR = (package_dir / "templates").as_posix() 

80 

81 # Initialize Flask app with package directories 

82 # Set static_folder to None to disable Flask's built-in static handling 

83 # We'll use our custom static route instead to handle dist folder 

84 app = Flask(__name__, static_folder=None, template_folder=TEMPLATE_DIR) 

85 # Store static dir for custom handling 

86 app.config["STATIC_DIR"] = STATIC_DIR 

87 logger.debug(f"Using package static path: {STATIC_DIR}") 

88 logger.debug(f"Using package template path: {TEMPLATE_DIR}") 

89 except Exception: 

90 # Fallback for development 

91 logger.exception("Package directories not found, using fallback paths") 

92 # Set static_folder to None to disable Flask's built-in static handling 

93 app = Flask( 

94 __name__, 

95 static_folder=None, 

96 template_folder=str(Path("templates").resolve()), 

97 ) 

98 # Store static dir for custom handling 

99 app.config["STATIC_DIR"] = str(Path("static").resolve()) 

100 

101 # Use custom Request class that spools large uploads to disk 

102 # This prevents memory exhaustion from large file uploads (issue #1176) 

103 app.request_class = DiskSpoolingRequest 

104 

105 # Add proxy support for deployments behind load balancers/reverse proxies 

106 # This ensures X-Forwarded-For and X-Forwarded-Proto headers are properly handled 

107 # Important for rate limiting and security (gets real client IP, not proxy IP) 

108 app.wsgi_app = ProxyFix( 

109 app.wsgi_app, 

110 x_for=1, # Trust 1 proxy for X-Forwarded-For 

111 x_proto=1, # Trust 1 proxy for X-Forwarded-Proto (http/https) 

112 x_host=0, # Don't trust X-Forwarded-Host (security) 

113 x_port=0, # Don't trust X-Forwarded-Port (security) 

114 x_prefix=0, # Don't trust X-Forwarded-Prefix (security) 

115 ) 

116 

117 # WSGI middleware for dynamic cookie security 

118 # This wraps AFTER ProxyFix so we have access to the real client IP 

119 # Must be WSGI level because Flask session cookies are set after after_request handlers 

120 class SecureCookieMiddleware: 

121 """WSGI middleware to add Secure flag to cookies based on request context. 

122 

123 Security model: 

124 - Localhost HTTP (127.0.0.1, ::1): Skip Secure flag (local traffic is safe) 

125 - Proxied requests (X-Forwarded-For present): Add Secure flag (production) 

126 - Non-localhost HTTP: Add Secure flag (will fail, by design - use HTTPS) 

127 - TESTING mode: Never add Secure flag (for CI/development) 

128 

129 This prevents X-Forwarded-For spoofing attacks by checking for the header's 

130 presence rather than its value - if the header exists, we're behind a proxy. 

131 """ 

132 

133 def __init__(self, wsgi_app, flask_app): 

134 self.wsgi_app = wsgi_app 

135 self.flask_app = flask_app 

136 

137 def __call__(self, environ, start_response): 

138 # Check if we should add Secure flag 

139 should_add_secure = self._should_add_secure_flag(environ) 

140 

141 def custom_start_response(status, headers, exc_info=None): 

142 if should_add_secure: 

143 # Modify Set-Cookie headers to add Secure flag 

144 new_headers = [] 

145 for name, value in headers: 

146 if name.lower() == "set-cookie": 

147 if ( 147 ↛ 152line 147 didn't jump to line 152 because the condition on line 147 was always true

148 "; Secure" not in value 

149 and "; secure" not in value 

150 ): 

151 value = value + "; Secure" 

152 new_headers.append((name, value)) 

153 headers = new_headers 

154 return start_response(status, headers, exc_info) 

155 

156 return self.wsgi_app(environ, custom_start_response) 

157 

158 def _should_add_secure_flag(self, environ): 

159 """Determine if Secure flag should be added based on request context. 

160 

161 Security model: 

162 - Check the ACTUAL connection IP (REMOTE_ADDR), not X-Forwarded-For header 

163 - SecureCookieMiddleware is outer wrapper, so we see original REMOTE_ADDR 

164 - If connection comes from private IP (client or proxy), allow HTTP 

165 - If connection comes from public IP, require HTTPS 

166 

167 This is safe because: 

168 - We never trust X-Forwarded-For header values (can be spoofed) 

169 - We only check the actual TCP connection source IP 

170 - Spoofing X-Forwarded-For from public IP doesn't bypass this check 

171 - Local proxies (nginx on localhost/LAN) have private REMOTE_ADDR 

172 """ 

173 # Skip if in explicit testing mode 

174 if self.flask_app.config.get("LDR_TESTING_MODE"): 

175 return False 

176 

177 # Check actual connection source IP (before ProxyFix modifies it) 

178 # This is either: 

179 # - Direct client IP (if no proxy) 

180 # - Proxy server IP (if behind proxy) 

181 # Local proxies (nginx on localhost, Traefik on LAN) have private IPs 

182 remote_addr = environ.get("REMOTE_ADDR", "") 

183 is_private = _is_private_ip(remote_addr) 

184 

185 # Check if HTTPS 

186 is_https = environ.get("wsgi.url_scheme") == "https" 

187 

188 # Add Secure flag if: 

189 # - Using HTTPS (always secure over HTTPS) 

190 # - OR connection is from public IP (require HTTPS for public access) 

191 return is_https or not is_private 

192 

193 # Wrap the app with our cookie security middleware 

194 app.wsgi_app = SecureCookieMiddleware(app.wsgi_app, app) 

195 

196 # App configuration 

197 # Generate or load a unique SECRET_KEY per installation 

198 import secrets 

199 from ..config.paths import get_data_directory 

200 

201 secret_key_file = Path(get_data_directory()) / ".secret_key" 

202 if secret_key_file.exists(): 

203 try: 

204 with open(secret_key_file, "r") as f: 

205 app.config["SECRET_KEY"] = f.read().strip() 

206 except Exception as e: 

207 logger.warning(f"Could not read secret key file: {e}") 

208 app.config["SECRET_KEY"] = secrets.token_hex(32) 

209 else: 

210 # Generate a new key on first run 

211 new_key = secrets.token_hex(32) 

212 try: 

213 secret_key_file.parent.mkdir(parents=True, exist_ok=True) 

214 with open(secret_key_file, "w") as f: 

215 f.write(new_key) 

216 secret_key_file.chmod(0o600) # Secure file permissions 

217 app.config["SECRET_KEY"] = new_key 

218 logger.info("Generated new SECRET_KEY for this installation") 

219 except Exception as e: 

220 logger.warning(f"Could not save secret key file: {e}") 

221 app.config["SECRET_KEY"] = new_key 

222 # Session cookie security settings 

223 # SECURE flag is added dynamically based on request context (see after_request below) 

224 # This allows localhost HTTP to work for development while keeping production secure 

225 # 

226 # Check if explicitly in testing mode (for backwards compatibility) 

227 is_testing = ( 

228 os.getenv("CI") 

229 or os.getenv("TESTING") 

230 or os.getenv("PYTEST_CURRENT_TEST") 

231 or app.debug 

232 ) 

233 # Set to False - we add Secure flag dynamically in after_request handler 

234 # Exception: if TESTING mode is active, we never add Secure flag 

235 app.config["SESSION_COOKIE_SECURE"] = False 

236 app.config["LDR_TESTING_MODE"] = bool(is_testing) # Store for after_request 

237 app.config["SESSION_COOKIE_HTTPONLY"] = ( 

238 True # Prevent JavaScript access (XSS mitigation) 

239 ) 

240 app.config["SESSION_COOKIE_SAMESITE"] = "Lax" # CSRF protection 

241 app.config["PERMANENT_SESSION_LIFETIME"] = 7200 # 2 hours in seconds 

242 # PREFERRED_URL_SCHEME affects URL generation (url_for), not request.is_secure 

243 app.config["PREFERRED_URL_SCHEME"] = "https" 

244 

245 # File upload security limits - calculated from FileUploadValidator constants 

246 app.config["MAX_CONTENT_LENGTH"] = ( 

247 FileUploadValidator.MAX_FILES_PER_REQUEST 

248 * FileUploadValidator.MAX_FILE_SIZE 

249 ) 

250 

251 # Initialize CSRF protection 

252 # Explicitly enable CSRF protection (don't rely on implicit Flask-WTF behavior) 

253 app.config["WTF_CSRF_ENABLED"] = True 

254 CSRFProtect(app) 

255 # Exempt Socket.IO from CSRF protection 

256 # Note: Flask-SocketIO handles CSRF internally, so we don't need to exempt specific views 

257 

258 # Initialize security headers middleware 

259 SecurityHeaders(app) 

260 

261 # Initialize rate limiting for security (brute force protection) 

262 # Uses imported limiter from utils.rate_limiter module 

263 # Rate limiting is disabled in CI via enabled callable in rate_limiter.py 

264 # Also set app config to ensure Flask-Limiter respects our settings 

265 from ..settings.env_registry import is_rate_limiting_enabled 

266 

267 app.config["RATELIMIT_ENABLED"] = is_rate_limiting_enabled() 

268 limiter.init_app(app) 

269 

270 # Custom error handler for rate limit exceeded (429) 

271 @app.errorhandler(429) 

272 def ratelimit_handler(e): 

273 # Import here to avoid circular imports 

274 from .utils.rate_limiter import get_client_ip 

275 

276 # Audit logging for security monitoring 

277 # Use get_client_ip() to get the real IP behind proxies 

278 logger.warning( 

279 f"Rate limit exceeded: endpoint={request.endpoint} " 

280 f"ip={get_client_ip()} " 

281 f"user_agent={request.headers.get('User-Agent', 'unknown')}" 

282 ) 

283 return jsonify( 

284 error="Too many requests", 

285 message="Too many attempts. Please try again later.", 

286 ), 429 

287 

288 # Note: Dynamic cookie security is handled by SecureCookieMiddleware (WSGI level) 

289 # This is necessary because Flask's session cookies are set AFTER after_request handlers 

290 # The middleware wrapping happens below near ProxyFix 

291 

292 # Disable CSRF for API routes 

293 @app.before_request 

294 def disable_csrf_for_api(): 

295 if ( 

296 request.path.startswith("/api/v1/") 

297 or request.path.startswith("/research/api/") 

298 or request.path.startswith("/benchmark/api/") 

299 ): 

300 # Mark this request as exempt from CSRF 

301 request.environ["csrf_exempt"] = True 

302 

303 # Database configuration - Using per-user databases now 

304 # No shared database configuration needed 

305 app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False 

306 app.config["SQLALCHEMY_ECHO"] = False 

307 

308 # Per-user databases are created automatically via encrypted_db.py 

309 

310 # Log data location and security information 

311 from ..config.paths import get_data_directory 

312 from ..database.encrypted_db import db_manager 

313 

314 data_dir = get_data_directory() 

315 logger.info("=" * 60) 

316 logger.info("DATA STORAGE INFORMATION") 

317 logger.info("=" * 60) 

318 logger.info(f"Data directory: {data_dir}") 

319 logger.info( 

320 "Databases: Per-user encrypted databases in encrypted_databases/" 

321 ) 

322 

323 # Check if using custom location 

324 from local_deep_research.settings.manager import SettingsManager 

325 

326 settings_manager = SettingsManager() 

327 custom_data_dir = settings_manager.get_setting("bootstrap.data_dir") 

328 if custom_data_dir: 328 ↛ 329line 328 didn't jump to line 329 because the condition on line 328 was never true

329 logger.info( 

330 f"Using custom data location via LDR_DATA_DIR: {custom_data_dir}" 

331 ) 

332 else: 

333 logger.info("Using default platform-specific data location") 

334 

335 # Display security status based on actual SQLCipher availability 

336 if db_manager.has_encryption: 336 ↛ 341line 336 didn't jump to line 341 because the condition on line 336 was always true

337 logger.info( 

338 "SECURITY: Databases are encrypted with SQLCipher. Ensure appropriate file system permissions are set on the data directory." 

339 ) 

340 else: 

341 logger.warning( 

342 "SECURITY NOTICE: SQLCipher is not available - databases are NOT encrypted. " 

343 "Install SQLCipher for database encryption. Ensure appropriate file system permissions are set on the data directory." 

344 ) 

345 

346 logger.info( 

347 "TIP: You can change the data location by setting the LDR_DATA_DIR environment variable." 

348 ) 

349 logger.info("=" * 60) 

350 

351 # Initialize Vite helper for asset management 

352 from .utils.vite_helper import vite 

353 

354 vite.init_app(app) 

355 

356 # Register socket service 

357 socket_service = SocketIOService(app=app) 

358 

359 # Initialize news subscription scheduler 

360 try: 

361 # Always initialize news for now - per-user enabling will be handled in routes 

362 if True: 

363 # News tables are now created per-user in their encrypted databases 

364 logger.info( 

365 "News tables will be created in per-user encrypted databases" 

366 ) 

367 

368 # Check if scheduler is enabled BEFORE importing/initializing 

369 # Use env registry which handles both env vars and settings 

370 from ..settings.env_registry import get_env_setting 

371 

372 scheduler_enabled = get_env_setting("news.scheduler.enabled", True) 

373 logger.info(f"News scheduler enabled: {scheduler_enabled}") 

374 

375 if scheduler_enabled: 375 ↛ 396line 375 didn't jump to line 396 because the condition on line 375 was always true

376 # Only import and initialize if enabled 

377 from ..news.subscription_manager.scheduler import ( 

378 get_news_scheduler, 

379 ) 

380 from ..settings.manager import SettingsManager 

381 

382 # Get system settings for scheduler configuration (if not already loaded) 

383 if "settings_manager" not in locals(): 383 ↛ 384line 383 didn't jump to line 384 because the condition on line 383 was never true

384 settings_manager = SettingsManager() 

385 

386 # Get scheduler instance and initialize with settings 

387 scheduler = get_news_scheduler() 

388 scheduler.initialize_with_settings(settings_manager) 

389 scheduler.start() 

390 app.news_scheduler = scheduler 

391 logger.info( 

392 "News scheduler started with activity-based tracking" 

393 ) 

394 else: 

395 # Don't initialize scheduler if disabled 

396 app.news_scheduler = None 

397 logger.info("News scheduler disabled - not initializing") 

398 else: 

399 logger.info( 

400 "News module disabled - subscription scheduler not started" 

401 ) 

402 app.news_scheduler = None 

403 except Exception: 

404 logger.exception("Failed to initialize news scheduler") 

405 app.news_scheduler = None 

406 

407 # Apply middleware 

408 logger.info("Applying middleware...") 

409 apply_middleware(app) 

410 logger.info("Middleware applied successfully") 

411 

412 # Register blueprints 

413 logger.info("Registering blueprints...") 

414 register_blueprints(app) 

415 logger.info("Blueprints registered successfully") 

416 

417 # Register error handlers 

418 logger.info("Registering error handlers...") 

419 register_error_handlers(app) 

420 logger.info("Error handlers registered successfully") 

421 

422 # Start the queue processor v2 (uses encrypted databases) 

423 # Always start the processor - it will handle per-user queue modes 

424 logger.info("Starting queue processor v2...") 

425 from .queue.processor_v2 import queue_processor 

426 

427 queue_processor.start() 

428 logger.info("Started research queue processor v2") 

429 

430 logger.info("App factory completed successfully") 

431 

432 return app, socket_service 

433 

434 

435def apply_middleware(app): 

436 """Apply middleware to the Flask app.""" 

437 

438 # Import auth decorators and middleware 

439 logger.info("Importing cleanup_middleware...") 

440 from .auth.cleanup_middleware import cleanup_completed_research 

441 

442 logger.info("Importing database_middleware...") 

443 from .auth.database_middleware import ensure_user_database 

444 

445 logger.info("Importing decorators...") 

446 from .auth.decorators import inject_current_user 

447 

448 logger.info("Importing queue_middleware...") 

449 from .auth.queue_middleware import process_pending_queue_operations 

450 

451 logger.info("Importing queue_middleware_v2...") 

452 from .auth.queue_middleware_v2 import notify_queue_processor 

453 

454 logger.info("Importing session_cleanup...") 

455 from .auth.session_cleanup import cleanup_stale_sessions 

456 

457 logger.info("All middleware imports completed") 

458 

459 # Register authentication middleware 

460 # First clean up stale sessions 

461 app.before_request(cleanup_stale_sessions) 

462 # Then ensure database is open for authenticated users 

463 app.before_request(ensure_user_database) 

464 # Then inject current user into g 

465 app.before_request(inject_current_user) 

466 # Clean up completed research records 

467 app.before_request(cleanup_completed_research) 

468 # Process any pending queue operations for this user (direct mode) 

469 app.before_request(process_pending_queue_operations) 

470 # Notify queue processor of user activity (queue mode) 

471 app.before_request(notify_queue_processor) 

472 

473 logger.info("All middleware registered") 

474 

475 # Flush any queued logs from background threads 

476 logger.info("Importing log_utils...") 

477 from ..utilities.log_utils import flush_log_queue 

478 

479 app.before_request(flush_log_queue) 

480 logger.info("Log flushing middleware registered") 

481 

482 # Clean up database sessions after each request 

483 @app.teardown_appcontext 

484 def cleanup_db_session(exception=None): 

485 """Clean up database session after each request to avoid cross-thread issues.""" 

486 from flask import g 

487 

488 if hasattr(g, "db_session"): 

489 try: 

490 if g.db_session: 

491 g.db_session.close() 

492 except Exception: 

493 pass # Ignore errors during cleanup 

494 finally: 

495 g.db_session = None 

496 

497 # Add a middleware layer to handle abrupt disconnections 

498 @app.before_request 

499 def handle_websocket_requests(): 

500 if request.path.startswith("/socket.io"): 500 ↛ 501line 500 didn't jump to line 501 because the condition on line 500 was never true

501 try: 

502 if not request.environ.get("werkzeug.socket"): 

503 return 

504 except Exception: 

505 logger.exception("WebSocket preprocessing error") 

506 # Return empty response to prevent further processing 

507 return "", 200 

508 

509 # Note: CORS headers for API routes are now handled by SecurityHeaders middleware 

510 # (see src/local_deep_research/security/security_headers.py) 

511 

512 

513def register_blueprints(app): 

514 """Register blueprints with the Flask app.""" 

515 

516 # Import blueprints 

517 logger.info("Importing blueprints...") 

518 

519 # Import benchmark blueprint 

520 from ..benchmarks.web_api.benchmark_routes import benchmark_bp 

521 

522 logger.info("Importing API blueprint...") 

523 from .api import api_blueprint # Import the API blueprint 

524 

525 logger.info("Importing auth blueprint...") 

526 from .auth import auth_bp # Import the auth blueprint 

527 

528 logger.info("Importing API routes blueprint...") 

529 from .routes.api_routes import api_bp # Import the API blueprint 

530 

531 logger.info("Importing context overflow API...") 

532 from .routes.context_overflow_api import ( 

533 context_overflow_bp, 

534 ) # Import context overflow API 

535 

536 logger.info("Importing history routes...") 

537 from .routes.history_routes import history_bp 

538 

539 logger.info("Importing metrics routes...") 

540 from .routes.metrics_routes import metrics_bp 

541 

542 logger.info("Importing research routes...") 

543 from .routes.research_routes import research_bp 

544 

545 logger.info("Importing settings routes...") 

546 from .routes.settings_routes import settings_bp 

547 

548 logger.info("All core blueprints imported successfully") 

549 

550 # Add root route 

551 @app.route("/") 

552 def index(): 

553 """Root route - redirect to login if not authenticated""" 

554 from flask import redirect, session, url_for 

555 

556 from ..database.session_context import get_user_db_session 

557 from ..utilities.db_utils import get_settings_manager 

558 from .utils.templates import render_template_with_defaults 

559 

560 # Check if user is authenticated 

561 if "username" not in session: 

562 return redirect(url_for("auth.login")) 

563 

564 # Load current settings from database using proper session context 

565 username = session.get("username") 

566 settings = {} 

567 with get_user_db_session(username) as db_session: 

568 if db_session: 568 ↛ 602line 568 didn't jump to line 602

569 settings_manager = get_settings_manager(db_session, username) 

570 settings = { 

571 "llm_provider": settings_manager.get_setting( 

572 "llm.provider", "ollama" 

573 ), 

574 "llm_model": settings_manager.get_setting("llm.model", ""), 

575 "llm_openai_endpoint_url": settings_manager.get_setting( 

576 "llm.openai_endpoint.url", "" 

577 ), 

578 "llm_ollama_url": settings_manager.get_setting( 

579 "llm.ollama.url" 

580 ), 

581 "llm_lmstudio_url": settings_manager.get_setting( 

582 "llm.lmstudio.url" 

583 ), 

584 "llm_local_context_window_size": settings_manager.get_setting( 

585 "llm.local_context_window_size" 

586 ), 

587 "search_tool": settings_manager.get_setting( 

588 "search.tool", "" 

589 ), 

590 "search_iterations": settings_manager.get_setting( 

591 "search.iterations", 2 

592 ), 

593 "search_questions_per_iteration": settings_manager.get_setting( 

594 "search.questions_per_iteration", 3 

595 ), 

596 "search_strategy": settings_manager.get_setting( 

597 "search.search_strategy", "source-based" 

598 ), 

599 } 

600 

601 # Debug logging 

602 log_settings(settings, "Research page settings loaded") 

603 

604 return render_template_with_defaults( 

605 "pages/research.html", settings=settings 

606 ) 

607 

608 # Register auth blueprint FIRST (so login page is accessible) 

609 app.register_blueprint(auth_bp) # Already has url_prefix="/auth" 

610 

611 # Register other blueprints 

612 app.register_blueprint(research_bp) 

613 app.register_blueprint(history_bp) # Already has url_prefix="/history" 

614 app.register_blueprint(metrics_bp) 

615 app.register_blueprint(settings_bp) # Already has url_prefix="/settings" 

616 app.register_blueprint( 

617 api_bp, url_prefix="/research/api" 

618 ) # Register API blueprint with prefix 

619 app.register_blueprint(benchmark_bp) # Register benchmark blueprint 

620 app.register_blueprint( 

621 context_overflow_bp, url_prefix="/metrics" 

622 ) # Register context overflow API 

623 

624 # Register news API routes 

625 from .routes import news_routes 

626 

627 app.register_blueprint(news_routes.bp) 

628 logger.info("News API routes registered successfully") 

629 

630 # Register follow-up research routes 

631 from ..followup_research.routes import followup_bp 

632 

633 app.register_blueprint(followup_bp) 

634 logger.info("Follow-up research routes registered successfully") 

635 

636 # Register news page blueprint 

637 from ..news.web import create_news_blueprint 

638 

639 news_bp = create_news_blueprint() 

640 app.register_blueprint(news_bp, url_prefix="/news") 

641 logger.info("News page routes registered successfully") 

642 

643 # Register API v1 blueprint 

644 app.register_blueprint(api_blueprint) # Already has url_prefix='/api/v1' 

645 

646 # Register Research Library blueprint 

647 from ..research_library import library_bp, rag_bp, delete_bp 

648 

649 app.register_blueprint(library_bp) # Already has url_prefix='/library' 

650 logger.info("Research Library routes registered successfully") 

651 

652 # Register RAG Management blueprint 

653 app.register_blueprint(rag_bp) # Already has url_prefix='/library' 

654 logger.info("RAG Management routes registered successfully") 

655 

656 # Register Deletion Management blueprint 

657 app.register_blueprint(delete_bp) # Already has url_prefix='/library/api' 

658 logger.info("Deletion Management routes registered successfully") 

659 

660 # Register Document Scheduler blueprint 

661 from ..research_scheduler.routes import scheduler_bp 

662 

663 app.register_blueprint(scheduler_bp) 

664 logger.info("Document Scheduler routes registered successfully") 

665 

666 # After registration, update CSRF exemptions 

667 if hasattr(app, "extensions") and "csrf" in app.extensions: 667 ↛ 681line 667 didn't jump to line 681 because the condition on line 667 was always true

668 csrf = app.extensions["csrf"] 

669 # Exempt the API blueprint routes by actual endpoints 

670 csrf.exempt("api_v1") 

671 csrf.exempt("api") 

672 for rule in app.url_map.iter_rules(): 

673 if rule.endpoint and ( 

674 rule.endpoint.startswith("api_v1.") 

675 or rule.endpoint.startswith("api.") 

676 ): 

677 csrf.exempt(rule.endpoint) 

678 

679 # Add favicon route 

680 # Exempt favicon from rate limiting 

681 @app.route("/favicon.ico") 

682 @limiter.exempt 

683 def favicon(): 

684 static_dir = app.config.get("STATIC_DIR", "static") 

685 return send_from_directory( 

686 static_dir, "favicon.ico", mimetype="image/x-icon" 

687 ) 

688 

689 # Add static route at the app level for compatibility 

690 # Exempt static files from rate limiting 

691 @app.route("/static/<path:path>") 

692 @limiter.exempt 

693 def app_serve_static(path): 

694 from ..security.path_validator import PathValidator 

695 

696 static_dir = Path(app.config.get("STATIC_DIR", "static")) 

697 

698 # First try to serve from dist directory (for built assets) 

699 dist_dir = static_dir / "dist" 

700 try: 

701 # Use PathValidator to safely validate the path 

702 validated_path = PathValidator.validate_safe_path( 

703 path, 

704 dist_dir, 

705 allow_absolute=False, 

706 required_extensions=None, # Allow any file type for static assets 

707 ) 

708 

709 if validated_path and validated_path.exists(): 709 ↛ 710line 709 didn't jump to line 710 because the condition on line 709 was never true

710 return send_from_directory(str(dist_dir), path) 

711 except (ValueError, Exception): 

712 # Path validation failed, try regular static folder 

713 pass 

714 

715 # Fall back to regular static folder 

716 try: 

717 validated_path = PathValidator.validate_safe_path( 

718 path, static_dir, allow_absolute=False, required_extensions=None 

719 ) 

720 

721 if validated_path and validated_path.exists(): 721 ↛ 722line 721 didn't jump to line 722 because the condition on line 721 was never true

722 return send_from_directory(str(static_dir), path) 

723 except (ValueError, Exception): 

724 # Path validation failed 

725 pass 

726 

727 return make_response(jsonify({"error": "Not found"}), 404) 

728 

729 

730def register_error_handlers(app): 

731 """Register error handlers with the Flask app.""" 

732 

733 @app.errorhandler(404) 

734 def not_found(error): 

735 return make_response(jsonify({"error": "Not found"}), 404) 

736 

737 @app.errorhandler(500) 

738 def server_error(error): 

739 return make_response(jsonify({"error": "Server error"}), 500) 

740 

741 # Handle CSRF validation errors with helpful message 

742 try: 

743 from flask_wtf.csrf import CSRFError 

744 

745 @app.errorhandler(CSRFError) 

746 def handle_csrf_error(error): 

747 """Handle CSRF errors with helpful debugging info.""" 

748 # Check if this might be a Secure cookie issue over HTTP 

749 is_http = not request.is_secure 

750 is_private = _is_private_ip(request.remote_addr or "") 

751 is_proxied = request.headers.get("X-Forwarded-For") is not None 

752 

753 error_msg = str(error.description) 

754 

755 # Provide detailed help for HTTP + public IP or proxied scenario 

756 if is_http and (not is_private or is_proxied): 756 ↛ 757line 756 didn't jump to line 757 because the condition on line 756 was never true

757 logger.warning( 

758 f"CSRF validation failed - likely due to Secure cookie over HTTP. " 

759 f"remote_addr={request.remote_addr}, proxied={is_proxied}, " 

760 f"host={request.host}" 

761 ) 

762 error_msg = ( 

763 "Session cookie error: You're accessing over HTTP from a " 

764 "public IP address or through a proxy. " 

765 "This is blocked for security reasons.\n\n" 

766 "Solutions:\n" 

767 "1. Use HTTPS with a reverse proxy (recommended for production)\n" 

768 "2. Access from your local network (LAN IPs like 192.168.x.x work over HTTP)\n" 

769 "3. Access directly from localhost (http://127.0.0.1:5000)\n" 

770 "4. Use SSH tunnel: ssh -L 5000:localhost:5000 user@server, " 

771 "then access http://localhost:5000\n\n" 

772 "Note: LAN access (192.168.x.x, 10.x.x.x, 172.16-31.x.x) works over HTTP. " 

773 "Only public internet access requires HTTPS." 

774 ) 

775 

776 return make_response(jsonify({"error": error_msg}), 400) 

777 except ImportError: 

778 pass 

779 

780 # Handle News API exceptions globally 

781 try: 

782 from ..news.exceptions import NewsAPIException 

783 

784 @app.errorhandler(NewsAPIException) 

785 def handle_news_api_exception(error): 

786 """Handle NewsAPIException and convert to JSON response.""" 

787 from loguru import logger 

788 

789 logger.error( 

790 f"News API error: {error.message} (code: {error.error_code})" 

791 ) 

792 return jsonify(error.to_dict()), error.status_code 

793 except ImportError: 

794 # News module not available 

795 pass 

796 

797 

798def create_database(app): 

799 """ 

800 DEPRECATED: Database creation is now handled per-user via encrypted_db.py 

801 This function is kept for compatibility but does nothing. 

802 """ 

803 pass