Coverage for src / local_deep_research / web / app_factory.py: 84%

365 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-25 01:07 +0000

1# import logging - replaced with loguru 

2import ipaddress 

3import os 

4from pathlib import Path 

5from importlib import resources as importlib_resources 

6 

7from flask import ( 

8 Flask, 

9 Request, 

10 abort, 

11 jsonify, 

12 make_response, 

13 request, 

14 send_from_directory, 

15) 

16from flask_wtf.csrf import CSRFProtect 

17from werkzeug.middleware.proxy_fix import ProxyFix 

18from loguru import logger 

19from local_deep_research.settings.logger import log_settings 

20 

21from ..utilities.log_utils import InterceptHandler 

22from ..security import SecurityHeaders, get_security_default 

23from .utils.rate_limiter import limiter 

24from ..security.file_upload_validator import FileUploadValidator 

25 

26# Removed DB_PATH import - using per-user databases now 

27from .services.socket_service import SocketIOService 

28 

29 

30def _is_private_ip(ip_str: str) -> bool: 

31 """Check if IP is a private/local network address (RFC 1918 + localhost). 

32 

33 This allows LAN access over HTTP without requiring HTTPS, matching the 

34 behavior of other self-hosted applications like Jellyfin and Home Assistant. 

35 

36 Private ranges: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, plus localhost. 

37 """ 

38 try: 

39 ip = ipaddress.ip_address(ip_str) 

40 return ip.is_private or ip.is_loopback 

41 except ValueError: 

42 return False 

43 

44 

45class DiskSpoolingRequest(Request): 

46 """Custom Request class that spools large file uploads to disk. 

47 

48 This prevents memory exhaustion from large multipart uploads by writing 

49 files larger than max_form_memory_size to temporary files on disk instead 

50 of keeping them in memory. 

51 

52 Security fix for issue #1176: With 200 files × 50MB limit, the default 

53 behavior could consume 10GB+ of memory per request. 

54 """ 

55 

56 # Files larger than 5MB are spooled to disk instead of memory 

57 max_form_memory_size = 5 * 1024 * 1024 # 5MB threshold 

58 

59 

60def create_app(): 

61 """ 

62 Create and configure the Flask application. 

63 

64 Returns: 

65 tuple: (app, socketio) - The configured Flask app and SocketIO instance 

66 """ 

67 # Set Werkzeug logger to WARNING level to suppress Socket.IO polling logs 

68 import logging 

69 

70 logging.getLogger("werkzeug").setLevel(logging.WARNING) 

71 logging.getLogger("werkzeug").addHandler(InterceptHandler()) 

72 

73 logger.info("Initializing Local Deep Research application...") 

74 

75 try: 

76 # Get directories based on package installation 

77 PACKAGE_DIR = importlib_resources.files("local_deep_research") / "web" 

78 with importlib_resources.as_file(PACKAGE_DIR) as package_dir: 

79 STATIC_DIR = (package_dir / "static").as_posix() 

80 TEMPLATE_DIR = (package_dir / "templates").as_posix() 

81 

82 # Initialize Flask app with package directories 

83 # Set static_folder to None to disable Flask's built-in static handling 

84 # We'll use our custom static route instead to handle dist folder 

85 app = Flask(__name__, static_folder=None, template_folder=TEMPLATE_DIR) 

86 # Store static dir for custom handling 

87 app.config["STATIC_DIR"] = STATIC_DIR 

88 logger.debug(f"Using package static path: {STATIC_DIR}") 

89 logger.debug(f"Using package template path: {TEMPLATE_DIR}") 

90 except Exception: 

91 # Fallback for development 

92 logger.exception("Package directories not found, using fallback paths") 

93 # Set static_folder to None to disable Flask's built-in static handling 

94 app = Flask( 

95 __name__, 

96 static_folder=None, 

97 template_folder=str(Path("templates").resolve()), 

98 ) 

99 # Store static dir for custom handling 

100 app.config["STATIC_DIR"] = str(Path("static").resolve()) 

101 

102 # Use custom Request class that spools large uploads to disk 

103 # This prevents memory exhaustion from large file uploads (issue #1176) 

104 app.request_class = DiskSpoolingRequest 

105 

106 # Add proxy support for deployments behind load balancers/reverse proxies 

107 # This ensures X-Forwarded-For and X-Forwarded-Proto headers are properly handled 

108 # Important for rate limiting and security (gets real client IP, not proxy IP) 

109 app.wsgi_app = ProxyFix( 

110 app.wsgi_app, 

111 x_for=1, # Trust 1 proxy for X-Forwarded-For 

112 x_proto=1, # Trust 1 proxy for X-Forwarded-Proto (http/https) 

113 x_host=0, # Don't trust X-Forwarded-Host (security) 

114 x_port=0, # Don't trust X-Forwarded-Port (security) 

115 x_prefix=0, # Don't trust X-Forwarded-Prefix (security) 

116 ) 

117 

118 # WSGI middleware for dynamic cookie security 

119 # This wraps AFTER ProxyFix so we have access to the real client IP 

120 # Must be WSGI level because Flask session cookies are set after after_request handlers 

121 class SecureCookieMiddleware: 

122 """WSGI middleware to add Secure flag to cookies based on request context. 

123 

124 Security model: 

125 - Localhost HTTP (127.0.0.1, ::1): Skip Secure flag (local traffic is safe) 

126 - Proxied requests (X-Forwarded-For present): Add Secure flag (production) 

127 - Non-localhost HTTP: Add Secure flag (will fail, by design - use HTTPS) 

128 - TESTING mode: Never add Secure flag (for CI/development) 

129 

130 This prevents X-Forwarded-For spoofing attacks by checking for the header's 

131 presence rather than its value - if the header exists, we're behind a proxy. 

132 """ 

133 

134 def __init__(self, wsgi_app, flask_app): 

135 self.wsgi_app = wsgi_app 

136 self.flask_app = flask_app 

137 

138 def __call__(self, environ, start_response): 

139 # Check if we should add Secure flag 

140 should_add_secure = self._should_add_secure_flag(environ) 

141 

142 def custom_start_response(status, headers, exc_info=None): 

143 if should_add_secure: 

144 # Modify Set-Cookie headers to add Secure flag 

145 new_headers = [] 

146 for name, value in headers: 

147 if name.lower() == "set-cookie": 

148 if ( 148 ↛ 153line 148 didn't jump to line 153 because the condition on line 148 was always true

149 "; Secure" not in value 

150 and "; secure" not in value 

151 ): 

152 value = value + "; Secure" 

153 new_headers.append((name, value)) 

154 headers = new_headers 

155 return start_response(status, headers, exc_info) 

156 

157 return self.wsgi_app(environ, custom_start_response) 

158 

159 def _should_add_secure_flag(self, environ): 

160 """Determine if Secure flag should be added based on request context. 

161 

162 Security model: 

163 - Check the ACTUAL connection IP (REMOTE_ADDR), not X-Forwarded-For header 

164 - SecureCookieMiddleware is outer wrapper, so we see original REMOTE_ADDR 

165 - If connection comes from private IP (client or proxy), allow HTTP 

166 - If connection comes from public IP, require HTTPS 

167 

168 This is safe because: 

169 - We never trust X-Forwarded-For header values (can be spoofed) 

170 - We only check the actual TCP connection source IP 

171 - Spoofing X-Forwarded-For from public IP doesn't bypass this check 

172 - Local proxies (nginx on localhost/LAN) have private REMOTE_ADDR 

173 """ 

174 # Skip if in explicit testing mode 

175 if self.flask_app.config.get("LDR_TESTING_MODE"): 

176 return False 

177 

178 # Check actual connection source IP (before ProxyFix modifies it) 

179 # This is either: 

180 # - Direct client IP (if no proxy) 

181 # - Proxy server IP (if behind proxy) 

182 # Local proxies (nginx on localhost, Traefik on LAN) have private IPs 

183 remote_addr = environ.get("REMOTE_ADDR", "") 

184 is_private = _is_private_ip(remote_addr) 

185 

186 # Check if HTTPS 

187 is_https = environ.get("wsgi.url_scheme") == "https" 

188 

189 # Add Secure flag if: 

190 # - Using HTTPS (always secure over HTTPS) 

191 # - OR connection is from public IP (require HTTPS for public access) 

192 return is_https or not is_private 

193 

194 # Wrap the app with our cookie security middleware 

195 app.wsgi_app = SecureCookieMiddleware(app.wsgi_app, app) 

196 

197 # WSGI middleware to remove Server header 

198 # This must be the outermost wrapper to catch headers added by Werkzeug 

199 class ServerHeaderMiddleware: 

200 """WSGI middleware to remove Server header from all responses. 

201 

202 Prevents information disclosure about the underlying web server. 

203 Must be outermost middleware to catch headers added by WSGI layer. 

204 """ 

205 

206 def __init__(self, wsgi_app): 

207 self.wsgi_app = wsgi_app 

208 

209 def __call__(self, environ, start_response): 

210 def custom_start_response(status, headers, exc_info=None): 

211 filtered_headers = [ 

212 (name, value) 

213 for name, value in headers 

214 if name.lower() != "server" 

215 ] 

216 return start_response(status, filtered_headers, exc_info) 

217 

218 return self.wsgi_app(environ, custom_start_response) 

219 

220 # Apply ServerHeaderMiddleware as outermost wrapper 

221 app.wsgi_app = ServerHeaderMiddleware(app.wsgi_app) 

222 

223 # App configuration 

224 # Generate or load a unique SECRET_KEY per installation 

225 import secrets 

226 from ..config.paths import get_data_directory 

227 

228 secret_key_file = Path(get_data_directory()) / ".secret_key" 

229 if secret_key_file.exists(): 

230 try: 

231 with open(secret_key_file, "r") as f: 

232 app.config["SECRET_KEY"] = f.read().strip() 

233 except Exception as e: 

234 logger.warning(f"Could not read secret key file: {e}") 

235 app.config["SECRET_KEY"] = secrets.token_hex(32) 

236 else: 

237 # Generate a new key on first run 

238 new_key = secrets.token_hex(32) 

239 try: 

240 secret_key_file.parent.mkdir(parents=True, exist_ok=True) 

241 with open(secret_key_file, "w") as f: 

242 f.write(new_key) 

243 secret_key_file.chmod(0o600) # Secure file permissions 

244 app.config["SECRET_KEY"] = new_key 

245 logger.info("Generated new SECRET_KEY for this installation") 

246 except Exception as e: 

247 logger.warning(f"Could not save secret key file: {e}") 

248 app.config["SECRET_KEY"] = new_key 

249 # Session cookie security settings 

250 # SECURE flag is added dynamically based on request context (see after_request below) 

251 # This allows localhost HTTP to work for development while keeping production secure 

252 # 

253 # Check if explicitly in testing mode (for backwards compatibility) 

254 is_testing = ( 

255 os.getenv("CI") 

256 or os.getenv("TESTING") 

257 or os.getenv("PYTEST_CURRENT_TEST") 

258 or app.debug 

259 ) 

260 # Set to False - we add Secure flag dynamically in after_request handler 

261 # Exception: if TESTING mode is active, we never add Secure flag 

262 app.config["SESSION_COOKIE_SECURE"] = False 

263 app.config["LDR_TESTING_MODE"] = bool(is_testing) # Store for after_request 

264 app.config["SESSION_COOKIE_HTTPONLY"] = ( 

265 True # Prevent JavaScript access (XSS mitigation) 

266 ) 

267 app.config["SESSION_COOKIE_SAMESITE"] = "Lax" # CSRF protection 

268 # Set max cookie lifetime for permanent sessions (when session.permanent=True). 

269 # This applies to "remember me" sessions; non-permanent sessions expire on browser close. 

270 remember_me_days = get_security_default( 

271 "security.session_remember_me_days", 30 

272 ) 

273 app.config["PERMANENT_SESSION_LIFETIME"] = remember_me_days * 24 * 3600 

274 # PREFERRED_URL_SCHEME affects URL generation (url_for), not request.is_secure 

275 app.config["PREFERRED_URL_SCHEME"] = "https" 

276 

277 # File upload security limits - calculated from FileUploadValidator constants 

278 app.config["MAX_CONTENT_LENGTH"] = ( 

279 FileUploadValidator.MAX_FILES_PER_REQUEST 

280 * FileUploadValidator.MAX_FILE_SIZE 

281 ) 

282 

283 # Initialize CSRF protection 

284 # Explicitly enable CSRF protection (don't rely on implicit Flask-WTF behavior) 

285 app.config["WTF_CSRF_ENABLED"] = True 

286 CSRFProtect(app) 

287 # Exempt Socket.IO from CSRF protection 

288 # Note: Flask-SocketIO handles CSRF internally, so we don't need to exempt specific views 

289 

290 # Initialize security headers middleware 

291 SecurityHeaders(app) 

292 

293 # Initialize rate limiting for security (brute force protection) 

294 # Uses imported limiter from utils.rate_limiter module 

295 # Rate limiting is disabled in CI via enabled callable in rate_limiter.py 

296 # Also set app config to ensure Flask-Limiter respects our settings 

297 from ..settings.env_registry import is_rate_limiting_enabled 

298 

299 app.config["RATELIMIT_ENABLED"] = is_rate_limiting_enabled() 

300 limiter.init_app(app) 

301 

302 # Custom error handler for rate limit exceeded (429) 

303 @app.errorhandler(429) 

304 def ratelimit_handler(e): 

305 # Import here to avoid circular imports 

306 from .utils.rate_limiter import get_client_ip 

307 

308 # Audit logging for security monitoring 

309 # Use get_client_ip() to get the real IP behind proxies 

310 logger.warning( 

311 f"Rate limit exceeded: endpoint={request.endpoint} " 

312 f"ip={get_client_ip()} " 

313 f"user_agent={request.headers.get('User-Agent', 'unknown')}" 

314 ) 

315 return jsonify( 

316 error="Too many requests", 

317 message="Too many attempts. Please try again later.", 

318 ), 429 

319 

320 # Note: Dynamic cookie security is handled by SecureCookieMiddleware (WSGI level) 

321 # This is necessary because Flask's session cookies are set AFTER after_request handlers 

322 # The middleware wrapping happens below near ProxyFix 

323 

324 # Note: CSRF exemptions for API blueprints are applied after blueprint 

325 # registration below (search for "CSRF exemptions" in this file). 

326 

327 # Database configuration - Using per-user databases now 

328 # No shared database configuration needed 

329 app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False 

330 app.config["SQLALCHEMY_ECHO"] = False 

331 

332 # Per-user databases are created automatically via encrypted_db.py 

333 

334 # Log data location and security information 

335 from ..config.paths import get_data_directory 

336 from ..database.encrypted_db import db_manager 

337 

338 data_dir = get_data_directory() 

339 logger.info("=" * 60) 

340 logger.info("DATA STORAGE INFORMATION") 

341 logger.info("=" * 60) 

342 logger.info(f"Data directory: {data_dir}") 

343 logger.info( 

344 "Databases: Per-user encrypted databases in encrypted_databases/" 

345 ) 

346 

347 # Check if using custom location 

348 from local_deep_research.settings.manager import SettingsManager 

349 

350 settings_manager = SettingsManager() 

351 custom_data_dir = settings_manager.get_setting("bootstrap.data_dir") 

352 if custom_data_dir: 352 ↛ 353line 352 didn't jump to line 353 because the condition on line 352 was never true

353 logger.info( 

354 f"Using custom data location via LDR_DATA_DIR: {custom_data_dir}" 

355 ) 

356 else: 

357 logger.info("Using default platform-specific data location") 

358 

359 # Display security status based on actual SQLCipher availability 

360 if db_manager.has_encryption: 360 ↛ 365line 360 didn't jump to line 365 because the condition on line 360 was always true

361 logger.info( 

362 "SECURITY: Databases are encrypted with SQLCipher. Ensure appropriate file system permissions are set on the data directory." 

363 ) 

364 else: 

365 logger.warning( 

366 "SECURITY NOTICE: SQLCipher is not available - databases are NOT encrypted. " 

367 "Install SQLCipher for database encryption. Ensure appropriate file system permissions are set on the data directory." 

368 ) 

369 

370 logger.info( 

371 "TIP: You can change the data location by setting the LDR_DATA_DIR environment variable." 

372 ) 

373 logger.info("=" * 60) 

374 

375 # Initialize Vite helper for asset management 

376 from .utils.vite_helper import vite 

377 

378 vite.init_app(app) 

379 

380 # Initialize Theme helper for auto-detecting themes from CSS 

381 from .utils.theme_helper import theme_helper 

382 

383 theme_helper.init_app(app) 

384 

385 # Generate combined themes.css from individual theme files 

386 from .themes import theme_registry 

387 

388 try: 

389 static_dir = Path(app.config.get("STATIC_DIR", "static")) 

390 themes_css_path = static_dir / "css" / "themes.css" 

391 combined_css = theme_registry.get_combined_css() 

392 themes_css_path.write_text(combined_css, encoding="utf-8") 

393 logger.debug( 

394 f"Generated themes.css with {len(theme_registry.themes)} themes" 

395 ) 

396 except PermissionError: 

397 logger.warning( 

398 f"Cannot write themes.css to {themes_css_path}. " 

399 "Theme CSS will need to be pre-generated." 

400 ) 

401 except Exception: 

402 logger.exception("Error generating combined themes.css") 

403 

404 # Register socket service 

405 socket_service = SocketIOService(app=app) 

406 

407 # Initialize news subscription scheduler 

408 try: 

409 # News tables are now created per-user in their encrypted databases 

410 logger.info( 

411 "News tables will be created in per-user encrypted databases" 

412 ) 

413 

414 # Check if scheduler is enabled BEFORE importing/initializing 

415 # Use env registry which handles both env vars and settings 

416 from ..settings.env_registry import get_env_setting 

417 

418 scheduler_enabled = get_env_setting("news.scheduler.enabled", True) 

419 logger.info(f"News scheduler enabled: {scheduler_enabled}") 

420 

421 if scheduler_enabled: 421 ↛ 440line 421 didn't jump to line 440 because the condition on line 421 was always true

422 # Only import and initialize if enabled 

423 from ..news.subscription_manager.scheduler import ( 

424 get_news_scheduler, 

425 ) 

426 from ..settings.manager import SettingsManager 

427 

428 # Get system settings for scheduler configuration (if not already loaded) 

429 if "settings_manager" not in locals(): 429 ↛ 430line 429 didn't jump to line 430 because the condition on line 429 was never true

430 settings_manager = SettingsManager() 

431 

432 # Get scheduler instance and initialize with settings 

433 scheduler = get_news_scheduler() 

434 scheduler.initialize_with_settings(settings_manager) 

435 scheduler.start() 

436 app.news_scheduler = scheduler 

437 logger.info("News scheduler started with activity-based tracking") 

438 else: 

439 # Don't initialize scheduler if disabled 

440 app.news_scheduler = None 

441 logger.info("News scheduler disabled - not initializing") 

442 except Exception: 

443 logger.exception("Failed to initialize news scheduler") 

444 app.news_scheduler = None 

445 

446 # Apply middleware 

447 logger.info("Applying middleware...") 

448 apply_middleware(app) 

449 logger.info("Middleware applied successfully") 

450 

451 # Register blueprints 

452 logger.info("Registering blueprints...") 

453 register_blueprints(app) 

454 logger.info("Blueprints registered successfully") 

455 

456 # Register error handlers 

457 logger.info("Registering error handlers...") 

458 register_error_handlers(app) 

459 logger.info("Error handlers registered successfully") 

460 

461 # Start the queue processor v2 (uses encrypted databases) 

462 # Always start the processor - it will handle per-user queue modes 

463 logger.info("Starting queue processor v2...") 

464 from .queue.processor_v2 import queue_processor 

465 

466 queue_processor.start() 

467 logger.info("Started research queue processor v2") 

468 

469 logger.info("App factory completed successfully") 

470 

471 return app, socket_service 

472 

473 

474def apply_middleware(app): 

475 """Apply middleware to the Flask app.""" 

476 

477 # Import auth decorators and middleware 

478 logger.info("Importing cleanup_middleware...") 

479 from .auth.cleanup_middleware import cleanup_completed_research 

480 

481 logger.info("Importing database_middleware...") 

482 from .auth.database_middleware import ensure_user_database 

483 

484 logger.info("Importing decorators...") 

485 from .auth.decorators import inject_current_user 

486 

487 logger.info("Importing queue_middleware...") 

488 from .auth.queue_middleware import process_pending_queue_operations 

489 

490 logger.info("Importing queue_middleware_v2...") 

491 from .auth.queue_middleware_v2 import notify_queue_processor 

492 

493 logger.info("Importing session_cleanup...") 

494 from .auth.session_cleanup import cleanup_stale_sessions 

495 

496 logger.info("All middleware imports completed") 

497 

498 # Register authentication middleware 

499 # First clean up stale sessions 

500 app.before_request(cleanup_stale_sessions) 

501 # Then ensure database is open for authenticated users 

502 app.before_request(ensure_user_database) 

503 # Then inject current user into g 

504 app.before_request(inject_current_user) 

505 # Clean up completed research records 

506 app.before_request(cleanup_completed_research) 

507 # Process any pending queue operations for this user (direct mode) 

508 app.before_request(process_pending_queue_operations) 

509 # Notify queue processor of user activity (queue mode) 

510 app.before_request(notify_queue_processor) 

511 

512 logger.info("All middleware registered") 

513 

514 # Flush any queued logs from background threads 

515 logger.info("Importing log_utils...") 

516 from ..utilities.log_utils import flush_log_queue 

517 

518 app.before_request(flush_log_queue) 

519 logger.info("Log flushing middleware registered") 

520 

521 # Clean up database sessions after each request 

522 @app.teardown_appcontext 

523 def cleanup_db_session(exception=None): 

524 """Clean up database session after each request to avoid cross-thread issues.""" 

525 from flask import g 

526 

527 session = g.pop("db_session", None) 

528 if session is not None: 

529 try: 

530 if exception: 530 ↛ 531line 530 didn't jump to line 531 because the condition on line 530 was never true

531 session.rollback() 

532 session.close() 

533 except Exception: 

534 pass # Ignore errors during cleanup 

535 

536 # Add a middleware layer to handle abrupt disconnections 

537 @app.before_request 

538 def handle_websocket_requests(): 

539 if request.path.startswith("/socket.io"): 539 ↛ 540line 539 didn't jump to line 540 because the condition on line 539 was never true

540 try: 

541 if not request.environ.get("werkzeug.socket"): 

542 return 

543 except Exception: 

544 logger.exception("WebSocket preprocessing error") 

545 # Return empty response to prevent further processing 

546 return "", 200 

547 

548 # Note: CORS headers for API routes are now handled by SecurityHeaders middleware 

549 # (see src/local_deep_research/security/security_headers.py) 

550 

551 

552def register_blueprints(app): 

553 """Register blueprints with the Flask app.""" 

554 

555 # Import blueprints 

556 logger.info("Importing blueprints...") 

557 

558 # Import benchmark blueprint 

559 from ..benchmarks.web_api.benchmark_routes import benchmark_bp 

560 

561 logger.info("Importing API blueprint...") 

562 from .api import api_blueprint # Import the API blueprint 

563 

564 logger.info("Importing auth blueprint...") 

565 from .auth import auth_bp # Import the auth blueprint 

566 

567 logger.info("Importing API routes blueprint...") 

568 from .routes.api_routes import api_bp # Import the API blueprint 

569 

570 logger.info("Importing context overflow API...") 

571 from .routes.context_overflow_api import ( 

572 context_overflow_bp, 

573 ) # Import context overflow API 

574 

575 logger.info("Importing history routes...") 

576 from .routes.history_routes import history_bp 

577 

578 logger.info("Importing metrics routes...") 

579 from .routes.metrics_routes import metrics_bp 

580 

581 logger.info("Importing research routes...") 

582 from .routes.research_routes import research_bp 

583 

584 logger.info("Importing settings routes...") 

585 from .routes.settings_routes import settings_bp 

586 

587 logger.info("All core blueprints imported successfully") 

588 

589 # Add root route 

590 @app.route("/") 

591 def index(): 

592 """Root route - redirect to login if not authenticated""" 

593 from flask import redirect, session, url_for 

594 

595 from ..database.session_context import get_user_db_session 

596 from ..utilities.db_utils import get_settings_manager 

597 from .utils.templates import render_template_with_defaults 

598 

599 # Check if user is authenticated 

600 if "username" not in session: 

601 return redirect(url_for("auth.login")) 

602 

603 # Load current settings from database using proper session context 

604 username = session.get("username") 

605 settings = {} 

606 with get_user_db_session(username) as db_session: 

607 if db_session: 607 ↛ 641line 607 didn't jump to line 641

608 settings_manager = get_settings_manager(db_session, username) 

609 settings = { 

610 "llm_provider": settings_manager.get_setting( 

611 "llm.provider", "ollama" 

612 ), 

613 "llm_model": settings_manager.get_setting("llm.model", ""), 

614 "llm_openai_endpoint_url": settings_manager.get_setting( 

615 "llm.openai_endpoint.url", "" 

616 ), 

617 "llm_ollama_url": settings_manager.get_setting( 

618 "llm.ollama.url" 

619 ), 

620 "llm_lmstudio_url": settings_manager.get_setting( 

621 "llm.lmstudio.url" 

622 ), 

623 "llm_local_context_window_size": settings_manager.get_setting( 

624 "llm.local_context_window_size" 

625 ), 

626 "search_tool": settings_manager.get_setting( 

627 "search.tool", "" 

628 ), 

629 "search_iterations": settings_manager.get_setting( 

630 "search.iterations", 3 

631 ), 

632 "search_questions_per_iteration": settings_manager.get_setting( 

633 "search.questions_per_iteration", 2 

634 ), 

635 "search_strategy": settings_manager.get_setting( 

636 "search.search_strategy", "source-based" 

637 ), 

638 } 

639 

640 # Debug logging 

641 log_settings(settings, "Research page settings loaded") 

642 

643 return render_template_with_defaults( 

644 "pages/research.html", settings=settings 

645 ) 

646 

647 # Register auth blueprint FIRST (so login page is accessible) 

648 app.register_blueprint(auth_bp) # Already has url_prefix="/auth" 

649 

650 # Register other blueprints 

651 app.register_blueprint(research_bp) 

652 app.register_blueprint(history_bp) # Already has url_prefix="/history" 

653 app.register_blueprint(metrics_bp) 

654 app.register_blueprint(settings_bp) # Already has url_prefix="/settings" 

655 app.register_blueprint( 

656 api_bp, url_prefix="/research/api" 

657 ) # Register API blueprint with prefix 

658 app.register_blueprint(benchmark_bp) # Register benchmark blueprint 

659 app.register_blueprint( 

660 context_overflow_bp, url_prefix="/metrics" 

661 ) # Register context overflow API 

662 

663 # Register news API routes 

664 from .routes import news_routes 

665 

666 app.register_blueprint(news_routes.bp) 

667 logger.info("News API routes registered successfully") 

668 

669 # Register follow-up research routes 

670 from ..followup_research.routes import followup_bp 

671 

672 app.register_blueprint(followup_bp) 

673 logger.info("Follow-up research routes registered successfully") 

674 

675 # Register news page blueprint 

676 from ..news.web import create_news_blueprint 

677 

678 news_bp = create_news_blueprint() 

679 app.register_blueprint(news_bp, url_prefix="/news") 

680 logger.info("News page routes registered successfully") 

681 

682 # Register API v1 blueprint 

683 app.register_blueprint(api_blueprint) # Already has url_prefix='/api/v1' 

684 

685 # Register Research Library blueprint 

686 from ..research_library import library_bp, rag_bp, delete_bp 

687 

688 app.register_blueprint(library_bp) # Already has url_prefix='/library' 

689 logger.info("Research Library routes registered successfully") 

690 

691 # Register RAG Management blueprint 

692 app.register_blueprint(rag_bp) # Already has url_prefix='/library' 

693 logger.info("RAG Management routes registered successfully") 

694 

695 # Register Deletion Management blueprint 

696 app.register_blueprint(delete_bp) # Already has url_prefix='/library/api' 

697 logger.info("Deletion Management routes registered successfully") 

698 

699 # Register Document Scheduler blueprint 

700 from ..research_scheduler.routes import scheduler_bp 

701 

702 app.register_blueprint(scheduler_bp) 

703 logger.info("Document Scheduler routes registered successfully") 

704 

705 # CSRF exemptions — Flask-WTF requires Blueprint objects (not strings) 

706 # to populate _exempt_blueprints. Passing strings only populates 

707 # _exempt_views, which compares against module-qualified names and 

708 # silently fails to match Flask endpoint names. 

709 if hasattr(app, "extensions") and "csrf" in app.extensions: 709 ↛ 718line 709 didn't jump to line 718 because the condition on line 709 was always true

710 csrf = app.extensions["csrf"] 

711 for bp_name in ("api_v1", "api", "benchmark", "research"): 

712 bp_obj = app.blueprints.get(bp_name) 

713 if bp_obj is not None: 713 ↛ 711line 713 didn't jump to line 711 because the condition on line 713 was always true

714 csrf.exempt(bp_obj) 

715 

716 # Add favicon route 

717 # Exempt favicon from rate limiting 

718 @app.route("/favicon.ico") 

719 @limiter.exempt 

720 def favicon(): 

721 static_dir = app.config.get("STATIC_DIR", "static") 

722 return send_from_directory( 

723 static_dir, "favicon.ico", mimetype="image/x-icon" 

724 ) 

725 

726 # Add static route at the app level for compatibility 

727 # Exempt static files from rate limiting 

728 @app.route("/static/<path:path>") 

729 @limiter.exempt 

730 def app_serve_static(path): 

731 from ..security.path_validator import PathValidator 

732 

733 static_dir = Path(app.config.get("STATIC_DIR", "static")) 

734 

735 # First try to serve from dist directory (for built assets) 

736 dist_dir = static_dir / "dist" 

737 try: 

738 # Use PathValidator to safely validate the path 

739 validated_path = PathValidator.validate_safe_path( 

740 path, 

741 dist_dir, 

742 allow_absolute=False, 

743 required_extensions=None, # Allow any file type for static assets 

744 ) 

745 

746 if validated_path and validated_path.exists(): 746 ↛ 747line 746 didn't jump to line 747 because the condition on line 746 was never true

747 return send_from_directory(str(dist_dir), path) 

748 except ValueError: 

749 # Path validation failed, try regular static folder 

750 pass 

751 

752 # Fall back to regular static folder 

753 try: 

754 validated_path = PathValidator.validate_safe_path( 

755 path, static_dir, allow_absolute=False, required_extensions=None 

756 ) 

757 

758 if validated_path and validated_path.exists(): 758 ↛ 759line 758 didn't jump to line 759 because the condition on line 758 was never true

759 return send_from_directory(str(static_dir), path) 

760 except ValueError: 

761 # Path validation failed 

762 pass 

763 

764 abort(404) 

765 

766 

767def register_error_handlers(app): 

768 """Register error handlers with the Flask app.""" 

769 

770 @app.errorhandler(404) 

771 def not_found(error): 

772 if request.path.startswith("/api/"): 

773 return make_response(jsonify({"error": "Not found"}), 404) 

774 return make_response("Not found", 404) 

775 

776 @app.errorhandler(500) 

777 def server_error(error): 

778 if request.path.startswith("/api/"): 

779 return make_response(jsonify({"error": "Server error"}), 500) 

780 return make_response("Server error", 500) 

781 

782 # Handle CSRF validation errors with helpful message 

783 try: 

784 from flask_wtf.csrf import CSRFError 

785 

786 @app.errorhandler(CSRFError) 

787 def handle_csrf_error(error): 

788 """Handle CSRF errors with helpful debugging info.""" 

789 # Check if this might be a Secure cookie issue over HTTP 

790 is_http = not request.is_secure 

791 is_private = _is_private_ip(request.remote_addr or "") 

792 is_proxied = request.headers.get("X-Forwarded-For") is not None 

793 

794 error_msg = str(error.description) 

795 

796 # Provide detailed help for HTTP + public IP or proxied scenario 

797 if is_http and (not is_private or is_proxied): 797 ↛ 798line 797 didn't jump to line 798 because the condition on line 797 was never true

798 logger.warning( 

799 f"CSRF validation failed - likely due to Secure cookie over HTTP. " 

800 f"remote_addr={request.remote_addr}, proxied={is_proxied}, " 

801 f"host={request.host}" 

802 ) 

803 error_msg = ( 

804 "Session cookie error: You're accessing over HTTP from a " 

805 "public IP address or through a proxy. " 

806 "This is blocked for security reasons.\n\n" 

807 "Solutions:\n" 

808 "1. Use HTTPS with a reverse proxy (recommended for production)\n" 

809 "2. Access from your local network (LAN IPs like 192.168.x.x work over HTTP)\n" 

810 "3. Access directly from localhost (http://127.0.0.1:5000)\n" 

811 "4. Use SSH tunnel: ssh -L 5000:localhost:5000 user@server, " 

812 "then access http://localhost:5000\n\n" 

813 "Note: LAN access (192.168.x.x, 10.x.x.x, 172.16-31.x.x) works over HTTP. " 

814 "Only public internet access requires HTTPS." 

815 ) 

816 

817 return make_response(jsonify({"error": error_msg}), 400) 

818 except ImportError: 

819 pass 

820 

821 # Handle News API exceptions globally 

822 try: 

823 from ..news.exceptions import NewsAPIException 

824 

825 @app.errorhandler(NewsAPIException) 

826 def handle_news_api_exception(error): 

827 """Handle NewsAPIException and convert to JSON response.""" 

828 from loguru import logger 

829 

830 logger.error( 

831 f"News API error: {error.message} (code: {error.error_code})" 

832 ) 

833 return jsonify(error.to_dict()), error.status_code 

834 except ImportError: 

835 # News module not available 

836 pass 

837 

838 

839def create_database(app): 

840 """ 

841 DEPRECATED: Database creation is now handled per-user via encrypted_db.py 

842 This function is kept for compatibility but does nothing. 

843 """ 

844 pass