Coverage for src / local_deep_research / web / app_factory.py: 88%
436 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1# import logging - replaced with loguru
2import ipaddress
3import os
4from pathlib import Path
5from importlib import resources as importlib_resources
7from flask import (
8 Flask,
9 Request,
10 abort,
11 jsonify,
12 make_response,
13 request,
14 send_from_directory,
15)
16from flask_wtf.csrf import CSRFProtect
17from werkzeug.middleware.proxy_fix import ProxyFix
18from loguru import logger
19from local_deep_research.settings.logger import log_settings
21from ..utilities.log_utils import InterceptHandler
22from ..security import SecurityHeaders, get_security_default
23from ..security.rate_limiter import limiter
24from ..security.file_upload_validator import FileUploadValidator
26# Removed DB_PATH import - using per-user databases now
27from .services.socket_service import SocketIOService
30def _is_private_ip(ip_str: str) -> bool:
31 """Check if IP is a private/local network address (RFC 1918 + localhost).
33 This allows LAN access over HTTP without requiring HTTPS, matching the
34 behavior of other self-hosted applications like Jellyfin and Home Assistant.
36 Private ranges: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, plus localhost.
37 """
38 try:
39 ip = ipaddress.ip_address(ip_str)
40 return ip.is_private or ip.is_loopback
41 except ValueError:
42 return False
45class DiskSpoolingRequest(Request):
46 """Custom Request class that spools large file uploads to disk.
48 This prevents memory exhaustion from large multipart uploads by writing
49 files larger than max_form_memory_size to temporary files on disk instead
50 of keeping them in memory.
52 Security fix for issue #1176: With 200 files × 50MB limit, the default
53 behavior could consume 10GB+ of memory per request.
54 """
56 # Files larger than 5MB are spooled to disk instead of memory
57 max_form_memory_size = 5 * 1024 * 1024 # 5MB threshold
60def create_app():
61 """
62 Create and configure the Flask application.
64 Returns:
65 tuple: (app, socketio) - The configured Flask app and SocketIO instance
66 """
67 # Route stdlib loggers through loguru via InterceptHandler.
68 # Guard against handler duplication when create_app() is called multiple
69 # times (e.g. in tests).
70 import logging
72 werkzeug_logger = logging.getLogger("werkzeug")
73 werkzeug_logger.setLevel(
74 logging.WARNING
75 ) # Suppress verbose per-request logs
76 if not any(
77 isinstance(h, InterceptHandler) for h in werkzeug_logger.handlers
78 ):
79 werkzeug_logger.addHandler(InterceptHandler())
81 # APScheduler logs job execution results (success/failure) to its own
82 # logger hierarchy. Without an InterceptHandler the WARNING+ messages
83 # only reach Python's lastResort handler as unformatted stderr.
84 # Level is WARNING (not INFO) because job functions already log their
85 # own progress via loguru — APScheduler's INFO messages would be redundant.
86 apscheduler_logger = logging.getLogger("apscheduler")
87 apscheduler_logger.setLevel(logging.WARNING)
88 if not any(
89 isinstance(h, InterceptHandler) for h in apscheduler_logger.handlers
90 ):
91 apscheduler_logger.addHandler(InterceptHandler())
93 logger.info("Initializing Local Deep Research application...")
95 try:
96 # Get directories based on package installation
97 PACKAGE_DIR = importlib_resources.files("local_deep_research") / "web"
98 with importlib_resources.as_file(PACKAGE_DIR) as package_dir:
99 STATIC_DIR = (package_dir / "static").as_posix()
100 TEMPLATE_DIR = (package_dir / "templates").as_posix()
102 # Initialize Flask app with package directories
103 # Set static_folder to None to disable Flask's built-in static handling
104 # We'll use our custom static route instead to handle dist folder
105 app = Flask(__name__, static_folder=None, template_folder=TEMPLATE_DIR)
106 # Store static dir for custom handling
107 app.config["STATIC_DIR"] = STATIC_DIR
108 logger.debug(f"Using package static path: {STATIC_DIR}")
109 logger.debug(f"Using package template path: {TEMPLATE_DIR}")
110 except Exception:
111 # Fallback for development
112 logger.exception("Package directories not found, using fallback paths")
113 # Set static_folder to None to disable Flask's built-in static handling
114 app = Flask(
115 __name__,
116 static_folder=None,
117 template_folder=str(Path("templates").resolve()),
118 )
119 # Store static dir for custom handling
120 app.config["STATIC_DIR"] = str(Path("static").resolve())
122 # Use custom Request class that spools large uploads to disk
123 # This prevents memory exhaustion from large file uploads (issue #1176)
124 app.request_class = DiskSpoolingRequest
126 # Add proxy support for deployments behind load balancers/reverse proxies
127 # This ensures X-Forwarded-For and X-Forwarded-Proto headers are properly handled
128 # Important for rate limiting and security (gets real client IP, not proxy IP)
129 app.wsgi_app = ProxyFix( # type: ignore[method-assign]
130 app.wsgi_app,
131 x_for=1, # Trust 1 proxy for X-Forwarded-For
132 x_proto=1, # Trust 1 proxy for X-Forwarded-Proto (http/https)
133 x_host=0, # Don't trust X-Forwarded-Host (security)
134 x_port=0, # Don't trust X-Forwarded-Port (security)
135 x_prefix=0, # Don't trust X-Forwarded-Prefix (security)
136 )
138 # WSGI middleware for dynamic cookie security
139 # This wraps AFTER ProxyFix so we have access to the real client IP
140 # Must be WSGI level because Flask session cookies are set after after_request handlers
141 class SecureCookieMiddleware:
142 """WSGI middleware to add Secure flag to cookies based on request context.
144 Security model:
145 - Localhost HTTP (127.0.0.1, ::1): Skip Secure flag (local traffic is safe)
146 - Proxied requests (X-Forwarded-For present): Add Secure flag (production)
147 - Non-localhost HTTP: Add Secure flag (will fail, by design - use HTTPS)
148 - TESTING mode: Never add Secure flag (for CI/development)
150 This prevents X-Forwarded-For spoofing attacks by checking for the header's
151 presence rather than its value - if the header exists, we're behind a proxy.
152 """
154 def __init__(self, wsgi_app, flask_app):
155 self.wsgi_app = wsgi_app
156 self.flask_app = flask_app
158 def __call__(self, environ, start_response):
159 # Check if we should add Secure flag
160 should_add_secure = self._should_add_secure_flag(environ)
162 def custom_start_response(status, headers, exc_info=None):
163 if should_add_secure:
164 # Modify Set-Cookie headers to add Secure flag
165 new_headers = []
166 for name, value in headers:
167 if name.lower() == "set-cookie":
168 if ( 168 ↛ 173line 168 didn't jump to line 173 because the condition on line 168 was always true
169 "; Secure" not in value
170 and "; secure" not in value
171 ):
172 value = value + "; Secure"
173 new_headers.append((name, value))
174 headers = new_headers
175 return start_response(status, headers, exc_info)
177 return self.wsgi_app(environ, custom_start_response)
179 def _should_add_secure_flag(self, environ):
180 """Determine if Secure flag should be added based on request context.
182 Security model:
183 - Check the ACTUAL connection IP (REMOTE_ADDR), not X-Forwarded-For header
184 - SecureCookieMiddleware is outer wrapper, so we see original REMOTE_ADDR
185 - If connection comes from private IP (client or proxy), allow HTTP
186 - If connection comes from public IP, require HTTPS
188 This is safe because:
189 - We never trust X-Forwarded-For header values (can be spoofed)
190 - We only check the actual TCP connection source IP
191 - Spoofing X-Forwarded-For from public IP doesn't bypass this check
192 - Local proxies (nginx on localhost/LAN) have private REMOTE_ADDR
193 """
194 # Skip if in explicit testing mode
195 if self.flask_app.config.get("LDR_TESTING_MODE"):
196 return False
198 # Check actual connection source IP (before ProxyFix modifies it)
199 # This is either:
200 # - Direct client IP (if no proxy)
201 # - Proxy server IP (if behind proxy)
202 # Local proxies (nginx on localhost, Traefik on LAN) have private IPs
203 remote_addr = environ.get("REMOTE_ADDR", "")
204 is_private = _is_private_ip(remote_addr)
206 # Check if HTTPS
207 is_https = environ.get("wsgi.url_scheme") == "https"
209 # Add Secure flag if:
210 # - Using HTTPS (always secure over HTTPS)
211 # - OR connection is from public IP (require HTTPS for public access)
212 return is_https or not is_private
214 # Wrap the app with our cookie security middleware
215 app.wsgi_app = SecureCookieMiddleware(app.wsgi_app, app) # type: ignore[method-assign]
217 # WSGI middleware to remove Server header
218 # This must be the outermost wrapper to catch headers added by Werkzeug
219 class ServerHeaderMiddleware:
220 """WSGI middleware to remove Server header from all responses.
222 Prevents information disclosure about the underlying web server.
223 Must be outermost middleware to catch headers added by WSGI layer.
224 """
226 def __init__(self, wsgi_app):
227 self.wsgi_app = wsgi_app
229 def __call__(self, environ, start_response):
230 def custom_start_response(status, headers, exc_info=None):
231 filtered_headers = [
232 (name, value)
233 for name, value in headers
234 if name.lower() != "server"
235 ]
236 return start_response(status, filtered_headers, exc_info)
238 return self.wsgi_app(environ, custom_start_response)
240 # Apply ServerHeaderMiddleware as outermost wrapper
241 app.wsgi_app = ServerHeaderMiddleware(app.wsgi_app) # type: ignore[method-assign]
243 # App configuration
244 # Generate or load a unique SECRET_KEY per installation
245 import secrets
246 from ..config.paths import get_data_directory
248 secret_key_file = Path(get_data_directory()) / ".secret_key"
249 secret_key_file.parent.mkdir(parents=True, exist_ok=True)
250 new_key = secrets.token_hex(32)
251 try:
252 fd = os.open(
253 str(secret_key_file), os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600
254 )
255 try:
256 os.write(fd, new_key.encode())
257 finally:
258 os.close(fd)
259 app.config["SECRET_KEY"] = new_key
260 logger.info("Generated new SECRET_KEY for this installation")
261 except FileExistsError:
262 try:
263 with open(secret_key_file, "r") as f:
264 app.config["SECRET_KEY"] = f.read().strip()
265 except Exception:
266 logger.warning("Could not read secret key file")
267 app.config["SECRET_KEY"] = new_key
268 except OSError:
269 logger.warning("Could not save secret key file")
270 app.config["SECRET_KEY"] = new_key
271 # Session cookie security settings
272 # SECURE flag is added dynamically based on request context (see after_request below)
273 # This allows localhost HTTP to work for development while keeping production secure
274 #
275 # Check if explicitly in testing mode (for backwards compatibility)
276 is_testing = (
277 os.getenv("CI")
278 or os.getenv("TESTING")
279 or os.getenv("PYTEST_CURRENT_TEST")
280 or app.debug
281 )
282 # Set to False - we add Secure flag dynamically in after_request handler
283 # Exception: if TESTING mode is active, we never add Secure flag
284 app.config["SESSION_COOKIE_SECURE"] = False
285 app.config["LDR_TESTING_MODE"] = bool(is_testing) # Store for after_request
286 app.config["SESSION_COOKIE_HTTPONLY"] = (
287 True # Prevent JavaScript access (XSS mitigation)
288 )
289 app.config["SESSION_COOKIE_SAMESITE"] = "Lax" # CSRF protection
290 # Set max cookie lifetime for permanent sessions (when session.permanent=True).
291 # This applies to "remember me" sessions; non-permanent sessions expire on browser close.
292 remember_me_days = get_security_default(
293 "security.session_remember_me_days", 30
294 )
295 app.config["PERMANENT_SESSION_LIFETIME"] = remember_me_days * 24 * 3600
296 # PREFERRED_URL_SCHEME affects URL generation (url_for), not request.is_secure
297 app.config["PREFERRED_URL_SCHEME"] = "https"
299 # File upload security limits - calculated from FileUploadValidator constants
300 app.config["MAX_CONTENT_LENGTH"] = (
301 FileUploadValidator.MAX_FILES_PER_REQUEST
302 * FileUploadValidator.MAX_FILE_SIZE
303 )
305 # Initialize CSRF protection
306 # Explicitly enable CSRF protection (don't rely on implicit Flask-WTF behavior)
307 app.config["WTF_CSRF_ENABLED"] = True
308 CSRFProtect(app)
309 # Exempt Socket.IO from CSRF protection
310 # Note: Flask-SocketIO handles CSRF internally, so we don't need to exempt specific views
312 # Initialize security headers middleware
313 SecurityHeaders(app)
315 # Initialize rate limiting for security (brute force protection)
316 # Uses imported limiter from security.rate_limiter module
317 # Rate limiting is disabled in CI via enabled callable in rate_limiter.py
318 # Also set app config to ensure Flask-Limiter respects our settings
319 from ..settings.env_registry import is_rate_limiting_enabled
321 app.config["RATELIMIT_ENABLED"] = is_rate_limiting_enabled()
322 app.config["RATELIMIT_STRATEGY"] = "moving-window"
323 limiter.init_app(app)
325 # Custom error handler for rate limit exceeded (429)
326 @app.errorhandler(429)
327 def ratelimit_handler(e):
328 # Import here to avoid circular imports
329 from ..security.rate_limiter import get_client_ip
331 # Audit logging for security monitoring
332 # Use get_client_ip() to get the real IP behind proxies
333 logger.warning(
334 f"Rate limit exceeded: endpoint={request.endpoint} "
335 f"ip={get_client_ip()} "
336 f"user_agent={request.headers.get('User-Agent', 'unknown')}"
337 )
338 return jsonify(
339 error="Too many requests",
340 message="Too many attempts. Please try again later.",
341 ), 429
343 # Note: Dynamic cookie security is handled by SecureCookieMiddleware (WSGI level)
344 # This is necessary because Flask's session cookies are set AFTER after_request handlers
345 # The middleware wrapping happens below near ProxyFix
347 # Note: CSRF exemptions for API blueprints are applied after blueprint
348 # registration below (search for "CSRF exemptions" in this file).
350 # Database configuration - Using per-user databases now
351 # No shared database configuration needed
352 app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
353 app.config["SQLALCHEMY_ECHO"] = False
355 # Per-user databases are created automatically via encrypted_db.py
357 # Log data location and security information
358 from ..config.paths import get_data_directory
359 from ..database.encrypted_db import db_manager
361 data_dir = get_data_directory()
362 logger.info("=" * 60)
363 logger.info("DATA STORAGE INFORMATION")
364 logger.info("=" * 60)
365 logger.info(f"Data directory: {data_dir}")
366 logger.info(
367 "Databases: Per-user encrypted databases in encrypted_databases/"
368 )
370 # Check if using custom location
371 from local_deep_research.settings.manager import SettingsManager
373 settings_manager = SettingsManager()
374 custom_data_dir = settings_manager.get_setting("bootstrap.data_dir")
375 if custom_data_dir: 375 ↛ 376line 375 didn't jump to line 376 because the condition on line 375 was never true
376 logger.info(
377 f"Using custom data location via LDR_DATA_DIR: {custom_data_dir}"
378 )
379 else:
380 logger.info("Using default platform-specific data location")
382 # Display security status based on actual SQLCipher availability
383 if db_manager.has_encryption:
384 logger.info(
385 "SECURITY: Databases are encrypted with SQLCipher. Ensure appropriate file system permissions are set on the data directory."
386 )
387 else:
388 logger.warning(
389 "SECURITY NOTICE: SQLCipher is not available - databases are NOT encrypted. "
390 "Install SQLCipher for database encryption. Ensure appropriate file system permissions are set on the data directory."
391 )
393 logger.info(
394 "TIP: You can change the data location by setting the LDR_DATA_DIR environment variable."
395 )
396 logger.info("=" * 60)
398 # Initialize Vite helper for asset management
399 from .utils.vite_helper import vite
401 vite.init_app(app)
403 # Initialize Theme helper for auto-detecting themes from CSS
404 from .utils.theme_helper import theme_helper
406 theme_helper.init_app(app)
408 # Generate combined themes.css from individual theme files
409 from .themes import theme_registry
411 try:
412 static_dir = Path(app.config.get("STATIC_DIR", "static"))
413 themes_css_path = static_dir / "css" / "themes.css"
414 combined_css = theme_registry.get_combined_css()
415 themes_css_path.write_text(combined_css, encoding="utf-8")
416 logger.debug(
417 f"Generated themes.css with {len(theme_registry.themes)} themes"
418 )
419 except PermissionError:
420 logger.warning(
421 f"Cannot write themes.css to {themes_css_path}. "
422 "Theme CSS will need to be pre-generated."
423 )
424 except Exception:
425 logger.exception("Error generating combined themes.css")
427 # Register socket service
428 socket_service = SocketIOService(app=app)
430 # Initialize news subscription scheduler
431 try:
432 # News tables are now created per-user in their encrypted databases
433 logger.info(
434 "News tables will be created in per-user encrypted databases"
435 )
437 # Check if scheduler is enabled BEFORE importing/initializing
438 # Use env registry which handles both env vars and settings
439 from ..settings.env_registry import get_env_setting
441 scheduler_enabled = get_env_setting("news.scheduler.enabled", True)
442 logger.info(f"News scheduler enabled: {scheduler_enabled}")
444 if scheduler_enabled:
445 # Only import and initialize if enabled
446 from ..news.subscription_manager.scheduler import (
447 get_news_scheduler,
448 )
449 from ..settings.manager import SettingsManager
451 # Get system settings for scheduler configuration (if not already loaded)
452 if "settings_manager" not in locals(): 452 ↛ 453line 452 didn't jump to line 453 because the condition on line 452 was never true
453 settings_manager = SettingsManager()
455 # Get scheduler instance and initialize with settings
456 scheduler = get_news_scheduler()
457 scheduler.initialize_with_settings(settings_manager)
458 scheduler.set_app(app)
459 scheduler.start()
460 app.news_scheduler = scheduler # type: ignore[attr-defined]
461 logger.info("News scheduler started with activity-based tracking")
462 else:
463 # Don't initialize scheduler if disabled
464 app.news_scheduler = None # type: ignore[attr-defined]
465 logger.info("News scheduler disabled - not initializing")
466 except Exception:
467 logger.exception("Failed to initialize news scheduler")
468 app.news_scheduler = None # type: ignore[attr-defined]
470 # Apply middleware
471 logger.info("Applying middleware...")
472 apply_middleware(app)
473 logger.info("Middleware applied successfully")
475 # Register blueprints
476 logger.info("Registering blueprints...")
477 register_blueprints(app)
478 logger.info("Blueprints registered successfully")
480 # Register error handlers
481 logger.info("Registering error handlers...")
482 register_error_handlers(app)
483 logger.info("Error handlers registered successfully")
485 # Start the queue processor v2 (uses encrypted databases)
486 # Always start the processor - it will handle per-user queue modes
487 logger.info("Starting queue processor v2...")
488 from .queue.processor_v2 import queue_processor
490 queue_processor.start()
491 logger.info("Started research queue processor v2")
493 logger.info("App factory completed successfully")
495 return app, socket_service
498def apply_middleware(app):
499 """Apply middleware to the Flask app."""
501 # Import auth decorators and middleware
502 logger.info("Importing cleanup_middleware...")
503 from .auth.cleanup_middleware import cleanup_completed_research
505 logger.info("Importing database_middleware...")
506 from .auth.database_middleware import ensure_user_database
508 logger.info("Importing decorators...")
509 from .auth.decorators import inject_current_user
511 logger.info("Importing queue_middleware...")
512 from .auth.queue_middleware import process_pending_queue_operations
514 logger.info("Importing queue_middleware_v2...")
515 from .auth.queue_middleware_v2 import notify_queue_processor
517 logger.info("Importing session_cleanup...")
518 from .auth.session_cleanup import cleanup_stale_sessions
520 logger.info("All middleware imports completed")
522 # Register authentication middleware
523 # First clean up stale sessions
524 app.before_request(cleanup_stale_sessions)
525 # Then ensure database is open for authenticated users
526 app.before_request(ensure_user_database)
527 # Then inject current user into g
528 app.before_request(inject_current_user)
529 # Clean up completed research records
530 app.before_request(cleanup_completed_research)
531 # Process any pending queue operations for this user (direct mode)
532 app.before_request(process_pending_queue_operations)
533 # Notify queue processor of user activity (queue mode)
534 app.before_request(notify_queue_processor)
536 logger.info("All middleware registered")
538 # Flush any queued logs from background threads
539 logger.info("Importing log_utils...")
540 from ..utilities.log_utils import flush_log_queue
542 app.before_request(flush_log_queue)
543 logger.info("Log flushing middleware registered")
545 # Inject backend constants into Jinja2 templates for frontend JS.
546 # This is the Flask-documented pattern for sharing Python enums with JavaScript.
547 # Source of truth: src/local_deep_research/constants.py::ResearchStatus
548 # Frontend helpers: src/local_deep_research/web/static/js/config/constants.js
549 # Template injection: src/local_deep_research/web/templates/base.html
550 from ..constants import ResearchStatus
552 @app.context_processor
553 def inject_frontend_constants():
554 terminal = [
555 ResearchStatus.COMPLETED,
556 ResearchStatus.SUSPENDED,
557 ResearchStatus.FAILED,
558 ResearchStatus.ERROR,
559 ResearchStatus.CANCELLED,
560 ]
561 return {
562 "research_status_enum": {m.name: m.value for m in ResearchStatus},
563 "research_terminal_states": [str(s) for s in terminal],
564 }
566 # Clean up database sessions after each request
567 @app.teardown_appcontext
568 def cleanup_db_session(exception=None):
569 """Clean up database session after each request to avoid cross-thread issues."""
570 from flask import g
572 session = g.pop("db_session", None)
573 if session is not None:
574 try:
575 session.rollback()
576 except Exception:
577 logger.warning(
578 "Error rolling back request session during cleanup"
579 )
580 try:
581 session.close()
582 except Exception:
583 logger.warning("Error closing request session during cleanup")
585 # Sweep credential entries for dead threads. Multiple trigger
586 # points (here, processor_v2, and connection_cleanup scheduler)
587 # ensure sweeps happen regardless of traffic patterns.
588 try:
589 from ..database.thread_local_session import cleanup_dead_threads
591 cleanup_dead_threads()
592 except Exception:
593 logger.debug("Error during dead thread sweep", exc_info=True)
595 # Clean up any thread-local database session that may have been created
596 # via get_metrics_session() fallback in session_context.py (e.g. background
597 # threads or error paths where g.db_session was unavailable).
598 try:
599 from ..database.thread_local_session import cleanup_current_thread
601 cleanup_current_thread()
602 except Exception:
603 logger.debug(
604 "Error during thread-local session cleanup", exc_info=True
605 )
607 # Add a middleware layer to handle abrupt disconnections
608 @app.before_request
609 def handle_websocket_requests():
610 if request.path.startswith("/socket.io"):
611 try:
612 if not request.environ.get("werkzeug.socket"): 612 ↛ 618line 612 didn't jump to line 618 because the condition on line 612 was always true
613 return None
614 except Exception:
615 logger.exception("WebSocket preprocessing error")
616 # Return empty response to prevent further processing
617 return "", 200
618 return None
620 # Note: CORS headers for API routes are now handled by SecurityHeaders middleware
621 # (see src/local_deep_research/security/security_headers.py)
624def register_blueprints(app):
625 """Register blueprints with the Flask app."""
627 # Import blueprints
628 logger.info("Importing blueprints...")
630 # Import benchmark blueprint
631 from ..benchmarks.web_api.benchmark_routes import benchmark_bp
633 logger.info("Importing API blueprint...")
634 from .api import api_blueprint # Import the API blueprint
636 logger.info("Importing auth blueprint...")
637 from .auth import auth_bp # Import the auth blueprint
639 logger.info("Importing API routes blueprint...")
640 from .routes.api_routes import api_bp # Import the API blueprint
642 logger.info("Importing context overflow API...")
643 from .routes.context_overflow_api import (
644 context_overflow_bp,
645 ) # Import context overflow API
647 logger.info("Importing history routes...")
648 from .routes.history_routes import history_bp
650 logger.info("Importing metrics routes...")
651 from .routes.metrics_routes import metrics_bp
653 logger.info("Importing research routes...")
654 from .routes.research_routes import research_bp
656 logger.info("Importing settings routes...")
657 from .routes.settings_routes import settings_bp
659 logger.info("All core blueprints imported successfully")
661 # Add root route
662 @app.route("/")
663 def index():
664 """Root route - redirect to login if not authenticated"""
665 from flask import redirect, session, url_for
667 from ..constants import get_available_strategies
668 from ..database.session_context import get_user_db_session
669 from ..utilities.db_utils import get_settings_manager
670 from .utils.templates import render_template_with_defaults
672 # Check if user is authenticated
673 if "username" not in session:
674 return redirect(url_for("auth.login"))
676 # Load current settings from database using proper session context
677 username = session.get("username")
678 settings = {}
679 show_all = False
680 with get_user_db_session(username) as db_session:
681 if db_session: 681 ↛ 718line 681 didn't jump to line 718
682 settings_manager = get_settings_manager(db_session, username)
683 settings = {
684 "llm_provider": settings_manager.get_setting(
685 "llm.provider", "ollama"
686 ),
687 "llm_model": settings_manager.get_setting("llm.model", ""),
688 "llm_openai_endpoint_url": settings_manager.get_setting(
689 "llm.openai_endpoint.url", ""
690 ),
691 "llm_ollama_url": settings_manager.get_setting(
692 "llm.ollama.url"
693 ),
694 "llm_lmstudio_url": settings_manager.get_setting(
695 "llm.lmstudio.url"
696 ),
697 "llm_local_context_window_size": settings_manager.get_setting(
698 "llm.local_context_window_size"
699 ),
700 "search_tool": settings_manager.get_setting(
701 "search.tool", ""
702 ),
703 "search_iterations": settings_manager.get_setting(
704 "search.iterations", 3
705 ),
706 "search_questions_per_iteration": settings_manager.get_setting(
707 "search.questions_per_iteration", 2
708 ),
709 "search_strategy": settings_manager.get_setting(
710 "search.search_strategy", "source-based"
711 ),
712 }
713 show_all = settings_manager.get_setting(
714 "search.show_all_strategies", False
715 )
717 # Debug logging
718 log_settings(settings, "Research page settings loaded")
720 return render_template_with_defaults(
721 "pages/research.html",
722 settings=settings,
723 strategies=get_available_strategies(show_all=bool(show_all)),
724 )
726 # Register auth blueprint FIRST (so login page is accessible)
727 app.register_blueprint(auth_bp) # Already has url_prefix="/auth"
729 # Register other blueprints
730 app.register_blueprint(research_bp)
731 app.register_blueprint(history_bp) # Already has url_prefix="/history"
732 app.register_blueprint(metrics_bp)
733 app.register_blueprint(settings_bp) # Already has url_prefix="/settings"
734 app.register_blueprint(
735 api_bp, url_prefix="/research/api"
736 ) # Register API blueprint with prefix
737 app.register_blueprint(benchmark_bp) # Register benchmark blueprint
738 app.register_blueprint(
739 context_overflow_bp, url_prefix="/metrics"
740 ) # Register context overflow API
742 # Register news API routes
743 from .routes import news_routes
745 app.register_blueprint(news_routes.bp)
746 logger.info("News API routes registered successfully")
748 # Register follow-up research routes
749 from ..followup_research.routes import followup_bp
751 app.register_blueprint(followup_bp)
752 logger.info("Follow-up research routes registered successfully")
754 # Register news page blueprint
755 from ..news.web import create_news_blueprint
757 news_bp = create_news_blueprint()
758 app.register_blueprint(news_bp, url_prefix="/news")
759 logger.info("News page routes registered successfully")
761 # Register API v1 blueprint
762 app.register_blueprint(api_blueprint) # Already has url_prefix='/api/v1'
764 # Register Research Library blueprint
765 from ..research_library import library_bp, rag_bp, delete_bp
767 app.register_blueprint(library_bp) # Already has url_prefix='/library'
768 logger.info("Research Library routes registered successfully")
770 # Register RAG Management blueprint
771 app.register_blueprint(rag_bp) # Already has url_prefix='/library'
772 logger.info("RAG Management routes registered successfully")
774 # Register Deletion Management blueprint
775 app.register_blueprint(delete_bp) # Already has url_prefix='/library/api'
776 logger.info("Deletion Management routes registered successfully")
778 # Register Semantic Search blueprint
779 from ..research_library.search import search_bp
781 app.register_blueprint(search_bp) # url_prefix='/library'
782 logger.info("Semantic Search routes registered successfully")
784 # Register Document Scheduler blueprint
785 from ..research_scheduler.routes import scheduler_bp
787 app.register_blueprint(scheduler_bp)
788 logger.info("Document Scheduler routes registered successfully")
790 # CSRF exemptions — Flask-WTF requires Blueprint objects (not strings)
791 # to populate _exempt_blueprints. Passing strings only populates
792 # _exempt_views, which compares against module-qualified names and
793 # silently fails to match Flask endpoint names.
794 if hasattr(app, "extensions") and "csrf" in app.extensions:
795 csrf = app.extensions["csrf"]
796 # Only api_v1 is exempt: it's a programmatic REST API used by
797 # external clients. The api, benchmark, and research blueprints
798 # are browser-facing and the frontend already sends CSRF tokens.
799 for bp_name in ("api_v1",):
800 bp_obj = app.blueprints.get(bp_name)
801 if bp_obj is not None: 801 ↛ 799line 801 didn't jump to line 799 because the condition on line 801 was always true
802 csrf.exempt(bp_obj)
804 # Add favicon route
805 # Exempt favicon from rate limiting
806 @app.route("/favicon.ico")
807 @limiter.exempt
808 def favicon():
809 static_dir = app.config.get("STATIC_DIR", "static")
810 return send_from_directory(
811 static_dir, "favicon.ico", mimetype="image/x-icon"
812 )
814 # Add static route at the app level for compatibility
815 # Exempt static files from rate limiting
816 import re
818 _HASHED_FILENAME_RE = re.compile(r"\.[A-Za-z0-9_-]{8,}\.")
820 @app.route("/static/<path:path>")
821 @limiter.exempt
822 def app_serve_static(path):
823 from ..security.path_validator import PathValidator
825 static_dir = Path(app.config.get("STATIC_DIR", "static"))
827 # First try to serve from dist directory (for built assets).
828 # Flask captures path as "dist/js/app.abc.js", so strip the
829 # "dist/" prefix before joining with dist_dir to avoid a
830 # double-dist path (static/dist/dist/...).
831 dist_prefix = "dist/"
832 dist_dir = static_dir / "dist"
833 if path.startswith(dist_prefix): 833 ↛ 834line 833 didn't jump to line 834 because the condition on line 833 was never true
834 dist_relative = path[len(dist_prefix) :]
835 try:
836 validated_path = PathValidator.validate_safe_path(
837 dist_relative,
838 dist_dir,
839 allow_absolute=False,
840 required_extensions=None,
841 )
843 if validated_path and validated_path.exists():
844 response = make_response(
845 send_from_directory(str(dist_dir), dist_relative)
846 )
847 if _HASHED_FILENAME_RE.search(dist_relative):
848 # Content-hashed files are safe for immutable caching
849 response.headers["Cache-Control"] = (
850 "public, max-age=31536000, immutable"
851 )
852 else:
853 response.headers["Cache-Control"] = (
854 "public, max-age=0, must-revalidate"
855 )
856 return response
857 except ValueError:
858 pass
860 # Fall back to dist directory for Vite-built assets (fonts, etc.)
861 # Vite uses base: '/static/' so CSS references /static/fonts/...
862 # but the files live in static/dist/fonts/...
863 try:
864 validated_path = PathValidator.validate_safe_path(
865 path, dist_dir, allow_absolute=False, required_extensions=None
866 )
868 if validated_path and validated_path.exists(): 868 ↛ 869line 868 didn't jump to line 869 because the condition on line 868 was never true
869 response = make_response(
870 send_from_directory(str(dist_dir), path)
871 )
872 if _HASHED_FILENAME_RE.search(path):
873 response.headers["Cache-Control"] = (
874 "public, max-age=31536000, immutable"
875 )
876 else:
877 response.headers["Cache-Control"] = (
878 "public, max-age=0, must-revalidate"
879 )
880 return response
881 except ValueError:
882 pass
884 # Fall back to regular static folder
885 try:
886 validated_path = PathValidator.validate_safe_path(
887 path, static_dir, allow_absolute=False, required_extensions=None
888 )
890 if validated_path and validated_path.exists(): 890 ↛ 891line 890 didn't jump to line 891 because the condition on line 890 was never true
891 response = make_response(
892 send_from_directory(str(static_dir), path)
893 )
894 # Non-hashed files must revalidate on each request
895 response.headers["Cache-Control"] = (
896 "public, max-age=0, must-revalidate"
897 )
898 return response
899 except ValueError:
900 # Path validation failed
901 pass
903 abort(404)
904 return None
907def register_error_handlers(app):
908 """Register error handlers with the Flask app."""
910 @app.errorhandler(404)
911 def not_found(error):
912 if request.path.startswith("/api/"):
913 return make_response(jsonify({"error": "Not found"}), 404)
914 return make_response("Not found", 404)
916 @app.errorhandler(500)
917 def server_error(error):
918 if request.path.startswith("/api/"):
919 return make_response(jsonify({"error": "Server error"}), 500)
920 return make_response("Server error", 500)
922 @app.errorhandler(401)
923 def handle_unauthorized(error):
924 if request.path.startswith("/api/") or request.path.startswith(
925 "/settings/api/"
926 ):
927 return make_response(
928 jsonify({"error": "Authentication required"}),
929 401,
930 )
931 from .auth.decorators import _safe_redirect_to_login
933 return _safe_redirect_to_login()
935 @app.errorhandler(413)
936 def handle_request_too_large(error):
937 if request.path.startswith("/api/"):
938 return make_response(
939 jsonify({"error": "Request too large"}),
940 413,
941 )
942 return make_response("Request too large", 413)
944 from .exceptions import WebAPIException
946 @app.errorhandler(WebAPIException)
947 def handle_web_api_exception(error):
948 """Handle WebAPIException and return JSON."""
949 logger.error(
950 "Web API error: {} (status {})", error.error_code, error.status_code
951 )
952 return jsonify(error.to_dict()), error.status_code
954 # Handle CSRF validation errors with helpful message
955 try:
956 from flask_wtf.csrf import CSRFError
958 @app.errorhandler(CSRFError)
959 def handle_csrf_error(error):
960 """Handle CSRF errors with helpful debugging info."""
961 # Check if this might be a Secure cookie issue over HTTP
962 is_http = not request.is_secure
963 is_private = _is_private_ip(request.remote_addr or "")
964 is_proxied = request.headers.get("X-Forwarded-For") is not None
966 error_msg = str(error.description)
968 # Provide detailed help for HTTP + public IP or proxied scenario
969 if is_http and (not is_private or is_proxied):
970 logger.warning(
971 f"CSRF validation failed - likely due to Secure cookie over HTTP. "
972 f"remote_addr={request.remote_addr}, proxied={is_proxied}, "
973 f"host={request.host}"
974 )
975 error_msg = (
976 "Session cookie error: You're accessing over HTTP from a "
977 "public IP address or through a proxy. "
978 "This is blocked for security reasons.\n\n"
979 "Solutions:\n"
980 "1. Use HTTPS with a reverse proxy (recommended for production)\n"
981 "2. Access from your local network (LAN IPs like 192.168.x.x work over HTTP)\n"
982 "3. Access directly from localhost (http://127.0.0.1:5000)\n"
983 "4. Use SSH tunnel: ssh -L 5000:localhost:5000 user@server, "
984 "then access http://localhost:5000\n\n"
985 "Note: LAN access (192.168.x.x, 10.x.x.x, 172.16-31.x.x) works over HTTP. "
986 "Only public internet access requires HTTPS."
987 )
989 return make_response(jsonify({"error": error_msg}), 400)
990 except ImportError:
991 pass
993 # Handle News API exceptions globally
994 try:
995 from ..news.exceptions import NewsAPIException
997 @app.errorhandler(NewsAPIException)
998 def handle_news_api_exception(error):
999 """Handle NewsAPIException and convert to JSON response."""
1000 from loguru import logger
1002 logger.error(
1003 "News API error: {} (status {})",
1004 error.error_code,
1005 error.status_code,
1006 )
1007 return jsonify(error.to_dict()), error.status_code
1008 except ImportError:
1009 # News module not available
1010 pass
1013def create_database(app):
1014 """
1015 DEPRECATED: Database creation is now handled per-user via encrypted_db.py
1016 This function is kept for compatibility but does nothing.
1017 """
1018 pass