Coverage for src / local_deep_research / web / app_factory.py: 84%
365 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
1# import logging - replaced with loguru
2import ipaddress
3import os
4from pathlib import Path
5from importlib import resources as importlib_resources
7from flask import (
8 Flask,
9 Request,
10 abort,
11 jsonify,
12 make_response,
13 request,
14 send_from_directory,
15)
16from flask_wtf.csrf import CSRFProtect
17from werkzeug.middleware.proxy_fix import ProxyFix
18from loguru import logger
19from local_deep_research.settings.logger import log_settings
21from ..utilities.log_utils import InterceptHandler
22from ..security import SecurityHeaders, get_security_default
23from .utils.rate_limiter import limiter
24from ..security.file_upload_validator import FileUploadValidator
26# Removed DB_PATH import - using per-user databases now
27from .services.socket_service import SocketIOService
30def _is_private_ip(ip_str: str) -> bool:
31 """Check if IP is a private/local network address (RFC 1918 + localhost).
33 This allows LAN access over HTTP without requiring HTTPS, matching the
34 behavior of other self-hosted applications like Jellyfin and Home Assistant.
36 Private ranges: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, plus localhost.
37 """
38 try:
39 ip = ipaddress.ip_address(ip_str)
40 return ip.is_private or ip.is_loopback
41 except ValueError:
42 return False
45class DiskSpoolingRequest(Request):
46 """Custom Request class that spools large file uploads to disk.
48 This prevents memory exhaustion from large multipart uploads by writing
49 files larger than max_form_memory_size to temporary files on disk instead
50 of keeping them in memory.
52 Security fix for issue #1176: With 200 files × 50MB limit, the default
53 behavior could consume 10GB+ of memory per request.
54 """
56 # Files larger than 5MB are spooled to disk instead of memory
57 max_form_memory_size = 5 * 1024 * 1024 # 5MB threshold
60def create_app():
61 """
62 Create and configure the Flask application.
64 Returns:
65 tuple: (app, socketio) - The configured Flask app and SocketIO instance
66 """
67 # Set Werkzeug logger to WARNING level to suppress Socket.IO polling logs
68 import logging
70 logging.getLogger("werkzeug").setLevel(logging.WARNING)
71 logging.getLogger("werkzeug").addHandler(InterceptHandler())
73 logger.info("Initializing Local Deep Research application...")
75 try:
76 # Get directories based on package installation
77 PACKAGE_DIR = importlib_resources.files("local_deep_research") / "web"
78 with importlib_resources.as_file(PACKAGE_DIR) as package_dir:
79 STATIC_DIR = (package_dir / "static").as_posix()
80 TEMPLATE_DIR = (package_dir / "templates").as_posix()
82 # Initialize Flask app with package directories
83 # Set static_folder to None to disable Flask's built-in static handling
84 # We'll use our custom static route instead to handle dist folder
85 app = Flask(__name__, static_folder=None, template_folder=TEMPLATE_DIR)
86 # Store static dir for custom handling
87 app.config["STATIC_DIR"] = STATIC_DIR
88 logger.debug(f"Using package static path: {STATIC_DIR}")
89 logger.debug(f"Using package template path: {TEMPLATE_DIR}")
90 except Exception:
91 # Fallback for development
92 logger.exception("Package directories not found, using fallback paths")
93 # Set static_folder to None to disable Flask's built-in static handling
94 app = Flask(
95 __name__,
96 static_folder=None,
97 template_folder=str(Path("templates").resolve()),
98 )
99 # Store static dir for custom handling
100 app.config["STATIC_DIR"] = str(Path("static").resolve())
102 # Use custom Request class that spools large uploads to disk
103 # This prevents memory exhaustion from large file uploads (issue #1176)
104 app.request_class = DiskSpoolingRequest
106 # Add proxy support for deployments behind load balancers/reverse proxies
107 # This ensures X-Forwarded-For and X-Forwarded-Proto headers are properly handled
108 # Important for rate limiting and security (gets real client IP, not proxy IP)
109 app.wsgi_app = ProxyFix(
110 app.wsgi_app,
111 x_for=1, # Trust 1 proxy for X-Forwarded-For
112 x_proto=1, # Trust 1 proxy for X-Forwarded-Proto (http/https)
113 x_host=0, # Don't trust X-Forwarded-Host (security)
114 x_port=0, # Don't trust X-Forwarded-Port (security)
115 x_prefix=0, # Don't trust X-Forwarded-Prefix (security)
116 )
118 # WSGI middleware for dynamic cookie security
119 # This wraps AFTER ProxyFix so we have access to the real client IP
120 # Must be WSGI level because Flask session cookies are set after after_request handlers
121 class SecureCookieMiddleware:
122 """WSGI middleware to add Secure flag to cookies based on request context.
124 Security model:
125 - Localhost HTTP (127.0.0.1, ::1): Skip Secure flag (local traffic is safe)
126 - Proxied requests (X-Forwarded-For present): Add Secure flag (production)
127 - Non-localhost HTTP: Add Secure flag (will fail, by design - use HTTPS)
128 - TESTING mode: Never add Secure flag (for CI/development)
130 This prevents X-Forwarded-For spoofing attacks by checking for the header's
131 presence rather than its value - if the header exists, we're behind a proxy.
132 """
134 def __init__(self, wsgi_app, flask_app):
135 self.wsgi_app = wsgi_app
136 self.flask_app = flask_app
138 def __call__(self, environ, start_response):
139 # Check if we should add Secure flag
140 should_add_secure = self._should_add_secure_flag(environ)
142 def custom_start_response(status, headers, exc_info=None):
143 if should_add_secure:
144 # Modify Set-Cookie headers to add Secure flag
145 new_headers = []
146 for name, value in headers:
147 if name.lower() == "set-cookie":
148 if ( 148 ↛ 153line 148 didn't jump to line 153 because the condition on line 148 was always true
149 "; Secure" not in value
150 and "; secure" not in value
151 ):
152 value = value + "; Secure"
153 new_headers.append((name, value))
154 headers = new_headers
155 return start_response(status, headers, exc_info)
157 return self.wsgi_app(environ, custom_start_response)
159 def _should_add_secure_flag(self, environ):
160 """Determine if Secure flag should be added based on request context.
162 Security model:
163 - Check the ACTUAL connection IP (REMOTE_ADDR), not X-Forwarded-For header
164 - SecureCookieMiddleware is outer wrapper, so we see original REMOTE_ADDR
165 - If connection comes from private IP (client or proxy), allow HTTP
166 - If connection comes from public IP, require HTTPS
168 This is safe because:
169 - We never trust X-Forwarded-For header values (can be spoofed)
170 - We only check the actual TCP connection source IP
171 - Spoofing X-Forwarded-For from public IP doesn't bypass this check
172 - Local proxies (nginx on localhost/LAN) have private REMOTE_ADDR
173 """
174 # Skip if in explicit testing mode
175 if self.flask_app.config.get("LDR_TESTING_MODE"):
176 return False
178 # Check actual connection source IP (before ProxyFix modifies it)
179 # This is either:
180 # - Direct client IP (if no proxy)
181 # - Proxy server IP (if behind proxy)
182 # Local proxies (nginx on localhost, Traefik on LAN) have private IPs
183 remote_addr = environ.get("REMOTE_ADDR", "")
184 is_private = _is_private_ip(remote_addr)
186 # Check if HTTPS
187 is_https = environ.get("wsgi.url_scheme") == "https"
189 # Add Secure flag if:
190 # - Using HTTPS (always secure over HTTPS)
191 # - OR connection is from public IP (require HTTPS for public access)
192 return is_https or not is_private
194 # Wrap the app with our cookie security middleware
195 app.wsgi_app = SecureCookieMiddleware(app.wsgi_app, app)
197 # WSGI middleware to remove Server header
198 # This must be the outermost wrapper to catch headers added by Werkzeug
199 class ServerHeaderMiddleware:
200 """WSGI middleware to remove Server header from all responses.
202 Prevents information disclosure about the underlying web server.
203 Must be outermost middleware to catch headers added by WSGI layer.
204 """
206 def __init__(self, wsgi_app):
207 self.wsgi_app = wsgi_app
209 def __call__(self, environ, start_response):
210 def custom_start_response(status, headers, exc_info=None):
211 filtered_headers = [
212 (name, value)
213 for name, value in headers
214 if name.lower() != "server"
215 ]
216 return start_response(status, filtered_headers, exc_info)
218 return self.wsgi_app(environ, custom_start_response)
220 # Apply ServerHeaderMiddleware as outermost wrapper
221 app.wsgi_app = ServerHeaderMiddleware(app.wsgi_app)
223 # App configuration
224 # Generate or load a unique SECRET_KEY per installation
225 import secrets
226 from ..config.paths import get_data_directory
228 secret_key_file = Path(get_data_directory()) / ".secret_key"
229 if secret_key_file.exists():
230 try:
231 with open(secret_key_file, "r") as f:
232 app.config["SECRET_KEY"] = f.read().strip()
233 except Exception as e:
234 logger.warning(f"Could not read secret key file: {e}")
235 app.config["SECRET_KEY"] = secrets.token_hex(32)
236 else:
237 # Generate a new key on first run
238 new_key = secrets.token_hex(32)
239 try:
240 secret_key_file.parent.mkdir(parents=True, exist_ok=True)
241 with open(secret_key_file, "w") as f:
242 f.write(new_key)
243 secret_key_file.chmod(0o600) # Secure file permissions
244 app.config["SECRET_KEY"] = new_key
245 logger.info("Generated new SECRET_KEY for this installation")
246 except Exception as e:
247 logger.warning(f"Could not save secret key file: {e}")
248 app.config["SECRET_KEY"] = new_key
249 # Session cookie security settings
250 # SECURE flag is added dynamically based on request context (see after_request below)
251 # This allows localhost HTTP to work for development while keeping production secure
252 #
253 # Check if explicitly in testing mode (for backwards compatibility)
254 is_testing = (
255 os.getenv("CI")
256 or os.getenv("TESTING")
257 or os.getenv("PYTEST_CURRENT_TEST")
258 or app.debug
259 )
260 # Set to False - we add Secure flag dynamically in after_request handler
261 # Exception: if TESTING mode is active, we never add Secure flag
262 app.config["SESSION_COOKIE_SECURE"] = False
263 app.config["LDR_TESTING_MODE"] = bool(is_testing) # Store for after_request
264 app.config["SESSION_COOKIE_HTTPONLY"] = (
265 True # Prevent JavaScript access (XSS mitigation)
266 )
267 app.config["SESSION_COOKIE_SAMESITE"] = "Lax" # CSRF protection
268 # Set max cookie lifetime for permanent sessions (when session.permanent=True).
269 # This applies to "remember me" sessions; non-permanent sessions expire on browser close.
270 remember_me_days = get_security_default(
271 "security.session_remember_me_days", 30
272 )
273 app.config["PERMANENT_SESSION_LIFETIME"] = remember_me_days * 24 * 3600
274 # PREFERRED_URL_SCHEME affects URL generation (url_for), not request.is_secure
275 app.config["PREFERRED_URL_SCHEME"] = "https"
277 # File upload security limits - calculated from FileUploadValidator constants
278 app.config["MAX_CONTENT_LENGTH"] = (
279 FileUploadValidator.MAX_FILES_PER_REQUEST
280 * FileUploadValidator.MAX_FILE_SIZE
281 )
283 # Initialize CSRF protection
284 # Explicitly enable CSRF protection (don't rely on implicit Flask-WTF behavior)
285 app.config["WTF_CSRF_ENABLED"] = True
286 CSRFProtect(app)
287 # Exempt Socket.IO from CSRF protection
288 # Note: Flask-SocketIO handles CSRF internally, so we don't need to exempt specific views
290 # Initialize security headers middleware
291 SecurityHeaders(app)
293 # Initialize rate limiting for security (brute force protection)
294 # Uses imported limiter from utils.rate_limiter module
295 # Rate limiting is disabled in CI via enabled callable in rate_limiter.py
296 # Also set app config to ensure Flask-Limiter respects our settings
297 from ..settings.env_registry import is_rate_limiting_enabled
299 app.config["RATELIMIT_ENABLED"] = is_rate_limiting_enabled()
300 limiter.init_app(app)
302 # Custom error handler for rate limit exceeded (429)
303 @app.errorhandler(429)
304 def ratelimit_handler(e):
305 # Import here to avoid circular imports
306 from .utils.rate_limiter import get_client_ip
308 # Audit logging for security monitoring
309 # Use get_client_ip() to get the real IP behind proxies
310 logger.warning(
311 f"Rate limit exceeded: endpoint={request.endpoint} "
312 f"ip={get_client_ip()} "
313 f"user_agent={request.headers.get('User-Agent', 'unknown')}"
314 )
315 return jsonify(
316 error="Too many requests",
317 message="Too many attempts. Please try again later.",
318 ), 429
320 # Note: Dynamic cookie security is handled by SecureCookieMiddleware (WSGI level)
321 # This is necessary because Flask's session cookies are set AFTER after_request handlers
322 # The middleware wrapping happens below near ProxyFix
324 # Note: CSRF exemptions for API blueprints are applied after blueprint
325 # registration below (search for "CSRF exemptions" in this file).
327 # Database configuration - Using per-user databases now
328 # No shared database configuration needed
329 app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
330 app.config["SQLALCHEMY_ECHO"] = False
332 # Per-user databases are created automatically via encrypted_db.py
334 # Log data location and security information
335 from ..config.paths import get_data_directory
336 from ..database.encrypted_db import db_manager
338 data_dir = get_data_directory()
339 logger.info("=" * 60)
340 logger.info("DATA STORAGE INFORMATION")
341 logger.info("=" * 60)
342 logger.info(f"Data directory: {data_dir}")
343 logger.info(
344 "Databases: Per-user encrypted databases in encrypted_databases/"
345 )
347 # Check if using custom location
348 from local_deep_research.settings.manager import SettingsManager
350 settings_manager = SettingsManager()
351 custom_data_dir = settings_manager.get_setting("bootstrap.data_dir")
352 if custom_data_dir: 352 ↛ 353line 352 didn't jump to line 353 because the condition on line 352 was never true
353 logger.info(
354 f"Using custom data location via LDR_DATA_DIR: {custom_data_dir}"
355 )
356 else:
357 logger.info("Using default platform-specific data location")
359 # Display security status based on actual SQLCipher availability
360 if db_manager.has_encryption: 360 ↛ 365line 360 didn't jump to line 365 because the condition on line 360 was always true
361 logger.info(
362 "SECURITY: Databases are encrypted with SQLCipher. Ensure appropriate file system permissions are set on the data directory."
363 )
364 else:
365 logger.warning(
366 "SECURITY NOTICE: SQLCipher is not available - databases are NOT encrypted. "
367 "Install SQLCipher for database encryption. Ensure appropriate file system permissions are set on the data directory."
368 )
370 logger.info(
371 "TIP: You can change the data location by setting the LDR_DATA_DIR environment variable."
372 )
373 logger.info("=" * 60)
375 # Initialize Vite helper for asset management
376 from .utils.vite_helper import vite
378 vite.init_app(app)
380 # Initialize Theme helper for auto-detecting themes from CSS
381 from .utils.theme_helper import theme_helper
383 theme_helper.init_app(app)
385 # Generate combined themes.css from individual theme files
386 from .themes import theme_registry
388 try:
389 static_dir = Path(app.config.get("STATIC_DIR", "static"))
390 themes_css_path = static_dir / "css" / "themes.css"
391 combined_css = theme_registry.get_combined_css()
392 themes_css_path.write_text(combined_css, encoding="utf-8")
393 logger.debug(
394 f"Generated themes.css with {len(theme_registry.themes)} themes"
395 )
396 except PermissionError:
397 logger.warning(
398 f"Cannot write themes.css to {themes_css_path}. "
399 "Theme CSS will need to be pre-generated."
400 )
401 except Exception:
402 logger.exception("Error generating combined themes.css")
404 # Register socket service
405 socket_service = SocketIOService(app=app)
407 # Initialize news subscription scheduler
408 try:
409 # News tables are now created per-user in their encrypted databases
410 logger.info(
411 "News tables will be created in per-user encrypted databases"
412 )
414 # Check if scheduler is enabled BEFORE importing/initializing
415 # Use env registry which handles both env vars and settings
416 from ..settings.env_registry import get_env_setting
418 scheduler_enabled = get_env_setting("news.scheduler.enabled", True)
419 logger.info(f"News scheduler enabled: {scheduler_enabled}")
421 if scheduler_enabled: 421 ↛ 440line 421 didn't jump to line 440 because the condition on line 421 was always true
422 # Only import and initialize if enabled
423 from ..news.subscription_manager.scheduler import (
424 get_news_scheduler,
425 )
426 from ..settings.manager import SettingsManager
428 # Get system settings for scheduler configuration (if not already loaded)
429 if "settings_manager" not in locals(): 429 ↛ 430line 429 didn't jump to line 430 because the condition on line 429 was never true
430 settings_manager = SettingsManager()
432 # Get scheduler instance and initialize with settings
433 scheduler = get_news_scheduler()
434 scheduler.initialize_with_settings(settings_manager)
435 scheduler.start()
436 app.news_scheduler = scheduler
437 logger.info("News scheduler started with activity-based tracking")
438 else:
439 # Don't initialize scheduler if disabled
440 app.news_scheduler = None
441 logger.info("News scheduler disabled - not initializing")
442 except Exception:
443 logger.exception("Failed to initialize news scheduler")
444 app.news_scheduler = None
446 # Apply middleware
447 logger.info("Applying middleware...")
448 apply_middleware(app)
449 logger.info("Middleware applied successfully")
451 # Register blueprints
452 logger.info("Registering blueprints...")
453 register_blueprints(app)
454 logger.info("Blueprints registered successfully")
456 # Register error handlers
457 logger.info("Registering error handlers...")
458 register_error_handlers(app)
459 logger.info("Error handlers registered successfully")
461 # Start the queue processor v2 (uses encrypted databases)
462 # Always start the processor - it will handle per-user queue modes
463 logger.info("Starting queue processor v2...")
464 from .queue.processor_v2 import queue_processor
466 queue_processor.start()
467 logger.info("Started research queue processor v2")
469 logger.info("App factory completed successfully")
471 return app, socket_service
474def apply_middleware(app):
475 """Apply middleware to the Flask app."""
477 # Import auth decorators and middleware
478 logger.info("Importing cleanup_middleware...")
479 from .auth.cleanup_middleware import cleanup_completed_research
481 logger.info("Importing database_middleware...")
482 from .auth.database_middleware import ensure_user_database
484 logger.info("Importing decorators...")
485 from .auth.decorators import inject_current_user
487 logger.info("Importing queue_middleware...")
488 from .auth.queue_middleware import process_pending_queue_operations
490 logger.info("Importing queue_middleware_v2...")
491 from .auth.queue_middleware_v2 import notify_queue_processor
493 logger.info("Importing session_cleanup...")
494 from .auth.session_cleanup import cleanup_stale_sessions
496 logger.info("All middleware imports completed")
498 # Register authentication middleware
499 # First clean up stale sessions
500 app.before_request(cleanup_stale_sessions)
501 # Then ensure database is open for authenticated users
502 app.before_request(ensure_user_database)
503 # Then inject current user into g
504 app.before_request(inject_current_user)
505 # Clean up completed research records
506 app.before_request(cleanup_completed_research)
507 # Process any pending queue operations for this user (direct mode)
508 app.before_request(process_pending_queue_operations)
509 # Notify queue processor of user activity (queue mode)
510 app.before_request(notify_queue_processor)
512 logger.info("All middleware registered")
514 # Flush any queued logs from background threads
515 logger.info("Importing log_utils...")
516 from ..utilities.log_utils import flush_log_queue
518 app.before_request(flush_log_queue)
519 logger.info("Log flushing middleware registered")
521 # Clean up database sessions after each request
522 @app.teardown_appcontext
523 def cleanup_db_session(exception=None):
524 """Clean up database session after each request to avoid cross-thread issues."""
525 from flask import g
527 session = g.pop("db_session", None)
528 if session is not None:
529 try:
530 if exception: 530 ↛ 531line 530 didn't jump to line 531 because the condition on line 530 was never true
531 session.rollback()
532 session.close()
533 except Exception:
534 pass # Ignore errors during cleanup
536 # Add a middleware layer to handle abrupt disconnections
537 @app.before_request
538 def handle_websocket_requests():
539 if request.path.startswith("/socket.io"): 539 ↛ 540line 539 didn't jump to line 540 because the condition on line 539 was never true
540 try:
541 if not request.environ.get("werkzeug.socket"):
542 return
543 except Exception:
544 logger.exception("WebSocket preprocessing error")
545 # Return empty response to prevent further processing
546 return "", 200
548 # Note: CORS headers for API routes are now handled by SecurityHeaders middleware
549 # (see src/local_deep_research/security/security_headers.py)
552def register_blueprints(app):
553 """Register blueprints with the Flask app."""
555 # Import blueprints
556 logger.info("Importing blueprints...")
558 # Import benchmark blueprint
559 from ..benchmarks.web_api.benchmark_routes import benchmark_bp
561 logger.info("Importing API blueprint...")
562 from .api import api_blueprint # Import the API blueprint
564 logger.info("Importing auth blueprint...")
565 from .auth import auth_bp # Import the auth blueprint
567 logger.info("Importing API routes blueprint...")
568 from .routes.api_routes import api_bp # Import the API blueprint
570 logger.info("Importing context overflow API...")
571 from .routes.context_overflow_api import (
572 context_overflow_bp,
573 ) # Import context overflow API
575 logger.info("Importing history routes...")
576 from .routes.history_routes import history_bp
578 logger.info("Importing metrics routes...")
579 from .routes.metrics_routes import metrics_bp
581 logger.info("Importing research routes...")
582 from .routes.research_routes import research_bp
584 logger.info("Importing settings routes...")
585 from .routes.settings_routes import settings_bp
587 logger.info("All core blueprints imported successfully")
589 # Add root route
590 @app.route("/")
591 def index():
592 """Root route - redirect to login if not authenticated"""
593 from flask import redirect, session, url_for
595 from ..database.session_context import get_user_db_session
596 from ..utilities.db_utils import get_settings_manager
597 from .utils.templates import render_template_with_defaults
599 # Check if user is authenticated
600 if "username" not in session:
601 return redirect(url_for("auth.login"))
603 # Load current settings from database using proper session context
604 username = session.get("username")
605 settings = {}
606 with get_user_db_session(username) as db_session:
607 if db_session: 607 ↛ 641line 607 didn't jump to line 641
608 settings_manager = get_settings_manager(db_session, username)
609 settings = {
610 "llm_provider": settings_manager.get_setting(
611 "llm.provider", "ollama"
612 ),
613 "llm_model": settings_manager.get_setting("llm.model", ""),
614 "llm_openai_endpoint_url": settings_manager.get_setting(
615 "llm.openai_endpoint.url", ""
616 ),
617 "llm_ollama_url": settings_manager.get_setting(
618 "llm.ollama.url"
619 ),
620 "llm_lmstudio_url": settings_manager.get_setting(
621 "llm.lmstudio.url"
622 ),
623 "llm_local_context_window_size": settings_manager.get_setting(
624 "llm.local_context_window_size"
625 ),
626 "search_tool": settings_manager.get_setting(
627 "search.tool", ""
628 ),
629 "search_iterations": settings_manager.get_setting(
630 "search.iterations", 3
631 ),
632 "search_questions_per_iteration": settings_manager.get_setting(
633 "search.questions_per_iteration", 2
634 ),
635 "search_strategy": settings_manager.get_setting(
636 "search.search_strategy", "source-based"
637 ),
638 }
640 # Debug logging
641 log_settings(settings, "Research page settings loaded")
643 return render_template_with_defaults(
644 "pages/research.html", settings=settings
645 )
647 # Register auth blueprint FIRST (so login page is accessible)
648 app.register_blueprint(auth_bp) # Already has url_prefix="/auth"
650 # Register other blueprints
651 app.register_blueprint(research_bp)
652 app.register_blueprint(history_bp) # Already has url_prefix="/history"
653 app.register_blueprint(metrics_bp)
654 app.register_blueprint(settings_bp) # Already has url_prefix="/settings"
655 app.register_blueprint(
656 api_bp, url_prefix="/research/api"
657 ) # Register API blueprint with prefix
658 app.register_blueprint(benchmark_bp) # Register benchmark blueprint
659 app.register_blueprint(
660 context_overflow_bp, url_prefix="/metrics"
661 ) # Register context overflow API
663 # Register news API routes
664 from .routes import news_routes
666 app.register_blueprint(news_routes.bp)
667 logger.info("News API routes registered successfully")
669 # Register follow-up research routes
670 from ..followup_research.routes import followup_bp
672 app.register_blueprint(followup_bp)
673 logger.info("Follow-up research routes registered successfully")
675 # Register news page blueprint
676 from ..news.web import create_news_blueprint
678 news_bp = create_news_blueprint()
679 app.register_blueprint(news_bp, url_prefix="/news")
680 logger.info("News page routes registered successfully")
682 # Register API v1 blueprint
683 app.register_blueprint(api_blueprint) # Already has url_prefix='/api/v1'
685 # Register Research Library blueprint
686 from ..research_library import library_bp, rag_bp, delete_bp
688 app.register_blueprint(library_bp) # Already has url_prefix='/library'
689 logger.info("Research Library routes registered successfully")
691 # Register RAG Management blueprint
692 app.register_blueprint(rag_bp) # Already has url_prefix='/library'
693 logger.info("RAG Management routes registered successfully")
695 # Register Deletion Management blueprint
696 app.register_blueprint(delete_bp) # Already has url_prefix='/library/api'
697 logger.info("Deletion Management routes registered successfully")
699 # Register Document Scheduler blueprint
700 from ..research_scheduler.routes import scheduler_bp
702 app.register_blueprint(scheduler_bp)
703 logger.info("Document Scheduler routes registered successfully")
705 # CSRF exemptions — Flask-WTF requires Blueprint objects (not strings)
706 # to populate _exempt_blueprints. Passing strings only populates
707 # _exempt_views, which compares against module-qualified names and
708 # silently fails to match Flask endpoint names.
709 if hasattr(app, "extensions") and "csrf" in app.extensions: 709 ↛ 718line 709 didn't jump to line 718 because the condition on line 709 was always true
710 csrf = app.extensions["csrf"]
711 for bp_name in ("api_v1", "api", "benchmark", "research"):
712 bp_obj = app.blueprints.get(bp_name)
713 if bp_obj is not None: 713 ↛ 711line 713 didn't jump to line 711 because the condition on line 713 was always true
714 csrf.exempt(bp_obj)
716 # Add favicon route
717 # Exempt favicon from rate limiting
718 @app.route("/favicon.ico")
719 @limiter.exempt
720 def favicon():
721 static_dir = app.config.get("STATIC_DIR", "static")
722 return send_from_directory(
723 static_dir, "favicon.ico", mimetype="image/x-icon"
724 )
726 # Add static route at the app level for compatibility
727 # Exempt static files from rate limiting
728 @app.route("/static/<path:path>")
729 @limiter.exempt
730 def app_serve_static(path):
731 from ..security.path_validator import PathValidator
733 static_dir = Path(app.config.get("STATIC_DIR", "static"))
735 # First try to serve from dist directory (for built assets)
736 dist_dir = static_dir / "dist"
737 try:
738 # Use PathValidator to safely validate the path
739 validated_path = PathValidator.validate_safe_path(
740 path,
741 dist_dir,
742 allow_absolute=False,
743 required_extensions=None, # Allow any file type for static assets
744 )
746 if validated_path and validated_path.exists(): 746 ↛ 747line 746 didn't jump to line 747 because the condition on line 746 was never true
747 return send_from_directory(str(dist_dir), path)
748 except ValueError:
749 # Path validation failed, try regular static folder
750 pass
752 # Fall back to regular static folder
753 try:
754 validated_path = PathValidator.validate_safe_path(
755 path, static_dir, allow_absolute=False, required_extensions=None
756 )
758 if validated_path and validated_path.exists(): 758 ↛ 759line 758 didn't jump to line 759 because the condition on line 758 was never true
759 return send_from_directory(str(static_dir), path)
760 except ValueError:
761 # Path validation failed
762 pass
764 abort(404)
767def register_error_handlers(app):
768 """Register error handlers with the Flask app."""
770 @app.errorhandler(404)
771 def not_found(error):
772 if request.path.startswith("/api/"):
773 return make_response(jsonify({"error": "Not found"}), 404)
774 return make_response("Not found", 404)
776 @app.errorhandler(500)
777 def server_error(error):
778 if request.path.startswith("/api/"):
779 return make_response(jsonify({"error": "Server error"}), 500)
780 return make_response("Server error", 500)
782 # Handle CSRF validation errors with helpful message
783 try:
784 from flask_wtf.csrf import CSRFError
786 @app.errorhandler(CSRFError)
787 def handle_csrf_error(error):
788 """Handle CSRF errors with helpful debugging info."""
789 # Check if this might be a Secure cookie issue over HTTP
790 is_http = not request.is_secure
791 is_private = _is_private_ip(request.remote_addr or "")
792 is_proxied = request.headers.get("X-Forwarded-For") is not None
794 error_msg = str(error.description)
796 # Provide detailed help for HTTP + public IP or proxied scenario
797 if is_http and (not is_private or is_proxied): 797 ↛ 798line 797 didn't jump to line 798 because the condition on line 797 was never true
798 logger.warning(
799 f"CSRF validation failed - likely due to Secure cookie over HTTP. "
800 f"remote_addr={request.remote_addr}, proxied={is_proxied}, "
801 f"host={request.host}"
802 )
803 error_msg = (
804 "Session cookie error: You're accessing over HTTP from a "
805 "public IP address or through a proxy. "
806 "This is blocked for security reasons.\n\n"
807 "Solutions:\n"
808 "1. Use HTTPS with a reverse proxy (recommended for production)\n"
809 "2. Access from your local network (LAN IPs like 192.168.x.x work over HTTP)\n"
810 "3. Access directly from localhost (http://127.0.0.1:5000)\n"
811 "4. Use SSH tunnel: ssh -L 5000:localhost:5000 user@server, "
812 "then access http://localhost:5000\n\n"
813 "Note: LAN access (192.168.x.x, 10.x.x.x, 172.16-31.x.x) works over HTTP. "
814 "Only public internet access requires HTTPS."
815 )
817 return make_response(jsonify({"error": error_msg}), 400)
818 except ImportError:
819 pass
821 # Handle News API exceptions globally
822 try:
823 from ..news.exceptions import NewsAPIException
825 @app.errorhandler(NewsAPIException)
826 def handle_news_api_exception(error):
827 """Handle NewsAPIException and convert to JSON response."""
828 from loguru import logger
830 logger.error(
831 f"News API error: {error.message} (code: {error.error_code})"
832 )
833 return jsonify(error.to_dict()), error.status_code
834 except ImportError:
835 # News module not available
836 pass
839def create_database(app):
840 """
841 DEPRECATED: Database creation is now handled per-user via encrypted_db.py
842 This function is kept for compatibility but does nothing.
843 """
844 pass