Coverage for src/local_deep_research/web/app_factory.py: 87%
391 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
1# import logging - replaced with loguru
2import os
3from pathlib import Path
4from importlib import resources as importlib_resources
6from flask import (
7 Flask,
8 Request,
9 abort,
10 jsonify,
11 make_response,
12 request,
13 send_from_directory,
14)
15from flask_wtf.csrf import CSRFProtect
16from werkzeug.middleware.proxy_fix import ProxyFix
17from loguru import logger
18from local_deep_research.settings.logger import log_settings
20from ..utilities.log_utils import InterceptHandler
21from ..security import SecurityHeaders, get_security_default
22from ..security.rate_limiter import limiter
23from ..security.file_upload_validator import FileUploadValidator
24from ..security.web_middleware import (
25 SecureCookieMiddleware,
26 ServerHeaderMiddleware,
27)
29# Removed DB_PATH import - using per-user databases now
30from .services.socket_service import SocketIOService
33class DiskSpoolingRequest(Request):
34 """Custom Request class that spools large file uploads to disk.
36 This prevents memory exhaustion from large multipart uploads by writing
37 files larger than max_form_memory_size to temporary files on disk instead
38 of keeping them in memory.
40 Security fix for issue #1176: a request right up against the per-file ×
41 per-request limits could otherwise consume tens of GB of memory in
42 one go. The spool-to-disk threshold keeps memory bounded regardless
43 of how high the per-file cap (FileUploadValidator.MAX_FILE_SIZE) is set.
44 """
46 # Files larger than 5MB are spooled to disk instead of memory
47 max_form_memory_size = 5 * 1024 * 1024 # 5MB threshold
50def create_app():
51 """
52 Create and configure the Flask application.
54 Returns:
55 tuple: (app, socketio) - The configured Flask app and SocketIO instance
56 """
57 # Route stdlib loggers through loguru via InterceptHandler.
58 # Guard against handler duplication when create_app() is called multiple
59 # times (e.g. in tests).
60 import logging
62 werkzeug_logger = logging.getLogger("werkzeug")
63 werkzeug_logger.setLevel(
64 logging.WARNING
65 ) # Suppress verbose per-request logs
66 if not any(
67 isinstance(h, InterceptHandler) for h in werkzeug_logger.handlers
68 ):
69 werkzeug_logger.addHandler(InterceptHandler())
71 # APScheduler logs job execution results (success/failure) to its own
72 # logger hierarchy. Without an InterceptHandler the WARNING+ messages
73 # only reach Python's lastResort handler as unformatted stderr.
74 # Level is WARNING (not INFO) because job functions already log their
75 # own progress via loguru — APScheduler's INFO messages would be redundant.
76 apscheduler_logger = logging.getLogger("apscheduler")
77 apscheduler_logger.setLevel(logging.WARNING)
78 if not any(
79 isinstance(h, InterceptHandler) for h in apscheduler_logger.handlers
80 ):
81 apscheduler_logger.addHandler(InterceptHandler())
83 logger.info("Initializing Local Deep Research application...")
85 try:
86 # Get directories based on package installation
87 PACKAGE_DIR = importlib_resources.files("local_deep_research") / "web"
88 with importlib_resources.as_file(PACKAGE_DIR) as package_dir:
89 STATIC_DIR = (package_dir / "static").as_posix()
90 TEMPLATE_DIR = (package_dir / "templates").as_posix()
92 # Initialize Flask app with package directories
93 # Set static_folder to None to disable Flask's built-in static handling
94 # We'll use our custom static route instead to handle dist folder
95 app = Flask(__name__, static_folder=None, template_folder=TEMPLATE_DIR)
96 # Store static dir for custom handling
97 app.config["STATIC_DIR"] = STATIC_DIR
98 logger.debug(f"Using package static path: {STATIC_DIR}")
99 logger.debug(f"Using package template path: {TEMPLATE_DIR}")
100 except Exception:
101 # Fallback for development
102 logger.exception("Package directories not found, using fallback paths")
103 # Set static_folder to None to disable Flask's built-in static handling
104 app = Flask(
105 __name__,
106 static_folder=None,
107 template_folder=str(Path("templates").resolve()),
108 )
109 # Store static dir for custom handling
110 app.config["STATIC_DIR"] = str(Path("static").resolve())
112 # Use custom Request class that spools large uploads to disk
113 # This prevents memory exhaustion from large file uploads (issue #1176)
114 app.request_class = DiskSpoolingRequest
116 # Middleware stack (wrapped innermost -> outermost; runs in reverse at
117 # request time):
118 # 1. SecureCookieMiddleware: adds Secure flag iff wsgi.url_scheme=https.
119 # Wrapped INSIDE ProxyFix so it reads the post-rewrite scheme.
120 # 2. ProxyFix: translates X-Forwarded-* into REMOTE_ADDR / wsgi.url_scheme.
121 # 3. ServerHeaderMiddleware: strips Server header (outermost).
122 app.wsgi_app = SecureCookieMiddleware(app.wsgi_app, app) # type: ignore[method-assign]
123 app.wsgi_app = ProxyFix( # type: ignore[method-assign]
124 app.wsgi_app,
125 x_for=1, # Trust 1 proxy for X-Forwarded-For
126 x_proto=1, # Trust 1 proxy for X-Forwarded-Proto (http/https)
127 x_host=0, # Don't trust X-Forwarded-Host (security)
128 x_port=0, # Don't trust X-Forwarded-Port (security)
129 x_prefix=0, # Don't trust X-Forwarded-Prefix (security)
130 )
131 app.wsgi_app = ServerHeaderMiddleware(app.wsgi_app) # type: ignore[method-assign]
133 # App configuration
134 # Generate or load a unique SECRET_KEY per installation
135 import secrets
136 from ..config.paths import get_data_directory
138 secret_key_file = Path(get_data_directory()) / ".secret_key"
139 secret_key_file.parent.mkdir(parents=True, exist_ok=True)
140 new_key = secrets.token_hex(32)
141 try:
142 fd = os.open(
143 str(secret_key_file), os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600
144 )
145 try:
146 os.write(fd, new_key.encode())
147 finally:
148 os.close(fd)
149 app.config["SECRET_KEY"] = new_key
150 logger.info("Generated new SECRET_KEY for this installation")
151 except FileExistsError:
152 try:
153 with open(secret_key_file, "r", encoding="utf-8") as f:
154 app.config["SECRET_KEY"] = f.read().strip()
155 except Exception:
156 logger.warning("Could not read secret key file")
157 app.config["SECRET_KEY"] = new_key
158 except OSError:
159 logger.warning("Could not save secret key file")
160 app.config["SECRET_KEY"] = new_key
161 # Session cookie security settings
162 # SECURE flag is added dynamically based on request context (see after_request below)
163 # This allows localhost HTTP to work for development while keeping production secure
164 #
165 # Check if explicitly in testing mode (for backwards compatibility)
166 is_testing = (
167 os.getenv("CI")
168 or os.getenv("TESTING")
169 or os.getenv("PYTEST_CURRENT_TEST")
170 or app.debug
171 )
172 # Set to False - we add Secure flag dynamically in after_request handler
173 # Exception: if TESTING mode is active, we never add Secure flag
174 app.config["SESSION_COOKIE_SECURE"] = False
175 app.config["LDR_TESTING_MODE"] = bool(is_testing) # Store for after_request
176 app.config["SESSION_COOKIE_HTTPONLY"] = (
177 True # Prevent JavaScript access (XSS mitigation)
178 )
179 app.config["SESSION_COOKIE_SAMESITE"] = "Lax" # CSRF protection
180 # Set max cookie lifetime for permanent sessions (when session.permanent=True).
181 # This applies to "remember me" sessions; non-permanent sessions expire on browser close.
182 remember_me_days = get_security_default(
183 "security.session_remember_me_days", 30
184 )
185 app.config["PERMANENT_SESSION_LIFETIME"] = remember_me_days * 24 * 3600
186 # PREFERRED_URL_SCHEME affects URL generation (url_for), not request.is_secure
187 app.config["PREFERRED_URL_SCHEME"] = "https"
189 # File upload security limits - calculated from FileUploadValidator constants
190 app.config["MAX_CONTENT_LENGTH"] = (
191 FileUploadValidator.MAX_FILES_PER_REQUEST
192 * FileUploadValidator.MAX_FILE_SIZE
193 )
195 # Initialize CSRF protection
196 # Explicitly enable CSRF protection (don't rely on implicit Flask-WTF behavior)
197 app.config["WTF_CSRF_ENABLED"] = True
198 CSRFProtect(app)
199 # Exempt Socket.IO from CSRF protection
200 # Note: Flask-SocketIO handles CSRF internally, so we don't need to exempt specific views
202 # Initialize security headers middleware
203 SecurityHeaders(app)
205 # Initialize rate limiting for security (brute force protection)
206 # Uses imported limiter from security.rate_limiter module
207 # Rate limiting is disabled in CI via enabled callable in rate_limiter.py
208 # Also set app config to ensure Flask-Limiter respects our settings
209 from ..settings.env_registry import is_rate_limiting_enabled
211 app.config["RATELIMIT_ENABLED"] = is_rate_limiting_enabled()
212 app.config["RATELIMIT_STRATEGY"] = "moving-window"
213 limiter.init_app(app)
215 # Custom error handler for rate limit exceeded (429)
216 @app.errorhandler(429)
217 def ratelimit_handler(e):
218 # Import here to avoid circular imports
219 from ..security.rate_limiter import get_client_ip
221 # Audit logging for security monitoring
222 # Use get_client_ip() to get the real IP behind proxies
223 logger.warning(
224 f"Rate limit exceeded: endpoint={request.endpoint} "
225 f"ip={get_client_ip()} "
226 f"user_agent={request.headers.get('User-Agent', 'unknown')}"
227 )
228 return jsonify(
229 error="Too many requests",
230 message="Too many attempts. Please try again later.",
231 ), 429
233 # Note: Dynamic cookie security is handled by SecureCookieMiddleware (WSGI level)
234 # This is necessary because Flask's session cookies are set AFTER after_request handlers
235 # The middleware wrapping happens below near ProxyFix
237 # Note: CSRF exemptions for API blueprints are applied after blueprint
238 # registration below (search for "CSRF exemptions" in this file).
240 # Database configuration - Using per-user databases now
241 # No shared database configuration needed
242 app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
243 app.config["SQLALCHEMY_ECHO"] = False
245 # Per-user databases are created automatically via encrypted_db.py
247 # Log data location and security information
248 from ..config.paths import get_data_directory
249 from ..database.encrypted_db import db_manager
251 data_dir = get_data_directory()
252 logger.info("=" * 60)
253 logger.info("DATA STORAGE INFORMATION")
254 logger.info("=" * 60)
255 logger.info(f"Data directory: {data_dir}")
256 logger.info(
257 "Databases: Per-user encrypted databases in encrypted_databases/"
258 )
260 # Check if using custom location
261 from local_deep_research.settings.manager import SettingsManager
263 settings_manager = SettingsManager()
264 custom_data_dir = settings_manager.get_setting("bootstrap.data_dir")
265 if custom_data_dir: 265 ↛ 266line 265 didn't jump to line 266 because the condition on line 265 was never true
266 logger.info(
267 f"Using custom data location via LDR_DATA_DIR: {custom_data_dir}"
268 )
269 else:
270 logger.info("Using default platform-specific data location")
272 # Display security status based on actual SQLCipher availability
273 if db_manager.has_encryption:
274 logger.info(
275 "SECURITY: Databases are encrypted with SQLCipher. Ensure appropriate file system permissions are set on the data directory."
276 )
277 else:
278 logger.warning(
279 "SECURITY NOTICE: SQLCipher is not available - databases are NOT encrypted. "
280 "Install SQLCipher for database encryption. Ensure appropriate file system permissions are set on the data directory."
281 )
283 logger.info(
284 "TIP: You can change the data location by setting the LDR_DATA_DIR environment variable."
285 )
286 logger.info("=" * 60)
288 # Initialize Vite helper for asset management
289 from .utils.vite_helper import vite
291 vite.init_app(app)
293 # Initialize Theme helper for auto-detecting themes from CSS
294 from .utils.theme_helper import theme_helper
296 theme_helper.init_app(app)
298 # Generate combined themes.css from individual theme files
299 from .themes import theme_registry
301 try:
302 static_dir = Path(app.config.get("STATIC_DIR", "static"))
303 themes_css_path = static_dir / "css" / "themes.css"
304 combined_css = theme_registry.get_combined_css()
305 themes_css_path.write_text(combined_css, encoding="utf-8")
306 logger.debug(
307 f"Generated themes.css with {len(theme_registry.themes)} themes"
308 )
309 except PermissionError:
310 logger.warning(
311 f"Cannot write themes.css to {themes_css_path}. "
312 "Theme CSS will need to be pre-generated."
313 )
314 except Exception:
315 logger.exception("Error generating combined themes.css")
317 # Register socket service
318 socket_service = SocketIOService(app=app)
320 # Initialize news subscription scheduler
321 try:
322 # News tables are now created per-user in their encrypted databases
323 logger.info(
324 "News tables will be created in per-user encrypted databases"
325 )
327 # Check if scheduler is enabled BEFORE importing/initializing
328 # Use env registry which handles both env vars and settings
329 from ..settings.env_registry import get_env_setting
331 scheduler_enabled = get_env_setting("news.scheduler.enabled", True)
332 logger.info(f"News scheduler enabled: {scheduler_enabled}")
334 if scheduler_enabled:
335 # Only import and initialize if enabled
336 from ..scheduler.background import (
337 get_background_job_scheduler,
338 )
339 from ..settings.manager import SettingsManager
341 # Get system settings for scheduler configuration (if not already loaded)
342 if "settings_manager" not in locals(): 342 ↛ 343line 342 didn't jump to line 343 because the condition on line 342 was never true
343 settings_manager = SettingsManager()
345 # Get scheduler instance and initialize with settings
346 scheduler = get_background_job_scheduler()
347 scheduler.initialize_with_settings(settings_manager)
348 scheduler.set_app(app)
349 scheduler.start()
350 app.background_job_scheduler = scheduler # type: ignore[attr-defined]
351 logger.info("News scheduler started with activity-based tracking")
352 else:
353 # Don't initialize scheduler if disabled
354 app.background_job_scheduler = None # type: ignore[attr-defined]
355 logger.info("News scheduler disabled - not initializing")
356 except Exception:
357 logger.exception("Failed to initialize news scheduler")
358 app.background_job_scheduler = None # type: ignore[attr-defined]
360 # Apply middleware
361 logger.info("Applying middleware...")
362 apply_middleware(app)
363 logger.info("Middleware applied successfully")
365 # Register blueprints
366 logger.info("Registering blueprints...")
367 register_blueprints(app)
368 logger.info("Blueprints registered successfully")
370 # Register error handlers
371 logger.info("Registering error handlers...")
372 register_error_handlers(app)
373 logger.info("Error handlers registered successfully")
375 # Start the queue processor v2 (uses encrypted databases)
376 # Always start the processor - it will handle per-user queue modes
377 logger.info("Starting queue processor v2...")
378 from .queue.processor_v2 import queue_processor
380 queue_processor.start()
381 logger.info("Started research queue processor v2")
383 logger.info("App factory completed successfully")
385 return app, socket_service
388def apply_middleware(app):
389 """Apply middleware to the Flask app."""
391 # Import auth decorators and middleware
392 logger.info("Importing cleanup_middleware...")
393 from .auth.cleanup_middleware import cleanup_completed_research
395 logger.info("Importing database_middleware...")
396 from .auth.database_middleware import ensure_user_database
398 logger.info("Importing decorators...")
399 from .auth.decorators import inject_current_user
401 logger.info("Importing queue_middleware...")
402 from .auth.queue_middleware import process_pending_queue_operations
404 logger.info("Importing queue_middleware_v2...")
405 from .auth.queue_middleware_v2 import notify_queue_processor
407 logger.info("Importing session_cleanup...")
408 from .auth.session_cleanup import cleanup_stale_sessions
410 logger.info("All middleware imports completed")
412 # Register authentication middleware
413 # First clean up stale sessions
414 app.before_request(cleanup_stale_sessions)
415 # Then ensure database is open for authenticated users
416 app.before_request(ensure_user_database)
417 # Then inject current user into g
418 app.before_request(inject_current_user)
419 # Clean up completed research records
420 app.before_request(cleanup_completed_research)
421 # Process any pending queue operations for this user (direct mode)
422 app.before_request(process_pending_queue_operations)
423 # Notify queue processor of user activity (queue mode)
424 app.before_request(notify_queue_processor)
426 logger.info("All middleware registered")
428 # Note: log-queue flushing is handled asynchronously by the
429 # background daemon started in web/app.py::main() (see
430 # start_log_queue_processor). We deliberately do NOT register
431 # flush_log_queue as a before_request handler: doing so makes
432 # every request (including GET /auth/login) synchronously wait
433 # on _write_log_to_database, which opens a user DB session and
434 # blocks if the shared connection pool is wedged. At-exit
435 # draining is still handled via atexit in web/app.py.
437 # Inject backend constants into Jinja2 templates for frontend JS.
438 # This is the Flask-documented pattern for sharing Python enums with JavaScript.
439 # Source of truth: src/local_deep_research/constants.py::ResearchStatus
440 # Frontend helpers: src/local_deep_research/web/static/js/config/constants.js
441 # Template injection: src/local_deep_research/web/templates/base.html
442 from ..constants import ResearchStatus
444 @app.context_processor
445 def inject_frontend_constants():
446 terminal = [
447 ResearchStatus.COMPLETED,
448 ResearchStatus.SUSPENDED,
449 ResearchStatus.FAILED,
450 ResearchStatus.ERROR,
451 ResearchStatus.CANCELLED,
452 ]
453 return {
454 "research_status_enum": {m.name: m.value for m in ResearchStatus},
455 "research_terminal_states": [str(s) for s in terminal],
456 }
458 # Clean up database sessions after each request
459 @app.teardown_appcontext
460 def cleanup_db_session(exception=None):
461 """Clean up database session after each request to avoid cross-thread issues."""
462 from flask import g
464 session = g.pop("db_session", None)
465 if session is not None:
466 try:
467 session.rollback()
468 except Exception:
469 logger.warning(
470 "Error rolling back request session during cleanup"
471 )
472 try:
473 session.close()
474 except Exception:
475 logger.warning("Error closing request session during cleanup")
477 # Sweep credential entries for dead threads. Multiple trigger
478 # points (here, processor_v2, and connection_cleanup scheduler)
479 # ensure sweeps happen regardless of traffic patterns.
480 try:
481 from ..database.thread_local_session import cleanup_dead_threads
483 cleanup_dead_threads()
484 except Exception:
485 logger.debug("Error during dead thread sweep", exc_info=True)
487 # Clean up any thread-local database session that may have been created
488 # via get_metrics_session() fallback in session_context.py (e.g. background
489 # threads or error paths where g.db_session was unavailable).
490 try:
491 from ..database.thread_local_session import cleanup_current_thread
493 cleanup_current_thread()
494 except Exception:
495 logger.debug(
496 "Error during thread-local session cleanup", exc_info=True
497 )
499 # Add a middleware layer to handle abrupt disconnections
500 @app.before_request
501 def handle_websocket_requests():
502 if request.path.startswith("/socket.io"):
503 try:
504 if not request.environ.get("werkzeug.socket"): 504 ↛ 510line 504 didn't jump to line 510 because the condition on line 504 was always true
505 return None
506 except Exception:
507 logger.exception("WebSocket preprocessing error")
508 # Return empty response to prevent further processing
509 return "", 200
510 return None
512 # Note: CORS headers for API routes are now handled by SecurityHeaders middleware
513 # (see src/local_deep_research/security/security_headers.py)
516def register_blueprints(app):
517 """Register blueprints with the Flask app."""
519 # Import blueprints
520 logger.info("Importing blueprints...")
522 # Import benchmark blueprint
523 from ..benchmarks.web_api.benchmark_routes import benchmark_bp
525 logger.info("Importing API blueprint...")
526 from .api import api_blueprint # Import the API blueprint
528 logger.info("Importing auth blueprint...")
529 from .auth import auth_bp # Import the auth blueprint
531 logger.info("Importing API routes blueprint...")
532 from .routes.api_routes import api_bp # Import the API blueprint
534 logger.info("Importing context overflow API...")
535 from .routes.context_overflow_api import (
536 context_overflow_bp,
537 ) # Import context overflow API
539 logger.info("Importing history routes...")
540 from .routes.history_routes import history_bp
542 logger.info("Importing metrics routes...")
543 from .routes.metrics_routes import metrics_bp
545 logger.info("Importing research routes...")
546 from .routes.research_routes import research_bp
548 logger.info("Importing settings routes...")
549 from .routes.settings_routes import settings_bp
551 logger.info("All core blueprints imported successfully")
553 # Add root route
554 @app.route("/")
555 def index():
556 """Root route - redirect to login if not authenticated"""
557 from flask import redirect, session, url_for
559 from ..constants import get_available_strategies
560 from ..database.session_context import get_user_db_session
561 from ..utilities.db_utils import get_settings_manager
562 from .utils.templates import render_template_with_defaults
564 # Check if user is authenticated
565 if "username" not in session:
566 return redirect(url_for("auth.login"))
568 # Load current settings from database using proper session context
569 username = session.get("username")
570 settings = {}
571 show_all = False
572 with get_user_db_session(username) as db_session:
573 if db_session: 573 ↛ 610line 573 didn't jump to line 610
574 settings_manager = get_settings_manager(db_session, username)
575 settings = {
576 "llm_provider": settings_manager.get_setting(
577 "llm.provider", "ollama"
578 ),
579 "llm_model": settings_manager.get_setting("llm.model", ""),
580 "llm_openai_endpoint_url": settings_manager.get_setting(
581 "llm.openai_endpoint.url", ""
582 ),
583 "llm_ollama_url": settings_manager.get_setting(
584 "llm.ollama.url"
585 ),
586 "llm_lmstudio_url": settings_manager.get_setting(
587 "llm.lmstudio.url"
588 ),
589 "llm_local_context_window_size": settings_manager.get_setting(
590 "llm.local_context_window_size"
591 ),
592 "search_tool": settings_manager.get_setting(
593 "search.tool", ""
594 ),
595 "search_iterations": settings_manager.get_setting(
596 "search.iterations", 3
597 ),
598 "search_questions_per_iteration": settings_manager.get_setting(
599 "search.questions_per_iteration", 2
600 ),
601 "search_strategy": settings_manager.get_setting(
602 "search.search_strategy", "source-based"
603 ),
604 }
605 show_all = settings_manager.get_setting(
606 "search.show_all_strategies", False
607 )
609 # Debug logging
610 log_settings(settings, "Research page settings loaded")
612 return render_template_with_defaults(
613 "pages/research.html",
614 settings=settings,
615 strategies=get_available_strategies(show_all=bool(show_all)),
616 )
618 # Register auth blueprint FIRST (so login page is accessible)
619 app.register_blueprint(auth_bp) # Already has url_prefix="/auth"
621 # Register other blueprints
622 app.register_blueprint(research_bp)
623 app.register_blueprint(history_bp) # Already has url_prefix="/history"
624 app.register_blueprint(metrics_bp)
625 app.register_blueprint(settings_bp) # Already has url_prefix="/settings"
626 app.register_blueprint(
627 api_bp, url_prefix="/research/api"
628 ) # Register API blueprint with prefix
629 app.register_blueprint(benchmark_bp) # Register benchmark blueprint
630 app.register_blueprint(
631 context_overflow_bp, url_prefix="/metrics"
632 ) # Register context overflow API
634 # Register news API routes
635 from .routes import news_routes
637 app.register_blueprint(news_routes.bp)
638 logger.info("News API routes registered successfully")
640 # Register chat routes
641 from ..chat.routes import chat_bp
643 app.register_blueprint(chat_bp)
644 logger.info("Chat routes registered successfully")
646 # Register follow-up research routes
647 from ..followup_research.routes import followup_bp
649 app.register_blueprint(followup_bp)
650 logger.info("Follow-up research routes registered successfully")
652 # Register news page blueprint
653 from ..news.web import create_news_blueprint
655 news_bp = create_news_blueprint()
656 app.register_blueprint(news_bp, url_prefix="/news")
657 logger.info("News page routes registered successfully")
659 # Register API v1 blueprint
660 app.register_blueprint(api_blueprint) # Already has url_prefix='/api/v1'
662 # Register Research Library blueprint
663 from ..research_library import library_bp, rag_bp, delete_bp
665 app.register_blueprint(library_bp) # Already has url_prefix='/library'
666 logger.info("Research Library routes registered successfully")
668 # Register RAG Management blueprint
669 app.register_blueprint(rag_bp) # Already has url_prefix='/library'
670 logger.info("RAG Management routes registered successfully")
672 # Register Deletion Management blueprint
673 app.register_blueprint(delete_bp) # Already has url_prefix='/library/api'
674 logger.info("Deletion Management routes registered successfully")
676 # Register Semantic Search blueprint
677 from ..research_library.search import search_bp
679 app.register_blueprint(search_bp) # url_prefix='/library'
680 logger.info("Semantic Search routes registered successfully")
682 # Register Document Scheduler blueprint
683 from ..research_scheduler.routes import scheduler_bp
685 app.register_blueprint(scheduler_bp)
686 logger.info("Document Scheduler routes registered successfully")
688 # CSRF exemptions — Flask-WTF requires Blueprint objects (not strings)
689 # to populate _exempt_blueprints. Passing strings only populates
690 # _exempt_views, which compares against module-qualified names and
691 # silently fails to match Flask endpoint names.
692 if hasattr(app, "extensions") and "csrf" in app.extensions:
693 csrf = app.extensions["csrf"]
694 # Only api_v1 is exempt: it's a programmatic REST API used by
695 # external clients. The api, benchmark, and research blueprints
696 # are browser-facing and the frontend already sends CSRF tokens.
697 for bp_name in ("api_v1",):
698 bp_obj = app.blueprints.get(bp_name)
699 if bp_obj is not None: 699 ↛ 697line 699 didn't jump to line 697 because the condition on line 699 was always true
700 csrf.exempt(bp_obj)
702 # Add favicon route
703 # Exempt favicon from rate limiting
704 @app.route("/favicon.ico")
705 @limiter.exempt
706 def favicon():
707 static_dir = app.config.get("STATIC_DIR", "static")
708 return send_from_directory(
709 static_dir, "favicon.ico", mimetype="image/x-icon"
710 )
712 # Add static route at the app level for compatibility
713 # Exempt static files from rate limiting
714 import re
716 _HASHED_FILENAME_RE = re.compile(r"\.[A-Za-z0-9_-]{8,}\.")
718 @app.route("/static/<path:path>")
719 @limiter.exempt
720 def app_serve_static(path):
721 from ..security.path_validator import PathValidator
723 static_dir = Path(app.config.get("STATIC_DIR", "static"))
725 # First try to serve from dist directory (for built assets).
726 # Flask captures path as "dist/js/app.abc.js", so strip the
727 # "dist/" prefix before joining with dist_dir to avoid a
728 # double-dist path (static/dist/dist/...).
729 dist_prefix = "dist/"
730 dist_dir = static_dir / "dist"
731 if path.startswith(dist_prefix): 731 ↛ 732line 731 didn't jump to line 732 because the condition on line 731 was never true
732 dist_relative = path[len(dist_prefix) :]
733 try:
734 validated_path = PathValidator.validate_safe_path(
735 dist_relative,
736 dist_dir,
737 allow_absolute=False,
738 required_extensions=None,
739 )
741 if validated_path and validated_path.exists():
742 response = make_response(
743 send_from_directory(str(dist_dir), dist_relative)
744 )
745 if _HASHED_FILENAME_RE.search(dist_relative):
746 # Content-hashed files are safe for immutable caching
747 response.headers["Cache-Control"] = (
748 "public, max-age=31536000, immutable"
749 )
750 else:
751 response.headers["Cache-Control"] = (
752 "public, max-age=0, must-revalidate"
753 )
754 return response
755 except ValueError:
756 pass
758 # Fall back to dist directory for Vite-built assets (fonts, etc.)
759 # Vite uses base: '/static/' so CSS references /static/fonts/...
760 # but the files live in static/dist/fonts/...
761 try:
762 validated_path = PathValidator.validate_safe_path(
763 path, dist_dir, allow_absolute=False, required_extensions=None
764 )
766 if validated_path and validated_path.exists(): 766 ↛ 767line 766 didn't jump to line 767 because the condition on line 766 was never true
767 response = make_response(
768 send_from_directory(str(dist_dir), path)
769 )
770 if _HASHED_FILENAME_RE.search(path):
771 response.headers["Cache-Control"] = (
772 "public, max-age=31536000, immutable"
773 )
774 else:
775 response.headers["Cache-Control"] = (
776 "public, max-age=0, must-revalidate"
777 )
778 return response
779 except ValueError:
780 pass
782 # Fall back to regular static folder
783 try:
784 validated_path = PathValidator.validate_safe_path(
785 path, static_dir, allow_absolute=False, required_extensions=None
786 )
788 if validated_path and validated_path.exists(): 788 ↛ 789line 788 didn't jump to line 789 because the condition on line 788 was never true
789 response = make_response(
790 send_from_directory(str(static_dir), path)
791 )
792 # Non-hashed files must revalidate on each request
793 response.headers["Cache-Control"] = (
794 "public, max-age=0, must-revalidate"
795 )
796 return response
797 except ValueError:
798 # Path validation failed
799 pass
801 abort(404)
802 return None
805def register_error_handlers(app):
806 """Register error handlers with the Flask app."""
807 from .auth.decorators import _is_api_path
809 @app.errorhandler(404)
810 def not_found(error):
811 if _is_api_path(request.path):
812 return make_response(jsonify({"error": "Not found"}), 404)
813 return make_response("Not found", 404)
815 @app.errorhandler(500)
816 def server_error(error):
817 if _is_api_path(request.path):
818 return make_response(jsonify({"error": "Server error"}), 500)
819 return make_response("Server error", 500)
821 @app.errorhandler(401)
822 def handle_unauthorized(error):
823 if _is_api_path(request.path):
824 return make_response(
825 jsonify({"error": "Authentication required"}),
826 401,
827 )
828 from .auth.decorators import _safe_redirect_to_login
830 return _safe_redirect_to_login()
832 @app.errorhandler(413)
833 def handle_request_too_large(error):
834 if _is_api_path(request.path):
835 return make_response(
836 jsonify({"error": "Request too large"}),
837 413,
838 )
839 return make_response("Request too large", 413)
841 from .exceptions import WebAPIException
843 @app.errorhandler(WebAPIException)
844 def handle_web_api_exception(error):
845 """Handle WebAPIException and return JSON."""
846 logger.error(
847 "Web API error: {} (status {})", error.error_code, error.status_code
848 )
849 return jsonify(error.to_dict()), error.status_code
851 # Handle CSRF validation errors as JSON
852 try:
853 from flask_wtf.csrf import CSRFError
855 @app.errorhandler(CSRFError)
856 def handle_csrf_error(error):
857 return make_response(
858 jsonify({"error": str(error.description)}), 400
859 )
860 except ImportError:
861 pass
863 # Handle News API exceptions globally
864 try:
865 from ..news.exceptions import NewsAPIException
867 @app.errorhandler(NewsAPIException)
868 def handle_news_api_exception(error):
869 """Handle NewsAPIException and convert to JSON response."""
870 from loguru import logger
872 logger.error(
873 "News API error: {} (status {})",
874 error.error_code,
875 error.status_code,
876 )
877 return jsonify(error.to_dict()), error.status_code
878 except ImportError:
879 # News module not available
880 pass
883def create_database(app):
884 """
885 DEPRECATED: Database creation is now handled per-user via encrypted_db.py
886 This function is kept for compatibility but does nothing.
887 """
888 pass