Coverage for src/local_deep_research/chat/routes.py: 82%

1"""

2Flask routes for chat API.

4Provides endpoints for:

5- Chat page rendering

6- Session management (create, list, get, archive, delete)

7- Message management (send, list)

8- Research triggering from chat

9"""

11import unicodedata

12import uuid

13from datetime import datetime, timedelta, UTC

14from typing import Any, Dict, List

15from flask import Blueprint, request, jsonify, session

16from loguru import logger

17from sqlalchemy import update as sa_update

18from sqlalchemy.exc import IntegrityError, SQLAlchemyError

20from .service import (

21 ArchiveBlockedError,

22 AttemptInProgress,

23 AttemptNotFound,

24 ChatService,

25 ChatSessionNotFound,

26 DB_EXCEPTIONS,

27)

28from .context import ChatContextManager

29from ..constants import ResearchStatus

30from ..database.models import (

31 ChatMessage,

32 ChatSession,

33 ChatSessionStatus,

34 ResearchHistory,

35 UserActiveResearch,

36)

37from ..database.session_context import get_user_db_session

38from ..exceptions import DuplicateResearchError, SystemAtCapacityError

39from ..security.decorators import require_json_body

40from ..security.rate_limiter import _get_api_user_key, limiter

41from ..settings.manager import SettingsManager

42from ..web.auth.decorators import login_required

43from ..web.utils.templates import render_template_with_defaults

44from ..web.auth.password_utils import (

45 get_user_password,

46 resolve_user_password,

47)

48from ..web.routes.globals import (

49 cleanup_research,

50 is_research_thread_alive,

51)

52from ..web.services.research_service import (

53 run_research_process,

54 start_research_process,

55)

57# Create blueprint

58chat_bp = Blueprint("chat", __name__)

60# Valid status values for sessions (built from the enum so a typo never

61# silently passes validation; the literal "all" sentinel widens the list

62# filter to every status without bypassing the whitelist).

63VALID_UPDATE_STATUSES = {

64 ChatSessionStatus.ACTIVE.value,

65 ChatSessionStatus.ARCHIVED.value,

66}

67VALID_LIST_STATUSES = {*(s.value for s in ChatSessionStatus), "all"}

69# Input length limits

70MAX_QUERY_LENGTH = 10_000

71MAX_TITLE_LENGTH = 500

72MAX_MESSAGE_LENGTH = 10_000

73# Hard cap on `offset` to prevent server-side DoS: get_session_messages

74# fetches `limit + offset` rows from BOTH chat_messages and chat_progress_steps

75# tables, so unbounded offset means unbounded SQL LIMIT. With cursor-based

76# pagination (`before_created_at`) as the recommended path, offset above a few

77# pages is not a normal access pattern.

78MAX_OFFSET = 1_000

80# Wider exception tuple used by HTTP route handlers (subsumes

81# service.DB_EXCEPTIONS plus the attribute/type errors that can escape

82# request-shape coercion code). DB_EXCEPTIONS itself is single-sourced

83# from chat.service so the two never drift.

84ROUTE_EXCEPTIONS = (

85 ValueError,

86 RuntimeError,

87 SQLAlchemyError,

88 AttributeError,

89 TypeError,

90)

93def _load_settings(username):

94 """Load all settings for a user.

96 ``bypass_cache=True`` matches the call pattern in

97 ``research_routes.start_research``: a setting changed via the UI

98 moments before the user sends a chat message must take effect on the

99 next research, not be served from a stale cache.

100 """

101 with get_user_db_session(username) as db:

102 return SettingsManager(db_session=db).get_all_settings(

103 bypass_cache=True

104 )

105

106

107def _parse_int_param(

108 value: str | None,

109 default: int,

110 min_val: int = 0,

111 max_val: int | None = None,

112) -> int:

113 """Safely parse an integer parameter with bounds checking."""

114 try:

115 result = int(value) if value is not None else default

116 if result < min_val:

117 return min_val

118 if max_val is not None and result > max_val:

119 return max_val

120 return result

121 except (ValueError, TypeError):

122 return default

123

124

125_INVISIBLE_UNICODE_CATEGORIES = {"Cf", "Zl", "Zp"}

126

127

128def _validate_title(title) -> tuple[str, int] | None:

129 """Return (error_message, http_status) when *title* is invalid, else None.

130

131 A title is invalid when it is not a non-empty string or exceeds

132 ``MAX_TITLE_LENGTH``. Callers that allow ``None`` (e.g. create_session

133 where omitting the title is fine) should short-circuit on ``None``

134 before calling this helper.

135

136 Strips Unicode format / line-separator characters (``Cf``/``Zl``/``Zp``,

137 including zero-width spaces U+200B-U+200D and BOM U+FEFF) before the

138 emptiness check so an "invisible" title like 500 zero-width chars is

139 rejected instead of saving a session that looks blank in the UI.

140 """

141 if not isinstance(title, str): 141 ↛ 142line 141 didn't jump to line 142 because the condition on line 141 was never true

142 return ("Title cannot be empty", 400)

143 visible = "".join(

144 c

145 for c in title

146 if unicodedata.category(c) not in _INVISIBLE_UNICODE_CATEGORIES

147 )

148 if not visible.strip():

149 return ("Title cannot be empty", 400)

150 if len(title) > MAX_TITLE_LENGTH:

151 return (

152 f"Title too long (max {MAX_TITLE_LENGTH} characters)",

153 400,

154 )

155 return None

156

157

158def _cleanup_chat_send_rows(

159 username, research_id, message_id, session_id, reason: str

160) -> None:

161 """Undo the user-message + research_history rows committed by send_message

162 when ``start_research_process`` rejects the spawn.

163

164 Used by both the ``DuplicateResearchError`` (409) and

165 ``SystemAtCapacityError`` (429) paths. Failure to clean up is logged at

166 ERROR level so orphan rows + inflated message_count are visible to ops.

167 """

168 try:

169 with get_user_db_session(username) as cleanup_db:

170 cleanup_db.query(ResearchHistory).filter_by(id=research_id).delete()

171 cleanup_db.query(ChatMessage).filter_by(id=message_id).delete()

172 # Drop the per-user-cap tracking row too (the spawn never

173 # started, so no live thread owns it). research_id is a fresh

174 # UUID, so this only ever matches our own just-inserted row.

175 cleanup_db.query(UserActiveResearch).filter_by(

176 username=username, research_id=research_id

177 ).delete()

178 cleanup_db.execute(

179 sa_update(ChatSession)

180 .where(ChatSession.id == session_id)

181 .values(message_count=ChatSession.message_count - 1)

182 )

183 cleanup_db.commit()

184 except DB_EXCEPTIONS:

185 logger.exception(

186 f"Cleanup after {reason} chat-send rejection FAILED "

187 f"for research {research_id[:8]}... in chat "

188 f"{session_id[:8]}...; orphan rows + inflated "

189 f"message_count may persist until next sweep."

190 )

191

192

193class ChatSpawnError(Exception):

194 """Raised by ``_spawn_chat_research`` to signal a mapped HTTP failure.

195

196 Carries the HTTP status code, an error string for the response body,

197 and optional extra fields merged into the JSON payload (e.g.

198 ``active_research_id`` for the 409 path). Both ``send_message`` and

199 ``retry_attempt`` catch this and translate to a ``jsonify(...)`` with

200 the same shape, keeping the spawn path single-sourced.

201 """

202

203 def __init__(self, status_code: int, error: str, **extra):

204 super().__init__(error)

205 self.status_code = status_code

206 self.error = error

207 self.extra = extra

208

209

210def _spawn_chat_research(

211 username: str,

212 session_id: str,

213 content: str,

214 settings_snapshot: Dict[str, Any],

215 research_context: Dict[str, Any],

216 messages: List[Dict[str, Any]],

217 service: "ChatService",

218 research_mode: str = "quick",

219) -> tuple[str, str]:

220 """Atomically write the user message + IN_PROGRESS research row, then

221 spawn the research worker.

222

223 Single-sources the spawn path shared by ``send_message`` (new chat

224 turn) and ``retry_attempt`` (delete failed turn, re-submit same

225 content). Returns ``(research_id, message_id)`` on success.

226

227 Pre-requisites the CALLER must satisfy before invoking:

228 - Session exists and is active (``ChatService.get_session`` passed).

229 - Per-session in-progress guard passed (no other live research).

230 - Per-user concurrent cap not exceeded.

231

232 Failure modes (mapped to ``ChatSpawnError``):

233 - 400: malformed numeric settings (``iterations`` / ``questions``).

234 - 404: session was deleted between caller's existence check and

235 this helper's atomic write.

236 - 409: race against the partial unique index

237 ``ux_research_history_chat_session_in_progress`` (two concurrent

238 sends both passed the per-session guard), or

239 ``DuplicateResearchError`` from ``start_research_process`` (live

240 thread already owns the id).

241 - 429: ``SystemAtCapacityError`` from ``start_research_process``

242 (cap filled between caller's check and spawn).

243

244 Raises ``ChatSpawnError`` only; never returns ``None``. The caller

245 catches once and translates to ``jsonify`` + status.

246 """

247 # Parse numeric search settings up-front. A malformed value (a

248 # non-numeric string in the user's settings DB) must return a clean

249 # 400 HERE — before the atomic write below commits the user message

250 # + IN_PROGRESS research row. Mirrors the original send_message

251 # ordering rationale.

252 try:

253 iterations = int(

254 settings_snapshot.get("search.iterations", {}).get("value", 3)

255 )

256 questions = int(

257 settings_snapshot.get("search.questions_per_iteration", {}).get(

258 "value", 1

259 )

260 )

261 except (ValueError, TypeError):

262 raise ChatSpawnError(

263 400,

264 "Invalid numeric value in search settings "

265 "(iterations / questions_per_iteration).",

266 )

267

268 research_id = str(uuid.uuid4())

269

270 # ---- Atomic write: user message + research row in ONE transaction ----

271 # Closes the orphan window: any IntegrityError or concurrent-delete on

272 # the research insert rolls back the user message too. The

273 # UPDATE...RETURNING inside insert_message_in_db doubles as the

274 # authoritative "session-still-exists" check; if the session was

275 # deleted between the caller's existence check and now, a ValueError

276 # surfaces with "not found" and we map it to 404.

277 try:

278 with get_user_db_session(username) as db_session:

279 message_id = service.insert_message_in_db(

280 db_session,

281 session_id=session_id,

282 role="user",

283 content=content,

284 message_type="query" if len(messages) == 0 else "followup",

285 )

286

287 created_at = datetime.now(UTC).isoformat()

288 research_meta = {

289 "submission": {

290 "chat_session_id": session_id,

291 "message_id": message_id,

292 "research_mode": research_mode,

293 },

294 }

295 research = ResearchHistory(

296 id=research_id,

297 query=content,

298 mode=research_mode,

299 status=ResearchStatus.IN_PROGRESS.value,

300 created_at=created_at,

301 progress_log=[{"time": created_at, "progress": 0}],

302 research_meta=research_meta,

303 chat_session_id=session_id,

304 )

305 db_session.add(research)

306

307 # Count this research toward the per-user concurrent cap.

308 # Added in the SAME transaction as the research row so the

309 # IntegrityError rollback below undoes both.

310 import threading

311

312 db_session.add(

313 UserActiveResearch(

314 username=username,

315 research_id=research_id,

316 status=ResearchStatus.IN_PROGRESS,

317 thread_id=str(threading.current_thread().ident),

318 settings_snapshot=settings_snapshot,

319 )

320 )

321

322 try:

323 db_session.commit()

324 except IntegrityError:

325 # Two near-simultaneous POSTs both passed the per-session

326 # guard; the partial unique index on (chat_session_id)

327 # WHERE status='in_progress' (migration 0010) catches the

328 # loser here. Rolling back the transaction also undoes the

329 # user-message INSERT and the message_count increment.

330 db_session.rollback()

331 logger.warning(

332 f"Concurrent in-progress research race for chat "

333 f"{session_id[:8]}..."

334 )

335 raise ChatSpawnError(

336 409,

337 "Research already in progress on this chat session.",

338 )

339 except ValueError as exc:

340 # ``insert_message_in_db`` raises ValueError("not found") when

341 # the session row was deleted between the existence check and

342 # the UPDATE...RETURNING.

343 if "not found" in str(exc).lower(): 343 ↛ 345line 343 didn't jump to line 345 because the condition on line 343 was always true

344 raise ChatSpawnError(404, "Session not found")

345 raise

346 # ---- end atomic write ----

347

348 # Get user password for metrics writes inside the worker thread.

349 pw = get_user_password(username)

350

351 model_provider = settings_snapshot.get("llm.provider", {}).get("value", "")

352 model = settings_snapshot.get("llm.model", {}).get("value", "")

353 search_engine = settings_snapshot.get("search.tool", {}).get("value", "")

354 custom_endpoint = settings_snapshot.get("llm.openai_endpoint.url", {}).get(

355 "value"

356 )

357 user_strategy = settings_snapshot.get("search.search_strategy", {}).get(

358 "value", "langgraph-agent"

359 )

360

361 # For follow-up messages, use the contextual follow-up strategy

362 # which wraps the user's preferred strategy as a delegate.

363 if research_context.get("is_multi_turn"):

364 strategy = "enhanced-contextual-followup"

365 research_context["delegate_strategy"] = user_strategy

366 else:

367 strategy = user_strategy

368

369 # Spawn the worker thread. ``DuplicateResearchError`` and

370 # ``SystemAtCapacityError`` inherit from ``Exception`` (not

371 # RuntimeError) so they are NOT in ROUTE_EXCEPTIONS and would

372 # otherwise escape to a generic 500 — leaving the user message +

373 # research row we just committed as orphans. Catch them here, undo

374 # our side effects via ``_cleanup_chat_send_rows``, and re-raise as

375 # ``ChatSpawnError`` for the caller to translate.

376 try:

377 start_research_process(

378 research_id,

379 content,

380 research_mode,

381 run_research_process,

382 username=username,

383 user_password=pw,

384 model_provider=model_provider,

385 model=model,

386 search_engine=search_engine,

387 custom_endpoint=custom_endpoint,

388 strategy=strategy,

389 iterations=iterations,

390 questions_per_iteration=questions,

391 research_context=research_context,

392 chat_session_id=session_id,

393 chat_message_id=message_id,

394 settings_snapshot=settings_snapshot,

395 )

396 except DuplicateResearchError:

397 logger.warning(

398 f"DuplicateResearchError on chat spawn for "

399 f"{research_id[:8]}... (chat {session_id[:8]}...)"

400 )

401 _cleanup_chat_send_rows(

402 username, research_id, message_id, session_id, "duplicate"

403 )

404 raise ChatSpawnError(

405 409, "Research already in progress on this chat session."

406 )

407 except SystemAtCapacityError:

408 logger.warning(

409 f"SystemAtCapacityError on chat spawn for "

410 f"{research_id[:8]}... (chat {session_id[:8]}...)"

411 )

412 _cleanup_chat_send_rows(

413 username, research_id, message_id, session_id, "capacity"

414 )

415 raise ChatSpawnError(

416 429, "Server is at research capacity. Please retry shortly."

417 )

418

419 logger.info(

420 f"Started chat research {research_id[:8]}... for chat "

421 f"{session_id[:8]}..."

422 )

423 return research_id, message_id

424

425

426def _chat_spawn_response(exc: ChatSpawnError):

427 """Translate a ChatSpawnError into the (jsonify, status) tuple the

428 Flask route returns.

429

430 Both ``send_message`` and ``retry_attempt`` shape errors identically,

431 so this lives once. ``exc.extra`` (e.g. ``active_research_id``) is

432 merged into the JSON body when present.

433 """

434 payload = {"success": False, "error": exc.error}

435 payload.update(exc.extra)

436 return jsonify(payload), exc.status_code

437

438

439# ============================================================================

440# Concurrency-guard helpers shared by send_message + retry_attempt

441# ============================================================================

442

443

444def _enforce_chat_session_research_slot(

445 cap_db, username: str, session_id: str

446) -> tuple[str, int] | None:

447 """Reject send/retry when this chat session or user is at capacity.

448

449 Runs inside the caller's ``cap_db`` transaction so the stale-row

450 reclaims it performs are visible to the count check below it.

451

452 Returns ``(error_message, http_status)`` when the request should be

453 rejected, else ``None``.

454

455 Two checks:

456 1. Per-session guard: at most one live research per chat session.

457 Mirrors the same check in send_message's inline block

458 (chat/routes.py:925-941 in the pre-refactor layout).

459 2. Per-user global cap: at most ``app.max_concurrent_researches``

460 researches per user across ALL sessions. Mirrors

461 research_routes.start_research.

462

463 DOES NOT include the stale-row reclaim sweep — the caller does that

464 first via ``reclaim_stale_user_active_research`` so the count check

465 sees accurate numbers.

466 """

467 # Per-session guard. Note: this fires for ANY in-progress research

468 # on the session, INCLUDING the target research on a retry path.

469 # Retry routes must therefore ensure the target is not in-progress

470 # (typically via delete_attempt's AttemptInProgress semantics)

471 # BEFORE calling this.

472 existing_session_research = (

473 cap_db.query(ResearchHistory)

474 .filter_by(

475 chat_session_id=session_id,

476 status=ResearchStatus.IN_PROGRESS,

477 )

478 .first()

479 )

480 if existing_session_research: 480 ↛ 481line 480 didn't jump to line 481 because the condition on line 480 was never true

481 return (

482 "Research already in progress on this chat session. "

483 "Stop it before sending a new message.",

484 409,

485 )

486

487 active_count = (

488 cap_db.query(UserActiveResearch)

489 .filter_by(

490 username=username,

491 status=ResearchStatus.IN_PROGRESS,

492 )

493 .count()

494 )

495 max_concurrent = SettingsManager(db_session=cap_db).get_setting(

496 "app.max_concurrent_researches", 3

497 )

498 if active_count >= max_concurrent: 498 ↛ 499line 498 didn't jump to line 499 because the condition on line 498 was never true

499 return (

500 f"Concurrent research limit reached ({active_count}/"

501 f"{max_concurrent}). Wait for an existing research to finish.",

502 429,

503 )

504

505 return None

506

507

508# ============================================================================

509# Page Routes

510# ============================================================================

511

512

513@chat_bp.route("/chat/")

514@chat_bp.route("/chat/<session_id>")

515@login_required

516def chat_page(session_id=None):

517 """

518 Render the chat page.

519

520 Args:

521 session_id: Optional session ID to load existing session

522 """

523 return render_template_with_defaults(

524 "pages/chat.html", session_id=session_id

525 )

526

527

528# ============================================================================

529# Session API Routes

530# ============================================================================

531

532

533@chat_bp.route("/api/chat/sessions", methods=["POST"])

534@login_required

535# Per-user keying (default is per-IP). Without this, users behind a shared

536# NAT/proxy share one bucket and can DoS each other for legitimate chat use.

537@limiter.limit("20 per minute", key_func=_get_api_user_key)

538@require_json_body(

539 error_format="success",

540 error_message="Request body must be a JSON object",

541)

542def create_session():

543 """

544 Create a new chat session.

545

546 Request body:

547 {

548 "initial_query": "optional initial question",

549 "title": "optional custom title"

550 }

551

552 Returns:

553 {

554 "success": true,

555 "session_id": "uuid",

556 "session": { session data }

557 }

558 """

559 try:

560 username = session.get("username")

561

562 # @require_json_body has already guaranteed a dict body; reach for it

563 # directly. Flask caches the parse so this is not a duplicate call.

564 data = request.get_json(silent=True)

565

566 # Validate input lengths

567 initial_query = data.get("initial_query")

568 title = data.get("title")

569

570 # Reject non-string initial_query early so len() / downstream

571 # string ops don't raise TypeError → 500.

572 if initial_query is not None and not isinstance(initial_query, str): 572 ↛ 573line 572 didn't jump to line 573 because the condition on line 572 was never true

573 return jsonify(

574 {

575 "success": False,

576 "error": "initial_query must be a string",

577 }

578 ), 400

579

580 if initial_query and len(initial_query) > MAX_QUERY_LENGTH: 580 ↛ 581line 580 didn't jump to line 581 because the condition on line 580 was never true

581 return jsonify(

582 {

583 "success": False,

584 "error": f"Initial query too long (max {MAX_QUERY_LENGTH} characters)",

585 }

586 ), 400

587

588 if title is not None:

589 err = _validate_title(title)

590 if err is not None: 590 ↛ 591line 590 didn't jump to line 591 because the condition on line 590 was never true

591 msg, status = err

592 return jsonify({"success": False, "error": msg}), status

593

594 settings_snapshot = _load_settings(username)

595

596 service = ChatService(username)

597 session_id = service.create_session(

598 initial_query=initial_query,

599 title=title,

600 settings_snapshot=settings_snapshot,

601 )

602

603 # Get the created session

604 try:

605 session_data = service.get_session(session_id)

606 except ChatSessionNotFound:

607 # Session was just created in this request — getting "not

608 # found" here means a delete-race or storage failure.

609 # Don't include session_id in the log message (flagged as

610 # sensitive by check-sensitive-logging); the exception's

611 # stack trace already carries enough context to diagnose.

612 logger.exception("Just-created chat session missing on read-back")

613 return jsonify(

614 {"success": False, "error": "Failed to load created session"}

615 ), 500

616

617 return jsonify(

618 {

619 "success": True,

620 "session_id": session_id,

621 "session": session_data,

622 }

623 )

624

625 except ROUTE_EXCEPTIONS:

626 logger.exception("Error creating chat session")

627 return jsonify(

628 {

629 "success": False,

630 "error": "Failed to create chat session",

631 }

632 ), 500

633

634

635@chat_bp.route(

636 "/api/chat/sessions/<session_id>/generate-title", methods=["POST"]

637)

638@login_required

639# Per-user keying + lower limit than create_session because each call is a

640# real LLM round-trip on a server-paid endpoint (vs create_session which is

641# zero-LLM DB work). Without per-user keying, shared-IP users share the bucket.

642@limiter.limit("10 per minute", key_func=_get_api_user_key)

643@require_json_body(

644 error_format="success",

645 error_message="Request body must be a JSON object",

646)

647def generate_session_title(session_id):

648 """

649 Regenerate the session title using the configured LLM.

650

651 This is a fire-and-forget endpoint the frontend calls asynchronously

652 right after creating a session, so the synchronous POST

653 /api/chat/sessions response isn't blocked on an LLM round-trip.

654

655 Request body: {"query": "the initial research query"}

656

657 Returns: {"success": true, "title": "..."} on success,

658 {"success": false, "error": "..."} on failure.

659 """

660 try:

661 username = session.get("username")

662 # @require_json_body has already guaranteed a dict body.

663 data = request.get_json(silent=True)

664

665 query = data.get("query")

666

667 if not query:

668 return jsonify(

669 {"success": False, "error": "query is required"}

670 ), 400

671 if not isinstance(query, str) or len(query) > MAX_QUERY_LENGTH: 671 ↛ 672line 671 didn't jump to line 672 because the condition on line 671 was never true

672 return jsonify(

673 {

674 "success": False,

675 "error": f"query must be a string up to {MAX_QUERY_LENGTH} chars",

676 }

677 ), 400

678

679 service = ChatService(username)

680 try:

681 service.get_session(session_id)

682 except ChatSessionNotFound:

683 return jsonify(

684 {"success": False, "error": "Session not found"}

685 ), 404

686

687 settings_snapshot = _load_settings(username)

688 new_title = service.regenerate_title_with_llm(

689 session_id, query, settings_snapshot

690 )

691 if not new_title: 691 ↛ 695line 691 didn't jump to line 695 because the condition on line 691 was always true

692 # LLM disabled, or LLM call failed — keep existing fallback title

693 return jsonify({"success": False, "title": None}), 200

694

695 return jsonify({"success": True, "title": new_title})

696

697 except ROUTE_EXCEPTIONS:

698 logger.exception("Error regenerating chat title")

699 return jsonify(

700 {"success": False, "error": "Failed to regenerate title"}

701 ), 500

702

703

704@chat_bp.route("/api/chat/sessions", methods=["GET"])

705@login_required

706def list_sessions():

707 """

708 List chat sessions for the current user.

709

710 Query params:

711 - status: active, archived, deleted, or all (default: active)

712 - limit: max sessions to return (default: 20)

713 - offset: pagination offset (default: 0)

714

715 Returns:

716 {

717 "success": true,

718 "sessions": [ session data list ]

719 }

720 """

721 try:

722 username = session.get("username")

723 status = request.args.get("status", ChatSessionStatus.ACTIVE.value)

724 # Validate status parameter

725 if status not in VALID_LIST_STATUSES:

726 status = ChatSessionStatus.ACTIVE.value

727 limit = _parse_int_param(

728 request.args.get("limit"), 20, min_val=1, max_val=100

729 )

730 offset = _parse_int_param(

731 request.args.get("offset"), 0, min_val=0, max_val=MAX_OFFSET

732 )

733

734 service = ChatService(username)

735 sessions = service.list_sessions(

736 status=status, limit=limit, offset=offset

737 )

738

739 return jsonify(

740 {

741 "success": True,

742 "sessions": sessions,

743 }

744 )

745

746 except ROUTE_EXCEPTIONS:

747 logger.exception("Error listing chat sessions")

748 return jsonify(

749 {

750 "success": False,

751 "error": "Failed to list chat sessions",

752 }

753 ), 500

754

755

756@chat_bp.route("/api/chat/sessions/<session_id>", methods=["GET"])

757@login_required

758def get_session(session_id):

759 """

760 Get a specific chat session.

761

762 Returns:

763 {

764 "success": true,

765 "session": { session data }

766 }

767 """

768 try:

769 username = session.get("username")

770 service = ChatService(username)

771 try:

772 session_data = service.get_session(session_id)

773 except ChatSessionNotFound:

774 return jsonify(

775 {

776 "success": False,

777 "error": "Session not found",

778 }

779 ), 404

780

781 return jsonify(

782 {

783 "success": True,

784 "session": session_data,

785 }

786 )

787

788 except ROUTE_EXCEPTIONS:

789 logger.exception("Error getting chat session")

790 return jsonify(

791 {

792 "success": False,

793 "error": "Failed to get chat session",

794 }

795 ), 500

796

797

798@chat_bp.route("/api/chat/sessions/<session_id>", methods=["PATCH"])

799@login_required

800# Per-user keying, like the other state-changing chat routes. Without a

801# per-route limit, rename/archive was bounded only by the global limiter,

802# leaving an uneven abuse surface across the session API.

803@limiter.limit("30 per minute", key_func=_get_api_user_key)

804@require_json_body(

805 error_format="success",

806 error_message="Request body must be a JSON object",

807)

808def update_session(session_id):

809 """

810 Update a chat session (title, archive, delete).

811

812 Request body:

813 {

814 "title": "new title", // optional

815 "status": "archived" // optional: active, archived

816 }

817 """

818 try:

819 username = session.get("username")

820 # @require_json_body has already guaranteed a dict body.

821 data = request.get_json(silent=True)

822

823 # Require at least one valid field

824 valid_fields = {"title", "status"}

825 if not any(field in data for field in valid_fields): 825 ↛ 826line 825 didn't jump to line 826 because the condition on line 825 was never true

826 return jsonify(

827 {

828 "success": False,

829 "error": "Request must include at least one of: title, status",

830 }

831 ), 400

832

833 service = ChatService(username)

834

835 try:

836 service.get_session(session_id)

837 except ChatSessionNotFound:

838 return jsonify(

839 {"success": False, "error": "Session not found"}

840 ), 404

841

842 ops_ok = True

843

844 if "title" in data:

845 title = data["title"]

846 err = _validate_title(title)

847 if err is not None:

848 msg, status = err

849 return jsonify({"success": False, "error": msg}), status

850 ops_ok = service.update_session_title(session_id, title) and ops_ok

851

852 if "status" in data:

853 new_status = data["status"]

854 if new_status not in VALID_UPDATE_STATUSES:

855 return jsonify(

856 {"success": False, "error": "Invalid status value"}

857 ), 400

858 if new_status == ChatSessionStatus.ACTIVE.value:

859 ops_ok = service.reactivate_session(session_id) and ops_ok

860 elif new_status == ChatSessionStatus.ARCHIVED.value: 860 ↛ 878line 860 didn't jump to line 878 because the condition on line 860 was always true

861 try:

862 ops_ok = service.archive_session(session_id) and ops_ok

863 except ArchiveBlockedError:

864 # Symmetric with send-to-archived (also 409): the

865 # client should stop the research or wait for it to

866 # finish before archiving the session.

867 # Hard-coded message — never echo str(exc) here so a

868 # future ArchiveBlockedError raise with interpolated

869 # data can't leak to the response (information

870 # exposure through an exception, CWE-209).

871 return jsonify(

872 {

873 "success": False,

874 "error": "Cannot archive: research in_progress. Stop it first.",

875 }

876 ), 409

877

878 try:

879 session_data = service.get_session(session_id)

880 except ChatSessionNotFound:

881 # Session was deleted by a concurrent request between the

882 # update above and this read-back. Treat as 404 rather than

883 # returning a partial success with null data.

884 return jsonify(

885 {"success": False, "error": "Session not found"}

886 ), 404

887

888 if not ops_ok: 888 ↛ 892line 888 didn't jump to line 892 because the condition on line 888 was never true

889 # The read-back above succeeded, so the session still exists, yet

890 # an update reported failure — a DB write error was swallowed into

891 # a False return. Surface it instead of reporting success.

892 logger.error(

893 f"Chat session update failed at DB layer for "

894 f"{session_id[:8]}..."

895 )

896 return jsonify(

897 {"success": False, "error": "Failed to update session"}

898 ), 500

899

900 return jsonify(

901 {

902 "success": True,

903 "session": session_data,

904 }

905 )

906

907 except ROUTE_EXCEPTIONS:

908 logger.exception("Error updating chat session")

909 return jsonify(

910 {

911 "success": False,

912 "error": "Failed to update chat session",

913 }

914 ), 500

915

916

917@chat_bp.route("/api/chat/sessions/<session_id>", methods=["DELETE"])

918@login_required

919# Per-user keying, like the other state-changing chat routes. Caps bulk

920# delete attempts that the global limiter alone left under-constrained.

921@limiter.limit("30 per minute", key_func=_get_api_user_key)

922def delete_session(session_id):

923 """Delete a chat session permanently."""

924 try:

925 username = session.get("username")

926 service = ChatService(username)

927 success = service.delete_session(session_id)

928

929 if not success:

930 return jsonify(

931 {

932 "success": False,

933 "error": "Session not found",

934 }

935 ), 404

936

937 return jsonify(

938 {

939 "success": True,

940 }

941 )

942

943 except ROUTE_EXCEPTIONS:

944 logger.exception("Error deleting chat session")

945 return jsonify(

946 {

947 "success": False,

948 "error": "Failed to delete chat session",

949 }

950 ), 500

951

952

953# ============================================================================

954# Message API Routes

955# ============================================================================

956

957

958@chat_bp.route("/api/chat/sessions/<session_id>/messages", methods=["GET"])

959@login_required

960def get_messages(session_id):

961 """

962 Get messages for a chat session.

963

964 Query params:

965 - limit: max messages to return (default: 50, max: 100)

966 - offset: pagination offset into the DESC slice (default: 0)

967 - before_created_at: ISO timestamp cursor — return only entries

968 strictly older than this. Use the oldest currently-displayed

969 ``created_at`` to implement "load older messages".

970 - before_id: optional id of the oldest currently-displayed row;

971 when paired with `before_created_at` the cursor becomes

972 composite, preventing same-millisecond rows at the page boundary

973 from being silently dropped.

974

975 Returns:

976 {

977 "success": true,

978 "messages": [ message data list, ASC by created_at ],

979 "has_more": bool,

980 "in_progress_research_id": str | null

981 }

982 """

983 try:

984 username = session.get("username")

985 limit = _parse_int_param(

986 request.args.get("limit"), 50, min_val=1, max_val=100

987 )

988 offset = _parse_int_param(

989 request.args.get("offset"), 0, min_val=0, max_val=MAX_OFFSET

990 )

991 before_created_at = request.args.get("before_created_at") or None

992 before_id = request.args.get("before_id") or None

993

994 service = ChatService(username)

995

996 try:

997 service.get_session(session_id)

998 except ChatSessionNotFound:

999 return jsonify(

1000 {"success": False, "error": "Session not found"}

1001 ), 404

1002

1003 # Fetch one extra row so we can tell the client whether more

1004 # older entries exist without a second round-trip.

1005 peek_limit = limit + 1

1006 page = service.get_session_messages(

1007 session_id,

1008 limit=peek_limit,

1009 offset=offset,

1010 before_created_at=before_created_at,

1011 before_id=before_id,

1012 )

1013 has_more = len(page) > limit

1014 messages = page[-limit:] if has_more else page

1015

1016 # The client (chat.js loadSession) restores the live "thinking"

1017 # indicator from this field instead of inferring in-flight state

1018 # from message metadata. O(1) via the partial-unique index

1019 # ux_research_history_chat_session_in_progress.

1020 in_progress_research_id = service.get_in_progress_research_id(

1021 session_id

1022 )

1023

1024 return jsonify(

1025 {

1026 "success": True,

1027 "messages": messages,

1028 "has_more": has_more,

1029 "in_progress_research_id": in_progress_research_id,

1030 }

1031 )

1032

1033 except ROUTE_EXCEPTIONS:

1034 logger.exception("Error getting chat messages")

1035 return jsonify(

1036 {

1037 "success": False,

1038 "error": "Failed to get chat messages",

1039 }

1040 ), 500

1041

1042

1043@chat_bp.route("/api/chat/sessions/<session_id>/messages", methods=["POST"])

1044@login_required

1045# Per-user keying (default is per-IP). send_message launches a full research

1046# run, so this is the heaviest chat endpoint; shared-IP users sharing the

1047# bucket would lock each other out.

1048@limiter.limit("10 per minute", key_func=_get_api_user_key)

1049@require_json_body(

1050 error_format="success",

1051 error_message="Request body must be a JSON object",

1052)

1053def send_message(session_id):

1054 """

1055 Send a message in a chat session.

1056

1057 This endpoint:

1058 1. Adds the user message to the session

1059 2. Decides if research is needed

1060 3. If research needed, starts research process

1061 4. Returns message ID and research ID (if applicable)

1062

1063 Request body:

1064 {

1065 "content": "user message",

1066 "trigger_research": true // optional, default true

1067 }

1068

1069 Note: Research mode is always "quick" in chat. This is intentional for v1.

1070

1071 Returns:

1072 {

1073 "success": true,

1074 "message_id": "uuid",

1075 "research_id": "uuid or null",

1076 "research_mode": "quick/none"

1077 }

1078 """

1079 try:

1080 username = session.get("username")

1081 # @require_json_body has already guaranteed a dict body and rejected

1082 # non-JSON content types (which also hardens CSRF, matching the other

1083 # state-changing chat POSTs). Flask caches the parse, so this is free.

1084 data = request.get_json(silent=True)

1085

1086 if not data or not data.get("content"):

1087 return jsonify(

1088 {

1089 "success": False,

1090 "error": "Message content is required",

1091 }

1092 ), 400

1093

1094 # Reject non-string content before .strip() raises AttributeError

1095 # → 500. Mirrors the isinstance guard in _validate_title.

1096 if not isinstance(data["content"], str): 1096 ↛ 1097line 1096 didn't jump to line 1097 because the condition on line 1096 was never true

1097 return jsonify(

1098 {

1099 "success": False,

1100 "error": "content must be a string",

1101 }

1102 ), 400

1103

1104 content = data["content"].strip()

1105

1106 # Reject whitespace-only content

1107 if not content:

1108 return jsonify(

1109 {

1110 "success": False,

1111 "error": "Message content is required",

1112 }

1113 ), 400

1114

1115 if len(content) > MAX_MESSAGE_LENGTH:

1116 return jsonify(

1117 {

1118 "success": False,

1119 "error": f"Message too long (max {MAX_MESSAGE_LENGTH} characters)",

1120 }

1121 ), 400

1122

1123 # trigger_research is a strict boolean. Reject non-bool values with a

1124 # 400 instead of silently coercing them to True: a client sending

1125 # {"trigger_research": "no"} or {"trigger_research": 0} intends to

1126 # SUPPRESS research, and coercing the truthy-string to True would

1127 # launch an unwanted (paid) research run against their intent.

1128 raw = data.get("trigger_research", True)

1129 if not isinstance(raw, bool):

1130 return jsonify(

1131 {

1132 "success": False,

1133 "error": "trigger_research must be a boolean",

1134 }

1135 ), 400

1136 trigger_research = raw

1137

1138 service = ChatService(username)

1139

1140 # Verify session exists (informational fast-fail; the

1141 # UPDATE...RETURNING inside insert_message_in_db is the

1142 # authoritative check that survives a delete-race).

1143 try:

1144 session_data = service.get_session(session_id)

1145 except ChatSessionNotFound:

1146 return jsonify(

1147 {

1148 "success": False,

1149 "error": "Session not found",

1150 }

1151 ), 404

1152

1153 # Reject sends to non-active sessions. Archived/deleted sessions

1154 # are intentionally read-only — users must reactivate before

1155 # continuing the conversation.

1156 if session_data.get("status") != ChatSessionStatus.ACTIVE.value: 1156 ↛ 1157line 1156 didn't jump to line 1157 because the condition on line 1156 was never true

1157 return jsonify(

1158 {

1159 "success": False,

1160 "error": "This chat is archived. Reactivate it to continue.",

1161 }

1162 ), 409

1163

1164 # Pre-fetch existing messages for context decisions.

1165 messages = service.get_session_messages(session_id, limit=20)

1166

1167 research_id = None

1168 research_mode = "none"

1169 message_id = None

1170 settings_snapshot = None

1171 research_context = None

1172

1173 if trigger_research:

1174 # Always quick mode in chat (intentional v1 scope).

1175 research_mode = "quick"

1176

1177 # Verify the DB password is available BEFORE creating any rows

1178 # or spawning a worker. Chat-triggered research runs on a

1179 # background thread that writes token/search metrics to the

1180 # user's encrypted database; without the password every metric

1181 # write is silently dropped, leaving the metrics dashboard empty

1182 # while the research still completes (issue #4457). The password

1183 # is re-fetched at the spawn site below, so discard it here.

1184 _pw, session_expired = resolve_user_password(username)

1185 if session_expired:

1186 return jsonify(

1187 {

1188 "success": False,

1189 "error": "Your session has expired. Please log out "

1190 "and log back in to continue.",

1191 }

1192 ), 401

1193

1194 # ---- Concurrency guards (per-session + global per-user) ----

1195 # Both guards run in one transaction so a stale-row reclaim

1196 # is visible to the count check below it.

1197 #

1198 # Without the stale-thread sweep, a process crash leaves the

1199 # ResearchHistory row at IN_PROGRESS forever — every later

1200 # send_message returns 409 with no in-chat way to recover.

1201 #

1202 # Sweep AGE NOTE: a brand-new IN_PROGRESS row briefly exists

1203 # before its worker registers in `_active_research` (between

1204 # the DB commit below and the `start_research_process` call).

1205 # During that window `is_research_thread_alive` would return

1206 # False even though the thread spawn is in flight. Only reclaim

1207 # rows older than `_STALE_RESEARCH_GRACE_SECONDS` (default 30s)

1208 # so we don't kill our own freshly-inserted research from a

1209 # racing concurrent send.

1210 _STALE_RESEARCH_GRACE_SECONDS = 30

1211 grace_cutoff_dt = datetime.now(UTC) - timedelta(

1212 seconds=_STALE_RESEARCH_GRACE_SECONDS

1213 )

1214 # ResearchHistory.created_at is a String column (ISO-8601);

1215 # UserActiveResearch.started_at is a UtcDateTime column.

1216 grace_cutoff_iso = grace_cutoff_dt.isoformat()

1217 with get_user_db_session(username) as cap_db:

1218 # 1. Reclaim stale chat-session research rows whose

1219 # worker thread is dead AND that are older than the

1220 # spawn-grace cutoff.

1221 stale_chat = (

1222 cap_db.query(ResearchHistory)

1223 .filter(

1224 ResearchHistory.chat_session_id == session_id,

1225 ResearchHistory.status == ResearchStatus.IN_PROGRESS,

1226 ResearchHistory.created_at < grace_cutoff_iso,

1227 )

1228 .all()

1229 )

1230 reclaimed_chat = False

1231 for row in stale_chat: 1231 ↛ 1232line 1231 didn't jump to line 1232 because the loop on line 1231 never started

1232 if not is_research_thread_alive(row.id):

1233 logger.warning(

1234 f"Reclaiming stale chat research {row.id[:8]}... "

1235 f"(thread dead) on chat {session_id[:8]}..."

1236 )

1237 row.status = ResearchStatus.FAILED

1238 cleanup_research(row.id)

1239 reclaimed_chat = True

1240 if reclaimed_chat: 1240 ↛ 1241line 1240 didn't jump to line 1241 because the condition on line 1240 was never true

1241 cap_db.commit()

1242

1243 # 2. Per-session guard: at most one live research per chat.

1244 existing_session_research = (

1245 cap_db.query(ResearchHistory)

1246 .filter_by(

1247 chat_session_id=session_id,

1248 status=ResearchStatus.IN_PROGRESS,

1249 )

1250 .first()

1251 )

1252 if existing_session_research:

1253 return jsonify(

1254 {

1255 "success": False,

1256 "error": "Research already in progress on this chat session. Stop it before sending a new message.",

1257 "active_research_id": existing_session_research.id,

1258 }

1259 ), 409

1260

1261 # 3. Reclaim stale UserActiveResearch rows so the count

1262 # below isn't inflated by dead threads. Same grace

1263 # window applied via started_at to avoid killing a

1264 # sibling request's just-spawned thread. Shared with

1265 # research_routes.start_research; chat passes a

1266 # grace_cutoff_dt because chat send can race with

1267 # its own concurrent sibling, research_routes can't.

1268 from ..web.routes.globals import (

1269 reclaim_stale_user_active_research,

1270 )

1271

1272 if reclaim_stale_user_active_research( 1272 ↛ 1278line 1272 didn't jump to line 1278 because the condition on line 1272 was never true

1273 cap_db,

1274 username,

1275 grace_cutoff_dt=grace_cutoff_dt,

1276 logger=logger,

1277 ):

1278 cap_db.commit()

1279

1280 # 4. Global per-user cap (mirrors

1281 # research_routes.start_research). Without this,

1282 # multiple chat tabs let a user bypass the cap.

1283 active_count = (

1284 cap_db.query(UserActiveResearch)

1285 .filter_by(

1286 username=username,

1287 status=ResearchStatus.IN_PROGRESS,

1288 )

1289 .count()

1290 )

1291 max_concurrent = SettingsManager(db_session=cap_db).get_setting(

1292 "app.max_concurrent_researches", 3

1293 )

1294 if active_count >= max_concurrent:

1295 return jsonify(

1296 {

1297 "success": False,

1298 "error": (

1299 f"Concurrent research limit reached "

1300 f"({active_count}/{max_concurrent}). "

1301 "Wait for an existing research to finish."

1302 ),

1303 }

1304 ), 429

1305 # ---- end concurrency guards ----

1306

1307 # Settings + context (read-only — fine to do after the cap

1308 # check, before the atomic write).

1309 if trigger_research:

1310 settings_snapshot = _load_settings(username)

1311 context_manager = ChatContextManager(

1312 session_id,

1313 messages,

1314 session_data.get("accumulated_context"),

1315 settings_snapshot=settings_snapshot,

1316 )

1317 # Pass the new user message so prior conversation is condensed

1318 # into a summary focused on this question (used as the follow-up

1319 # prompt's "previous findings").

1320 research_context = context_manager.build_research_context(

1321 current_query=content

1322 )

1323

1324 # Atomically insert user message + IN_PROGRESS research row,

1325 # then spawn the worker. All failure modes (400 malformed

1326 # settings, 404 session-deleted-race, 409 concurrent /

1327 # duplicate, 429 at-capacity) surface as ChatSpawnError and

1328 # are translated by ``_chat_spawn_response`` so the response

1329 # shape stays single-sourced across send_message and

1330 # retry_attempt.

1331 try:

1332 research_id, message_id = _spawn_chat_research(

1333 username,

1334 session_id,

1335 content,

1336 settings_snapshot,

1337 research_context,

1338 messages,

1339 service,

1340 research_mode=research_mode,

1341 )

1342 except ChatSpawnError as exc:

1343 return _chat_spawn_response(exc)

1344 else:

1345 # trigger_research=False: persist the user message without

1346 # spawning a research run. Mirrors the original send_message

1347 # shape — the message lands even when research is suppressed.

1348 try:

1349 with get_user_db_session(username) as db_session:

1350 message_id = service.insert_message_in_db(

1351 db_session,

1352 session_id=session_id,

1353 role="user",

1354 content=content,

1355 message_type=(

1356 "query" if len(messages) == 0 else "followup"

1357 ),

1358 )

1359 db_session.commit()

1360 except ValueError as exc:

1361 if "not found" in str(exc).lower():

1362 return jsonify(

1363 {"success": False, "error": "Session not found"}

1364 ), 404

1365 raise

1366 research_id = None

1367

1368 return jsonify(

1369 {

1370 "success": True,

1371 "message_id": message_id,

1372 "session_id": session_id,

1373 "research_id": research_id,

1374 "research_mode": research_mode,

1375 }

1376 )

1377

1378 except ROUTE_EXCEPTIONS:

1379 logger.exception("Error sending chat message")

1380 return jsonify(

1381 {

1382 "success": False,

1383 "error": "Failed to send message",

1384 }

1385 ), 500

1386

1387

1388# ============================================================================

1389# Per-attempt API Routes (delete + retry a single chat turn)

1390# ============================================================================

1391

1392

1393@chat_bp.route(

1394 "/api/chat/sessions/<session_id>/attempts/<research_id>",

1395 methods=["DELETE"],

1396)

1397@login_required

1398# Per-user keying, like the other state-changing chat routes. Caps bulk

1399# delete attempts that the global limiter alone left under-constrained.

1400@limiter.limit("30 per minute", key_func=_get_api_user_key)

1401def delete_attempt(session_id, research_id):

1402 """Delete a single chat attempt (user message + research + response).

1403

1404 Refuses with 409 if the target research is IN_PROGRESS and its worker

1405 thread is alive — the client must Stop it first (or wait for it to

1406 fail naturally). Stale IN_PROGRESS rows whose thread is dead are

1407 reclaimed and deleted.

1408 """

1409 try:

1410 username = session.get("username")

1411 service = ChatService(username)

1412 try:

1413 service.delete_attempt(session_id, research_id)

1414 except AttemptNotFound:

1415 return jsonify(

1416 {"success": False, "error": "Attempt not found"}

1417 ), 404

1418 except AttemptInProgress:

1419 return jsonify(

1420 {

1421 "success": False,

1422 "error": (

1423 "Research is in progress. Stop it before deleting "

1424 "the attempt."

1425 ),

1426 "active_research_id": research_id,

1427 }

1428 ), 409

1429

1430 return jsonify({"success": True})

1431

1432 except ROUTE_EXCEPTIONS:

1433 logger.exception("Error deleting chat attempt")

1434 return jsonify(

1435 {"success": False, "error": "Failed to delete attempt"}

1436 ), 500

1437

1438

1439@chat_bp.route(

1440 "/api/chat/sessions/<session_id>/attempts/<research_id>/retry",

1441 methods=["POST"],

1442)

1443@login_required

1444# Same per-minute cap as send_message: retry spawns a full research run,

1445# so it shares the spawn budget rather than getting a separate one (else

1446# alternating send/retry would let a user exceed the intended rate).

1447@limiter.limit("10 per minute", key_func=_get_api_user_key)

1448def retry_attempt(session_id, research_id):

1449 """Retry a chat attempt: delete the old turn, re-submit same content.

1450

1451 Looks up the original user message content via

1452 ``ChatService.get_original_attempt_query`` (uses

1453 ``research_meta.submission.message_id`` first, falls back to a

1454 ChatMessage query for older rows), deletes the old attempt

1455 atomically, then runs the same spawn path as ``send_message``.

1456

1457 Returns the SAME shape as ``send_message`` so the client can

1458 subscribe to the new research_id via its existing post-send flow.

1459 """

1460 try:

1461 username = session.get("username")

1462 service = ChatService(username)

1463

1464 # Verify session exists + is active BEFORE doing anything

1465 # destructive. The get_session call also seeds session_data for

1466 # the context-builder below.

1467 try:

1468 session_data = service.get_session(session_id)

1469 except ChatSessionNotFound:

1470 return jsonify(

1471 {"success": False, "error": "Session not found"}

1472 ), 404

1473

1474 if session_data.get("status") != ChatSessionStatus.ACTIVE.value:

1475 return jsonify(

1476 {

1477 "success": False,

1478 "error": (

1479 "This chat is archived. Reactivate it to continue."

1480 ),

1481 }

1482 ), 409

1483

1484 # Resolve DB password BEFORE any destructive op: the spawn path

1485 # needs it for metrics writes, and re-checking AFTER delete_attempt

1486 # would leave the attempt gone on a session-expired 401.

1487 _pw, session_expired = resolve_user_password(username)

1488 if session_expired: 1488 ↛ 1489line 1488 didn't jump to line 1489 because the condition on line 1488 was never true

1489 return jsonify(

1490 {

1491 "success": False,

1492 "error": (

1493 "Your session has expired. Please log out and "

1494 "log back in to continue."

1495 ),

1496 }

1497 ), 401

1498

1499 # Per-session + per-user concurrency guards. Same sweep pattern

1500 # as send_message: reclaim stale rows older than the grace

1501 # window first so the count check is accurate, then refuse if

1502 # this session OR the user is at capacity.

1503 #

1504 # If the TARGET research is still IN_PROGRESS, the per-session

1505 # guard catches it here and returns 409 — the user must Stop it

1506 # first. (delete_attempt would also catch this via

1507 # AttemptInProgress, but failing earlier — before any rows are

1508 # touched — keeps the failure cheap.)

1509 _STALE_RESEARCH_GRACE_SECONDS = 30

1510 grace_cutoff_dt = datetime.now(UTC) - timedelta(

1511 seconds=_STALE_RESEARCH_GRACE_SECONDS

1512 )

1513 grace_cutoff_iso = grace_cutoff_dt.isoformat()

1514 with get_user_db_session(username) as cap_db:

1515 # Reclaim stale chat-session research rows whose worker is

1516 # dead AND older than the grace cutoff. Mirrors

1517 # send_message's sweep at chat/routes.py:903-923.

1518 stale_chat = (

1519 cap_db.query(ResearchHistory)

1520 .filter(

1521 ResearchHistory.chat_session_id == session_id,

1522 ResearchHistory.status == ResearchStatus.IN_PROGRESS,

1523 ResearchHistory.created_at < grace_cutoff_iso,

1524 )

1525 .all()

1526 )

1527 reclaimed_chat = False

1528 for row in stale_chat: 1528 ↛ 1529line 1528 didn't jump to line 1529 because the loop on line 1528 never started

1529 if not is_research_thread_alive(row.id):

1530 logger.warning(

1531 f"Reclaiming stale chat research {row.id[:8]}... "

1532 f"(thread dead) on chat {session_id[:8]}..."

1533 )

1534 row.status = ResearchStatus.FAILED

1535 cleanup_research(row.id)

1536 reclaimed_chat = True

1537 if reclaimed_chat: 1537 ↛ 1538line 1537 didn't jump to line 1538 because the condition on line 1537 was never true

1538 cap_db.commit()

1539

1540 # Reclaim stale UserActiveResearch rows globally for this

1541 # user, so the per-user cap count below isn't inflated.

1542 from ..web.routes.globals import (

1543 reclaim_stale_user_active_research,

1544 )

1545

1546 if reclaim_stale_user_active_research( 1546 ↛ 1552line 1546 didn't jump to line 1552 because the condition on line 1546 was never true

1547 cap_db,

1548 username,

1549 grace_cutoff_dt=grace_cutoff_dt,

1550 logger=logger,

1551 ):

1552 cap_db.commit()

1553

1554 cap_error = _enforce_chat_session_research_slot(

1555 cap_db, username, session_id

1556 )

1557 if cap_error is not None: 1557 ↛ 1558line 1557 didn't jump to line 1558 because the condition on line 1557 was never true

1558 msg, status = cap_error

1559 if status == 409:

1560 return jsonify(

1561 {

1562 "success": False,

1563 "error": msg,

1564 "active_research_id": research_id,

1565 }

1566 ), 409

1567 return jsonify({"success": False, "error": msg}), status

1568

1569 # Fetch original user content BEFORE deleting the old attempt.

1570 # After delete_attempt the row is gone, so this lookup must

1571 # succeed first. Raises AttemptNotFound → 404.

1572 try:

1573 original_content = service.get_original_attempt_query(

1574 session_id, research_id

1575 )

1576 except AttemptNotFound:

1577 return jsonify(

1578 {"success": False, "error": "Attempt not found"}

1579 ), 404

1580

1581 if not original_content.strip(): 1581 ↛ 1585line 1581 didn't jump to line 1585 because the condition on line 1581 was never true

1582 # Empty/whitespace-only original — shouldn't happen since

1583 # send_message rejects these, but guard against a corrupt

1584 # row before we destroy the only copy.

1585 return jsonify(

1586 {

1587 "success": False,

1588 "error": (

1589 "Original attempt has no query content to retry."

1590 ),

1591 }

1592 ), 400

1593

1594 # Delete the old attempt. By this point the target is guaranteed

1595 # not IN_PROGRESS (the per-session guard above would have

1596 # caught it), so this should not raise AttemptInProgress — but

1597 # catch it anyway in case of a narrow race.

1598 try:

1599 service.delete_attempt(session_id, research_id)

1600 except AttemptInProgress:

1601 return jsonify(

1602 {

1603 "success": False,

1604 "error": (

1605 "Research is in progress. Stop it before retrying."

1606 ),

1607 "active_research_id": research_id,

1608 }

1609 ), 409

1610 except AttemptNotFound:

1611 # Concurrent retry already deleted the target.

1612 return jsonify(

1613 {"success": False, "error": "Attempt not found"}

1614 ), 404

1615

1616 # Re-fetch messages list AFTER the delete so the context

1617 # builder doesn't see the just-removed attempt (otherwise the

1618 # new research would treat the retried query as a follow-up to

1619 # itself).

1620 messages = service.get_session_messages(session_id, limit=20)

1621

1622 settings_snapshot = _load_settings(username)

1623 context_manager = ChatContextManager(

1624 session_id,

1625 messages,

1626 session_data.get("accumulated_context"),

1627 settings_snapshot=settings_snapshot,

1628 )

1629 research_context = context_manager.build_research_context(

1630 current_query=original_content

1631 )

1632

1633 # Spawn the new research via the shared helper. Same failure

1634 # shape as send_message.

1635 try:

1636 new_research_id, new_message_id = _spawn_chat_research(

1637 username,

1638 session_id,

1639 original_content,

1640 settings_snapshot,

1641 research_context,

1642 messages,

1643 service,

1644 research_mode="quick",

1645 )

1646 except ChatSpawnError as exc:

1647 return _chat_spawn_response(exc)

1648

1649 return jsonify(

1650 {

1651 "success": True,

1652 "session_id": session_id,

1653 "research_id": new_research_id,

1654 "message_id": new_message_id,

1655 "research_mode": "quick",

1656 }

1657 )

1658

1659 except ROUTE_EXCEPTIONS:

1660 logger.exception("Error retrying chat attempt")

1661 return jsonify(

1662 {"success": False, "error": "Failed to retry attempt"}

1663 ), 500