Coverage for src/local_deep_research/chat/service.py: 84%

1"""

2Service layer for chat functionality.

4This service handles the business logic for chat sessions and messages,

5including session management, message handling, and context building.

6"""

8from typing import Dict, Any, List, Optional

9from concurrent.futures import (

10 ThreadPoolExecutor,

11 TimeoutError as FuturesTimeoutError,

12)

13from datetime import datetime, UTC

14import uuid

15from loguru import logger

16from sqlalchemy import and_, or_, update

17from sqlalchemy.exc import SQLAlchemyError

19from ..database.models import (

20 ChatMessage,

21 ChatMessageType,

22 ChatProgressStep,

23 ChatRole,

24 ChatSession,

25 ChatSessionStatus,

26 ResearchHistory,

27 UserActiveResearch,

28)

29from ..database.session_context import get_user_db_session

30from ..web.routes.globals import (

31 cleanup_research,

32 is_research_thread_alive,

33 set_termination_flag,

34)

35from ..constants import ResearchStatus

37# Standard exception tuple for service-layer DB operations

38DB_EXCEPTIONS = (ValueError, RuntimeError, SQLAlchemyError)

41class ArchiveBlockedError(RuntimeError):

42 """Raised when archive_session is called while a research is in_progress.

44 Archive flips a session to read-only; allowing it while research is

45 still running would leave an orphaned research tied to a session the

46 user thinks is frozen. The route layer translates this to HTTP 409.

47 """

50class ChatSessionNotFound(LookupError):

51 """Raised by get_session when no row matches the supplied session_id.

53 The route layer translates this to HTTP 404. Distinct from

54 ChatRepositoryError so a transient DB failure cannot masquerade as

55 "session does not exist".

56 """

59class ChatRepositoryError(RuntimeError):

60 """Raised by get_session when the underlying DB query fails.

62 The route layer translates this to HTTP 500. Keeping this separate

63 from ChatSessionNotFound prevents false 404s on infrastructure

64 errors (locked DB file, encryption key failure, etc.).

65 """

68class AttemptNotFound(LookupError):

69 """Raised when no ResearchHistory row matches the supplied research_id

70 inside the session scoped by ``session_id``.

72 The route layer translates this to HTTP 404. Distinct from

73 ChatSessionNotFound (also 404) so the diagnostic log distinguishes

74 "session missing" from "research missing".

75 """

78class AttemptInProgress(RuntimeError):

79 """Raised by delete_attempt when the target research is IN_PROGRESS

80 AND its worker thread is alive.

82 Deleting a research out from under a live worker would orphan the

83 thread (it would keep burning LLM cycles against a ResearchHistory

84 row that no longer exists). The route layer translates this to HTTP

85 409 and returns ``research_id`` so the client can offer a Stop +

86 retry flow.

88 A stale IN_PROGRESS row whose thread is dead does NOT raise: the

89 sweep logic inside ``delete_attempt`` reclaims it (mirrors

90 send_message's stale-row reclaim at chat/routes.py:903-923).

91 """

94# Title generation should not block the request thread on a slow LLM.

95# Wrap the synchronous llm.invoke() call in a worker future with a hard

96# wall-clock timeout. The default matches the rest of the codebase's

97# 30s LLM-timeout convention; tune via chat.title_llm_timeout_seconds.

98_DEFAULT_TITLE_LLM_TIMEOUT_SECONDS = 30.0

100

101def _serialize_dt(value):

102 """Return an ISO-8601 string for a datetime, or None."""

103 return value.isoformat() if value is not None else None

104

105

106class ChatService:

107 """Service for managing chat conversations and messages."""

108

109 def __init__(self, username: str):

110 """

111 Initialize the chat service.

112

113 Args:

114 username: Username for database access

115 """

116 self.username = username

117

118 @staticmethod

119 def _atomic_increment(db, counter_col, where_clause):

120 """Atomically increment an integer counter column by one and return

121 its new value.

122

123 Emits a single ``UPDATE <table> SET col = col + 1 WHERE <clause>

124 RETURNING col`` so the next sequence number is allocated without a

125 read-modify-write race between concurrent writers. ``counter_col`` is

126 a mapped column attribute (e.g. ``ChatSession.message_count``); the

127 target table and column name are derived from it.

128

129 Returns the post-increment value, or ``None`` when no row matched

130 ``where_clause`` — the caller decides whether a miss is a 404/ValueError.

131 Extracted from the previously-duplicated counter logic in

132 ``insert_message_in_db`` (message_count) and ``add_progress_step``

133 (step_count).

134 """

135 # Key the values() dict by the column object (not its string name)

136 # so the table/column are both derived straight from counter_col.

137 stmt = (

138 update(counter_col.class_)

139 .where(where_clause)

140 .values({counter_col: counter_col + 1})

141 .returning(counter_col)

142 )

143 return db.execute(stmt).scalar_one_or_none()

144

145 def create_session(

146 self,

147 initial_query: Optional[str] = None,

148 title: Optional[str] = None,

149 settings_snapshot: Optional[Dict[str, Any]] = None,

150 ) -> str:

151 """

152 Create a new chat session.

153

154 Args:

155 initial_query: Optional initial query (used for title generation)

156 title: Optional custom title for the session

157 settings_snapshot: Optional settings for LLM title generation

158

159 Returns:

160 Session ID (UUID string)

161 """

162 try:

163 session_id = str(uuid.uuid4())

164

165 # Use fast, non-LLM fallback title synchronously. If the caller

166 # wants an LLM-generated title they trigger it asynchronously via

167 # POST /api/chat/sessions/<id>/generate-title so the creation

168 # request isn't blocked on an LLM round-trip.

169 resolved_title = title or self._fallback_title(initial_query)

170

171 with get_user_db_session(self.username) as db:

172 # created_at is populated by the utcnow() default on the

173 # column — no need to pass it explicitly.

174 session = ChatSession(

175 id=session_id,

176 title=resolved_title,

177 status=ChatSessionStatus.ACTIVE.value,

178 accumulated_context={

179 "key_entities": [],

180 "topics": [],

181 "summary": "",

182 },

183 message_count=0,

184 )

185 db.add(session)

186 db.commit()

187

188 logger.info(

189 f"Created chat {session_id[:8]}... for user {self.username}"

190 )

191 return session_id

192

193 except DB_EXCEPTIONS:

194 logger.exception("Error creating chat session")

195 raise

196

197 def regenerate_title_with_llm(

198 self,

199 session_id: str,

200 query: Optional[str],

201 settings_snapshot: Optional[Dict[str, Any]],

202 ) -> Optional[str]:

203 """

204 Regenerate a session's title using the LLM.

205

206 Intended to be called from a dedicated endpoint the frontend fires

207 after session creation, so the create request doesn't block on the

208 LLM round-trip.

209

210 Idempotency: if the current title no longer matches the non-LLM

211 fallback (i.e. the user manually edited it, or a sibling tab's

212 LLM-gen already ran), skip the LLM call so we don't spend credits

213 only to overwrite the user's deliberate edit on the way back.

214

215 Returns the new title on success, or None on failure / no-op.

216 """

217 if not query:

218 return None

219 # Check whether the session still has the fallback title. If the

220 # user (or a concurrent generate-title request) has already moved

221 # past the fallback, don't burn an LLM call to overwrite their work.

222 try:

223 current = self.get_session(session_id)

224 except ChatSessionNotFound:

225 return None

226 current_title = (current or {}).get("title") or ""

227 fallback = self._fallback_title(query)

228 if current_title and current_title != fallback:

229 logger.info(

230 f"Skipping LLM title gen for {session_id[:8]}...: title "

231 f"already set ('{current_title[:30]}...')"

232 )

233 return None

234 new_title = self._generate_title(query, settings_snapshot)

235 if not new_title: 235 ↛ 236line 235 didn't jump to line 236 because the condition on line 235 was never true

236 return None

237 updated = self.update_session_title(session_id, new_title)

238 return new_title if updated else None

239

240 def add_message(

241 self,

242 session_id: str,

243 role: str,

244 content: str,

245 message_type: str,

246 research_id: Optional[str] = None,

247 allow_archived: bool = False,

248 ) -> str:

249 """

250 Add a durable message (query/followup/response) to a chat session.

251

252 Content is required and stored inline. Step rows live in

253 chat_progress_steps and are written via add_progress_step().

254

255 Args:

256 session_id: ID of the session to add message to

257 role: Message role (user or assistant)

258 content: Message content (required, non-empty)

259 message_type: Type of message (query, followup, response)

260 research_id: Optional ID of associated research

261 allow_archived: When True, the atomic-update WHERE clause omits

262 the ``status='active'`` filter so a system-written assistant

263 response can land even if the session was archived between

264 research start and completion. Use ONLY for system writes

265 (final assistant response, terminate-partial) — the user

266 send-message path MUST keep the default False so that

267 archiving a session in one browser tab still blocks a

268 concurrent user reply from another tab mid-flight.

269

270 Returns:

271 Message ID (UUID string)

272

273 Raises:

274 ValueError: if content is None. Validated before opening the DB

275 session so callers (e.g. the route layer) can return HTTP 400

276 without paying SQLCipher cold-open cost on a doomed request.

277 """

278 if content is None:

279 raise ValueError("content is required for chat messages")

280 try:

281 with get_user_db_session(self.username) as db:

282 message_id = self.insert_message_in_db(

283 db,

284 session_id=session_id,

285 role=role,

286 content=content,

287 message_type=message_type,

288 research_id=research_id,

289 allow_archived=allow_archived,

290 )

291 db.commit()

292 return message_id

293

294 except DB_EXCEPTIONS:

295 logger.exception("Error adding message to chat session")

296 raise

297

298 def insert_message_in_db(

299 self,

300 db,

301 session_id: str,

302 role: str,

303 content: str,

304 message_type: str,

305 research_id: Optional[str] = None,

306 allow_archived: bool = False,

307 ) -> str:

308 """

309 Insert a durable chat message in an active SQLAlchemy session WITHOUT

310 committing. The caller owns the transaction lifecycle and is

311 responsible for commit/rollback.

312

313 This exists so the route layer can atomically commit the user message

314 together with the research-history row in a single transaction —

315 avoiding the orphan-message bug that occurs if the user-message

316 commit succeeds and the research insert later raises.

317

318 Validation and the atomic message_count increment are identical to

319 ``add_message``; only the commit responsibility differs.

320

321 Raises:

322 ValueError: if role/message_type are invalid, content is None,

323 or the session row does not exist.

324 """

325 # Content is required (NOT NULL on the column).

326 # Empty string is permitted by the column (NOT NULL only rejects

327 # SQL NULL); reject Python None.

328 if content is None: 328 ↛ 329line 328 didn't jump to line 329 because the condition on line 328 was never true

329 raise ValueError("content is required for chat messages")

330

331 # Authoritative validation via the enum constructors — raises

332 # ValueError for unknown values, which the route layer maps to HTTP

333 # 400 via ROUTE_EXCEPTIONS. Keeps failure fast (before DB hit) and

334 # avoids the HTTP-500 regression we'd get from letting SQLAlchemy's

335 # StatementError surface at commit time.

336 try:

337 ChatRole(role)

338 except ValueError as exc:

339 raise ValueError(f"Invalid role: {role!r}") from exc

340 try:

341 ChatMessageType(message_type)

342 except ValueError as exc:

343 raise ValueError(f"Invalid message_type: {message_type!r}") from exc

344

345 message_id = str(uuid.uuid4())

346 # Atomic increment-and-return on ChatSession.message_count.

347 # By default the WHERE clause re-checks `status='active'` so an

348 # archive PATCH racing with a user-message send cannot land on

349 # a now-archived session. When ``allow_archived=True`` (system-

350 # written assistant responses), the filter is relaxed so a

351 # final-report save can complete even if the session flipped to

352 # archived mid-research — losing the answer is worse than the

353 # "archive means stop" semantic for system writes.

354 if allow_archived:

355 where_clause = ChatSession.id == session_id

356 not_found_msg = f"Chat session {session_id} not found"

357 else:

358 where_clause = (ChatSession.id == session_id) & (

359 ChatSession.status == ChatSessionStatus.ACTIVE.value

360 )

361 not_found_msg = f"Chat session {session_id} not found or not active"

362 sequence = self._atomic_increment(

363 db, ChatSession.message_count, where_clause

364 )

365 if sequence is None:

366 raise ValueError(not_found_msg)

367

368 # created_at populated by column default (utcnow()).

369 message = ChatMessage(

370 id=message_id,

371 session_id=session_id,

372 research_id=research_id,

373 role=role,

374 message_type=message_type,

375 content=content,

376 sequence_number=sequence,

377 )

378 db.add(message)

379 logger.debug(

380 f"Staged message {sequence} for chat {session_id[:8]}... (uncommitted)"

381 )

382 return message_id

383

384 def add_progress_step(

385 self,

386 session_id: str,

387 research_id: str,

388 content: str,

389 phase: Optional[str] = None,

390 ) -> str:

391 """

392 Add a transient research-progress step for a chat session.

393

394 Step rows live in chat_progress_steps and have their

395 own per-research sequence (allocated atomically against

396 ResearchHistory.step_count). They do NOT increment the chat

397 session's message_count.

398

399 Args:

400 session_id: ID of the parent chat session

401 research_id: ID of the research producing the step

402 content: Rendered step text (e.g. "Searching for ...")

403 phase: Optional phase tag from research_service._STEP_PHASES

404

405 Returns:

406 Step ID (UUID string)

407 """

408 if content is None:

409 raise ValueError("content is required for progress steps")

410

411 try:

412 step_id = str(uuid.uuid4())

413

414 with get_user_db_session(self.username) as db:

415 # Atomic increment-and-return on research_history.step_count.

416 sequence = self._atomic_increment(

417 db,

418 ResearchHistory.step_count,

419 ResearchHistory.id == research_id,

420 )

421 if sequence is None:

422 raise ValueError( # noqa: TRY301

423 f"Research {research_id} not found"

424 )

425

426 step = ChatProgressStep(

427 id=step_id,

428 research_id=research_id,

429 session_id=session_id,

430 phase=phase,

431 content=content,

432 sequence_number=sequence,

433 )

434 db.add(step)

435 db.commit()

436

437 logger.debug(

438 f"Added progress step {sequence} for research "

439 f"{research_id[:8]}... in chat {session_id[:8]}..."

440 )

441 return step_id

442

443 except DB_EXCEPTIONS:

444 logger.exception("Error adding progress step")

445 raise

446

447 def get_session(self, session_id: str) -> Dict[str, Any]:

448 """

449 Get a chat session by ID.

450

451 Args:

452 session_id: ID of the session

453

454 Returns:

455 Session data dictionary.

456

457 Raises:

458 ChatSessionNotFound: if no row matches ``session_id``.

459 Route layer maps to HTTP 404.

460 ChatRepositoryError: if the DB query itself fails. Route

461 layer maps to HTTP 500. Keeping these separate avoids

462 masking transient DB errors as "not found".

463 """

464 try:

465 with get_user_db_session(self.username) as db:

466 session = db.query(ChatSession).filter_by(id=session_id).first()

467

468 if not session:

469 logger.warning(f"Chat not found: {session_id[:8]}...")

470 # noqa: TRY301 — re-raised by the outer except

471 # ChatSessionNotFound below to propagate as 404.

472 raise ChatSessionNotFound(session_id) # noqa: TRY301

473

474 return {

475 "id": session.id,

476 "title": session.title,

477 "status": session.status,

478 "message_count": session.message_count,

479 "created_at": _serialize_dt(session.created_at),

480 "accumulated_context": session.accumulated_context,

481 }

482

483 except ChatSessionNotFound:

484 # Propagate as-is; this is the genuine 404 signal.

485 raise

486 except DB_EXCEPTIONS as exc:

487 logger.exception("Error getting chat session")

488 raise ChatRepositoryError(

489 f"DB error reading session {session_id[:8]}..."

490 ) from exc

491

492 def get_session_messages(

493 self,

494 session_id: str,

495 limit: int = 50,

496 offset: int = 0,

497 before_created_at: Optional[str] = None,

498 before_id: Optional[str] = None,

499 ) -> List[Dict[str, Any]]:

500 """

501 Get messages for a session, server-side merged with progress steps.

502

503 chat_messages.content is always inline; step rows live in

504 chat_progress_steps. This method merges both into a single ordered

505 stream by created_at so the client renderer (chat.js) sees a

506 unified message list with `message_type='step'` rows interleaved.

507

508 Pagination is SQL-level via per-table LIMIT + Python merge: each

509 table fetches at most ``limit`` rows ordered by created_at DESC,

510 the two streams are merged on the (timestamp, kind) sort key, and

511 the latest ``limit`` are returned in ASC order so the client

512 renders oldest→newest as before.

513

514 Pass ``before_created_at`` to fetch the page IMMEDIATELY older

515 than the given ISO timestamp (use the oldest currently-displayed

516 ``created_at`` to implement a "load older messages" trigger).

517 Without the cursor, ``offset`` selects which DESC slice to return

518 (offset=0 → newest, offset=limit → next older window, …).

519

520 Args:

521 session_id: ID of the session

522 limit: Maximum number of (merged) entries to return

523 offset: Number of entries to skip (DESC-ordered slice index)

524 before_created_at: Optional ISO timestamp cursor — return only

525 entries strictly older than this. Useful for cursor-based

526 "load older" pagination instead of offset arithmetic.

527

528 Returns:

529 List of message + step data dictionaries, ordered by

530 created_at ascending.

531 """

532 try:

533 with get_user_db_session(self.username) as db:

534 msg_q = db.query(ChatMessage).filter_by(session_id=session_id)

535 step_q = db.query(ChatProgressStep).filter_by(

536 session_id=session_id

537 )

538

539 if before_created_at: 539 ↛ 540line 539 didn't jump to line 540 because the condition on line 539 was never true

540 try:

541 cutoff = datetime.fromisoformat(

542 before_created_at.replace("Z", "+00:00")

543 )

544 except ValueError:

545 logger.warning(

546 "Invalid before_created_at cursor: "

547 f"{before_created_at!r} — ignoring."

548 )

549 else:

550 # Composite cursor: when `before_id` is also

551 # supplied, the filter becomes

552 # created_at < cutoff

553 # OR (created_at = cutoff AND id < before_id)

554 # which prevents same-millisecond rows at the

555 # page boundary from being silently dropped on

556 # "Load older" pagination. With a bare timestamp

557 # cursor we fall back to strict `<` for

558 # backwards-compat with older clients.

559 if before_id:

560 msg_q = msg_q.filter(

561 or_(

562 ChatMessage.created_at < cutoff,

563 and_(

564 ChatMessage.created_at == cutoff,

565 ChatMessage.id < before_id,

566 ),

567 )

568 )

569 # ChatProgressStep ids are integers but

570 # message ids are UUID strings; using the

571 # bare `<` operator on string ids gives a

572 # stable lexicographic tie-break, and

573 # progress-step rows tie-break by their

574 # own integer id (id < int(before_id) is

575 # not safe because before_id is the UUID of

576 # a chat message, not a step). For steps,

577 # drop the equality branch so duplicates

578 # rather than drops occur on tie — the

579 # client-side dedup catches them.

580 step_q = step_q.filter(

581 ChatProgressStep.created_at <= cutoff

582 )

583 else:

584 msg_q = msg_q.filter(

585 ChatMessage.created_at < cutoff

586 )

587 step_q = step_q.filter(

588 ChatProgressStep.created_at < cutoff

589 )

590

591 # Pull at most ``limit`` rows from EACH table in DESC

592 # order. The merged window is at most 2 * limit rows

593 # (one extreme: all from one table), which we trim to

594 # ``limit`` after the Python merge. This bounds the SQL

595 # work and avoids the old .all() cliff at large N.

596 fetch_n = limit + offset

597 # Secondary ORDER BY on sequence_number stabilises rows

598 # whose created_at collide at SQLite's millisecond

599 # precision (sqlalchemy_utc stores `%Y-%m-%d %H:%M:%S.fff`).

600 # Without it, rapid-fire inserts (paste-and-submit,

601 # auto-retries) can be returned in arbitrary order even

602 # though sequence_number is monotonic.

603 messages = (

604 msg_q.order_by(

605 ChatMessage.created_at.desc(),

606 ChatMessage.sequence_number.desc(),

607 )

608 .limit(fetch_n)

609 .all()

610 )

611 steps = (

612 step_q.order_by(

613 ChatProgressStep.created_at.desc(),

614 ChatProgressStep.sequence_number.desc(),

615 )

616 .limit(fetch_n)

617 .all()

618 )

619

620 merged: List[Dict[str, Any]] = []

621 for msg in messages:

622 merged.append(

623 {

624 "id": msg.id,

625 "session_id": msg.session_id,

626 "role": msg.role,

627 "message_type": msg.message_type,

628 "content": msg.content,

629 "sequence_number": msg.sequence_number,

630 "research_id": msg.research_id,

631 "created_at": _serialize_dt(msg.created_at),

632 }

633 )

634 for step in steps:

635 merged.append(

636 {

637 "id": f"step-{step.id}",

638 "session_id": step.session_id,

639 "role": "assistant",

640 "message_type": "step",

641 "content": step.content,

642 "phase": step.phase,

643 "sequence_number": step.sequence_number,

644 "research_id": step.research_id,

645 "created_at": _serialize_dt(step.created_at),

646 }

647 )

648

649 # Sort DESC by (created_at, sequence_number,

650 # step-before-message on tie), take the newest

651 # [offset:offset+limit] slice, then flip to ASC so the

652 # client still renders oldest→newest. Including

653 # sequence_number in the Python tie-break mirrors the

654 # SQL ORDER BY above and prevents same-timestamp messages

655 # from rendering out of insertion order.

656 merged.sort(

657 key=lambda m: (

658 m["created_at"] or "",

659 m.get("sequence_number") or 0,

660 0 if m["message_type"] == "step" else 1,

661 ),

662 reverse=True,

663 )

664 window = merged[offset : offset + limit]

665 window.reverse()

666 return window

667

668 except DB_EXCEPTIONS:

669 # Re-raise so the route returns HTTP 500 instead of a

670 # misleading 200 + []. An empty list here would be

671 # indistinguishable from a session that genuinely has no

672 # messages, hiding infrastructure failures from the client.

673 logger.exception("Error getting chat messages")

674 raise

675

676 def get_in_progress_research_id(self, session_id: str) -> Optional[str]:

677 """Return the id of the in-progress research for this chat session,

678 or ``None`` if no research is currently running.

679

680 Used by the GET messages endpoint so the client can restore the

681 live "thinking" indicator on reload without inferring it from

682 message metadata (which fails during the wrapper-strategy

683 preprocessing window before any progress step has persisted).

684

685 The partial-unique index

686 ``ux_research_history_chat_session_in_progress`` (migration 0010)

687 guarantees at most one matching row exists and turns this into

688 an O(1) index lookup.

689 """

690 try:

691 with get_user_db_session(self.username) as db:

692 row = (

693 db.query(ResearchHistory.id)

694 .filter(

695 ResearchHistory.chat_session_id == session_id,

696 ResearchHistory.status == ResearchStatus.IN_PROGRESS,

697 )

698 .first()

699 )

700 return row[0] if row else None

701 except DB_EXCEPTIONS:

702 # Re-raise rather than swallow → the route handler can

703 # surface a 500 so the client shows an error banner. Returning

704 # None here is indistinguishable from "no research running",

705 # which leaves the send button enabled and lets the user

706 # double-submit into the unique-index guard.

707 logger.exception(

708 "Error fetching in-progress research id for chat session"

709 )

710 raise

711

712 def list_sessions(

713 self,

714 status: str = ChatSessionStatus.ACTIVE.value,

715 limit: int = 20,

716 offset: int = 0,

717 ) -> List[Dict[str, Any]]:

718 """

719 List chat sessions for the user.

720

721 Args:

722 status: Filter by status (active, archived, deleted, or all)

723 limit: Maximum number of sessions to return

724 offset: Number of sessions to skip

725

726 Returns:

727 List of session data dictionaries

728 """

729 try:

730 with get_user_db_session(self.username) as db:

731 query = db.query(ChatSession)

732

733 if status != "all":

734 query = query.filter_by(status=status)

735

736 sessions = (

737 query.order_by(ChatSession.created_at.desc())

738 .offset(offset)

739 .limit(limit)

740 .all()

741 )

742

743 return [

744 {

745 "id": s.id,

746 "title": s.title,

747 "status": s.status,

748 "message_count": s.message_count,

749 "created_at": _serialize_dt(s.created_at),

750 }

751 for s in sessions

752 ]

753

754 except DB_EXCEPTIONS:

755 # Re-raise so the route returns HTTP 500. Silently returning

756 # [] would make a real DB failure look like a brand-new user

757 # with no sessions, hiding the problem from operators and

758 # confusing the UI.

759 logger.exception("Error listing chat sessions")

760 raise

761

762 def update_session_title(self, session_id: str, title: str) -> bool:

763 """

764 Update the title of a chat session.

765

766 Args:

767 session_id: ID of the session

768 title: New title

769

770 Returns:

771 True if updated successfully

772 """

773 try:

774 with get_user_db_session(self.username) as db:

775 session = db.query(ChatSession).filter_by(id=session_id).first()

776 if session: 776 ↛ 781line 776 didn't jump to line 781 because the condition on line 776 was always true

777 session.title = title

778

779 db.commit()

780 return True

781 return False

782

783 except DB_EXCEPTIONS:

784 logger.exception("Error updating chat session title")

785 return False

786

787 def reactivate_session(self, session_id: str) -> bool:

788 """

789 Reactivate an archived or deleted chat session.

790

791 Args:

792 session_id: ID of the session to reactivate

793

794 Returns:

795 True if reactivated successfully

796 """

797 try:

798 with get_user_db_session(self.username) as db:

799 session = db.query(ChatSession).filter_by(id=session_id).first()

800 if session: 800 ↛ 806line 800 didn't jump to line 806 because the condition on line 800 was always true

801 session.status = ChatSessionStatus.ACTIVE.value

802

803 db.commit()

804 logger.info(f"Reactivated chat: {session_id[:8]}...")

805 return True

806 return False

807

808 except DB_EXCEPTIONS:

809 logger.exception("Error reactivating chat session")

810 return False

811

812 def archive_session(self, session_id: str) -> bool:

813 """

814 Archive a chat session.

815

816 Refuses to archive while a research is still in_progress for the

817 session: archive flips the session read-only, and an in-flight

818 research would otherwise survive as an orphaned process writing

819 back into a session the user believes is frozen. The caller (route layer)

820 must stop the research first (or use delete, which terminates

821 in-flight research as a side effect).

822

823 Args:

824 session_id: ID of the session to archive

825

826 Returns:

827 True if archived successfully, False if the session does not

828 exist or a DB error occurred.

829

830 Raises:

831 ArchiveBlockedError: if the session has an in_progress

832 research tied to it. The route layer maps this to HTTP

833 409, mirroring the existing send-to-archived 409 rule.

834 """

835 try:

836 with get_user_db_session(self.username) as db:

837 session = db.query(ChatSession).filter_by(id=session_id).first()

838 if not session:

839 return False

840

841 in_flight = (

842 db.query(ResearchHistory.id)

843 .filter(

844 ResearchHistory.chat_session_id == session_id,

845 ResearchHistory.status == ResearchStatus.IN_PROGRESS,

846 )

847 .first()

848 )

849 if in_flight is not None:

850 # Bubble up to route layer for 409 mapping. Caught

851 # and re-raised by the inner ``except

852 # ArchiveBlockedError`` below — the broad

853 # ``except DB_EXCEPTIONS`` must not swallow it.

854 raise ArchiveBlockedError( # noqa: TRY301 — re-raised by inner except ArchiveBlockedError

855 "Cannot archive: research in_progress. Stop it first."

856 )

857

858 session.status = ChatSessionStatus.ARCHIVED.value

859 db.commit()

860 logger.info(f"Archived chat: {session_id[:8]}...")

861 return True

862

863 except ArchiveBlockedError:

864 # Bubble up so the route layer can produce a 409 response.

865 raise

866 except DB_EXCEPTIONS:

867 logger.exception("Error archiving chat session")

868 return False

869

870 def delete_session(self, session_id: str) -> bool:

871 """

872 Permanently delete a chat session.

873

874 Cascades: ChatMessages deleted (CASCADE), ResearchHistory.chat_session_id set NULL.

875

876 Args:

877 session_id: ID of the session to delete

878

879 Returns:

880 True if deleted successfully

881 """

882 try:

883 # Terminate any in-progress research tied to this session, so the

884 # FK's ON DELETE SET NULL doesn't leave it alive with a null

885 # chat_session_id — an orphan that keeps burning LLM cycles for a

886 # conversation the user already discarded.

887 #

888 # Order matters: collect the in-flight ids inside the transaction,

889 # but set the (in-memory, non-transactional) termination flags only

890 # AFTER the delete commits. Flagging before the commit would, on a

891 # commit failure, kill the research of a session that still exists.

892 with get_user_db_session(self.username) as db:

893 session = db.query(ChatSession).filter_by(id=session_id).first()

894 if not session:

895 return False

896 in_flight = (

897 db.query(ResearchHistory.id)

898 .filter(

899 ResearchHistory.chat_session_id == session_id,

900 ResearchHistory.status == ResearchStatus.IN_PROGRESS,

901 )

902 .all()

903 )

904 db.delete(session)

905 db.commit()

906 for (rid,) in in_flight:

907 set_termination_flag(rid)

908 # Include the (truncated) username so a stolen-token bulk

909 # delete leaves a forensic trail tying each deletion to an

910 # account, not just an opaque session id (L_SEC1).

911 logger.info(

912 f"Deleted chat: user={self.username[:8]}... "

913 f"session={session_id[:8]}..."

914 )

915 return True

916

917 except DB_EXCEPTIONS:

918 logger.exception("Error deleting chat session")

919 return False

920

921 def delete_attempt(self, session_id: str, research_id: str) -> bool:

922 """Permanently delete one chat attempt (research + its messages + steps).

923

924 An "attempt" is the user message that triggered a research, the

925 research_history row itself, any assistant response message(s)

926 tagged with that research, and all chat_progress_steps. The

927 assistant message(s) and progress steps carry ``research_id``

928 directly; the user message is inserted with ``research_id=NULL``

929 (see ``_spawn_chat_research``) and is reachable only via

930 ``research_meta.submission.message_id`` — both linkages are

931 resolved below so the user bubble is removed too (issue #4659).

932

933 Refuses to delete while the target research is IN_PROGRESS and

934 its worker thread is alive (raises ``AttemptInProgress`` → 409).

935 A stale IN_PROGRESS row whose thread is dead is reclaimed: the

936 status flips to FAILED inside the same transaction that deletes

937 the rows (mirrors the stale-reclaim sweep in

938 chat/routes.py:903-923).

939

940 Unlike ``delete_session``, this MUST decrement

941 ``ChatSession.message_count`` — the session still exists, so the

942 counter would otherwise drift permanently upward. Mirrors the

943 per-message decrement in ``_cleanup_chat_send_rows``

944 (chat/routes.py:175-179).

945

946 Args:

947 session_id: ID of the parent chat session (scoped lookup).

948 research_id: ID of the research attempt to delete.

949

950 Returns:

951 True if the attempt existed and was deleted.

952

953 Raises:

954 AttemptNotFound: research_id does not belong to session_id.

955 Route layer maps to HTTP 404.

956 AttemptInProgress: research is IN_PROGRESS and its worker

957 thread is alive. Route layer maps to HTTP 409 with

958 ``active_research_id``.

959 """

960 try:

961 # Phase 1 — load + liveness check, OUTSIDE the delete tx so

962 # the (rare) set_termination_flag call below doesn't have to

963 # roll back if the worker is still mid-flight. Also lets us

964 # return AttemptNotFound before touching any rows.

965 with get_user_db_session(self.username) as db:

966 research = (

967 db.query(ResearchHistory)

968 .filter(

969 ResearchHistory.id == research_id,

970 ResearchHistory.chat_session_id == session_id,

971 )

972 .first()

973 )

974 if research is None:

975 raise AttemptNotFound(research_id) # noqa: TRY301 — re-raised by outer except

976 if (

977 research.status == ResearchStatus.IN_PROGRESS

978 and is_research_thread_alive(research_id)

979 ):

980 # Signal the worker to drain; the route layer tells

981 # the client to Stop+retry. Don't hard-delete while a

982 # live thread owns the row — the worker's finally

983 # block would otherwise write back to a deleted id.

984 set_termination_flag(research_id)

985 raise AttemptInProgress(research_id) # noqa: TRY301 — re-raised by outer except

986

987 # Phase 2 — atomic delete. The row may be FAILED, COMPLETED,

988 # SUSPENDED, or stale-IN_PROGRESS (thread dead); all of those

989 # are safe to delete. Count the ChatMessage rows first so the

990 # message_count decrement matches the rows removed.

991 with get_user_db_session(self.username) as db:

992 # Re-load inside this tx (the row may have changed

993 # status between Phase 1 and Phase 2 — e.g. the worker

994 # finished). If the session itself was deleted by a

995 # concurrent request, the rows we're about to delete

996 # CASCADE away anyway, so a missing row here is a clean

997 # 404.

998 research = (

999 db.query(ResearchHistory)

1000 .filter(

1001 ResearchHistory.id == research_id,

1002 ResearchHistory.chat_session_id == session_id,

1003 )

1004 .first()

1005 )

1006 if research is None: 1006 ↛ 1007line 1006 didn't jump to line 1007 because the condition on line 1006 was never true

1007 raise AttemptNotFound(research_id) # noqa: TRY301 — re-raised by outer except

1008

1009 # Belt-and-braces: re-check liveness. A thread that was

1010 # dead in Phase 1 can't come back, but a thread that was

1011 # alive (and tripped AttemptInProgress above) would have

1012 # returned already, so this branch only fires for the

1013 # narrow race where the worker went from dead→alive

1014 # between the two phases — not actually possible, kept

1015 # as a defensive guard.

1016 if ( 1016 ↛ 1020line 1016 didn't jump to line 1020 because the condition on line 1016 was never true

1017 research.status == ResearchStatus.IN_PROGRESS

1018 and is_research_thread_alive(research_id)

1019 ):

1020 set_termination_flag(research_id)

1021 raise AttemptInProgress(research_id) # noqa: TRY301 — re-raised by outer except

1022

1023 # Resolve the user (query) message id. In current

1024 # production the user message is inserted by

1025 # _spawn_chat_research with research_id=NULL and linked to

1026 # the attempt only via research_meta.submission.message_id

1027 # (mirrors get_original_attempt_query). Assistant

1028 # response(s) — and legacy pre-research_meta user rows —

1029 # carry research_id directly. We must delete BOTH, otherwise

1030 # the user bubble lingers orphaned after the attempt is

1031 # removed (issue #4659).

1032 user_message_id = None

1033 meta = research.research_meta or {}

1034 submission = meta.get("submission") or {}

1035 if isinstance(submission, dict): 1035 ↛ 1044line 1035 didn't jump to line 1044 because the condition on line 1035 was always true

1036 candidate = submission.get("message_id")

1037 if isinstance(candidate, str) and candidate: 1037 ↛ 1044line 1037 didn't jump to line 1044 because the condition on line 1037 was always true

1038 user_message_id = candidate

1039

1040 # research_id matches assistant rows (+ legacy user rows);

1041 # the id branch matches the NULL-research_id user message.

1042 # session_id scopes the id branch so a forged/corrupt

1043 # message_id can't reach another session's row.

1044 msg_filter = ChatMessage.research_id == research_id

1045 if user_message_id: 1045 ↛ 1056line 1045 didn't jump to line 1056 because the condition on line 1045 was always true

1046 msg_filter = or_(

1047 msg_filter,

1048 and_(

1049 ChatMessage.id == user_message_id,

1050 ChatMessage.session_id == session_id,

1051 ),

1052 )

1053

1054 # Count messages BEFORE deleting them so the

1055 # message_count decrement is exact.

1056 removed_messages = (

1057 db.query(ChatMessage).filter(msg_filter).count()

1058 )

1059

1060 # ChatMessage.research_id FK is ON DELETE SET NULL, not

1061 # CASCADE — explicit delete is required to remove the rows

1062 # (otherwise the bubbles linger with a stale research_id).

1063 db.query(ChatMessage).filter(msg_filter).delete(

1064 synchronize_session=False

1065 )

1066

1067 # chat_progress_steps FK is ON DELETE CASCADE, so the

1068 # research_history.delete() below would clean them up.

1069 # Delete explicitly so the count is predictable and the

1070 # tx is self-contained if the CASCADE pragma ever flips.

1071 db.query(ChatProgressStep).filter(

1072 ChatProgressStep.research_id == research_id

1073 ).delete(synchronize_session=False)

1074

1075 # user_active_research row (per-user cap counter).

1076 # Filtered by research_id; the username filter is

1077 # belt-and-braces (this user's DB only contains their own

1078 # rows).

1079 db.query(UserActiveResearch).filter(

1080 UserActiveResearch.research_id == research_id

1081 ).delete(synchronize_session=False)

1082

1083 # ResearchHistory last so its CASCADE doesn't fire while

1084 # our explicit deletes are pending.

1085 db.query(ResearchHistory).filter(

1086 ResearchHistory.id == research_id

1087 ).delete(synchronize_session=False)

1088

1089 # Decrement message_count. Skipped when no messages were

1090 # removed (e.g. an attempt that crashed before any

1091 # assistant response landed) — avoids a pointless UPDATE.

1092 if removed_messages > 0: 1092 ↛ 1104line 1092 didn't jump to line 1104 because the condition on line 1092 was always true

1093 db.query(ChatSession).filter(

1094 ChatSession.id == session_id

1095 ).update(

1096 {

1097 ChatSession.message_count: (

1098 ChatSession.message_count - removed_messages

1099 )

1100 },

1101 synchronize_session=False,

1102 )

1103

1104 db.commit()

1105

1106 # Phase 3 — post-commit in-memory cleanup. The worker's own

1107 # finally block calls cleanup_research too, so this is a

1108 # no-op for the in-progress path; for the stale-IN_PROGRESS

1109 # path it frees the slot immediately.

1110 cleanup_research(research_id)

1111

1112 logger.info(

1113 f"Deleted chat attempt: user={self.username[:8]}... "

1114 f"session={session_id[:8]}... research={research_id[:8]}... "

1115 f"({removed_messages} messages)"

1116 )

1117 return True

1118

1119 except (AttemptNotFound, AttemptInProgress):

1120 raise

1121 except DB_EXCEPTIONS:

1122 logger.exception("Error deleting chat attempt")

1123 raise

1124

1125 def get_original_attempt_query(

1126 self, session_id: str, research_id: str

1127 ) -> str:

1128 """Return the original user message content for a chat research.

1129

1130 Used by the retry route to re-submit the same query without the

1131 client echoing it back. Looks up ``research_meta.submission.\

1132 message_id`` first (set at send time by chat/routes.py:1058);

1133 falls back to a query on ``ChatMessage.research_id == X AND

1134 role='user'`` for older rows that predate the meta field.

1135

1136 Args:

1137 session_id: ID of the parent chat session (scoped lookup).

1138 research_id: ID of the research attempt.

1139

1140 Returns:

1141 The original user message content as a string.

1142

1143 Raises:

1144 AttemptNotFound: research_id does not belong to session_id,

1145 or no user message is reachable from it. Route layer

1146 maps to HTTP 404.

1147 """

1148 try:

1149 with get_user_db_session(self.username) as db:

1150 # Scope-by-session first: a research_id from another

1151 # session (e.g. user-supplied path param) should 404,

1152 # not silently return that other session's content.

1153 research = (

1154 db.query(ResearchHistory)

1155 .filter(

1156 ResearchHistory.id == research_id,

1157 ResearchHistory.chat_session_id == session_id,

1158 )

1159 .first()

1160 )

1161 if research is None:

1162 raise AttemptNotFound(research_id) # noqa: TRY301 — re-raised by outer except

1163

1164 # Fast path: research_meta carries the original

1165 # message_id set at send time.

1166 user_message_id = None

1167 meta = research.research_meta or {}

1168 submission = meta.get("submission") or {}

1169 if isinstance(submission, dict): 1169 ↛ 1174line 1169 didn't jump to line 1174 because the condition on line 1169 was always true

1170 candidate = submission.get("message_id")

1171 if isinstance(candidate, str) and candidate:

1172 user_message_id = candidate

1173

1174 if user_message_id:

1175 msg = (

1176 db.query(ChatMessage)

1177 .filter(

1178 ChatMessage.id == user_message_id,

1179 ChatMessage.session_id == session_id,

1180 ChatMessage.role == ChatRole.USER.value,

1181 )

1182 .first()

1183 )

1184 if msg is not None and msg.content: 1184 ↛ 1189line 1184 didn't jump to line 1189 because the condition on line 1184 was always true

1185 return str(msg.content)

1186

1187 # Fallback: pre-research_meta rows. Look up the user

1188 # message by research_id + role.

1189 msg = (

1190 db.query(ChatMessage)

1191 .filter(

1192 ChatMessage.research_id == research_id,

1193 ChatMessage.session_id == session_id,

1194 ChatMessage.role == ChatRole.USER.value,

1195 )

1196 .order_by(ChatMessage.created_at.asc())

1197 .first()

1198 )

1199 if msg is None or not msg.content: 1199 ↛ 1200line 1199 didn't jump to line 1200 because the condition on line 1199 was never true

1200 raise AttemptNotFound(research_id) # noqa: TRY301 — re-raised by outer except

1201 return str(msg.content)

1202

1203 except AttemptNotFound:

1204 raise

1205 except DB_EXCEPTIONS:

1206 logger.exception("Error fetching original attempt query")

1207 raise

1208

1209 def update_accumulated_context(

1210 self,

1211 session_id: str,

1212 new_entities: Optional[List[str]] = None,

1213 new_topics: Optional[List[str]] = None,

1214 summary_addition: Optional[str] = None,

1215 ) -> bool:

1216 """

1217 Update the accumulated context for a session.

1218

1219 Args:

1220 session_id: ID of the session

1221 new_entities: New entities to add

1222 new_topics: New topics to add

1223 summary_addition: Text to append to summary

1224

1225 Returns:

1226 True if updated successfully

1227 """

1228 try:

1229 with get_user_db_session(self.username) as db:

1230 # with_for_update() is a no-op on SQLite but provides

1231 # row locking on PostgreSQL/MySQL if ever used

1232 session = (

1233 db.query(ChatSession)

1234 .filter_by(id=session_id)

1235 .with_for_update()

1236 .first()

1237 )

1238 if not session:

1239 return False

1240

1241 # Build a NEW dict and reassign so SQLAlchemy's plain JSON

1242 # column marks the row dirty. In-place mutation of the existing

1243 # dict (or reassigning the same object identity) is not

1244 # detected without MutableDict.as_mutable() — at flush time

1245 # the loaded snapshot equals the current value and no UPDATE

1246 # is emitted. Same convention as research_sources_service.py.

1247 existing_ctx = session.accumulated_context or {}

1248 ctx = dict(existing_ctx)

1249

1250 # Merge entities (deduplicate)

1251 if new_entities:

1252 existing = set(ctx.get("key_entities", []))

1253 existing.update(new_entities)

1254 ctx["key_entities"] = list(existing)[:50]

1255

1256 # Merge topics

1257 if new_topics:

1258 existing = set(ctx.get("topics", []))

1259 existing.update(new_topics)

1260 ctx["topics"] = list(existing)[:20]

1261

1262 # Append to summary (with size limit)

1263 if summary_addition:

1264 current = ctx.get("summary", "")

1265 new_summary = (

1266 f"{current}\n\n{summary_addition}"

1267 if current

1268 else summary_addition

1269 )

1270 ctx["summary"] = new_summary[-8000:] # Keep last 8000 chars

1271

1272 session.accumulated_context = ctx

1273 db.commit()

1274 return True

1275

1276 except DB_EXCEPTIONS:

1277 logger.exception("Error updating accumulated context")

1278 return False

1279

1280 def _fallback_title(self, query: Optional[str]) -> str:

1281 """Non-LLM title used at creation time (never blocks on I/O)."""

1282 if not query:

1283 return f"Chat {datetime.now(UTC).strftime('%Y-%m-%d %H:%M')}"

1284 if len(query) > 100:

1285 return query[:97].strip() + "..."

1286 return query.strip()

1287

1288 def _generate_title(

1289 self,

1290 query: Optional[str],

1291 settings_snapshot: Optional[Dict[str, Any]] = None,

1292 ) -> str:

1293 """

1294 Generate a title from the initial query.

1295

1296 When chat.llm_title_generation is enabled and settings_snapshot is

1297 provided, uses an LLM for concise titles. Otherwise returns the

1298 non-LLM fallback title.

1299 """

1300 if not query:

1301 return self._fallback_title(query)

1302

1303 if settings_snapshot:

1304 from ..config.llm_config import get_llm

1305 from ..config.thread_settings import get_setting_from_snapshot

1306

1307 if get_setting_from_snapshot( 1307 ↛ 1374line 1307 didn't jump to line 1374 because the condition on line 1307 was always true

1308 "chat.llm_title_generation",

1309 False,

1310 settings_snapshot=settings_snapshot,

1311 ):

1312 timeout = float(

1313 get_setting_from_snapshot(

1314 "chat.title_llm_timeout_seconds",

1315 _DEFAULT_TITLE_LLM_TIMEOUT_SECONDS,

1316 settings_snapshot=settings_snapshot,

1317 )

1318 )

1319 # Run the blocking invoke in a worker thread so the request

1320 # thread isn't parked past `timeout` by an unresponsive LLM.

1321 # `with ThreadPoolExecutor(...) as pool:` would call

1322 # shutdown(wait=True) on __exit__, defeating the timeout —

1323 # use wait=False + cancel_futures so the timeout actually fires.

1324 pool = ThreadPoolExecutor(

1325 max_workers=1,

1326 thread_name_prefix="chat-title",

1327 )

1328 try:

1329 llm = get_llm(settings_snapshot=settings_snapshot)

1330 prompt = (

1331 "Generate a concise 3-7 word title for this research "

1332 "query. Return ONLY the title, no quotes or "

1333 f"explanation.\n\nQuery: {query[:200]}"

1334 )

1335 future = pool.submit(llm.invoke, prompt)

1336 try:

1337 response = future.result(timeout=timeout)

1338 except FuturesTimeoutError:

1339 logger.warning(

1340 "LLM title generation exceeded {}s timeout; "

1341 "falling back to truncation",

1342 timeout,

1343 )

1344 return self._fallback_title(query)

1345 # Strip CR/LF before storing: the title is later

1346 # interpolated into loguru f-strings (e.g. the

1347 # "title already set" log line above) — an embedded

1348 # newline forges what looks like a second log entry

1349 # in aggregators. Also keeps document.title /

1350 # chatTitle.textContent visually clean.

1351 title = (

1352 str(response.content)

1353 .replace("\n", " ")

1354 .replace("\r", " ")

1355 .strip()

1356 .strip("\"'")[:100]

1357 )

1358 if title: 1358 ↛ 1372line 1358 didn't jump to line 1372 because the condition on line 1358 was always true

1359 return title

1360 except Exception:

1361 # User opted into LLM title generation via the

1362 # `chat.llm_title_generation` setting; a silent

1363 # debug-level swallow would hide provider misconfig,

1364 # auth failures, or response-shape regressions in

1365 # production where stderr level is INFO. Log with

1366 # traceback so operators can diagnose, then fall back

1367 # to truncation for UX continuity.

1368 logger.exception(

1369 "LLM title generation failed, falling back to truncation"

1370 )

1371 finally:

1372 pool.shutdown(wait=False, cancel_futures=True)

1373

1374 return self._fallback_title(query)