Coverage for src/local_deep_research/database/models/chat.py: 95%

59 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1""" 

2Chat models for conversations and messages. 

3Domain-driven organization following news.py, research.py patterns. 

4 

5Chat schema: 

6- ChatMessage.content is NOT NULL — every row stores its rendered text inline. 

7- No CHECK constraint (content NOT NULL is sufficient). 

8- Step rows live in ChatProgressStep, NOT in ChatMessage. 

9- ChatSession.status is a typed Enum (ChatSessionStatus). 

10""" 

11 

12import enum 

13 

14from sqlalchemy import ( 

15 JSON, 

16 Column, 

17 Enum, 

18 ForeignKey, 

19 Index, 

20 Integer, 

21 String, 

22 Text, 

23 UniqueConstraint, 

24) 

25from sqlalchemy.orm import relationship 

26from sqlalchemy_utc import UtcDateTime, utcnow 

27 

28from .base import Base 

29 

30 

31class ChatRole(str, enum.Enum): 

32 """Who authored a chat message.""" 

33 

34 USER = "user" 

35 ASSISTANT = "assistant" 

36 

37 

38class ChatMessageType(str, enum.Enum): 

39 """Role a message plays in the conversation flow. 

40 

41 Step rows (transient research progress) live in ChatProgressStep, not 

42 in ChatMessage. The chat_message_type Enum no longer includes "step". 

43 """ 

44 

45 QUERY = "query" 

46 FOLLOWUP = "followup" 

47 RESPONSE = "response" 

48 

49 

50class ChatSessionStatus(str, enum.Enum): 

51 """Lifecycle state of a chat session.""" 

52 

53 ACTIVE = "active" 

54 ARCHIVED = "archived" 

55 DELETED = "deleted" 

56 

57 

58class ChatSession(Base): 

59 """ 

60 Chat conversation session. 

61 A session contains multiple messages and can span multiple research operations. 

62 """ 

63 

64 __tablename__ = "chat_sessions" 

65 

66 # UUID as primary key (consistent with ResearchHistory) 

67 id = Column(String(36), primary_key=True) 

68 

69 # Session metadata 

70 title = Column(String(500)) 

71 # No create_constraint=True: the migration intentionally omits the 

72 # CHECK constraint (matches the project pattern; enforcement is 

73 # at the ORM/service layer via ChatSessionStatus(value)). Adding it 

74 # here would create schema drift — fresh installs would have the 

75 # CHECK, migrated DBs would not. 

76 status = Column( 

77 Enum( 

78 ChatSessionStatus, 

79 values_callable=lambda obj: [e.value for e in obj], 

80 name="chat_session_status", 

81 ), 

82 nullable=False, 

83 default=ChatSessionStatus.ACTIVE, 

84 server_default="active", 

85 ) 

86 

87 # Accumulated context across messages — used to build the LLM prompt for 

88 # follow-up turns. Updated under row-level lock by ChatService. 

89 accumulated_context = Column(JSON) 

90 

91 created_at = Column(UtcDateTime, nullable=False, default=utcnow()) 

92 

93 # message_count is load-bearing: used with atomic UPDATE...RETURNING for 

94 # safe sequence_number generation in add_message(). Step rows do NOT 

95 # increment this counter (they live in chat_progress_steps with their 

96 # own per-research counter on research_history.step_count). 

97 message_count = Column( 

98 Integer, nullable=False, default=0, server_default="0" 

99 ) 

100 

101 # Relationships 

102 messages = relationship( 

103 "ChatMessage", 

104 back_populates="session", 

105 cascade="all, delete-orphan", 

106 order_by="ChatMessage.sequence_number", 

107 ) 

108 researches = relationship( 

109 "ResearchHistory", 

110 back_populates="chat_session", 

111 # DB-level FK (research_history.chat_session_id ONDELETE SET NULL) 

112 # handles the cleanup; no need for the ORM to emit a redundant 

113 # UPDATE before the parent DELETE. 

114 passive_deletes=True, 

115 ) 

116 progress_steps = relationship( 

117 "ChatProgressStep", 

118 back_populates="session", 

119 cascade="all, delete-orphan", 

120 order_by="ChatProgressStep.created_at", 

121 ) 

122 

123 # Indexes for performance 

124 __table_args__ = ( 

125 Index("idx_chat_session_status", "status"), 

126 Index("idx_chat_session_created", "created_at"), 

127 # Composite for the sidebar list_sessions hot query 

128 # (WHERE status=? ORDER BY created_at DESC). 

129 Index("idx_chat_session_status_created", "status", "created_at"), 

130 ) 

131 

132 def __repr__(self): 

133 return ( 

134 f"<ChatSession(id='{self.id}', title='{self.title}', " 

135 f"messages={self.message_count})>" 

136 ) 

137 

138 

139class ChatMessage(Base): 

140 """ 

141 Individual durable message within a chat session. 

142 Content is always inline; research_id is a back-reference, 

143 not a content source. 

144 """ 

145 

146 __tablename__ = "chat_messages" 

147 

148 # UUID as primary key 

149 id = Column(String(36), primary_key=True) 

150 

151 # Foreign keys 

152 # No `index=True` here — see __table_args__ note below: single-column 

153 # indexes are named explicitly so create_all and the 0010 migration 

154 # produce identically-named indexes. 

155 session_id = Column( 

156 String(36), 

157 ForeignKey("chat_sessions.id", ondelete="CASCADE"), 

158 nullable=False, 

159 ) 

160 

161 # Back-reference to research, if this turn triggered a research run. 

162 # ON DELETE SET NULL: deleting the underlying research preserves the 

163 # chat row (content is stored inline; the link just becomes stale). 

164 # This is the snapshot semantic the schema delivers. 

165 research_id = Column( 

166 String(36), 

167 ForeignKey("research_history.id", ondelete="SET NULL"), 

168 nullable=True, 

169 ) 

170 

171 # Message content 

172 # See ChatSession.status note: no create_constraint=True to match the 

173 # migration's deliberately-no-CHECK shape. 

174 role = Column( 

175 Enum( 

176 ChatRole, 

177 values_callable=lambda obj: [e.value for e in obj], 

178 name="chat_role", 

179 ), 

180 nullable=False, 

181 ) 

182 message_type = Column( 

183 Enum( 

184 ChatMessageType, 

185 values_callable=lambda obj: [e.value for e in obj], 

186 name="chat_message_type", 

187 ), 

188 nullable=False, 

189 ) 

190 content = Column(Text, nullable=False) 

191 

192 # Sequence for ordering within a session 

193 sequence_number = Column(Integer, nullable=False) 

194 

195 created_at = Column(UtcDateTime, nullable=False, default=utcnow()) 

196 

197 # Relationships 

198 session = relationship("ChatSession", back_populates="messages") 

199 research = relationship("ResearchHistory", back_populates="chat_messages") 

200 

201 # Named Index in __table_args__ (NOT index=True on the column) so both 

202 # the create_all path (fresh installs, test fixtures) and the migration 

203 # path (0010 CHAT_INDEXES list) produce identically-named indexes — 

204 # mirroring the ResearchResource pattern in models/research.py. 

205 __table_args__ = ( 

206 UniqueConstraint( 

207 "session_id", 

208 "sequence_number", 

209 name="uq_chat_message_session_seq", 

210 ), 

211 # Named indexes mirroring 0010 CHAT_INDEXES so create_all and the 

212 # migration produce identically-named indexes (avoids drift between 

213 # fresh-install and migrated DBs). 

214 Index("ix_chat_messages_session_id", "session_id"), 

215 Index("ix_chat_messages_research_id", "research_id"), 

216 Index( 

217 "ix_chat_messages_session_created", 

218 "session_id", 

219 "created_at", 

220 ), 

221 ) 

222 

223 def __repr__(self): 

224 return ( 

225 f"<ChatMessage(session='{self.session_id}', " 

226 f"seq={self.sequence_number}, role='{self.role}')>" 

227 ) 

228 

229 

230class ChatProgressStep(Base): 

231 """ 

232 Transient research-progress step. Streamed live during a running research, 

233 persisted so a session reload during in-flight research can rehydrate the 

234 progress accordion. 

235 

236 Lives in its own table so step rows do not pollute conversation 

237 sequencing or the chat_messages.message_count counter. 

238 """ 

239 

240 __tablename__ = "chat_progress_steps" 

241 

242 id = Column(String(36), primary_key=True) 

243 

244 # Step belongs to a research run. CASCADE: deleting the research drops 

245 # all its steps (they're transient progress; nothing to preserve). 

246 # No `index=True` — see __table_args__ below. 

247 research_id = Column( 

248 String(36), 

249 ForeignKey("research_history.id", ondelete="CASCADE"), 

250 nullable=False, 

251 ) 

252 

253 # Denormalized session FK for fast session-scoped queries (the resume 

254 # path on chat reload reads steps by session). CASCADE: deleting the 

255 # session drops all its steps. 

256 # No `index=True` — see __table_args__ below. 

257 session_id = Column( 

258 String(36), 

259 ForeignKey("chat_sessions.id", ondelete="CASCADE"), 

260 nullable=False, 

261 ) 

262 

263 # Phase from research_service._STEP_PHASES (e.g. "search", "observation"). 

264 # Nullable so legacy rows migrated from chat_messages can be carried 

265 # forward without phase data. 

266 phase = Column(String(64), nullable=True) 

267 

268 content = Column(Text, nullable=False) 

269 

270 # Per-research sequence — allocated atomically by ChatService.add_progress_step 

271 # via UPDATE...RETURNING on research_history.step_count. 

272 sequence_number = Column(Integer, nullable=False) 

273 

274 created_at = Column(UtcDateTime, nullable=False, default=utcnow()) 

275 

276 # Relationships 

277 research = relationship("ResearchHistory", back_populates="progress_steps") 

278 session = relationship("ChatSession", back_populates="progress_steps") 

279 

280 __table_args__ = ( 

281 UniqueConstraint( 

282 "research_id", 

283 "sequence_number", 

284 name="uq_chat_progress_step_research_seq", 

285 ), 

286 # Named indexes mirroring 0010 CHAT_INDEXES so create_all and the 

287 # migration produce identically-named indexes. 

288 Index("ix_chat_progress_steps_research_id", "research_id"), 

289 Index("ix_chat_progress_steps_session_id", "session_id"), 

290 Index( 

291 "ix_chat_progress_steps_session_created", 

292 "session_id", 

293 "created_at", 

294 ), 

295 ) 

296 

297 def __repr__(self): 

298 return ( 

299 f"<ChatProgressStep(research='{self.research_id}', " 

300 f"seq={self.sequence_number}, phase='{self.phase}')>" 

301 )