Coverage for src/local_deep_research/database/models/research.py: 94%

1"""

2Core research models for tasks, queries, and results.

3"""

5import enum

7from sqlalchemy import (

8 JSON,

9 Column,

10 Enum,

11 Float,

12 ForeignKey,

13 Index,

14 Integer,

15 String,

16 Text,

17)

18from sqlalchemy.orm import relationship

19from sqlalchemy_utc import UtcDateTime, utcnow

21from ...constants import ResearchStatus

22from .base import Base

25class ResearchTask(Base):

26 """

27 Main research tasks that users create.

28 This is the top-level object that contains all research activities.

29 """

31 __tablename__ = "research_tasks"

33 id = Column(Integer, primary_key=True)

34 title = Column(String(500), nullable=False)

35 description = Column(Text)

36 status = Column(

37 String(50), default="pending"

38 ) # pending, in_progress, completed, failed

39 priority = Column(Integer, default=0) # Higher number = higher priority

40 tags = Column(JSON) # List of tags for categorization

41 research_metadata = Column(JSON) # Flexible field for additional data

43 # Timestamps

44 created_at = Column(UtcDateTime, default=utcnow())

45 updated_at = Column(UtcDateTime, default=utcnow(), onupdate=utcnow())

46 started_at = Column(UtcDateTime)

47 completed_at = Column(UtcDateTime)

49 # Relationships

50 searches = relationship(

51 "SearchQuery",

52 back_populates="research_task",

53 cascade="all, delete-orphan",

54 )

55 results = relationship(

56 "SearchResult",

57 back_populates="research_task",

58 cascade="all, delete-orphan",

59 )

60 reports = relationship(

61 "Report", back_populates="research_task", cascade="all, delete-orphan"

62 )

64 def __repr__(self):

65 return f"<ResearchTask(title='{self.title}', status='{self.status}')>"

68class SearchQuery(Base):

69 """

70 Individual search queries executed as part of research tasks.

71 Tracks what was searched and when.

72 """

74 __tablename__ = "search_queries"

76 id = Column(Integer, primary_key=True)

77 research_task_id = Column(

78 Integer, ForeignKey("research_tasks.id", ondelete="CASCADE")

79 )

80 query = Column(Text, nullable=False)

81 search_engine = Column(String(50)) # google, bing, duckduckgo, etc.

82 search_type = Column(String(50)) # web, academic, news, etc.

83 parameters = Column(JSON) # Additional search parameters

85 # Status tracking

86 status = Column(

87 String(50), default="pending"

88 ) # pending, executing, completed, failed

89 error_message = Column(Text)

90 retry_count = Column(Integer, default=0)

92 # Timestamps

93 created_at = Column(UtcDateTime, default=utcnow())

94 executed_at = Column(UtcDateTime)

95 completed_at = Column(UtcDateTime)

97 # Relationships

98 research_task = relationship("ResearchTask", back_populates="searches")

99 results = relationship(

100 "SearchResult",

101 back_populates="search_query",

102 cascade="all, delete-orphan",

103 )

104

105 # Indexes for performance

106 __table_args__ = (

107 Index("idx_research_task_status", "research_task_id", "status"),

108 Index("idx_search_engine", "search_engine", "status"),

109 )

110

111 def __repr__(self):

112 return f"<SearchQuery(query='{self.query[:50]}...', status='{self.status}')>"

113

114

115class SearchResult(Base):

116 """

117 Individual search results from queries.

118 Stores both the initial result and any fetched content.

119 """

120

121 __tablename__ = "search_results"

122

123 id = Column(Integer, primary_key=True)

124 research_task_id = Column(

125 Integer, ForeignKey("research_tasks.id", ondelete="CASCADE")

126 )

127 search_query_id = Column(

128 Integer, ForeignKey("search_queries.id", ondelete="CASCADE")

129 )

130

131 # Basic result information

132 title = Column(String(500))

133 url = Column(Text, index=True) # Indexed for deduplication

134 snippet = Column(Text)

135

136 # Extended content

137 content = Column(Text) # Full content if fetched

138 content_type = Column(String(50)) # html, pdf, text, etc.

139 content_hash = Column(String(64)) # For deduplication

140

141 # Metadata

142 relevance_score = Column(Float) # Calculated relevance

143 position = Column(Integer) # Position in search results

144 domain = Column(String(255), index=True)

145 language = Column(String(10))

146 published_date = Column(UtcDateTime)

147 author = Column(String(255))

148

149 # Status tracking

150 fetch_status = Column(String(50)) # pending, fetched, failed, skipped

151 fetch_error = Column(Text)

152

153 # Timestamps

154 created_at = Column(UtcDateTime, default=utcnow())

155 fetched_at = Column(UtcDateTime)

156

157 # Relationships

158 research_task = relationship("ResearchTask", back_populates="results")

159 search_query = relationship("SearchQuery", back_populates="results")

160

161 # Indexes for performance

162 __table_args__ = (

163 Index("idx_task_relevance", "research_task_id", "relevance_score"),

164 Index("idx_content_hash", "content_hash"),

165 Index("idx_domain_task", "domain", "research_task_id"),

166 )

167

168 def __repr__(self):

169 return f"<SearchResult(title='{self.title[:50] if self.title else 'No title'}...', score={self.relevance_score})>"

170

171

172class ResearchMode(enum.Enum):

173 """Research modes available."""

174

175 QUICK = "quick"

176 DETAILED = "detailed"

177

178

179class ResearchResource(Base):

180 """Resources associated with research projects."""

181

182 __tablename__ = "research_resources"

183

184 id = Column(Integer, primary_key=True, autoincrement=True)

185 research_id = Column(

186 String(36),

187 ForeignKey("research_history.id", ondelete="CASCADE"),

188 nullable=False,

189 )

190 title = Column(Text)

191 url = Column(Text)

192 content_preview = Column(Text)

193 source_type = Column(Text)

194 resource_metadata = Column("metadata", JSON)

195 created_at = Column(String, nullable=False)

196

197 # Relationship

198 research = relationship("ResearchHistory", back_populates="resources")

199

200 def __repr__(self):

201 return f"<ResearchResource(title='{self.title}', url='{self.url}')>"

202

203

204class ResearchHistory(Base):

205 """

206 Research history table.

207 Tracks research sessions and their progress.

208 """

209

210 __tablename__ = "research_history"

211

212 # UUID as primary key

213 id = Column(String(36), primary_key=True)

214 # The search query.

215 query = Column(Text, nullable=False)

216 # The mode of research (e.g., 'quick_summary', 'detailed_report').

217 mode = Column(Text, nullable=False)

218 # Current status of the research.

219 status = Column(Text, nullable=False)

220 # The timestamp when the research started.

221 created_at = Column(Text, nullable=False)

222 # The timestamp when the research was completed.

223 completed_at = Column(Text)

224 # Duration of the research in seconds.

225 duration_seconds = Column(Integer)

226 # Path to the generated report.

227 report_path = Column(Text)

228 # Report content stored in database

229 report_content = Column(Text)

230 # Additional metadata about the research.

231 research_meta = Column(JSON)

232 # Latest progress log message.

233 progress_log = Column(JSON)

234 # Current progress of the research (as a percentage).

235 progress = Column(Integer)

236 # Title of the research report.

237 title = Column(Text)

238

239 # Relationships

240 resources = relationship(

241 "ResearchResource",

242 back_populates="research",

243 cascade="all, delete-orphan",

244 )

245

246 def __repr__(self):

247 return f"<ResearchHistory(query='{self.query[:50]}...', status={self.status})>"

248

249

250class Research(Base):

251 """

252 Modern research tracking with better type safety.

253 """

254

255 __tablename__ = "research"

256

257 id = Column(Integer, primary_key=True, index=True)

258 query = Column(String, nullable=False)

259 status = Column(

260 Enum(ResearchStatus), default=ResearchStatus.PENDING, nullable=False

261 )

262 mode = Column(

263 Enum(ResearchMode), default=ResearchMode.QUICK, nullable=False

264 )

265 created_at = Column(UtcDateTime, server_default=utcnow(), nullable=False)

266 updated_at = Column(

267 UtcDateTime, server_default=utcnow(), onupdate=utcnow(), nullable=False

268 )

269 progress = Column(Float, default=0.0, nullable=False)

270 start_time = Column(UtcDateTime, nullable=True)

271 end_time = Column(UtcDateTime, nullable=True)

272 error_message = Column(Text, nullable=True)

273

274 # Relationship

275 strategy = relationship(

276 "ResearchStrategy", back_populates="research", uselist=False

277 )

278

279 def __repr__(self):

280 return f"<Research(query='{self.query[:50]}...', status={self.status.value})>"

281

282

283class ResearchStrategy(Base):

284 """

285 Track which search strategy was used for each research.

286 """

287

288 __tablename__ = "research_strategies"

289

290 id = Column(Integer, primary_key=True, index=True)

291 research_id = Column(

292 Integer,

293 ForeignKey("research.id", ondelete="CASCADE"),

294 nullable=False,

295 unique=True,

296 index=True,

297 )

298 strategy_name = Column(String(100), nullable=False, index=True)

299 created_at = Column(UtcDateTime, server_default=utcnow(), nullable=False)

300

301 # Relationship

302 research = relationship("Research", back_populates="strategy")

303

304 def __repr__(self):

305 return f"<ResearchStrategy(research_id={self.research_id}, strategy={self.strategy_name})>"

Coverage for src / local_deep_research / database / models / research.py: 94%

127 statements