Coverage for src / local_deep_research / database / models / research.py: 95%
129 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2Core research models for tasks, queries, and results.
3"""
5import enum
7from sqlalchemy import (
8 JSON,
9 Column,
10 Enum,
11 Float,
12 ForeignKey,
13 Index,
14 Integer,
15 String,
16 Text,
17)
18from sqlalchemy.orm import relationship
20from sqlalchemy_utc import UtcDateTime, utcnow
22from ...constants import ResearchStatus
23from .base import Base
26class ResearchTask(Base):
27 """
28 Main research tasks that users create.
29 This is the top-level object that contains all research activities.
30 """
32 __tablename__ = "research_tasks"
34 id = Column(Integer, primary_key=True)
35 title = Column(String(500), nullable=False)
36 description = Column(Text)
37 status = Column(
38 String(50), default="pending"
39 ) # pending, in_progress, completed, failed
40 priority = Column(Integer, default=0) # Higher number = higher priority
41 tags = Column(JSON) # List of tags for categorization
42 research_metadata = Column(JSON) # Flexible field for additional data
44 # Timestamps
45 created_at = Column(UtcDateTime, default=utcnow())
46 updated_at = Column(UtcDateTime, default=utcnow(), onupdate=utcnow())
47 started_at = Column(UtcDateTime)
48 completed_at = Column(UtcDateTime)
50 # Relationships
51 searches = relationship(
52 "SearchQuery",
53 back_populates="research_task",
54 cascade="all, delete-orphan",
55 )
56 results = relationship(
57 "SearchResult",
58 back_populates="research_task",
59 cascade="all, delete-orphan",
60 )
61 reports = relationship(
62 "Report", back_populates="research_task", cascade="all, delete-orphan"
63 )
65 def __repr__(self):
66 return f"<ResearchTask(title='{self.title}', status='{self.status}')>"
69class SearchQuery(Base):
70 """
71 Individual search queries executed as part of research tasks.
72 Tracks what was searched and when.
73 """
75 __tablename__ = "search_queries"
77 id = Column(Integer, primary_key=True)
78 research_task_id = Column(
79 Integer, ForeignKey("research_tasks.id", ondelete="CASCADE")
80 )
81 query = Column(Text, nullable=False)
82 search_engine = Column(String(50)) # google, bing, duckduckgo, etc.
83 search_type = Column(String(50)) # web, academic, news, etc.
84 parameters = Column(JSON) # Additional search parameters
86 # Status tracking
87 status = Column(
88 String(50), default="pending"
89 ) # pending, executing, completed, failed
90 error_message = Column(Text)
91 retry_count = Column(Integer, default=0)
93 # Timestamps
94 created_at = Column(UtcDateTime, default=utcnow())
95 executed_at = Column(UtcDateTime)
96 completed_at = Column(UtcDateTime)
98 # Relationships
99 research_task = relationship("ResearchTask", back_populates="searches")
100 results = relationship(
101 "SearchResult",
102 back_populates="search_query",
103 cascade="all, delete-orphan",
104 )
106 # Indexes for performance
107 __table_args__ = (
108 Index("idx_research_task_status", "research_task_id", "status"),
109 Index("idx_search_engine", "search_engine", "status"),
110 )
112 def __repr__(self):
113 return f"<SearchQuery(query='{self.query[:50]}...', status='{self.status}')>"
116class SearchResult(Base):
117 """
118 Individual search results from queries.
119 Stores both the initial result and any fetched content.
120 """
122 __tablename__ = "search_results"
124 id = Column(Integer, primary_key=True)
125 research_task_id = Column(
126 Integer, ForeignKey("research_tasks.id", ondelete="CASCADE")
127 )
128 search_query_id = Column(
129 Integer, ForeignKey("search_queries.id", ondelete="CASCADE")
130 )
132 # Basic result information
133 title = Column(String(500))
134 url = Column(Text, index=True) # Indexed for deduplication
135 snippet = Column(Text)
137 # Extended content
138 content = Column(Text) # Full content if fetched
139 content_type = Column(String(50)) # html, pdf, text, etc.
140 content_hash = Column(String(64)) # For deduplication
142 # Metadata
143 relevance_score = Column(Float) # Calculated relevance
144 position = Column(Integer) # Position in search results
145 domain = Column(String(255), index=True)
146 language = Column(String(10))
147 published_date = Column(UtcDateTime)
148 author = Column(String(255))
150 # Status tracking
151 fetch_status = Column(String(50)) # pending, fetched, failed, skipped
152 fetch_error = Column(Text)
154 # Timestamps
155 created_at = Column(UtcDateTime, default=utcnow())
156 fetched_at = Column(UtcDateTime)
158 # Relationships
159 research_task = relationship("ResearchTask", back_populates="results")
160 search_query = relationship("SearchQuery", back_populates="results")
162 # Indexes for performance
163 __table_args__ = (
164 Index("idx_task_relevance", "research_task_id", "relevance_score"),
165 Index("idx_content_hash", "content_hash"),
166 Index("idx_domain_task", "domain", "research_task_id"),
167 )
169 def __repr__(self):
170 return f"<SearchResult(title='{self.title[:50] if self.title else 'No title'}...', score={self.relevance_score})>"
173class ResearchMode(enum.Enum):
174 """Research modes available."""
176 QUICK = "quick"
177 DETAILED = "detailed"
180class ResearchResource(Base):
181 """Resources associated with research projects."""
183 __tablename__ = "research_resources"
185 id = Column(Integer, primary_key=True, autoincrement=True)
186 research_id = Column(
187 String(36),
188 ForeignKey("research_history.id", ondelete="CASCADE"),
189 nullable=False,
190 )
191 title = Column(Text)
192 url = Column(Text)
193 content_preview = Column(Text)
194 source_type = Column(Text)
195 resource_metadata = Column("metadata", JSON)
196 created_at = Column(String, nullable=False)
197 document_id = Column(
198 String(36),
199 ForeignKey("documents.id", ondelete="SET NULL"),
200 nullable=True,
201 index=True,
202 )
204 # Relationships
205 research = relationship("ResearchHistory", back_populates="resources")
206 document = relationship("Document", foreign_keys=[document_id])
208 def __repr__(self):
209 return f"<ResearchResource(title='{self.title}', url='{self.url}')>"
212class ResearchHistory(Base):
213 """
214 Research history table.
215 Tracks research sessions and their progress.
216 """
218 __tablename__ = "research_history"
220 # UUID as primary key
221 id = Column(String(36), primary_key=True)
222 # The search query.
223 query = Column(Text, nullable=False)
224 # The mode of research (e.g., 'quick_summary', 'detailed_report').
225 mode = Column(Text, nullable=False)
226 # Current status of the research.
227 status = Column(Text, nullable=False)
228 # The timestamp when the research started.
229 created_at = Column(Text, nullable=False)
230 # The timestamp when the research was completed.
231 completed_at = Column(Text)
232 # Duration of the research in seconds.
233 duration_seconds = Column(Integer)
234 # Path to the generated report.
235 report_path = Column(Text)
236 # Report content stored in database
237 report_content = Column(Text)
238 # Additional metadata about the research.
239 research_meta = Column(JSON)
240 # Latest progress log message.
241 progress_log = Column(JSON)
242 # Current progress of the research (as a percentage).
243 progress = Column(Integer)
244 # Title of the research report.
245 title = Column(Text)
247 # Relationships
248 resources = relationship(
249 "ResearchResource",
250 back_populates="research",
251 cascade="all, delete-orphan",
252 )
254 def __repr__(self):
255 return f"<ResearchHistory(query='{self.query[:50]}...', status={self.status})>"
258class Research(Base):
259 """
260 Modern research tracking with better type safety.
261 """
263 __tablename__ = "research"
265 id = Column(Integer, primary_key=True, index=True)
266 query = Column(String, nullable=False)
267 status = Column(
268 Enum(ResearchStatus), default=ResearchStatus.PENDING, nullable=False
269 )
270 mode = Column(
271 Enum(ResearchMode), default=ResearchMode.QUICK, nullable=False
272 )
273 created_at = Column(UtcDateTime, server_default=utcnow(), nullable=False)
274 updated_at = Column(
275 UtcDateTime, server_default=utcnow(), onupdate=utcnow(), nullable=False
276 )
277 progress = Column(Float, default=0.0, nullable=False)
278 start_time = Column(UtcDateTime, nullable=True)
279 end_time = Column(UtcDateTime, nullable=True)
280 error_message = Column(Text, nullable=True)
282 # Relationship
283 strategy = relationship(
284 "ResearchStrategy", back_populates="research", uselist=False
285 )
287 def __repr__(self):
288 return f"<Research(query='{self.query[:50]}...', status={self.status.value})>"
291class ResearchStrategy(Base):
292 """
293 Track which search strategy was used for each research.
294 """
296 __tablename__ = "research_strategies"
298 id = Column(Integer, primary_key=True, index=True)
299 research_id = Column(
300 Integer,
301 ForeignKey("research.id", ondelete="CASCADE"),
302 nullable=False,
303 unique=True,
304 index=True,
305 )
306 strategy_name = Column(String(100), nullable=False, index=True)
307 created_at = Column(UtcDateTime, server_default=utcnow(), nullable=False)
309 # Relationship
310 research = relationship("Research", back_populates="strategy")
312 def __repr__(self):
313 return f"<ResearchStrategy(research_id={self.research_id}, strategy={self.strategy_name})>"