Coverage for src / local_deep_research / database / models / research.py: 94%
127 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
1"""
2Core research models for tasks, queries, and results.
3"""
5import enum
7from sqlalchemy import (
8 JSON,
9 Column,
10 Enum,
11 Float,
12 ForeignKey,
13 Index,
14 Integer,
15 String,
16 Text,
17)
18from sqlalchemy.orm import relationship
19from sqlalchemy_utc import UtcDateTime, utcnow
21from ...constants import ResearchStatus
22from .base import Base
25class ResearchTask(Base):
26 """
27 Main research tasks that users create.
28 This is the top-level object that contains all research activities.
29 """
31 __tablename__ = "research_tasks"
33 id = Column(Integer, primary_key=True)
34 title = Column(String(500), nullable=False)
35 description = Column(Text)
36 status = Column(
37 String(50), default="pending"
38 ) # pending, in_progress, completed, failed
39 priority = Column(Integer, default=0) # Higher number = higher priority
40 tags = Column(JSON) # List of tags for categorization
41 research_metadata = Column(JSON) # Flexible field for additional data
43 # Timestamps
44 created_at = Column(UtcDateTime, default=utcnow())
45 updated_at = Column(UtcDateTime, default=utcnow(), onupdate=utcnow())
46 started_at = Column(UtcDateTime)
47 completed_at = Column(UtcDateTime)
49 # Relationships
50 searches = relationship(
51 "SearchQuery",
52 back_populates="research_task",
53 cascade="all, delete-orphan",
54 )
55 results = relationship(
56 "SearchResult",
57 back_populates="research_task",
58 cascade="all, delete-orphan",
59 )
60 reports = relationship(
61 "Report", back_populates="research_task", cascade="all, delete-orphan"
62 )
64 def __repr__(self):
65 return f"<ResearchTask(title='{self.title}', status='{self.status}')>"
68class SearchQuery(Base):
69 """
70 Individual search queries executed as part of research tasks.
71 Tracks what was searched and when.
72 """
74 __tablename__ = "search_queries"
76 id = Column(Integer, primary_key=True)
77 research_task_id = Column(
78 Integer, ForeignKey("research_tasks.id", ondelete="CASCADE")
79 )
80 query = Column(Text, nullable=False)
81 search_engine = Column(String(50)) # google, bing, duckduckgo, etc.
82 search_type = Column(String(50)) # web, academic, news, etc.
83 parameters = Column(JSON) # Additional search parameters
85 # Status tracking
86 status = Column(
87 String(50), default="pending"
88 ) # pending, executing, completed, failed
89 error_message = Column(Text)
90 retry_count = Column(Integer, default=0)
92 # Timestamps
93 created_at = Column(UtcDateTime, default=utcnow())
94 executed_at = Column(UtcDateTime)
95 completed_at = Column(UtcDateTime)
97 # Relationships
98 research_task = relationship("ResearchTask", back_populates="searches")
99 results = relationship(
100 "SearchResult",
101 back_populates="search_query",
102 cascade="all, delete-orphan",
103 )
105 # Indexes for performance
106 __table_args__ = (
107 Index("idx_research_task_status", "research_task_id", "status"),
108 Index("idx_search_engine", "search_engine", "status"),
109 )
111 def __repr__(self):
112 return f"<SearchQuery(query='{self.query[:50]}...', status='{self.status}')>"
115class SearchResult(Base):
116 """
117 Individual search results from queries.
118 Stores both the initial result and any fetched content.
119 """
121 __tablename__ = "search_results"
123 id = Column(Integer, primary_key=True)
124 research_task_id = Column(
125 Integer, ForeignKey("research_tasks.id", ondelete="CASCADE")
126 )
127 search_query_id = Column(
128 Integer, ForeignKey("search_queries.id", ondelete="CASCADE")
129 )
131 # Basic result information
132 title = Column(String(500))
133 url = Column(Text, index=True) # Indexed for deduplication
134 snippet = Column(Text)
136 # Extended content
137 content = Column(Text) # Full content if fetched
138 content_type = Column(String(50)) # html, pdf, text, etc.
139 content_hash = Column(String(64)) # For deduplication
141 # Metadata
142 relevance_score = Column(Float) # Calculated relevance
143 position = Column(Integer) # Position in search results
144 domain = Column(String(255), index=True)
145 language = Column(String(10))
146 published_date = Column(UtcDateTime)
147 author = Column(String(255))
149 # Status tracking
150 fetch_status = Column(String(50)) # pending, fetched, failed, skipped
151 fetch_error = Column(Text)
153 # Timestamps
154 created_at = Column(UtcDateTime, default=utcnow())
155 fetched_at = Column(UtcDateTime)
157 # Relationships
158 research_task = relationship("ResearchTask", back_populates="results")
159 search_query = relationship("SearchQuery", back_populates="results")
161 # Indexes for performance
162 __table_args__ = (
163 Index("idx_task_relevance", "research_task_id", "relevance_score"),
164 Index("idx_content_hash", "content_hash"),
165 Index("idx_domain_task", "domain", "research_task_id"),
166 )
168 def __repr__(self):
169 return f"<SearchResult(title='{self.title[:50] if self.title else 'No title'}...', score={self.relevance_score})>"
172class ResearchMode(enum.Enum):
173 """Research modes available."""
175 QUICK = "quick"
176 DETAILED = "detailed"
179class ResearchResource(Base):
180 """Resources associated with research projects."""
182 __tablename__ = "research_resources"
184 id = Column(Integer, primary_key=True, autoincrement=True)
185 research_id = Column(
186 String(36),
187 ForeignKey("research_history.id", ondelete="CASCADE"),
188 nullable=False,
189 )
190 title = Column(Text)
191 url = Column(Text)
192 content_preview = Column(Text)
193 source_type = Column(Text)
194 resource_metadata = Column("metadata", JSON)
195 created_at = Column(String, nullable=False)
197 # Relationship
198 research = relationship("ResearchHistory", back_populates="resources")
200 def __repr__(self):
201 return f"<ResearchResource(title='{self.title}', url='{self.url}')>"
204class ResearchHistory(Base):
205 """
206 Research history table.
207 Tracks research sessions and their progress.
208 """
210 __tablename__ = "research_history"
212 # UUID as primary key
213 id = Column(String(36), primary_key=True)
214 # The search query.
215 query = Column(Text, nullable=False)
216 # The mode of research (e.g., 'quick_summary', 'detailed_report').
217 mode = Column(Text, nullable=False)
218 # Current status of the research.
219 status = Column(Text, nullable=False)
220 # The timestamp when the research started.
221 created_at = Column(Text, nullable=False)
222 # The timestamp when the research was completed.
223 completed_at = Column(Text)
224 # Duration of the research in seconds.
225 duration_seconds = Column(Integer)
226 # Path to the generated report.
227 report_path = Column(Text)
228 # Report content stored in database
229 report_content = Column(Text)
230 # Additional metadata about the research.
231 research_meta = Column(JSON)
232 # Latest progress log message.
233 progress_log = Column(JSON)
234 # Current progress of the research (as a percentage).
235 progress = Column(Integer)
236 # Title of the research report.
237 title = Column(Text)
239 # Relationships
240 resources = relationship(
241 "ResearchResource",
242 back_populates="research",
243 cascade="all, delete-orphan",
244 )
246 def __repr__(self):
247 return f"<ResearchHistory(query='{self.query[:50]}...', status={self.status})>"
250class Research(Base):
251 """
252 Modern research tracking with better type safety.
253 """
255 __tablename__ = "research"
257 id = Column(Integer, primary_key=True, index=True)
258 query = Column(String, nullable=False)
259 status = Column(
260 Enum(ResearchStatus), default=ResearchStatus.PENDING, nullable=False
261 )
262 mode = Column(
263 Enum(ResearchMode), default=ResearchMode.QUICK, nullable=False
264 )
265 created_at = Column(UtcDateTime, server_default=utcnow(), nullable=False)
266 updated_at = Column(
267 UtcDateTime, server_default=utcnow(), onupdate=utcnow(), nullable=False
268 )
269 progress = Column(Float, default=0.0, nullable=False)
270 start_time = Column(UtcDateTime, nullable=True)
271 end_time = Column(UtcDateTime, nullable=True)
272 error_message = Column(Text, nullable=True)
274 # Relationship
275 strategy = relationship(
276 "ResearchStrategy", back_populates="research", uselist=False
277 )
279 def __repr__(self):
280 return f"<Research(query='{self.query[:50]}...', status={self.status.value})>"
283class ResearchStrategy(Base):
284 """
285 Track which search strategy was used for each research.
286 """
288 __tablename__ = "research_strategies"
290 id = Column(Integer, primary_key=True, index=True)
291 research_id = Column(
292 Integer,
293 ForeignKey("research.id", ondelete="CASCADE"),
294 nullable=False,
295 unique=True,
296 index=True,
297 )
298 strategy_name = Column(String(100), nullable=False, index=True)
299 created_at = Column(UtcDateTime, server_default=utcnow(), nullable=False)
301 # Relationship
302 research = relationship("Research", back_populates="strategy")
304 def __repr__(self):
305 return f"<ResearchStrategy(research_id={self.research_id}, strategy={self.strategy_name})>"