Coverage for src / local_deep_research / database / models / research.py: 95%
133 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Core research models for tasks, queries, and results.
3"""
5import enum
7from sqlalchemy import (
8 JSON,
9 Column,
10 Enum,
11 Float,
12 ForeignKey,
13 Index,
14 Integer,
15 String,
16 Text,
17)
18from sqlalchemy.orm import relationship
19from sqlalchemy_utc import UtcDateTime, utcnow
21from .base import Base
24class ResearchTask(Base):
25 """
26 Main research tasks that users create.
27 This is the top-level object that contains all research activities.
28 """
30 __tablename__ = "research_tasks"
32 id = Column(Integer, primary_key=True)
33 title = Column(String(500), nullable=False)
34 description = Column(Text)
35 status = Column(
36 String(50), default="pending"
37 ) # pending, in_progress, completed, failed
38 priority = Column(Integer, default=0) # Higher number = higher priority
39 tags = Column(JSON) # List of tags for categorization
40 research_metadata = Column(JSON) # Flexible field for additional data
42 # Timestamps
43 created_at = Column(UtcDateTime, default=utcnow())
44 updated_at = Column(UtcDateTime, default=utcnow(), onupdate=utcnow())
45 started_at = Column(UtcDateTime)
46 completed_at = Column(UtcDateTime)
48 # Relationships
49 searches = relationship(
50 "SearchQuery",
51 back_populates="research_task",
52 cascade="all, delete-orphan",
53 )
54 results = relationship(
55 "SearchResult",
56 back_populates="research_task",
57 cascade="all, delete-orphan",
58 )
59 reports = relationship(
60 "Report", back_populates="research_task", cascade="all, delete-orphan"
61 )
63 def __repr__(self):
64 return f"<ResearchTask(title='{self.title}', status='{self.status}')>"
67class SearchQuery(Base):
68 """
69 Individual search queries executed as part of research tasks.
70 Tracks what was searched and when.
71 """
73 __tablename__ = "search_queries"
75 id = Column(Integer, primary_key=True)
76 research_task_id = Column(
77 Integer, ForeignKey("research_tasks.id", ondelete="CASCADE")
78 )
79 query = Column(Text, nullable=False)
80 search_engine = Column(String(50)) # google, bing, duckduckgo, etc.
81 search_type = Column(String(50)) # web, academic, news, etc.
82 parameters = Column(JSON) # Additional search parameters
84 # Status tracking
85 status = Column(
86 String(50), default="pending"
87 ) # pending, executing, completed, failed
88 error_message = Column(Text)
89 retry_count = Column(Integer, default=0)
91 # Timestamps
92 created_at = Column(UtcDateTime, default=utcnow())
93 executed_at = Column(UtcDateTime)
94 completed_at = Column(UtcDateTime)
96 # Relationships
97 research_task = relationship("ResearchTask", back_populates="searches")
98 results = relationship(
99 "SearchResult",
100 back_populates="search_query",
101 cascade="all, delete-orphan",
102 )
104 # Indexes for performance
105 __table_args__ = (
106 Index("idx_research_task_status", "research_task_id", "status"),
107 Index("idx_search_engine", "search_engine", "status"),
108 )
110 def __repr__(self):
111 return f"<SearchQuery(query='{self.query[:50]}...', status='{self.status}')>"
114class SearchResult(Base):
115 """
116 Individual search results from queries.
117 Stores both the initial result and any fetched content.
118 """
120 __tablename__ = "search_results"
122 id = Column(Integer, primary_key=True)
123 research_task_id = Column(
124 Integer, ForeignKey("research_tasks.id", ondelete="CASCADE")
125 )
126 search_query_id = Column(
127 Integer, ForeignKey("search_queries.id", ondelete="CASCADE")
128 )
130 # Basic result information
131 title = Column(String(500))
132 url = Column(Text, index=True) # Indexed for deduplication
133 snippet = Column(Text)
135 # Extended content
136 content = Column(Text) # Full content if fetched
137 content_type = Column(String(50)) # html, pdf, text, etc.
138 content_hash = Column(String(64)) # For deduplication
140 # Metadata
141 relevance_score = Column(Float) # Calculated relevance
142 position = Column(Integer) # Position in search results
143 domain = Column(String(255), index=True)
144 language = Column(String(10))
145 published_date = Column(UtcDateTime)
146 author = Column(String(255))
148 # Status tracking
149 fetch_status = Column(String(50)) # pending, fetched, failed, skipped
150 fetch_error = Column(Text)
152 # Timestamps
153 created_at = Column(UtcDateTime, default=utcnow())
154 fetched_at = Column(UtcDateTime)
156 # Relationships
157 research_task = relationship("ResearchTask", back_populates="results")
158 search_query = relationship("SearchQuery", back_populates="results")
160 # Indexes for performance
161 __table_args__ = (
162 Index("idx_task_relevance", "research_task_id", "relevance_score"),
163 Index("idx_content_hash", "content_hash"),
164 Index("idx_domain_task", "domain", "research_task_id"),
165 )
167 def __repr__(self):
168 return f"<SearchResult(title='{self.title[:50] if self.title else 'No title'}...', score={self.relevance_score})>"
171class ResearchMode(enum.Enum):
172 """Research modes available."""
174 QUICK = "quick"
175 DETAILED = "detailed"
178class ResearchStatus(enum.Enum):
179 """Status of research operations."""
181 PENDING = "pending"
182 IN_PROGRESS = "in_progress"
183 COMPLETED = "completed"
184 FAILED = "failed"
185 CANCELLED = "cancelled"
186 SUSPENDED = "suspended"
189class ResearchResource(Base):
190 """Resources associated with research projects."""
192 __tablename__ = "research_resources"
194 id = Column(Integer, primary_key=True, autoincrement=True)
195 research_id = Column(
196 String(36),
197 ForeignKey("research_history.id", ondelete="CASCADE"),
198 nullable=False,
199 )
200 title = Column(Text)
201 url = Column(Text)
202 content_preview = Column(Text)
203 source_type = Column(Text)
204 resource_metadata = Column("metadata", JSON)
205 created_at = Column(String, nullable=False)
207 # Relationship
208 research = relationship("ResearchHistory", back_populates="resources")
210 def __repr__(self):
211 return f"<ResearchResource(title='{self.title}', url='{self.url}')>"
214class ResearchHistory(Base):
215 """
216 Research history table.
217 Tracks research sessions and their progress.
218 """
220 __tablename__ = "research_history"
222 # UUID as primary key
223 id = Column(String(36), primary_key=True)
224 # The search query.
225 query = Column(Text, nullable=False)
226 # The mode of research (e.g., 'quick_summary', 'detailed_report').
227 mode = Column(Text, nullable=False)
228 # Current status of the research.
229 status = Column(Text, nullable=False)
230 # The timestamp when the research started.
231 created_at = Column(Text, nullable=False)
232 # The timestamp when the research was completed.
233 completed_at = Column(Text)
234 # Duration of the research in seconds.
235 duration_seconds = Column(Integer)
236 # Path to the generated report.
237 report_path = Column(Text)
238 # Report content stored in database
239 report_content = Column(Text)
240 # Additional metadata about the research.
241 research_meta = Column(JSON)
242 # Latest progress log message.
243 progress_log = Column(JSON)
244 # Current progress of the research (as a percentage).
245 progress = Column(Integer)
246 # Title of the research report.
247 title = Column(Text)
249 # Relationships
250 resources = relationship(
251 "ResearchResource",
252 back_populates="research",
253 cascade="all, delete-orphan",
254 )
256 def __repr__(self):
257 return f"<ResearchHistory(query='{self.query[:50]}...', status={self.status})>"
260class Research(Base):
261 """
262 Modern research tracking with better type safety.
263 """
265 __tablename__ = "research"
267 id = Column(Integer, primary_key=True, index=True)
268 query = Column(String, nullable=False)
269 status = Column(
270 Enum(ResearchStatus), default=ResearchStatus.PENDING, nullable=False
271 )
272 mode = Column(
273 Enum(ResearchMode), default=ResearchMode.QUICK, nullable=False
274 )
275 created_at = Column(UtcDateTime, server_default=utcnow(), nullable=False)
276 updated_at = Column(
277 UtcDateTime, server_default=utcnow(), onupdate=utcnow(), nullable=False
278 )
279 progress = Column(Float, default=0.0, nullable=False)
280 start_time = Column(UtcDateTime, nullable=True)
281 end_time = Column(UtcDateTime, nullable=True)
282 error_message = Column(Text, nullable=True)
284 # Relationship
285 strategy = relationship(
286 "ResearchStrategy", back_populates="research", uselist=False
287 )
289 def __repr__(self):
290 return f"<Research(query='{self.query[:50]}...', status={self.status.value})>"
293class ResearchStrategy(Base):
294 """
295 Track which search strategy was used for each research.
296 """
298 __tablename__ = "research_strategies"
300 id = Column(Integer, primary_key=True, index=True)
301 research_id = Column(
302 Integer,
303 ForeignKey("research.id", ondelete="CASCADE"),
304 nullable=False,
305 unique=True,
306 index=True,
307 )
308 strategy_name = Column(String(100), nullable=False, index=True)
309 created_at = Column(UtcDateTime, server_default=utcnow(), nullable=False)
311 # Relationship
312 research = relationship("Research", back_populates="strategy")
314 def __repr__(self):
315 return f"<ResearchStrategy(research_id={self.research_id}, strategy={self.strategy_name})>"