Coverage for src / local_deep_research / constants.py: 100%

28 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1"""Project-wide constants for Local Deep Research.""" 

2 

3from enum import StrEnum 

4from typing import Dict, List 

5 

6from .__version__ import __version__ 

7 

8# Honest, identifying User-Agent for APIs that prefer/require identification 

9# (e.g., academic APIs like arXiv, PubMed, OpenAlex) 

10USER_AGENT = ( 

11 f"Local-Deep-Research/{__version__} " 

12 "(Academic Research Tool; https://github.com/LearningCircuit/local-deep-research)" 

13) 

14 

15# Browser-like User-Agent for sites that may block bot requests 

16# Use sparingly and only when necessary 

17BROWSER_USER_AGENT = ( 

18 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " 

19 "AppleWebKit/537.36 (KHTML, like Gecko) " 

20 "Chrome/120.0.0.0 Safari/537.36" 

21) 

22 

23 

24# --- Research status values --- 

25# Frontend helpers: src/local_deep_research/web/static/js/config/constants.js 

26# Injected via: src/local_deep_research/web/app_factory.py (inject_frontend_constants) 

27# Template: src/local_deep_research/web/templates/base.html 

28# If you add/remove/rename a status here, the frontend picks it up automatically. 

29class ResearchStatus(StrEnum): 

30 """Status values for research records. 

31 

32 Uses StrEnum so values compare equal to plain strings, 

33 e.g. ``ResearchStatus.COMPLETED == "completed"`` is True. 

34 

35 Lifecycle:: 

36 

37 [*] ─┬─► QUEUED ─┬─► IN_PROGRESS ─┬─► COMPLETED 

38 │ │ ├─► FAILED 

39 │ └─► SUSPENDED └─► SUSPENDED 

40 │ (concurrency limit) (terminated while queued) 

41 

42 └─► IN_PROGRESS (slots available, skips queue) 

43 

44 Notes: 

45 - PENDING is declared as a model default but no creation path 

46 actually sets it. All routes use QUEUED or IN_PROGRESS. 

47 - ERROR is checked as a terminal state but never set by current 

48 code. It predates FAILED and exists for backward compatibility 

49 with older database records. 

50 - CANCELLED is not used by the research workflow. It is used by 

51 the benchmark subsystem (BenchmarkStatus, BenchmarkTaskStatus). 

52 """ 

53 

54 # --- Active lifecycle states --- 

55 PENDING = "pending" # Model default; never set by any creation path 

56 QUEUED = "queued" # Waiting for a worker slot 

57 IN_PROGRESS = "in_progress" # Worker actively executing 

58 

59 # --- Terminal states --- 

60 COMPLETED = "completed" # Finished successfully 

61 SUSPENDED = "suspended" # User terminated the research 

62 FAILED = "failed" # Unrecoverable error during execution 

63 

64 # --- Legacy / compatibility --- 

65 ERROR = "error" # Never set; predates FAILED 

66 CANCELLED = "cancelled" # Unused by research; for benchmarks 

67 

68 

69# --- Research library file_path sentinel values --- 

70FILE_PATH_METADATA_ONLY = "metadata_only" 

71FILE_PATH_TEXT_ONLY = "text_only_not_stored" 

72FILE_PATH_BLOB_DELETED = "blob_deleted" 

73FILE_PATH_SENTINELS = ( 

74 FILE_PATH_METADATA_ONLY, 

75 FILE_PATH_TEXT_ONLY, 

76 FILE_PATH_BLOB_DELETED, 

77) 

78 

79# --- Snippet / truncation lengths --- 

80SNIPPET_LENGTH_SHORT = 250 

81SNIPPET_LENGTH_LONG = 500 

82 

83# --- Research history collection --- 

84RESEARCH_HISTORY_COLLECTION_NAME = "History" 

85RESEARCH_HISTORY_COLLECTION_DESCRIPTION = ( 

86 "Your research history indexed for AI-powered semantic search. " 

87 "Indexing converts past research reports and their sources into " 

88 "searchable content, enabling natural-language queries across all " 

89 "your previous research. Used by the History page search when in " 

90 "AI or Hybrid mode." 

91) 

92 

93# --- Available search strategies (UI-facing) --- 

94# Single source of truth for strategies shown in all UI dropdowns. 

95# create_strategy() in search_system_factory.py handles additional names 

96# (aliases, internal strategies) — this list is purely for the UI. 

97AVAILABLE_STRATEGIES: List[Dict[str, str]] = [ 

98 { 

99 "name": "source-based", 

100 "label": "Source-Based (Best for small <16,000 context window)", 

101 "description": "Comprehensive research with inline citations. Focuses on finding and extracting information from authoritative sources.", 

102 }, 

103 { 

104 "name": "focused-iteration", 

105 "label": "Focused Iteration - Quick (Minimal text output)", 

106 "description": "Fast & precise Q&A with iterative search. Good for complex queries requiring specific answers.", 

107 }, 

108 { 

109 "name": "focused-iteration-standard", 

110 "label": "Focused Iteration - Comprehensive (Needs >16,000 context window)", 

111 "description": "Detailed long-form output with citations. Uses standard citation handler for comprehensive answers.", 

112 }, 

113 { 

114 "name": "mcp", 

115 "label": "MCP ReAct (Agentic research - LLM decides tools)", 

116 "description": "Agentic research using ReAct pattern. LLM decides what tools to call, analyzes results, and iterates.", 

117 }, 

118 { 

119 "name": "langgraph-agent", 

120 "label": "LangGraph Agent (Autonomous agentic research)", 

121 "description": "Agentic research where the LLM autonomously decides what to search, which engines to use, and when to synthesize. Supports all search engines as tools.", 

122 }, 

123] 

124 

125 

126ALL_STRATEGIES: List[Dict[str, str]] = [ 

127 *AVAILABLE_STRATEGIES, 

128 { 

129 "name": "iterative-refinement", 

130 "label": "Iterative Refinement (Progressive refinement)", 

131 "description": "LLM-guided progressive refinement. Iteratively refines results using evaluation and follow-up queries.", 

132 }, 

133 { 

134 "name": "topic-organization", 

135 "label": "Topic Organization (Clusters by topic)", 

136 "description": "Clusters sources into topics with lead texts. Organizes research by themes for structured output.", 

137 }, 

138 { 

139 "name": "news_aggregation", 

140 "label": "News Aggregation (Current events)", 

141 "description": "Specialized for news aggregation and current events.", 

142 }, 

143 { 

144 "name": "rapid", 

145 "label": "Rapid (Quick single-pass search)", 

146 "description": "Quick single-pass search for fast results. Good for simple queries.", 

147 }, 

148 { 

149 "name": "iterative", 

150 "label": "Iterative (Loop-based reasoning)", 

151 "description": "Loop-based reasoning with persistent knowledge accumulation and confidence tracking.", 

152 }, 

153 { 

154 "name": "parallel", 

155 "label": "Parallel (Multiple queries simultaneously)", 

156 "description": "Runs multiple search queries in parallel for comprehensive coverage.", 

157 }, 

158 { 

159 "name": "recursive", 

160 "label": "Recursive (Query decomposition)", 

161 "description": "Recursive decomposition of complex queries into simpler sub-queries.", 

162 }, 

163 { 

164 "name": "adaptive", 

165 "label": "Adaptive (Step-by-step reasoning)", 

166 "description": "Adaptive step-by-step reasoning that adjusts strategy based on results.", 

167 }, 

168 { 

169 "name": "smart", 

170 "label": "Smart (Auto sub-query generation)", 

171 "description": "Smart decomposition with automatic sub-query generation.", 

172 }, 

173 { 

174 "name": "standard", 

175 "label": "Standard (Basic iterative search)", 

176 "description": "Basic iterative search strategy for general use.", 

177 }, 

178 { 

179 "name": "iterdrag", 

180 "label": "IterDRAG (Iterative retrieval and generation)", 

181 "description": "IterDRAG strategy for iterative document retrieval and generation.", 

182 }, 

183 { 

184 "name": "iterative-reasoning", 

185 "label": "Iterative Reasoning (Depth-based exploration)", 

186 "description": "Iterative reasoning with depth-based exploration.", 

187 }, 

188 { 

189 "name": "browsecomp", 

190 "label": "BrowseComp (Confidence-based iteration)", 

191 "description": "BrowseComp optimized strategy with confidence-based iteration.", 

192 }, 

193 { 

194 "name": "evidence", 

195 "label": "Evidence (Verification with candidate discovery)", 

196 "description": "Enhanced evidence-based verification with candidate discovery and pattern learning.", 

197 }, 

198 { 

199 "name": "constrained", 

200 "label": "Constrained (Progressive narrowing)", 

201 "description": "Progressive constraint-based search that narrows candidates step by step.", 

202 }, 

203 { 

204 "name": "parallel-constrained", 

205 "label": "Parallel Constrained (Combined constraint execution)", 

206 "description": "Parallel constraint-based search with combined constraint execution.", 

207 }, 

208 { 

209 "name": "early-stop-constrained", 

210 "label": "Early Stop Constrained (With early stopping at 99%)", 

211 "description": "Parallel constraint search with immediate evaluation and early stopping.", 

212 }, 

213 { 

214 "name": "smart-query", 

215 "label": "Smart Query (LLM query generation)", 

216 "description": "Smart query generation strategy using LLM-generated queries.", 

217 }, 

218 { 

219 "name": "dual-confidence", 

220 "label": "Dual Confidence (Positive/negative/uncertainty scoring)", 

221 "description": "Dual confidence scoring with positive/negative/uncertainty.", 

222 }, 

223 { 

224 "name": "dual-confidence-with-rejection", 

225 "label": "Dual Confidence + Rejection (Early rejection)", 

226 "description": "Dual confidence with early rejection of poor candidates.", 

227 }, 

228 { 

229 "name": "concurrent-dual-confidence", 

230 "label": "Concurrent Dual Confidence (Concurrent search & evaluation)", 

231 "description": "Concurrent search and evaluation with progressive constraint relaxation.", 

232 }, 

233 { 

234 "name": "constraint-parallel", 

235 "label": "Constraint Parallel (Parallel constraint checking)", 

236 "description": "Parallel constraint checking with entity seeding and direct property search.", 

237 }, 

238 { 

239 "name": "modular", 

240 "label": "Modular (Modular architecture with constraint checking)", 

241 "description": "Modular architecture using constraint checking and candidate exploration modules.", 

242 }, 

243 { 

244 "name": "modular-parallel", 

245 "label": "Modular Parallel (Modular with parallel exploration)", 

246 "description": "Modular strategy with parallel exploration.", 

247 }, 

248 { 

249 "name": "browsecomp-entity", 

250 "label": "BrowseComp Entity (Entity-focused with knowledge graph)", 

251 "description": "Entity-focused search for BrowseComp questions with knowledge graph building.", 

252 }, 

253] 

254 

255 

256def get_available_strategies(show_all: bool = False) -> List[Dict[str, str]]: 

257 """Get the list of available research strategies. 

258 

259 Args: 

260 show_all: If True, return all strategies including advanced/experimental ones. 

261 

262 Returns: 

263 List of dictionaries with 'name', 'label', and 'description' keys. 

264 """ 

265 if show_all: 

266 return ALL_STRATEGIES.copy() 

267 return AVAILABLE_STRATEGIES.copy()