Coverage for src/local_deep_research/benchmarks/optimization/api.py: 100%

1"""

2API functions for optimization tasks in Local Deep Research.

4This module provides a simplified interface for parameter optimization

5without having to directly work with the optimizer classes.

6"""

8from pathlib import Path

9from typing import Any, Callable, Dict, List, Optional, Tuple

11# No metrics imports needed here, they're used in the OptunaOptimizer

12from .optuna_optimizer import OptunaOptimizer

15def optimize_parameters(

16 query: str,

17 param_space: Optional[Dict[str, Any]] = None,

18 output_dir: str = str(Path("data") / "optimization_results"),

19 model_name: Optional[str] = None,

20 provider: Optional[str] = None,

21 search_tool: Optional[str] = None,

22 temperature: float = 0.7,

23 n_trials: int = 30,

24 timeout: Optional[int] = None,

25 n_jobs: int = 1,

26 study_name: Optional[str] = None,

27 optimization_metrics: Optional[List[str]] = None,

28 metric_weights: Optional[Dict[str, float]] = None,

29 progress_callback: Optional[Callable[[int, int, Dict], None]] = None,

30 benchmark_weights: Optional[Dict[str, float]] = None,

31) -> Tuple[Dict[str, Any], float]:

32 """

33 Optimize parameters for Local Deep Research.

35 Args:

36 query: The research query to use for all experiments

37 param_space: Dictionary defining parameter search spaces (optional)

38 output_dir: Directory to save optimization results

39 model_name: Name of the LLM model to use

40 provider: LLM provider

41 search_tool: Search engine to use

42 temperature: LLM temperature

43 n_trials: Number of parameter combinations to try

44 timeout: Maximum seconds to run optimization (None for no limit)

45 n_jobs: Number of parallel jobs for optimization

46 study_name: Name of the Optuna study

47 optimization_metrics: List of metrics to optimize (default: ["quality", "speed"])

48 metric_weights: Dictionary of weights for each metric

49 progress_callback: Optional callback for progress updates

50 benchmark_weights: Dictionary mapping benchmark types to weights

51 (e.g., {"simpleqa": 0.6, "browsecomp": 0.4})

52 If None, only SimpleQA is used with weight 1.0

54 Returns:

55 Tuple of (best_parameters, best_score)

56 """

57 # Create optimizer

58 optimizer = OptunaOptimizer(

59 base_query=query,

60 output_dir=output_dir,

61 model_name=model_name,

62 provider=provider,

63 search_tool=search_tool,

64 temperature=temperature,

65 n_trials=n_trials,

66 timeout=timeout,

67 n_jobs=n_jobs,

68 study_name=study_name,

69 optimization_metrics=optimization_metrics,

70 metric_weights=metric_weights,

71 progress_callback=progress_callback,

72 benchmark_weights=benchmark_weights,

73 )

75 # Run optimization

76 return optimizer.optimize(param_space)

79def optimize_for_speed(

80 query: str,

81 n_trials: int = 20,

82 output_dir: str = str(Path("data") / "optimization_results"),

83 model_name: Optional[str] = None,

84 provider: Optional[str] = None,

85 search_tool: Optional[str] = None,

86 progress_callback: Optional[Callable[[int, int, Dict], None]] = None,

87 benchmark_weights: Optional[Dict[str, float]] = None,

88) -> Tuple[Dict[str, Any], float]:

89 """

90 Optimize parameters with a focus on speed performance.

92 Args:

93 query: The research query to use for all experiments

94 n_trials: Number of parameter combinations to try

95 output_dir: Directory to save optimization results

96 model_name: Name of the LLM model to use

97 provider: LLM provider

98 search_tool: Search engine to use

99 progress_callback: Optional callback for progress updates

100 benchmark_weights: Dictionary mapping benchmark types to weights

101 (e.g., {"simpleqa": 0.6, "browsecomp": 0.4})

102 If None, only SimpleQA is used with weight 1.0

103

104 Returns:

105 Tuple of (best_parameters, best_score)

106 """

107 # Focus on speed with reduced parameter space

108 param_space = {

109 "iterations": {

110 "type": "int",

111 "low": 1,

112 "high": 3,

113 "step": 1,

114 },

115 "questions_per_iteration": {

116 "type": "int",

117 "low": 1,

118 "high": 3,

119 "step": 1,

120 },

121 "search_strategy": {

122 "type": "categorical",

123 "choices": ["rapid", "parallel", "source_based"],

124 },

125 }

126

127 # Speed-focused weights

128 metric_weights = {"speed": 0.8, "quality": 0.2, "resource": 0.0}

129

130 return optimize_parameters(

131 query=query,

132 param_space=param_space,

133 output_dir=output_dir,

134 model_name=model_name,

135 provider=provider,

136 search_tool=search_tool,

137 n_trials=n_trials,

138 metric_weights=metric_weights,

139 optimization_metrics=["speed", "quality"],

140 progress_callback=progress_callback,

141 benchmark_weights=benchmark_weights,

142 )

143

144

145def optimize_for_quality(

146 query: str,

147 n_trials: int = 30,

148 output_dir: str = str(Path("data") / "optimization_results"),

149 model_name: Optional[str] = None,

150 provider: Optional[str] = None,

151 search_tool: Optional[str] = None,

152 progress_callback: Optional[Callable[[int, int, Dict], None]] = None,

153 benchmark_weights: Optional[Dict[str, float]] = None,

154) -> Tuple[Dict[str, Any], float]:

155 """

156 Optimize parameters with a focus on result quality.

157

158 Args:

159 query: The research query to use for all experiments

160 n_trials: Number of parameter combinations to try

161 output_dir: Directory to save optimization results

162 model_name: Name of the LLM model to use

163 provider: LLM provider

164 search_tool: Search engine to use

165 progress_callback: Optional callback for progress updates

166 benchmark_weights: Dictionary mapping benchmark types to weights

167 (e.g., {"simpleqa": 0.6, "browsecomp": 0.4})

168 If None, only SimpleQA is used with weight 1.0

169

170 Returns:

171 Tuple of (best_parameters, best_score)

172 """

173 # Quality-focused weights

174 metric_weights = {"quality": 0.9, "speed": 0.1, "resource": 0.0}

175

176 return optimize_parameters(

177 query=query,

178 output_dir=output_dir,

179 model_name=model_name,

180 provider=provider,

181 search_tool=search_tool,

182 n_trials=n_trials,

183 metric_weights=metric_weights,

184 optimization_metrics=["quality", "speed"],

185 progress_callback=progress_callback,

186 benchmark_weights=benchmark_weights,

187 )

188

189

190def optimize_for_efficiency(

191 query: str,

192 n_trials: int = 25,

193 output_dir: str = str(Path("data") / "optimization_results"),

194 model_name: Optional[str] = None,

195 provider: Optional[str] = None,

196 search_tool: Optional[str] = None,

197 progress_callback: Optional[Callable[[int, int, Dict], None]] = None,

198 benchmark_weights: Optional[Dict[str, float]] = None,

199) -> Tuple[Dict[str, Any], float]:

200 """

201 Optimize parameters with a focus on resource efficiency.

202

203 Args:

204 query: The research query to use for all experiments

205 n_trials: Number of parameter combinations to try

206 output_dir: Directory to save optimization results

207 model_name: Name of the LLM model to use

208 provider: LLM provider

209 search_tool: Search engine to use

210 progress_callback: Optional callback for progress updates

211 benchmark_weights: Dictionary mapping benchmark types to weights

212 (e.g., {"simpleqa": 0.6, "browsecomp": 0.4})

213 If None, only SimpleQA is used with weight 1.0

214

215 Returns:

216 Tuple of (best_parameters, best_score)

217 """

218 # Balance of quality, speed and resource usage

219 metric_weights = {"quality": 0.4, "speed": 0.3, "resource": 0.3}

220

221 return optimize_parameters(

222 query=query,

223 output_dir=output_dir,

224 model_name=model_name,

225 provider=provider,

226 search_tool=search_tool,

227 n_trials=n_trials,

228 metric_weights=metric_weights,

229 optimization_metrics=["quality", "speed", "resource"],

230 progress_callback=progress_callback,

231 benchmark_weights=benchmark_weights,

232 )

233

234

235def get_default_param_space() -> Dict[str, Any]:

236 """

237 Get the default parameter search space for optimization.

238

239 Returns:

240 Dictionary defining the default parameter search spaces

241 """

242 return {

243 "iterations": {

244 "type": "int",

245 "low": 1,

246 "high": 5,

247 "step": 1,

248 },

249 "questions_per_iteration": {

250 "type": "int",

251 "low": 1,

252 "high": 5,

253 "step": 1,

254 },

255 "search_strategy": {

256 "type": "categorical",

257 "choices": [

258 "iterdrag",

259 "standard",

260 "rapid",

261 "parallel",

262 "source_based",

263 ],

264 },

265 "max_results": {

266 "type": "int",

267 "low": 10,

268 "high": 100,

269 "step": 10,

270 },

271 "max_filtered_results": {

272 "type": "int",

273 "low": 5,

274 "high": 50,

275 "step": 5,

276 },

277 }

Coverage for src / local_deep_research / benchmarks / optimization / api.py: 100%

18 statements