Coverage for src / local_deep_research / web / database / benchmark_schema.py: 100%

30 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1"""Simple benchmark table definitions for schema creation.""" 

2 

3import enum 

4 

5from sqlalchemy import ( 

6 JSON, 

7 Boolean, 

8 Column, 

9 DateTime, 

10 Enum, 

11 Float, 

12 ForeignKey, 

13 Index, 

14 Integer, 

15 String, 

16 Text, 

17 UniqueConstraint, 

18) 

19from sqlalchemy.sql import func 

20 

21 

22class BenchmarkStatus(enum.Enum): 

23 """Status of a benchmark run.""" 

24 

25 PENDING = "pending" 

26 IN_PROGRESS = "in_progress" 

27 COMPLETED = "completed" 

28 FAILED = "failed" 

29 CANCELLED = "cancelled" 

30 PAUSED = "paused" 

31 

32 

33class DatasetType(enum.Enum): 

34 """Supported dataset types.""" 

35 

36 SIMPLEQA = "simpleqa" 

37 BROWSECOMP = "browsecomp" 

38 CUSTOM = "custom" 

39 

40 

41# Simple table definitions for creation 

42benchmark_runs_table = { 

43 "table_name": "benchmark_runs", 

44 "columns": [ 

45 Column("id", Integer, primary_key=True, index=True), 

46 Column("run_name", String(255), nullable=True), 

47 Column("config_hash", String(16), nullable=False, index=True), 

48 Column("query_hash_list", JSON, nullable=False), 

49 Column("search_config", JSON, nullable=False), 

50 Column("evaluation_config", JSON, nullable=False), 

51 Column("datasets_config", JSON, nullable=False), 

52 Column( 

53 "status", 

54 Enum(BenchmarkStatus), 

55 default=BenchmarkStatus.PENDING, 

56 nullable=False, 

57 ), 

58 Column( 

59 "created_at", DateTime, server_default=func.now(), nullable=False 

60 ), 

61 Column( 

62 "updated_at", 

63 DateTime, 

64 server_default=func.now(), 

65 onupdate=func.now(), 

66 nullable=False, 

67 ), 

68 Column("start_time", DateTime, nullable=True), 

69 Column("end_time", DateTime, nullable=True), 

70 Column("total_examples", Integer, default=0, nullable=False), 

71 Column("completed_examples", Integer, default=0, nullable=False), 

72 Column("failed_examples", Integer, default=0, nullable=False), 

73 Column("overall_accuracy", Float, nullable=True), 

74 Column("processing_rate", Float, nullable=True), 

75 Column("error_message", Text, nullable=True), 

76 ], 

77 "indexes": [ 

78 Index("idx_benchmark_runs_config_hash", "config_hash"), 

79 Index("idx_benchmark_runs_status_created", "status", "created_at"), 

80 ], 

81} 

82 

83benchmark_results_table = { 

84 "table_name": "benchmark_results", 

85 "columns": [ 

86 Column("id", Integer, primary_key=True, index=True), 

87 Column( 

88 "benchmark_run_id", 

89 Integer, 

90 ForeignKey("benchmark_runs.id", ondelete="CASCADE"), 

91 nullable=False, 

92 index=True, 

93 ), 

94 Column("example_id", String(255), nullable=False), 

95 Column("query_hash", String(32), nullable=False, index=True), 

96 Column("dataset_type", Enum(DatasetType), nullable=False), 

97 Column("question", Text, nullable=False), 

98 Column("correct_answer", Text, nullable=False), 

99 Column("response", Text, nullable=True), 

100 Column("extracted_answer", Text, nullable=True), 

101 Column("confidence", String(10), nullable=True), 

102 Column("processing_time", Float, nullable=True), 

103 Column("sources", JSON, nullable=True), 

104 Column("is_correct", Boolean, nullable=True), 

105 Column("graded_confidence", String(10), nullable=True), 

106 Column("grader_response", Text, nullable=True), 

107 Column( 

108 "created_at", DateTime, server_default=func.now(), nullable=False 

109 ), 

110 Column("completed_at", DateTime, nullable=True), 

111 Column("research_error", Text, nullable=True), 

112 Column("evaluation_error", Text, nullable=True), 

113 Column("task_index", Integer, nullable=True), 

114 Column("result_metadata", JSON, nullable=True), 

115 ], 

116 "indexes": [ 

117 Index( 

118 "idx_benchmark_results_run_dataset", 

119 "benchmark_run_id", 

120 "dataset_type", 

121 ), 

122 Index("idx_benchmark_results_query_hash", "query_hash"), 

123 Index("idx_benchmark_results_completed", "completed_at"), 

124 ], 

125 "constraints": [ 

126 UniqueConstraint( 

127 "benchmark_run_id", "query_hash", name="uix_run_query" 

128 ), 

129 ], 

130} 

131 

132benchmark_configs_table = { 

133 "table_name": "benchmark_configs", 

134 "columns": [ 

135 Column("id", Integer, primary_key=True, index=True), 

136 Column("name", String(255), nullable=False), 

137 Column("description", Text, nullable=True), 

138 Column("config_hash", String(16), nullable=False, index=True), 

139 Column("search_config", JSON, nullable=False), 

140 Column("evaluation_config", JSON, nullable=False), 

141 Column("datasets_config", JSON, nullable=False), 

142 Column( 

143 "created_at", DateTime, server_default=func.now(), nullable=False 

144 ), 

145 Column( 

146 "updated_at", 

147 DateTime, 

148 server_default=func.now(), 

149 onupdate=func.now(), 

150 nullable=False, 

151 ), 

152 Column("is_default", Boolean, default=False, nullable=False), 

153 Column("is_public", Boolean, default=True, nullable=False), 

154 Column("usage_count", Integer, default=0, nullable=False), 

155 Column("last_used", DateTime, nullable=True), 

156 Column("best_accuracy", Float, nullable=True), 

157 Column("avg_processing_rate", Float, nullable=True), 

158 ], 

159 "indexes": [ 

160 Index("idx_benchmark_configs_name", "name"), 

161 Index("idx_benchmark_configs_hash", "config_hash"), 

162 Index("idx_benchmark_configs_default", "is_default"), 

163 ], 

164} 

165 

166benchmark_progress_table = { 

167 "table_name": "benchmark_progress", 

168 "columns": [ 

169 Column("id", Integer, primary_key=True, index=True), 

170 Column( 

171 "benchmark_run_id", 

172 Integer, 

173 ForeignKey("benchmark_runs.id", ondelete="CASCADE"), 

174 nullable=False, 

175 index=True, 

176 ), 

177 Column( 

178 "timestamp", DateTime, server_default=func.now(), nullable=False 

179 ), 

180 Column("completed_examples", Integer, nullable=False), 

181 Column("total_examples", Integer, nullable=False), 

182 Column("overall_accuracy", Float, nullable=True), 

183 Column("dataset_accuracies", JSON, nullable=True), 

184 Column("processing_rate", Float, nullable=True), 

185 Column("estimated_completion", DateTime, nullable=True), 

186 Column("current_dataset", Enum(DatasetType), nullable=True), 

187 Column("current_example_id", String(255), nullable=True), 

188 Column("memory_usage", Float, nullable=True), 

189 Column("cpu_usage", Float, nullable=True), 

190 ], 

191 "indexes": [ 

192 Index( 

193 "idx_benchmark_progress_run_time", "benchmark_run_id", "timestamp" 

194 ), 

195 ], 

196} 

197 

198 

199def create_benchmark_tables_simple(engine): 

200 """Create benchmark tables using simple table definitions.""" 

201 from typing import Any as _Any 

202 

203 from sqlalchemy import MetaData, Table 

204 

205 metadata = MetaData() 

206 

207 # Create tables 

208 tables_to_create: list[dict[str, _Any]] = [ 

209 benchmark_runs_table, # type: ignore[list-item] 

210 benchmark_results_table, # type: ignore[list-item] 

211 benchmark_configs_table, # type: ignore[list-item] 

212 benchmark_progress_table, # type: ignore[list-item] 

213 ] 

214 

215 for table_def in tables_to_create: 

216 table = Table( 

217 table_def["table_name"], 

218 metadata, 

219 *table_def["columns"], 

220 extend_existing=True, 

221 ) 

222 

223 # Add indexes 

224 for index in table_def.get("indexes", []): 

225 index.table = table # type: ignore[attr-defined] 

226 

227 # Add constraints 

228 for constraint in table_def.get("constraints", []): 

229 table.append_constraint(constraint) 

230 

231 # Create all tables 

232 metadata.create_all(engine, checkfirst=True)