Coverage for src/local_deep_research/web/database/benchmark

1"""Simple benchmark table definitions for schema creation."""

3import enum

5from sqlalchemy import (

6 JSON,

7 Boolean,

8 Column,

9 DateTime,

10 Enum,

11 Float,

12 ForeignKey,

13 Index,

14 Integer,

15 String,

16 Text,

17 UniqueConstraint,

18)

19from sqlalchemy.sql import func

22class BenchmarkStatus(enum.Enum):

23 """Status of a benchmark run."""

25 PENDING = "pending"

26 IN_PROGRESS = "in_progress"

27 COMPLETED = "completed"

28 FAILED = "failed"

29 CANCELLED = "cancelled"

30 PAUSED = "paused"

33class DatasetType(enum.Enum):

34 """Supported dataset types."""

36 SIMPLEQA = "simpleqa"

37 BROWSECOMP = "browsecomp"

38 CUSTOM = "custom"

41# Simple table definitions for creation

42benchmark_runs_table = {

43 "table_name": "benchmark_runs",

44 "columns": [

45 Column("id", Integer, primary_key=True, index=True),

46 Column("run_name", String(255), nullable=True),

47 Column("config_hash", String(16), nullable=False, index=True),

48 Column("query_hash_list", JSON, nullable=False),

49 Column("search_config", JSON, nullable=False),

50 Column("evaluation_config", JSON, nullable=False),

51 Column("datasets_config", JSON, nullable=False),

52 Column(

53 "status",

54 Enum(BenchmarkStatus),

55 default=BenchmarkStatus.PENDING,

56 nullable=False,

57 ),

58 Column(

59 "created_at", DateTime, server_default=func.now(), nullable=False

60 ),

61 Column(

62 "updated_at",

63 DateTime,

64 server_default=func.now(),

65 onupdate=func.now(),

66 nullable=False,

67 ),

68 Column("start_time", DateTime, nullable=True),

69 Column("end_time", DateTime, nullable=True),

70 Column("total_examples", Integer, default=0, nullable=False),

71 Column("completed_examples", Integer, default=0, nullable=False),

72 Column("failed_examples", Integer, default=0, nullable=False),

73 Column("overall_accuracy", Float, nullable=True),

74 Column("processing_rate", Float, nullable=True),

75 Column("error_message", Text, nullable=True),

76 ],

77 "indexes": [

78 Index("idx_benchmark_runs_config_hash", "config_hash"),

79 Index("idx_benchmark_runs_status_created", "status", "created_at"),

80 ],

81}

83benchmark_results_table = {

84 "table_name": "benchmark_results",

85 "columns": [

86 Column("id", Integer, primary_key=True, index=True),

87 Column(

88 "benchmark_run_id",

89 Integer,

90 ForeignKey("benchmark_runs.id", ondelete="CASCADE"),

91 nullable=False,

92 index=True,

93 ),

94 Column("example_id", String(255), nullable=False),

95 Column("query_hash", String(32), nullable=False, index=True),

96 Column("dataset_type", Enum(DatasetType), nullable=False),

97 Column("question", Text, nullable=False),

98 Column("correct_answer", Text, nullable=False),

99 Column("response", Text, nullable=True),

100 Column("extracted_answer", Text, nullable=True),

101 Column("confidence", String(10), nullable=True),

102 Column("processing_time", Float, nullable=True),

103 Column("sources", JSON, nullable=True),

104 Column("is_correct", Boolean, nullable=True),

105 Column("graded_confidence", String(10), nullable=True),

106 Column("grader_response", Text, nullable=True),

107 Column(

108 "created_at", DateTime, server_default=func.now(), nullable=False

109 ),

110 Column("completed_at", DateTime, nullable=True),

111 Column("research_error", Text, nullable=True),

112 Column("evaluation_error", Text, nullable=True),

113 Column("task_index", Integer, nullable=True),

114 Column("result_metadata", JSON, nullable=True),

115 ],

116 "indexes": [

117 Index(

118 "idx_benchmark_results_run_dataset",

119 "benchmark_run_id",

120 "dataset_type",

121 ),

122 Index("idx_benchmark_results_query_hash", "query_hash"),

123 Index("idx_benchmark_results_completed", "completed_at"),

124 ],

125 "constraints": [

126 UniqueConstraint(

127 "benchmark_run_id", "query_hash", name="uix_run_query"

128 ),

129 ],

130}

131

132benchmark_configs_table = {

133 "table_name": "benchmark_configs",

134 "columns": [

135 Column("id", Integer, primary_key=True, index=True),

136 Column("name", String(255), nullable=False),

137 Column("description", Text, nullable=True),

138 Column("config_hash", String(16), nullable=False, index=True),

139 Column("search_config", JSON, nullable=False),

140 Column("evaluation_config", JSON, nullable=False),

141 Column("datasets_config", JSON, nullable=False),

142 Column(

143 "created_at", DateTime, server_default=func.now(), nullable=False

144 ),

145 Column(

146 "updated_at",

147 DateTime,

148 server_default=func.now(),

149 onupdate=func.now(),

150 nullable=False,

151 ),

152 Column("is_default", Boolean, default=False, nullable=False),

153 Column("is_public", Boolean, default=True, nullable=False),

154 Column("usage_count", Integer, default=0, nullable=False),

155 Column("last_used", DateTime, nullable=True),

156 Column("best_accuracy", Float, nullable=True),

157 Column("avg_processing_rate", Float, nullable=True),

158 ],

159 "indexes": [

160 Index("idx_benchmark_configs_name", "name"),

161 Index("idx_benchmark_configs_hash", "config_hash"),

162 Index("idx_benchmark_configs_default", "is_default"),

163 ],

164}

165

166benchmark_progress_table = {

167 "table_name": "benchmark_progress",

168 "columns": [

169 Column("id", Integer, primary_key=True, index=True),

170 Column(

171 "benchmark_run_id",

172 Integer,

173 ForeignKey("benchmark_runs.id", ondelete="CASCADE"),

174 nullable=False,

175 index=True,

176 ),

177 Column(

178 "timestamp", DateTime, server_default=func.now(), nullable=False

179 ),

180 Column("completed_examples", Integer, nullable=False),

181 Column("total_examples", Integer, nullable=False),

182 Column("overall_accuracy", Float, nullable=True),

183 Column("dataset_accuracies", JSON, nullable=True),

184 Column("processing_rate", Float, nullable=True),

185 Column("estimated_completion", DateTime, nullable=True),

186 Column("current_dataset", Enum(DatasetType), nullable=True),

187 Column("current_example_id", String(255), nullable=True),

188 Column("memory_usage", Float, nullable=True),

189 Column("cpu_usage", Float, nullable=True),

190 ],

191 "indexes": [

192 Index(

193 "idx_benchmark_progress_run_time", "benchmark_run_id", "timestamp"

194 ),

195 ],

196}

197

198

199def create_benchmark_tables_simple(engine):

200 """Create benchmark tables using simple table definitions."""

201 from typing import Any as _Any

202

203 from sqlalchemy import MetaData, Table

204

205 metadata = MetaData()

206

207 # Create tables

208 tables_to_create: list[dict[str, _Any]] = [

209 benchmark_runs_table, # type: ignore[list-item]

210 benchmark_results_table, # type: ignore[list-item]

211 benchmark_configs_table, # type: ignore[list-item]

212 benchmark_progress_table, # type: ignore[list-item]

213 ]

214

215 for table_def in tables_to_create:

216 table = Table(

217 table_def["table_name"],

218 metadata,

219 *table_def["columns"],

220 extend_existing=True,

221 )

222

223 # Add indexes

224 for index in table_def.get("indexes", []):

225 index.table = table # type: ignore[attr-defined]

226

227 # Add constraints

228 for constraint in table_def.get("constraints", []):

229 table.append_constraint(constraint)

230

231 # Create all tables

232 metadata.create_all(engine, checkfirst=True)

Coverage for src/local_deep_research/web/database/benchmark_schema.py: 100%

30 statements