Coverage for src / local_deep_research / web / database / benchmark_schema.py: 100%
30 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""Simple benchmark table definitions for schema creation."""
3import enum
5from sqlalchemy import (
6 JSON,
7 Boolean,
8 Column,
9 DateTime,
10 Enum,
11 Float,
12 ForeignKey,
13 Index,
14 Integer,
15 String,
16 Text,
17 UniqueConstraint,
18)
19from sqlalchemy.sql import func
22class BenchmarkStatus(enum.Enum):
23 """Status of a benchmark run."""
25 PENDING = "pending"
26 IN_PROGRESS = "in_progress"
27 COMPLETED = "completed"
28 FAILED = "failed"
29 CANCELLED = "cancelled"
30 PAUSED = "paused"
33class DatasetType(enum.Enum):
34 """Supported dataset types."""
36 SIMPLEQA = "simpleqa"
37 BROWSECOMP = "browsecomp"
38 CUSTOM = "custom"
41# Simple table definitions for creation
42benchmark_runs_table = {
43 "table_name": "benchmark_runs",
44 "columns": [
45 Column("id", Integer, primary_key=True, index=True),
46 Column("run_name", String(255), nullable=True),
47 Column("config_hash", String(16), nullable=False, index=True),
48 Column("query_hash_list", JSON, nullable=False),
49 Column("search_config", JSON, nullable=False),
50 Column("evaluation_config", JSON, nullable=False),
51 Column("datasets_config", JSON, nullable=False),
52 Column(
53 "status",
54 Enum(BenchmarkStatus),
55 default=BenchmarkStatus.PENDING,
56 nullable=False,
57 ),
58 Column(
59 "created_at", DateTime, server_default=func.now(), nullable=False
60 ),
61 Column(
62 "updated_at",
63 DateTime,
64 server_default=func.now(),
65 onupdate=func.now(),
66 nullable=False,
67 ),
68 Column("start_time", DateTime, nullable=True),
69 Column("end_time", DateTime, nullable=True),
70 Column("total_examples", Integer, default=0, nullable=False),
71 Column("completed_examples", Integer, default=0, nullable=False),
72 Column("failed_examples", Integer, default=0, nullable=False),
73 Column("overall_accuracy", Float, nullable=True),
74 Column("processing_rate", Float, nullable=True),
75 Column("error_message", Text, nullable=True),
76 ],
77 "indexes": [
78 Index("idx_benchmark_runs_config_hash", "config_hash"),
79 Index("idx_benchmark_runs_status_created", "status", "created_at"),
80 ],
81}
83benchmark_results_table = {
84 "table_name": "benchmark_results",
85 "columns": [
86 Column("id", Integer, primary_key=True, index=True),
87 Column(
88 "benchmark_run_id",
89 Integer,
90 ForeignKey("benchmark_runs.id", ondelete="CASCADE"),
91 nullable=False,
92 index=True,
93 ),
94 Column("example_id", String(255), nullable=False),
95 Column("query_hash", String(32), nullable=False, index=True),
96 Column("dataset_type", Enum(DatasetType), nullable=False),
97 Column("question", Text, nullable=False),
98 Column("correct_answer", Text, nullable=False),
99 Column("response", Text, nullable=True),
100 Column("extracted_answer", Text, nullable=True),
101 Column("confidence", String(10), nullable=True),
102 Column("processing_time", Float, nullable=True),
103 Column("sources", JSON, nullable=True),
104 Column("is_correct", Boolean, nullable=True),
105 Column("graded_confidence", String(10), nullable=True),
106 Column("grader_response", Text, nullable=True),
107 Column(
108 "created_at", DateTime, server_default=func.now(), nullable=False
109 ),
110 Column("completed_at", DateTime, nullable=True),
111 Column("research_error", Text, nullable=True),
112 Column("evaluation_error", Text, nullable=True),
113 Column("task_index", Integer, nullable=True),
114 Column("result_metadata", JSON, nullable=True),
115 ],
116 "indexes": [
117 Index(
118 "idx_benchmark_results_run_dataset",
119 "benchmark_run_id",
120 "dataset_type",
121 ),
122 Index("idx_benchmark_results_query_hash", "query_hash"),
123 Index("idx_benchmark_results_completed", "completed_at"),
124 ],
125 "constraints": [
126 UniqueConstraint(
127 "benchmark_run_id", "query_hash", name="uix_run_query"
128 ),
129 ],
130}
132benchmark_configs_table = {
133 "table_name": "benchmark_configs",
134 "columns": [
135 Column("id", Integer, primary_key=True, index=True),
136 Column("name", String(255), nullable=False),
137 Column("description", Text, nullable=True),
138 Column("config_hash", String(16), nullable=False, index=True),
139 Column("search_config", JSON, nullable=False),
140 Column("evaluation_config", JSON, nullable=False),
141 Column("datasets_config", JSON, nullable=False),
142 Column(
143 "created_at", DateTime, server_default=func.now(), nullable=False
144 ),
145 Column(
146 "updated_at",
147 DateTime,
148 server_default=func.now(),
149 onupdate=func.now(),
150 nullable=False,
151 ),
152 Column("is_default", Boolean, default=False, nullable=False),
153 Column("is_public", Boolean, default=True, nullable=False),
154 Column("usage_count", Integer, default=0, nullable=False),
155 Column("last_used", DateTime, nullable=True),
156 Column("best_accuracy", Float, nullable=True),
157 Column("avg_processing_rate", Float, nullable=True),
158 ],
159 "indexes": [
160 Index("idx_benchmark_configs_name", "name"),
161 Index("idx_benchmark_configs_hash", "config_hash"),
162 Index("idx_benchmark_configs_default", "is_default"),
163 ],
164}
166benchmark_progress_table = {
167 "table_name": "benchmark_progress",
168 "columns": [
169 Column("id", Integer, primary_key=True, index=True),
170 Column(
171 "benchmark_run_id",
172 Integer,
173 ForeignKey("benchmark_runs.id", ondelete="CASCADE"),
174 nullable=False,
175 index=True,
176 ),
177 Column(
178 "timestamp", DateTime, server_default=func.now(), nullable=False
179 ),
180 Column("completed_examples", Integer, nullable=False),
181 Column("total_examples", Integer, nullable=False),
182 Column("overall_accuracy", Float, nullable=True),
183 Column("dataset_accuracies", JSON, nullable=True),
184 Column("processing_rate", Float, nullable=True),
185 Column("estimated_completion", DateTime, nullable=True),
186 Column("current_dataset", Enum(DatasetType), nullable=True),
187 Column("current_example_id", String(255), nullable=True),
188 Column("memory_usage", Float, nullable=True),
189 Column("cpu_usage", Float, nullable=True),
190 ],
191 "indexes": [
192 Index(
193 "idx_benchmark_progress_run_time", "benchmark_run_id", "timestamp"
194 ),
195 ],
196}
199def create_benchmark_tables_simple(engine):
200 """Create benchmark tables using simple table definitions."""
201 from typing import Any as _Any
203 from sqlalchemy import MetaData, Table
205 metadata = MetaData()
207 # Create tables
208 tables_to_create: list[dict[str, _Any]] = [
209 benchmark_runs_table, # type: ignore[list-item]
210 benchmark_results_table, # type: ignore[list-item]
211 benchmark_configs_table, # type: ignore[list-item]
212 benchmark_progress_table, # type: ignore[list-item]
213 ]
215 for table_def in tables_to_create:
216 table = Table(
217 table_def["table_name"],
218 metadata,
219 *table_def["columns"],
220 extend_existing=True,
221 )
223 # Add indexes
224 for index in table_def.get("indexes", []):
225 index.table = table # type: ignore[attr-defined]
227 # Add constraints
228 for constraint in table_def.get("constraints", []):
229 table.append_constraint(constraint)
231 # Create all tables
232 metadata.create_all(engine, checkfirst=True)