aboutsummaryrefslogtreecommitdiff
path: root/benchmark
diff options
context:
space:
mode:
authorGravatar SwiftyOS <craigswift13@gmail.com> 2023-11-09 10:00:50 +0100
committerGravatar SwiftyOS <craigswift13@gmail.com> 2023-11-09 10:00:50 +0100
commitfa357dd13928baa4d1e30054bc75edc5d68b08f1 (patch)
tree7ca3f1d8e4871648d7b52fdac75c36de082ee9bc /benchmark
parentDisable hybrid mode in AutoGPT by default (diff)
downloadAuto-GPT-fa357dd13928baa4d1e30054bc75edc5d68b08f1.tar.gz
Auto-GPT-fa357dd13928baa4d1e30054bc75edc5d68b08f1.tar.bz2
Auto-GPT-fa357dd13928baa4d1e30054bc75edc5d68b08f1.zip
fix: Fixing Benchmarking
- Importing missing metadata field in Test class in report_types.py - Adding GAIA categories 1, 2, and 3 in data_types.py
Diffstat (limited to 'benchmark')
-rw-r--r--benchmark/agbenchmark/reports/processing/report_types.py3
-rw-r--r--benchmark/agbenchmark/utils/data_types.py3
2 files changed, 6 insertions, 0 deletions
diff --git a/benchmark/agbenchmark/reports/processing/report_types.py b/benchmark/agbenchmark/reports/processing/report_types.py
index e2fb1bc62..d2fc8dea3 100644
--- a/benchmark/agbenchmark/reports/processing/report_types.py
+++ b/benchmark/agbenchmark/reports/processing/report_types.py
@@ -51,6 +51,8 @@ class Test(BaseModelBenchmark):
category: List[str]
task: str
reached_cutoff: bool
+ metadata: Any
+
class ReportBase(BaseModelBenchmark):
@@ -68,6 +70,7 @@ class Report(ReportBase):
tests: Dict[str, Test]
+
class ReportV2(Test, ReportBase):
test_name: str
run_id: str | None
diff --git a/benchmark/agbenchmark/utils/data_types.py b/benchmark/agbenchmark/utils/data_types.py
index 74b509329..955b1d6a8 100644
--- a/benchmark/agbenchmark/utils/data_types.py
+++ b/benchmark/agbenchmark/utils/data_types.py
@@ -174,6 +174,9 @@ class Category(str, Enum):
GENERALIST = "general"
CODING = "coding"
SCRAPE_SYNTHESIZE = "scrape_synthesize"
+ GAIA_1 = "GAIA_1"
+ GAIA_2 = "GAIA_2"
+ GAIA_3 = "GAIA_3"
class ChallengeData(BaseModel):