diff options
Diffstat (limited to 'benchmark/agbenchmark/challenges/deprecated/2_specific')
-rw-r--r-- | benchmark/agbenchmark/challenges/deprecated/2_specific/artifacts_out/random_file.txt | 1 | ||||
-rw-r--r-- | benchmark/agbenchmark/challenges/deprecated/2_specific/data.json | 31 |
2 files changed, 32 insertions, 0 deletions
diff --git a/benchmark/agbenchmark/challenges/deprecated/2_specific/artifacts_out/random_file.txt b/benchmark/agbenchmark/challenges/deprecated/2_specific/artifacts_out/random_file.txt new file mode 100644 index 000000000..8a0eae046 --- /dev/null +++ b/benchmark/agbenchmark/challenges/deprecated/2_specific/artifacts_out/random_file.txt @@ -0,0 +1 @@ +81,462 Millions diff --git a/benchmark/agbenchmark/challenges/deprecated/2_specific/data.json b/benchmark/agbenchmark/challenges/deprecated/2_specific/data.json new file mode 100644 index 000000000..b650d458d --- /dev/null +++ b/benchmark/agbenchmark/challenges/deprecated/2_specific/data.json @@ -0,0 +1,31 @@ +{ + "category": [ + "scrape_synthesize" + ], + "cutoff": 60, + "dependencies": [ + "TestRevenueRetrieval_1.0" + ], + "eval_id": "b79898bb-263a-4184-8e4d-0aa52838bfdb", + "ground": { + "answer": "It was $81.462 billion in 2022.", + "eval": { + "type": "file" + }, + "files": [ + ".txt" + ], + "should_contain": [ + "81", + "462" + ], + "should_not_contain": [] + }, + "info": { + "description": "This one checks the accuracy of the information over r2", + "difficulty": "novice", + "side_effects": [] + }, + "name": "RevenueRetrieval1.1", + "task": "Write Tesla's revenue in 2022, rounded to the nearest million dollars, into a .txt file." +} |