diff options
Diffstat (limited to 'benchmark/reports/mini-agi/6.3_TestRevenueRetrieval_1.1.json')
-rw-r--r-- | benchmark/reports/mini-agi/6.3_TestRevenueRetrieval_1.1.json | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/benchmark/reports/mini-agi/6.3_TestRevenueRetrieval_1.1.json b/benchmark/reports/mini-agi/6.3_TestRevenueRetrieval_1.1.json new file mode 100644 index 000000000..383774347 --- /dev/null +++ b/benchmark/reports/mini-agi/6.3_TestRevenueRetrieval_1.1.json @@ -0,0 +1,30 @@ +{ + "command": "agbenchmark start --test TestRetrieval2.1", + "completion_time": "2023-07-17-17:54", + "metrics": { + "run_time": "27.49 seconds", + "highest_difficulty": "No successful tests" + }, + "tests": { + "TestRetrieval2.1": { + "data_path": "agbenchmark/challenges/retrieval/r2.1_specific", + "is_regression": false, + "answer": "It was $81.462 billion in 2022.", + "description": "This one checks the accuracy of the information over r2", + "metrics": { + "difficulty": "novice", + "success": false, + "fail_reason": "assert 1 in [0.0]", + "success_%": 0.0, + "run_time": "27.266 seconds" + }, + "reached_cutoff": false + } + }, + "config": { + "workspace": "${os.path.join(Path.home(), 'miniagi')}" + }, + "additional": { + "model": "gpt-3.5-turbo" + } +} |