diff options
Diffstat (limited to 'benchmark/reports/mini-agi/15_TestRevenueRetrieval.json')
-rw-r--r-- | benchmark/reports/mini-agi/15_TestRevenueRetrieval.json | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/benchmark/reports/mini-agi/15_TestRevenueRetrieval.json b/benchmark/reports/mini-agi/15_TestRevenueRetrieval.json new file mode 100644 index 000000000..d0895f331 --- /dev/null +++ b/benchmark/reports/mini-agi/15_TestRevenueRetrieval.json @@ -0,0 +1,61 @@ +{ + "command": "agbenchmark start --suite TestRevenueRetrieval", + "completion_time": "2023-07-24-13:34", + "metrics": { + "run_time": "62.03 seconds", + "highest_difficulty": "novice: 3" + }, + "tests": { + "TestRevenueRetrieval": { + "data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1", + "task": "Write tesla's exact revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).", + "category": [ + "retrieval" + ], + "metrics": { + "percentage": 33.33, + "highest_difficulty": "novice", + "run_time": "61.579 seconds" + }, + "tests": { + "TestRevenueRetrieval_1.0": { + "data_path": "C:/Users/silen/Desktop/Untitled Folder/Projects/Auto-GPT/Auto-GPT-Benchmarks/agbenchmark/challenges/retrieval/r2_search_suite_1/1_tesla_revenue/data.json", + "is_regression": true, + "answer": "It was $81.462 billion in 2022.", + "description": "A no guardrails search for info", + "metrics": { + "difficulty": "novice", + "success": true, + "success_%": 100.0 + } + }, + "TestRevenueRetrieval_1.1": { + "data_path": "C:/Users/silen/Desktop/Untitled Folder/Projects/Auto-GPT/Auto-GPT-Benchmarks/agbenchmark/challenges/retrieval/r2_search_suite_1/2_specific/data.json", + "is_regression": false, + "answer": "It was $81.462 billion in 2022.", + "description": "This one checks the accuracy of the information over r2", + "metrics": { + "difficulty": "novice", + "success": false, + "success_%": 0.0 + } + }, + "TestRevenueRetrieval_1.2": { + "data_path": "C:/Users/silen/Desktop/Untitled Folder/Projects/Auto-GPT/Auto-GPT-Benchmarks/agbenchmark/challenges/retrieval/r2_search_suite_1/3_formatting/data.json", + "is_regression": false, + "answer": "It was $81.462 billion in 2022. In millions the answer is 81,462.", + "description": "Advanced version of the r2.1 challenge that also asks for specific formatting.", + "metrics": { + "difficulty": "intermediate", + "success": false, + "success_%": 0.0 + } + } + }, + "reached_cutoff": true + } + }, + "config": { + "workspace": "${os.path.join(Path.home(), 'miniagi')}" + } +}
\ No newline at end of file |