aboutsummaryrefslogtreecommitdiff
path: root/benchmark/reports/mini-agi/15_TestRevenueRetrieval.json
diff options
context:
space:
mode:
Diffstat (limited to 'benchmark/reports/mini-agi/15_TestRevenueRetrieval.json')
-rw-r--r--benchmark/reports/mini-agi/15_TestRevenueRetrieval.json61
1 files changed, 61 insertions, 0 deletions
diff --git a/benchmark/reports/mini-agi/15_TestRevenueRetrieval.json b/benchmark/reports/mini-agi/15_TestRevenueRetrieval.json
new file mode 100644
index 000000000..d0895f331
--- /dev/null
+++ b/benchmark/reports/mini-agi/15_TestRevenueRetrieval.json
@@ -0,0 +1,61 @@
+{
+ "command": "agbenchmark start --suite TestRevenueRetrieval",
+ "completion_time": "2023-07-24-13:34",
+ "metrics": {
+ "run_time": "62.03 seconds",
+ "highest_difficulty": "novice: 3"
+ },
+ "tests": {
+ "TestRevenueRetrieval": {
+ "data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1",
+ "task": "Write tesla's exact revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
+ "category": [
+ "retrieval"
+ ],
+ "metrics": {
+ "percentage": 33.33,
+ "highest_difficulty": "novice",
+ "run_time": "61.579 seconds"
+ },
+ "tests": {
+ "TestRevenueRetrieval_1.0": {
+ "data_path": "C:/Users/silen/Desktop/Untitled Folder/Projects/Auto-GPT/Auto-GPT-Benchmarks/agbenchmark/challenges/retrieval/r2_search_suite_1/1_tesla_revenue/data.json",
+ "is_regression": true,
+ "answer": "It was $81.462 billion in 2022.",
+ "description": "A no guardrails search for info",
+ "metrics": {
+ "difficulty": "novice",
+ "success": true,
+ "success_%": 100.0
+ }
+ },
+ "TestRevenueRetrieval_1.1": {
+ "data_path": "C:/Users/silen/Desktop/Untitled Folder/Projects/Auto-GPT/Auto-GPT-Benchmarks/agbenchmark/challenges/retrieval/r2_search_suite_1/2_specific/data.json",
+ "is_regression": false,
+ "answer": "It was $81.462 billion in 2022.",
+ "description": "This one checks the accuracy of the information over r2",
+ "metrics": {
+ "difficulty": "novice",
+ "success": false,
+ "success_%": 0.0
+ }
+ },
+ "TestRevenueRetrieval_1.2": {
+ "data_path": "C:/Users/silen/Desktop/Untitled Folder/Projects/Auto-GPT/Auto-GPT-Benchmarks/agbenchmark/challenges/retrieval/r2_search_suite_1/3_formatting/data.json",
+ "is_regression": false,
+ "answer": "It was $81.462 billion in 2022. In millions the answer is 81,462.",
+ "description": "Advanced version of the r2.1 challenge that also asks for specific formatting.",
+ "metrics": {
+ "difficulty": "intermediate",
+ "success": false,
+ "success_%": 0.0
+ }
+ }
+ },
+ "reached_cutoff": true
+ }
+ },
+ "config": {
+ "workspace": "${os.path.join(Path.home(), 'miniagi')}"
+ }
+} \ No newline at end of file