Merge branch 'master' into security/analysis-workflows-sandboxsecurity/analysis-workflows-sandbox

author: Reinier van der Leer <pwuts@agpt.co> 2024-01-29 18:29:24 +0100
committer: GitHub <noreply@github.com> 2024-01-29 18:29:24 +0100
commit: 575be818ca1f7c644e2adf94c584772547141f55 (patch)
tree: abb052ae787b1107c9dff8fb4d86ee84842d515d /benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json
parent: Update ossf-scorecard.yml (diff)
parent: fix(agent/json_utils): Make `extract_dict_from_response` more robust (diff)
download: Auto-GPT-security/analysis-workflows-sandbox.tar.gz
Auto-GPT-security/analysis-workflows-sandbox.tar.bz2
Auto-GPT-security/analysis-workflows-sandbox.zip
1 files changed, 29 insertions, 0 deletions
diff --git a/benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json b/benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json
new file mode 100644
index 000000000..28d091d28
--- /dev/null
+++ b/benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json
@@ -0,0 +1,29 @@
+{
+  "command": "agbenchmark start --test TestRetrieval2",
+  "completion_time": "2023-07-17-13:54",
+  "metrics": {
+    "run_time": "36 seconds",
+    "highest_difficulty": "TestRetrieval2: 3"
+  },
+  "tests": {
+    "TestRetrieval2": {
+      "data_path": "agbenchmark/challenges/retrieval/r2_tesla_revenue",
+      "is_regression": false,
+      "reached_cutoff": false,
+      "answer": "It was $81.462 billion in 2022.",
+      "description": "A no guardrails search for info",
+      "metrics": {
+        "difficulty": "novice",
+        "success": true,
+        "success_%": 50.0,
+        "run_time": "35.59 seconds"
+      }
+    }
+  },
+  "config": {
+    "workspace": "${os.path.join(Path.home(), 'miniagi')}"
+  },
+  "additional": {
+    "model": "gpt-4"
+  }
+}
author	Reinier van der Leer <pwuts@agpt.co>	2024-01-29 18:29:24 +0100
committer	GitHub <noreply@github.com>	2024-01-29 18:29:24 +0100
commit	575be818ca1f7c644e2adf94c584772547141f55 (patch)
tree	abb052ae787b1107c9dff8fb4d86ee84842d515d /benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json
parent	Update ossf-scorecard.yml (diff)
parent	fix(agent/json_utils): Make `extract_dict_from_response` more robust (diff)
download	Auto-GPT-security/analysis-workflows-sandbox.tar.gz Auto-GPT-security/analysis-workflows-sandbox.tar.bz2 Auto-GPT-security/analysis-workflows-sandbox.zip