diff options
author | Reinier van der Leer <pwuts@agpt.co> | 2024-01-29 18:29:24 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-29 18:29:24 +0100 |
commit | 575be818ca1f7c644e2adf94c584772547141f55 (patch) | |
tree | abb052ae787b1107c9dff8fb4d86ee84842d515d /benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json | |
parent | Update ossf-scorecard.yml (diff) | |
parent | fix(agent/json_utils): Make `extract_dict_from_response` more robust (diff) | |
download | Auto-GPT-security/analysis-workflows-sandbox.tar.gz Auto-GPT-security/analysis-workflows-sandbox.tar.bz2 Auto-GPT-security/analysis-workflows-sandbox.zip |
Merge branch 'master' into security/analysis-workflows-sandboxsecurity/analysis-workflows-sandbox
Diffstat (limited to 'benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json')
-rw-r--r-- | benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json b/benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json new file mode 100644 index 000000000..28d091d28 --- /dev/null +++ b/benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json @@ -0,0 +1,29 @@ +{ + "command": "agbenchmark start --test TestRetrieval2", + "completion_time": "2023-07-17-13:54", + "metrics": { + "run_time": "36 seconds", + "highest_difficulty": "TestRetrieval2: 3" + }, + "tests": { + "TestRetrieval2": { + "data_path": "agbenchmark/challenges/retrieval/r2_tesla_revenue", + "is_regression": false, + "reached_cutoff": false, + "answer": "It was $81.462 billion in 2022.", + "description": "A no guardrails search for info", + "metrics": { + "difficulty": "novice", + "success": true, + "success_%": 50.0, + "run_time": "35.59 seconds" + } + } + }, + "config": { + "workspace": "${os.path.join(Path.home(), 'miniagi')}" + }, + "additional": { + "model": "gpt-4" + } +} |