diff options
Diffstat (limited to 'benchmark/agbenchmark_config/reports/20230913T234542_full_run/report.json')
-rw-r--r-- | benchmark/agbenchmark_config/reports/20230913T234542_full_run/report.json | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/benchmark/agbenchmark_config/reports/20230913T234542_full_run/report.json b/benchmark/agbenchmark_config/reports/20230913T234542_full_run/report.json new file mode 100644 index 000000000..06a3cc963 --- /dev/null +++ b/benchmark/agbenchmark_config/reports/20230913T234542_full_run/report.json @@ -0,0 +1,38 @@ +{ + "command": "agbenchmark start --test=TestWriteFile --mock", + "benchmark_git_commit_sha": "---", + "agent_git_commit_sha": "---", + "completion_time": "2023-09-13T23:45:42+00:00", + "benchmark_start_time": "2023-09-13T23:45:42+00:00", + "metrics": { + "run_time": "0.67 seconds", + "highest_difficulty": "No successful tests", + "total_cost": null + }, + "tests": { + "TestWriteFile": { + "data_path": "/Users/merwanehamadi/code/Auto-GPT/benchmark/agbenchmark/challenges/abilities/write_file/data.json", + "is_regression": false, + "category": [ + "interface" + ], + "task": "Write the word 'Washington' to a .txt file", + "answer": "The word 'Washington', printed to a .txt file named anything", + "description": "Tests the agents ability to write to a file", + "metrics": { + "difficulty": "interface", + "success": false, + "attempted": true, + "fail_reason": "cannot import name 'copy_artifacts_into_workspace' from 'agbenchmark.agent_interface' (/Users/merwanehamadi/code/Auto-GPT/benchmark/agbenchmark/agent_interface.py)", + "success_%": 0.0, + "cost": null, + "run_time": "0.189 seconds" + }, + "reached_cutoff": false + } + }, + "config": { + "agent_benchmark_config_path": "/Users/merwanehamadi/code/Auto-GPT/benchmark/agbenchmark_config/config.json", + "host": "http://localhost:8000" + } +}
\ No newline at end of file |