diff options
Diffstat (limited to 'benchmark/reports/Auto-GPT-Turbo/success_rate.json')
-rw-r--r-- | benchmark/reports/Auto-GPT-Turbo/success_rate.json | 721 |
1 files changed, 721 insertions, 0 deletions
diff --git a/benchmark/reports/Auto-GPT-Turbo/success_rate.json b/benchmark/reports/Auto-GPT-Turbo/success_rate.json new file mode 100644 index 000000000..82e9425cc --- /dev/null +++ b/benchmark/reports/Auto-GPT-Turbo/success_rate.json @@ -0,0 +1,721 @@ +{ + "TestAdaptLink": [ + false, + false, + false, + false, + true, + false, + false, + false, + true, + false, + false, + false, + false + ], + "TestAdaptSimpleTypoWithGuidance": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestAdaptTeslaRevenue": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestAgentProtocol_CreateAgentTask": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestAgentProtocol_ExecuteAgentTaskStep": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestAgentProtocol_GetAgentTask": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestAgentProtocol_ListAgentTaskSteps": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestAgentProtocol_ListAgentTasksIds": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestBasicMemory": [ + true, + true, + false, + true, + true, + false, + false, + false, + true, + true, + true, + false, + true + ], + "TestBasicRetrieval": [ + false, + false, + false, + false, + true, + false, + false, + false, + true, + true, + true, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestDebugMultipleTypo": [ + false, + false, + false, + false, + false, + false, + false, + false, + true, + false, + true, + false, + false + ], + "TestDebugSimpleTypoWithGuidance": [ + true, + true, + true, + true, + true, + true, + false, + false, + true, + true, + true, + true, + true, + false, + true, + true, + true, + true, + true + ], + "TestDebugSimpleTypoWithoutGuidance": [ + false, + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + false, + false + ], + "TestFunctionCodeGeneration": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + true, + false, + false + ], + "TestGetEthereumGasPrice": [ + false, + false, + false + ], + "TestPasswordGenerator_Easy": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestPlanCreation": [ + true, + true, + true, + true, + true, + true, + false, + false, + true, + true, + true, + true, + true + ], + "TestProductAdvisor_GamingMonitor": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestReadFile": [ + true, + true, + true, + true, + true, + true, + false, + false, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true + ], + "TestRememberGoal_Advanced": [ + false, + false, + false, + false, + false, + false, + false, + false, + true, + true, + false, + false, + true + ], + "TestRememberGoal_Hard": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + false, + true, + true, + false, + true, + true, + false, + false, + true + ], + "TestRememberGoal_Medium": [ + false, + false, + false, + false, + false, + false, + false, + false, + true, + true, + false, + false, + true + ], + "TestRememberGoal_Simple": [ + false, + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + true, + true, + true, + true, + true, + false, + false, + true + ], + "TestRememberMultipleIds": [ + false, + true, + false, + true, + false, + false, + false, + false, + true, + true, + false, + false, + true + ], + "TestRememberMultipleIdsWithNoise": [ + false, + true, + false, + true, + false, + false, + false, + false, + true, + false, + false, + false, + true + ], + "TestRememberMultiplePhrasesWithNoise": [ + false, + true, + false, + false, + false, + false, + false, + false, + true, + false, + false, + false, + false + ], + "TestRetrieval3": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestReturnCode_Modify": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + true, + false, + false + ], + "TestReturnCode_Simple": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + false + ], + "TestReturnCode_Tests": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestReturnCode_Write": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + true, + false, + false + ], + "TestRevenueRetrieval_1.0": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestRevenueRetrieval_1.1": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestRevenueRetrieval_1.2": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestSearch": [ + true, + true, + true, + true, + true, + true, + false, + false, + true, + true, + true, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestThreeSum": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + true, + true, + true, + true, + true, + false, + false + ], + "TestWebApp_ListAnimals": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestWrite5FilesWithArray": [ + false, + false, + true, + true, + true, + true, + false, + false, + true, + true, + true, + true, + true + ], + "TestWrite6Files": [ + false, + false, + true, + true, + true, + true, + false, + false, + true, + true, + true, + true, + true + ], + "TestWriteFile": [ + true, + true, + true, + true, + true, + true, + false, + false, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true + ], + "TestWritingCLI_FileOrganizer": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "TestUrlShortener": [ + false + ] +}
\ No newline at end of file |