diff options
Diffstat (limited to 'benchmark/agbenchmark/challenges/deprecated/memory/m3_noise/data.json')
-rw-r--r-- | benchmark/agbenchmark/challenges/deprecated/memory/m3_noise/data.json | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/benchmark/agbenchmark/challenges/deprecated/memory/m3_noise/data.json b/benchmark/agbenchmark/challenges/deprecated/memory/m3_noise/data.json new file mode 100644 index 000000000..850a9c5c5 --- /dev/null +++ b/benchmark/agbenchmark/challenges/deprecated/memory/m3_noise/data.json @@ -0,0 +1,33 @@ +{ + "category": [ + "memory" + ], + "cutoff": 120, + "dependencies": [ + "TestRememberMultipleIds" + ], + "eval_id": "480eaff6-e287-4741-9fe0-a4634e0ad491", + "ground": { + "answer": "3145\n3791\n9317\n9471", + "eval": { + "type": "file" + }, + "files": [ + "result.txt" + ], + "should_contain": [ + "3145", + "3791", + "9317", + "9471" + ], + "should_not_contain": [] + }, + "info": { + "description": "Tests ability for the agent to remember information between each action. Multiple ids are presented in different files and the last file instructs the agent to write these ids into another file. Some noise is also added to the files to test the agent's ability to filter out noise.", + "difficulty": "intermediate", + "side_effects": [] + }, + "name": "RememberMultipleIdsWithNoise", + "task": "Follow the instructions in the instructions_1.txt file" +} |