aboutsummaryrefslogtreecommitdiff
path: root/benchmark/agbenchmark/challenges/deprecated/memory/m4_phrases/data.json
diff options
context:
space:
mode:
Diffstat (limited to 'benchmark/agbenchmark/challenges/deprecated/memory/m4_phrases/data.json')
-rw-r--r--benchmark/agbenchmark/challenges/deprecated/memory/m4_phrases/data.json33
1 files changed, 33 insertions, 0 deletions
diff --git a/benchmark/agbenchmark/challenges/deprecated/memory/m4_phrases/data.json b/benchmark/agbenchmark/challenges/deprecated/memory/m4_phrases/data.json
new file mode 100644
index 000000000..0f62d5c02
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/deprecated/memory/m4_phrases/data.json
@@ -0,0 +1,33 @@
+{
+ "category": [
+ "memory"
+ ],
+ "cutoff": 120,
+ "dependencies": [
+ "TestRememberMultipleIdsWithNoise"
+ ],
+ "eval_id": "0c8cb5e5-7f7a-4475-977b-68ac2673d77a",
+ "ground": {
+ "answer": "The purple elephant danced on a rainbow while eating a taco\nThe sneaky toaster stole my socks and ran away to Hawaii\nMy pet rock sings better than Beyonc\u00e9 on Tuesdays\nThe giant hamster rode a unicycle through the crowded mall",
+ "eval": {
+ "type": "file"
+ },
+ "files": [
+ "result.txt"
+ ],
+ "should_contain": [
+ "The purple elephant danced on a rainbow while eating a taco",
+ "The sneaky toaster stole my socks and ran away to Hawaii",
+ "My pet rock sings better than Beyonce on Tuesdays",
+ "The giant hamster rode a unicycle through the crowded mall"
+ ],
+ "should_not_contain": []
+ },
+ "info": {
+ "description": "Tests ability for the agent to remember information between each action. Multiple phrases are presented in different files and the last file instructs the agent to write these phrases into another file. Some noise is also added to the files to test the agent's ability to filter out noise.",
+ "difficulty": "advanced",
+ "side_effects": []
+ },
+ "name": "RememberMultiplePhrasesWithNoise",
+ "task": "Follow the instructions in the instructions_1.txt file"
+}