diff options
Diffstat (limited to 'benchmark/agbenchmark/challenges/deprecated/content_gen/2_plan/data.json')
-rw-r--r-- | benchmark/agbenchmark/challenges/deprecated/content_gen/2_plan/data.json | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/benchmark/agbenchmark/challenges/deprecated/content_gen/2_plan/data.json b/benchmark/agbenchmark/challenges/deprecated/content_gen/2_plan/data.json new file mode 100644 index 000000000..94ecc1e6e --- /dev/null +++ b/benchmark/agbenchmark/challenges/deprecated/content_gen/2_plan/data.json @@ -0,0 +1,30 @@ +{ + "category": [ + "content_gen" + ], + "cutoff": 120, + "dependencies": [ + "TestWriteFile" + ], + "eval_id": "6ff65567-eb1e-4c7d-8b7f-dfc91dc95ed1", + "ground": { + "answer": "1. Search for flights to Toronto.\n2. Select the cheapest flight.\n3. Book the flight.\n4. Pay for the flight.\n5. Confirm the flight.", + "eval": { + "scoring": "scale", + "template": "reference", + "type": "llm" + }, + "files": [ + ".txt" + ], + "should_contain": [], + "should_not_contain": [] + }, + "info": { + "description": "ability to generate content based on the content of 2 files.", + "difficulty": "basic", + "side_effects": [] + }, + "name": "PlanCreation", + "task": "As an agent powered by a large language model, create a plan for yourself based on the task of booking a flight to Toronto. Output this plan into a .txt file." +} |