diff options
Diffstat (limited to 'benchmark/agbenchmark/challenges/deprecated/adapatability/a1_debug/data.json')
-rw-r--r-- | benchmark/agbenchmark/challenges/deprecated/adapatability/a1_debug/data.json | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/benchmark/agbenchmark/challenges/deprecated/adapatability/a1_debug/data.json b/benchmark/agbenchmark/challenges/deprecated/adapatability/a1_debug/data.json new file mode 100644 index 000000000..8328ca926 --- /dev/null +++ b/benchmark/agbenchmark/challenges/deprecated/adapatability/a1_debug/data.json @@ -0,0 +1,32 @@ +{ + "category": [ + "adaptability" + ], + "cutoff": 75, + "dependencies": [ + "TestDebugSimpleTypoWithGuidance" + ], + "eval_id": "38671c68-89ea-4c51-92a5-1bc35a033c49", + "ground": { + "answer": "Same as the original DebugWithGuidance test - [0, 1] [2, 5] [0, 3]", + "eval": { + "type": "python" + }, + "files": [ + "test.py" + ], + "should_contain": [ + "[0, 1]", + "[2, 5]", + "[0, 3]" + ], + "should_not_contain": [] + }, + "info": { + "description": "A version of the debug challenge where the wrong files to test and change are given. The llm must figure the challenges out", + "difficulty": "intermediate", + "side_effects": [] + }, + "name": "AdaptSimpleTypoWithGuidance", + "task": "1- Run sample_code.py.\n2- Read test.py.\n3- Modify test.py.\nRepeat step 1, 2 and 3 until sample_code.py runs without errors.\n" +} |