aboutsummaryrefslogtreecommitdiff
path: root/benchmark/agbenchmark/challenges/deprecated/d2.1_guided
diff options
context:
space:
mode:
Diffstat (limited to 'benchmark/agbenchmark/challenges/deprecated/d2.1_guided')
-rw-r--r--benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/__init__.py0
-rw-r--r--benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/sample_code.py13
-rw-r--r--benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/test.py32
-rw-r--r--benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/__init__.py0
-rw-r--r--benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/sample_code.py12
-rw-r--r--benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/test.py32
-rw-r--r--benchmark/agbenchmark/challenges/deprecated/d2.1_guided/data.json33
7 files changed, 122 insertions, 0 deletions
diff --git a/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/__init__.py b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/__init__.py
diff --git a/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/sample_code.py b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/sample_code.py
new file mode 100644
index 000000000..df8120bfa
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/sample_code.py
@@ -0,0 +1,13 @@
+# mypy: ignore-errors
+from typing import List, Optional
+
+
+def two_sum(nums: List, target: int) -> Optional[List[int]]:
+ seen = {}
+ for i, num in enumerate(nums):
+ typo
+ complement = target - num
+ if complement in seen:
+ return [seen[complement], i]
+ seen[num] = i
+ return None
diff --git a/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/test.py b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/test.py
new file mode 100644
index 000000000..c273ee793
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/test.py
@@ -0,0 +1,32 @@
+# mypy: ignore-errors
+from typing import List
+
+from sample_code import two_sum
+
+
+def test_two_sum(nums: List, target: int, expected_result: List[int]) -> None:
+ result = two_sum(nums, target)
+ print(result)
+ assert (
+ result == expected_result
+ ), f"AssertionError: Expected the output to be {expected_result}"
+
+
+if __name__ == "__main__":
+ # test the trivial case with the first two numbers
+ nums = [2, 7, 11, 15]
+ target = 9
+ expected_result = [0, 1]
+ test_two_sum(nums, target, expected_result)
+
+ # test for ability to use zero and the same number twice
+ nums = [2, 7, 0, 15, 12, 0]
+ target = 0
+ expected_result = [2, 5]
+ test_two_sum(nums, target, expected_result)
+
+ # test for first and last index usage and negative numbers
+ nums = [-6, 7, 11, 4]
+ target = -2
+ expected_result = [0, 3]
+ test_two_sum(nums, target, expected_result)
diff --git a/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/__init__.py b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/__init__.py
diff --git a/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/sample_code.py b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/sample_code.py
new file mode 100644
index 000000000..de3d8c62c
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/sample_code.py
@@ -0,0 +1,12 @@
+# mypy: ignore-errors
+from typing import List, Optional
+
+
+def two_sum(nums: List, target: int) -> Optional[List[int]]:
+ seen = {}
+ for i, num in enumerate(nums):
+ complement = target - num
+ if complement in seen:
+ return [seen[complement], i]
+ seen[num] = i
+ return None
diff --git a/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/test.py b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/test.py
new file mode 100644
index 000000000..c273ee793
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/test.py
@@ -0,0 +1,32 @@
+# mypy: ignore-errors
+from typing import List
+
+from sample_code import two_sum
+
+
+def test_two_sum(nums: List, target: int, expected_result: List[int]) -> None:
+ result = two_sum(nums, target)
+ print(result)
+ assert (
+ result == expected_result
+ ), f"AssertionError: Expected the output to be {expected_result}"
+
+
+if __name__ == "__main__":
+ # test the trivial case with the first two numbers
+ nums = [2, 7, 11, 15]
+ target = 9
+ expected_result = [0, 1]
+ test_two_sum(nums, target, expected_result)
+
+ # test for ability to use zero and the same number twice
+ nums = [2, 7, 0, 15, 12, 0]
+ target = 0
+ expected_result = [2, 5]
+ test_two_sum(nums, target, expected_result)
+
+ # test for first and last index usage and negative numbers
+ nums = [-6, 7, 11, 4]
+ target = -2
+ expected_result = [0, 3]
+ test_two_sum(nums, target, expected_result)
diff --git a/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/data.json b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/data.json
new file mode 100644
index 000000000..0ef6f3dd3
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/deprecated/d2.1_guided/data.json
@@ -0,0 +1,33 @@
+{
+ "category": [
+ "code",
+ "iterate"
+ ],
+ "cutoff": 75,
+ "dependencies": [
+ "TestReadFile"
+ ],
+ "eval_id": "e3794d05-7617-4b95-9e96-d5f84309f66f",
+ "ground": {
+ "answer": "[0, 1] [2, 5] [0, 3]",
+ "eval": {
+ "type": "python"
+ },
+ "files": [
+ "test.py"
+ ],
+ "should_contain": [
+ "[0, 1]",
+ "[2, 5]",
+ "[0, 3]"
+ ],
+ "should_not_contain": []
+ },
+ "info": {
+ "description": "Tests ability for the agent to debug python code with a simple typo in it.",
+ "difficulty": "novice",
+ "side_effects": []
+ },
+ "name": "DebugSimpleTypoWithGuidance",
+ "task": "1- Run test.py.\n2- Read sample_code.py.\n3- Modify sample_code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n"
+}