aboutsummaryrefslogtreecommitdiff
path: root/benchmark
diff options
context:
space:
mode:
authorGravatar Reinier van der Leer <pwuts@agpt.co> 2023-11-21 10:58:54 +0100
committerGravatar GitHub <noreply@github.com> 2023-11-21 10:58:54 +0100
commitb106a61352d7a9f85bbaa693fed7fc114c5277f7 (patch)
treed6f0b0858aa9573077aa61230931af992847e459 /benchmark
parentAllow AutoGPT to access local web hosts (#5318) (diff)
downloadAuto-GPT-b106a61352d7a9f85bbaa693fed7fc114c5277f7.tar.gz
Auto-GPT-b106a61352d7a9f85bbaa693fed7fc114c5277f7.tar.bz2
Auto-GPT-b106a61352d7a9f85bbaa693fed7fc114c5277f7.zip
Clean up & fix GitHub workflows (#6313)
* ci: Mitigate security issues in autogpt-ci.yml - Remove unnecessary pull_request_target paths and related variables and config - Set permissions for contents to read only * ci: Simplify steps in autogpt-ci.yml workflow using GitHub CLI - Simplify step in 'autogpt-ci.yml' by using GitHub CLI instead of API for adding label and comment functionality - Replace curl command with 'gh issue edit' to add "behaviour change" label to the pull request - Replace gh api command with 'gh issue comment' to leave a comment about the changed behavior of AutoGPT in the pull request * ci: Fix issues in workflows - Move environment variable definition to top level in benchmark-ci.yml (because the other job also needs it) - Removed invalid 'branches: [hackathon]' restriction in hackathon.yml workflow - Removed redundant 'ref' and 'repository' fields in the 'checkout' step of both workflows. * ci: Delete legacy benchmarks.yml workflow * ci: Add triggers for CI workflows - Add triggers to run CI workflows when they are edited. - Update the paths for the CI workflows in the trigger configuration. * fix: Fix benchmark lint error - Removed unnecessary blank lines in report_types.py - Fixed string quotes in challenge.py to maintain consistency * fix: Update task description in password generator data.json - Update task description in `data.json` file for the password generator challenge to clarify the input requirements and error handling. - This change is made in an attempt to make the Benchmark CI pass. * fix: Fix PasswordGenerator challenge in CI - Fix the behavior of the reference password_generator.py to align with the task description - Use default password length 8 instead of a random length in the generate_password function - Retrieve the password length from the command line arguments if "--length" is provided, else set it to 8
Diffstat (limited to 'benchmark')
-rw-r--r--benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json2
-rw-r--r--benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py8
-rw-r--r--benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json2
-rw-r--r--benchmark/agbenchmark/reports/processing/report_types.py2
-rw-r--r--benchmark/agbenchmark/utils/challenge.py4
5 files changed, 10 insertions, 8 deletions
diff --git a/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json b/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json
index 01dd0afcb..0f30ea872 100644
--- a/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json
+++ b/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json
@@ -24,5 +24,5 @@
"side_effects": []
},
"name": "PasswordGenerator",
- "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain letters, numbers and symbols. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). Any invalid input should raise a ValueError."
+ "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain letters, numbers and symbols. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). If no length is specified, the password should be 8 characters long. Any invalid input should raise a ValueError."
}
diff --git a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py
index 514ec43a4..5797ebcb2 100644
--- a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py
+++ b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py
@@ -1,8 +1,9 @@
import random
import string
+import sys
-def generate_password(length: int) -> str:
+def generate_password(length: int = 8) -> str:
if length < 8 or length > 16:
raise ValueError("Password length must be between 8 and 16 characters.")
@@ -19,5 +20,8 @@ def generate_password(length: int) -> str:
if __name__ == "__main__":
- password_length = random.randint(8, 16)
+ password_length = (
+ int(sys.argv[sys.argv.index("--length") + 1])
+ if "--length" in sys.argv else 8
+ )
print(generate_password(password_length))
diff --git a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json
index 8b2e1d91d..14bbe4539 100644
--- a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json
+++ b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json
@@ -24,5 +24,5 @@
"side_effects": []
},
"name": "PasswordGenerator",
- "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain at least one letter, number and symbol. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). Any invalid input should raise a ValueError."
+ "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain at least one letter, number and symbol. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--length x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(length=x). Any invalid input should raise a ValueError."
}
diff --git a/benchmark/agbenchmark/reports/processing/report_types.py b/benchmark/agbenchmark/reports/processing/report_types.py
index d2fc8dea3..3ba9e6c6b 100644
--- a/benchmark/agbenchmark/reports/processing/report_types.py
+++ b/benchmark/agbenchmark/reports/processing/report_types.py
@@ -54,7 +54,6 @@ class Test(BaseModelBenchmark):
metadata: Any
-
class ReportBase(BaseModelBenchmark):
command: str
completion_time: str | None
@@ -70,7 +69,6 @@ class Report(ReportBase):
tests: Dict[str, Test]
-
class ReportV2(Test, ReportBase):
test_name: str
run_id: str | None
diff --git a/benchmark/agbenchmark/utils/challenge.py b/benchmark/agbenchmark/utils/challenge.py
index 20353f685..a32ab6cf7 100644
--- a/benchmark/agbenchmark/utils/challenge.py
+++ b/benchmark/agbenchmark/utils/challenge.py
@@ -123,7 +123,7 @@ class Challenge(ABC):
print("\033[1;34mScoring content:\033[0m", content)
if ground.should_contain:
for should_contain_word in ground.should_contain:
- if not getattr(ground, 'case_sensitive', True):
+ if not getattr(ground, "case_sensitive", True):
should_contain_word = should_contain_word.lower()
content = content.lower()
print_content = (
@@ -137,7 +137,7 @@ class Challenge(ABC):
if ground.should_not_contain:
for should_not_contain_word in ground.should_not_contain:
- if not getattr(ground, 'case_sensitive', True):
+ if not getattr(ground, "case_sensitive", True):
should_not_contain_word = should_not_contain_word.lower()
content = content.lower()
print_content = f"\033[1;34mWord that should not exist\033[0m - {should_not_contain_word}:"