diff options
author | Reinier van der Leer <pwuts@agpt.co> | 2023-11-21 10:58:54 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-21 10:58:54 +0100 |
commit | b106a61352d7a9f85bbaa693fed7fc114c5277f7 (patch) | |
tree | d6f0b0858aa9573077aa61230931af992847e459 /benchmark | |
parent | Allow AutoGPT to access local web hosts (#5318) (diff) | |
download | Auto-GPT-b106a61352d7a9f85bbaa693fed7fc114c5277f7.tar.gz Auto-GPT-b106a61352d7a9f85bbaa693fed7fc114c5277f7.tar.bz2 Auto-GPT-b106a61352d7a9f85bbaa693fed7fc114c5277f7.zip |
Clean up & fix GitHub workflows (#6313)
* ci: Mitigate security issues in autogpt-ci.yml
- Remove unnecessary pull_request_target paths and related variables and config
- Set permissions for contents to read only
* ci: Simplify steps in autogpt-ci.yml workflow using GitHub CLI
- Simplify step in 'autogpt-ci.yml' by using GitHub CLI instead of API for adding label and comment functionality
- Replace curl command with 'gh issue edit' to add "behaviour change" label to the pull request
- Replace gh api command with 'gh issue comment' to leave a comment about the changed behavior of AutoGPT in the pull request
* ci: Fix issues in workflows
- Move environment variable definition to top level in benchmark-ci.yml (because the other job also needs it)
- Removed invalid 'branches: [hackathon]' restriction in hackathon.yml workflow
- Removed redundant 'ref' and 'repository' fields in the 'checkout' step of both workflows.
* ci: Delete legacy benchmarks.yml workflow
* ci: Add triggers for CI workflows
- Add triggers to run CI workflows when they are edited.
- Update the paths for the CI workflows in the trigger configuration.
* fix: Fix benchmark lint error
- Removed unnecessary blank lines in report_types.py
- Fixed string quotes in challenge.py to maintain consistency
* fix: Update task description in password generator data.json
- Update task description in `data.json` file for the password generator challenge to clarify the input requirements and error handling.
- This change is made in an attempt to make the Benchmark CI pass.
* fix: Fix PasswordGenerator challenge in CI
- Fix the behavior of the reference password_generator.py to align with the task description
- Use default password length 8 instead of a random length in the generate_password function
- Retrieve the password length from the command line arguments if "--length" is provided, else set it to 8
Diffstat (limited to 'benchmark')
5 files changed, 10 insertions, 8 deletions
diff --git a/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json b/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json index 01dd0afcb..0f30ea872 100644 --- a/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json +++ b/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json @@ -24,5 +24,5 @@ "side_effects": [] }, "name": "PasswordGenerator", - "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain letters, numbers and symbols. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). Any invalid input should raise a ValueError." + "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain letters, numbers and symbols. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). If no length is specified, the password should be 8 characters long. Any invalid input should raise a ValueError." } diff --git a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py index 514ec43a4..5797ebcb2 100644 --- a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py +++ b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py @@ -1,8 +1,9 @@ import random import string +import sys -def generate_password(length: int) -> str: +def generate_password(length: int = 8) -> str: if length < 8 or length > 16: raise ValueError("Password length must be between 8 and 16 characters.") @@ -19,5 +20,8 @@ def generate_password(length: int) -> str: if __name__ == "__main__": - password_length = random.randint(8, 16) + password_length = ( + int(sys.argv[sys.argv.index("--length") + 1]) + if "--length" in sys.argv else 8 + ) print(generate_password(password_length)) diff --git a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json index 8b2e1d91d..14bbe4539 100644 --- a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json +++ b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json @@ -24,5 +24,5 @@ "side_effects": [] }, "name": "PasswordGenerator", - "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain at least one letter, number and symbol. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). Any invalid input should raise a ValueError." + "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain at least one letter, number and symbol. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--length x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(length=x). Any invalid input should raise a ValueError." } diff --git a/benchmark/agbenchmark/reports/processing/report_types.py b/benchmark/agbenchmark/reports/processing/report_types.py index d2fc8dea3..3ba9e6c6b 100644 --- a/benchmark/agbenchmark/reports/processing/report_types.py +++ b/benchmark/agbenchmark/reports/processing/report_types.py @@ -54,7 +54,6 @@ class Test(BaseModelBenchmark): metadata: Any - class ReportBase(BaseModelBenchmark): command: str completion_time: str | None @@ -70,7 +69,6 @@ class Report(ReportBase): tests: Dict[str, Test] - class ReportV2(Test, ReportBase): test_name: str run_id: str | None diff --git a/benchmark/agbenchmark/utils/challenge.py b/benchmark/agbenchmark/utils/challenge.py index 20353f685..a32ab6cf7 100644 --- a/benchmark/agbenchmark/utils/challenge.py +++ b/benchmark/agbenchmark/utils/challenge.py @@ -123,7 +123,7 @@ class Challenge(ABC): print("\033[1;34mScoring content:\033[0m", content) if ground.should_contain: for should_contain_word in ground.should_contain: - if not getattr(ground, 'case_sensitive', True): + if not getattr(ground, "case_sensitive", True): should_contain_word = should_contain_word.lower() content = content.lower() print_content = ( @@ -137,7 +137,7 @@ class Challenge(ABC): if ground.should_not_contain: for should_not_contain_word in ground.should_not_contain: - if not getattr(ground, 'case_sensitive', True): + if not getattr(ground, "case_sensitive", True): should_not_contain_word = should_not_contain_word.lower() content = content.lower() print_content = f"\033[1;34mWord that should not exist\033[0m - {should_not_contain_word}:" |