diff options
Diffstat (limited to 'benchmark/agbenchmark/utils/utils.py')
-rw-r--r-- | benchmark/agbenchmark/utils/utils.py | 83 |
1 files changed, 34 insertions, 49 deletions
diff --git a/benchmark/agbenchmark/utils/utils.py b/benchmark/agbenchmark/utils/utils.py index 2fc51d212..eaa713730 100644 --- a/benchmark/agbenchmark/utils/utils.py +++ b/benchmark/agbenchmark/utils/utils.py @@ -1,18 +1,23 @@ # radio charts, logs, helper functions for tests, anything else relevant. import json +import logging import os import re from pathlib import Path -from typing import Any, List, Optional +from typing import Any, Optional from dotenv import load_dotenv -load_dotenv() +from agbenchmark.reports.processing.report_types import Test from agbenchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel +load_dotenv() + AGENT_NAME = os.getenv("AGENT_NAME") REPORT_LOCATION = os.getenv("REPORT_LOCATION", None) +logger = logging.getLogger(__name__) + def replace_backslash(value: Any) -> Any: if isinstance(value, str): @@ -27,17 +32,6 @@ def replace_backslash(value: Any) -> Any: return value -def calculate_success_percentage(results: list[bool]) -> float: - # Take the last 10 results or all if less than 10 - last_results = results[-10:] if len(results) > 10 else results - success_count = last_results.count(True) - total_count = len(last_results) - if total_count == 0: - return 0 - success_percentage = (success_count / total_count) * 100 # as a percentage - return round(success_percentage, 2) - - def get_test_path(json_file: str | Path) -> str: if isinstance(json_file, str): json_file = Path(json_file) @@ -59,41 +53,41 @@ def get_test_path(json_file: str | Path) -> str: def get_highest_success_difficulty( - data: dict, just_string: Optional[bool] = None + data: dict[str, Test], just_string: Optional[bool] = None ) -> str: highest_difficulty = None highest_difficulty_level = 0 for test_name, test_data in data.items(): try: - if test_data.get("tests", None): - highest_difficulty_str = test_data["metrics"]["highest_difficulty"] + if any(r.success for r in test_data.results): + difficulty_str = test_data.difficulty + if not difficulty_str: + continue + try: - highest_difficulty = DifficultyLevel[highest_difficulty_str] - highest_difficulty_level = DIFFICULTY_MAP[highest_difficulty] + difficulty_enum = DifficultyLevel[difficulty_str.lower()] + difficulty_level = DIFFICULTY_MAP[difficulty_enum] + + if difficulty_level > highest_difficulty_level: + highest_difficulty = difficulty_enum + highest_difficulty_level = difficulty_level except KeyError: - print( - f"Unexpected difficulty level '{highest_difficulty_str}' in test '{test_name}'" + logger.warning( + f"Unexpected difficulty level '{difficulty_str}' " + f"in test '{test_name}'" ) continue - else: - if test_data["metrics"]["success"]: - difficulty_str = test_data["metrics"]["difficulty"] - - try: - difficulty_enum = DifficultyLevel[difficulty_str.lower()] - difficulty_level = DIFFICULTY_MAP[difficulty_enum] - - if difficulty_level > highest_difficulty_level: - highest_difficulty = difficulty_enum - highest_difficulty_level = difficulty_level - except KeyError: - print( - f"Unexpected difficulty level '{difficulty_str}' in test '{test_name}'" - ) - continue - except Exception: - print(f"Make sure you selected the right test, no reports were generated.") + except Exception as e: + logger.warning( + "An unexpected error [1] occurred while analyzing report [2]." + "Please notify a maintainer.\n" + f"Report data [1]: {data}\n" + f"Error [2]: {e}" + ) + logger.warning( + "Make sure you selected the right test, no reports were generated." + ) break if highest_difficulty is not None: @@ -116,22 +110,13 @@ def get_highest_success_difficulty( # remote_url = remote_url[:-4] # git_commit_sha = f"{remote_url}/tree/{repo.head.commit.hexsha}" -# # print(f"GIT_COMMIT_SHA: {git_commit_sha}") +# # logger.debug(f"GIT_COMMIT_SHA: {git_commit_sha}") # return git_commit_sha # except Exception: -# # print(f"{directory} is not a git repository!") +# # logger.error(f"{directory} is not a git repository!") # return None -def agent_eligibible_for_optional_categories( - optional_challenge_categories: List, agent_categories: List -) -> bool: - for element in optional_challenge_categories: - if element not in agent_categories: - return False - return True - - def write_pretty_json(data, json_file): sorted_data = deep_sort(data) json_graph = json.dumps(sorted_data, indent=4) |