aboutsummaryrefslogtreecommitdiff
path: root/benchmark/agbenchmark/utils/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'benchmark/agbenchmark/utils/utils.py')
-rw-r--r--benchmark/agbenchmark/utils/utils.py83
1 files changed, 34 insertions, 49 deletions
diff --git a/benchmark/agbenchmark/utils/utils.py b/benchmark/agbenchmark/utils/utils.py
index 2fc51d212..eaa713730 100644
--- a/benchmark/agbenchmark/utils/utils.py
+++ b/benchmark/agbenchmark/utils/utils.py
@@ -1,18 +1,23 @@
# radio charts, logs, helper functions for tests, anything else relevant.
import json
+import logging
import os
import re
from pathlib import Path
-from typing import Any, List, Optional
+from typing import Any, Optional
from dotenv import load_dotenv
-load_dotenv()
+from agbenchmark.reports.processing.report_types import Test
from agbenchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel
+load_dotenv()
+
AGENT_NAME = os.getenv("AGENT_NAME")
REPORT_LOCATION = os.getenv("REPORT_LOCATION", None)
+logger = logging.getLogger(__name__)
+
def replace_backslash(value: Any) -> Any:
if isinstance(value, str):
@@ -27,17 +32,6 @@ def replace_backslash(value: Any) -> Any:
return value
-def calculate_success_percentage(results: list[bool]) -> float:
- # Take the last 10 results or all if less than 10
- last_results = results[-10:] if len(results) > 10 else results
- success_count = last_results.count(True)
- total_count = len(last_results)
- if total_count == 0:
- return 0
- success_percentage = (success_count / total_count) * 100 # as a percentage
- return round(success_percentage, 2)
-
-
def get_test_path(json_file: str | Path) -> str:
if isinstance(json_file, str):
json_file = Path(json_file)
@@ -59,41 +53,41 @@ def get_test_path(json_file: str | Path) -> str:
def get_highest_success_difficulty(
- data: dict, just_string: Optional[bool] = None
+ data: dict[str, Test], just_string: Optional[bool] = None
) -> str:
highest_difficulty = None
highest_difficulty_level = 0
for test_name, test_data in data.items():
try:
- if test_data.get("tests", None):
- highest_difficulty_str = test_data["metrics"]["highest_difficulty"]
+ if any(r.success for r in test_data.results):
+ difficulty_str = test_data.difficulty
+ if not difficulty_str:
+ continue
+
try:
- highest_difficulty = DifficultyLevel[highest_difficulty_str]
- highest_difficulty_level = DIFFICULTY_MAP[highest_difficulty]
+ difficulty_enum = DifficultyLevel[difficulty_str.lower()]
+ difficulty_level = DIFFICULTY_MAP[difficulty_enum]
+
+ if difficulty_level > highest_difficulty_level:
+ highest_difficulty = difficulty_enum
+ highest_difficulty_level = difficulty_level
except KeyError:
- print(
- f"Unexpected difficulty level '{highest_difficulty_str}' in test '{test_name}'"
+ logger.warning(
+ f"Unexpected difficulty level '{difficulty_str}' "
+ f"in test '{test_name}'"
)
continue
- else:
- if test_data["metrics"]["success"]:
- difficulty_str = test_data["metrics"]["difficulty"]
-
- try:
- difficulty_enum = DifficultyLevel[difficulty_str.lower()]
- difficulty_level = DIFFICULTY_MAP[difficulty_enum]
-
- if difficulty_level > highest_difficulty_level:
- highest_difficulty = difficulty_enum
- highest_difficulty_level = difficulty_level
- except KeyError:
- print(
- f"Unexpected difficulty level '{difficulty_str}' in test '{test_name}'"
- )
- continue
- except Exception:
- print(f"Make sure you selected the right test, no reports were generated.")
+ except Exception as e:
+ logger.warning(
+ "An unexpected error [1] occurred while analyzing report [2]."
+ "Please notify a maintainer.\n"
+ f"Report data [1]: {data}\n"
+ f"Error [2]: {e}"
+ )
+ logger.warning(
+ "Make sure you selected the right test, no reports were generated."
+ )
break
if highest_difficulty is not None:
@@ -116,22 +110,13 @@ def get_highest_success_difficulty(
# remote_url = remote_url[:-4]
# git_commit_sha = f"{remote_url}/tree/{repo.head.commit.hexsha}"
-# # print(f"GIT_COMMIT_SHA: {git_commit_sha}")
+# # logger.debug(f"GIT_COMMIT_SHA: {git_commit_sha}")
# return git_commit_sha
# except Exception:
-# # print(f"{directory} is not a git repository!")
+# # logger.error(f"{directory} is not a git repository!")
# return None
-def agent_eligibible_for_optional_categories(
- optional_challenge_categories: List, agent_categories: List
-) -> bool:
- for element in optional_challenge_categories:
- if element not in agent_categories:
- return False
- return True
-
-
def write_pretty_json(data, json_file):
sorted_data = deep_sort(data)
json_graph = json.dumps(sorted_data, indent=4)