1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
# radio charts, logs, helper functions for tests, anything else relevant.
import json
import os
import re
from pathlib import Path
from typing import Any, List, Optional
from dotenv import load_dotenv
load_dotenv()
from agbenchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel
AGENT_NAME = os.getenv("AGENT_NAME")
REPORT_LOCATION = os.getenv("REPORT_LOCATION", None)
def replace_backslash(value: Any) -> Any:
if isinstance(value, str):
return re.sub(
r"\\+", "/", value
) # replace one or more backslashes with a forward slash
elif isinstance(value, list):
return [replace_backslash(i) for i in value]
elif isinstance(value, dict):
return {k: replace_backslash(v) for k, v in value.items()}
else:
return value
def calculate_success_percentage(results: list[bool]) -> float:
# Take the last 10 results or all if less than 10
last_results = results[-10:] if len(results) > 10 else results
success_count = last_results.count(True)
total_count = len(last_results)
if total_count == 0:
return 0
success_percentage = (success_count / total_count) * 100 # as a percentage
return round(success_percentage, 2)
def get_test_path(json_file: str | Path) -> str:
if isinstance(json_file, str):
json_file = Path(json_file)
# Find the index of "agbenchmark" in the path parts
try:
agbenchmark_index = json_file.parts.index("benchmark")
except ValueError:
raise ValueError("Invalid challenge location.")
# Create the path from "agbenchmark" onwards
challenge_location = Path(*json_file.parts[agbenchmark_index:])
formatted_location = replace_backslash(str(challenge_location))
if isinstance(formatted_location, str):
return formatted_location
else:
return str(challenge_location)
def get_highest_success_difficulty(
data: dict, just_string: Optional[bool] = None
) -> str:
highest_difficulty = None
highest_difficulty_level = 0
for test_name, test_data in data.items():
try:
if test_data.get("tests", None):
highest_difficulty_str = test_data["metrics"]["highest_difficulty"]
try:
highest_difficulty = DifficultyLevel[highest_difficulty_str]
highest_difficulty_level = DIFFICULTY_MAP[highest_difficulty]
except KeyError:
print(
f"Unexpected difficulty level '{highest_difficulty_str}' in test '{test_name}'"
)
continue
else:
if test_data["metrics"]["success"]:
difficulty_str = test_data["metrics"]["difficulty"]
try:
difficulty_enum = DifficultyLevel[difficulty_str.lower()]
difficulty_level = DIFFICULTY_MAP[difficulty_enum]
if difficulty_level > highest_difficulty_level:
highest_difficulty = difficulty_enum
highest_difficulty_level = difficulty_level
except KeyError:
print(
f"Unexpected difficulty level '{difficulty_str}' in test '{test_name}'"
)
continue
except Exception:
print(f"Make sure you selected the right test, no reports were generated.")
break
if highest_difficulty is not None:
highest_difficulty_str = highest_difficulty.name # convert enum to string
else:
highest_difficulty_str = ""
if highest_difficulty_level and not just_string:
return f"{highest_difficulty_str}: {highest_difficulty_level}"
elif highest_difficulty_str:
return highest_difficulty_str
return "No successful tests"
# def get_git_commit_sha(directory: Path) -> Optional[str]:
# try:
# repo = git.Repo(directory)
# remote_url = repo.remotes.origin.url
# if remote_url.endswith(".git"):
# remote_url = remote_url[:-4]
# git_commit_sha = f"{remote_url}/tree/{repo.head.commit.hexsha}"
# # print(f"GIT_COMMIT_SHA: {git_commit_sha}")
# return git_commit_sha
# except Exception:
# # print(f"{directory} is not a git repository!")
# return None
def agent_eligibible_for_optional_categories(
optional_challenge_categories: List, agent_categories: List
) -> bool:
for element in optional_challenge_categories:
if element not in agent_categories:
return False
return True
def write_pretty_json(data, json_file):
sorted_data = deep_sort(data)
json_graph = json.dumps(sorted_data, indent=4)
with open(json_file, "w") as f:
f.write(json_graph)
f.write("\n")
def deep_sort(obj):
"""
Recursively sort the keys in JSON object
"""
if isinstance(obj, dict):
return {k: deep_sort(v) for k, v in sorted(obj.items())}
if isinstance(obj, list):
return [deep_sort(elem) for elem in obj]
return obj
|