aboutsummaryrefslogtreecommitdiff
path: root/benchmark/agbenchmark/reports/processing/process_report.py
diff options
context:
space:
mode:
Diffstat (limited to 'benchmark/agbenchmark/reports/processing/process_report.py')
-rw-r--r--benchmark/agbenchmark/reports/processing/process_report.py30
1 files changed, 15 insertions, 15 deletions
diff --git a/benchmark/agbenchmark/reports/processing/process_report.py b/benchmark/agbenchmark/reports/processing/process_report.py
index b390ba2f9..3bb94f9e3 100644
--- a/benchmark/agbenchmark/reports/processing/process_report.py
+++ b/benchmark/agbenchmark/reports/processing/process_report.py
@@ -1,4 +1,5 @@
import json
+import logging
import os
from pathlib import Path
from typing import Any
@@ -9,6 +10,8 @@ from agbenchmark.reports.processing.get_files import (
from agbenchmark.reports.processing.report_types import Report, Test
from agbenchmark.utils.data_types import STRING_DIFFICULTY_MAP
+logger = logging.getLogger(__name__)
+
def get_reports_data(report_path: str) -> dict[str, Any]:
latest_files = get_latest_report_from_agent_directories(report_path)
@@ -31,26 +34,23 @@ def get_reports_data(report_path: str) -> dict[str, Any]:
return reports_data
-def get_agent_category(report: Report) -> dict[str, Any]:
+def get_highest_achieved_difficulty_per_category(report: Report) -> dict[str, Any]:
categories: dict[str, Any] = {}
- def get_highest_category_difficulty(data: Test) -> None:
- for category in data.category:
- if (
- category == "interface"
- or category == "iterate"
- or category == "product_advisor"
- ):
+ for _, test_data in report.tests.items():
+ for category in test_data.category:
+ if category in ("interface", "iterate", "product_advisor"):
continue
categories.setdefault(category, 0)
- if data.metrics.success:
- num_dif = STRING_DIFFICULTY_MAP[data.metrics.difficulty]
+ if (
+ test_data.results
+ and all(r.success for r in test_data.results)
+ and test_data.difficulty
+ ):
+ num_dif = STRING_DIFFICULTY_MAP[test_data.difficulty]
if num_dif > categories[category]:
categories[category] = num_dif
- for _, test_data in report.tests.items():
- get_highest_category_difficulty(test_data)
-
return categories
@@ -58,9 +58,9 @@ def all_agent_categories(reports_data: dict[str, Any]) -> dict[str, Any]:
all_categories: dict[str, Any] = {}
for name, report in reports_data.items():
- categories = get_agent_category(report)
+ categories = get_highest_achieved_difficulty_per_category(report)
if categories: # only add to all_categories if categories is not empty
- print(f"Adding {name}: {categories}")
+ logger.debug(f"Adding {name}: {categories}")
all_categories[name] = categories
return all_categories