1 files changed, 15 insertions, 15 deletions
diff --git a/benchmark/agbenchmark/reports/processing/process_report.py b/benchmark/agbenchmark/reports/processing/process_report.py
index b390ba2f9..3bb94f9e3 100644
--- a/benchmark/agbenchmark/reports/processing/process_report.py
+++ b/benchmark/agbenchmark/reports/processing/process_report.py
@@ -1,4 +1,5 @@
 import json
+import logging
 import os
 from pathlib import Path
 from typing import Any
@@ -9,6 +10,8 @@ from agbenchmark.reports.processing.get_files import (
 from agbenchmark.reports.processing.report_types import Report, Test
 from agbenchmark.utils.data_types import STRING_DIFFICULTY_MAP
 
+logger = logging.getLogger(__name__)
+
 
 def get_reports_data(report_path: str) -> dict[str, Any]:
     latest_files = get_latest_report_from_agent_directories(report_path)
@@ -31,26 +34,23 @@ def get_reports_data(report_path: str) -> dict[str, Any]:
     return reports_data
 
 
-def get_agent_category(report: Report) -> dict[str, Any]:
+def get_highest_achieved_difficulty_per_category(report: Report) -> dict[str, Any]:
     categories: dict[str, Any] = {}
 
-    def get_highest_category_difficulty(data: Test) -> None:
-        for category in data.category:
-            if (
-                category == "interface"
-                or category == "iterate"
-                or category == "product_advisor"
-            ):
+    for _, test_data in report.tests.items():
+        for category in test_data.category:
+            if category in ("interface", "iterate", "product_advisor"):
                 continue
             categories.setdefault(category, 0)
-            if data.metrics.success:
-                num_dif = STRING_DIFFICULTY_MAP[data.metrics.difficulty]
+            if (
+                test_data.results
+                and all(r.success for r in test_data.results)
+                and test_data.difficulty
+            ):
+                num_dif = STRING_DIFFICULTY_MAP[test_data.difficulty]
                 if num_dif > categories[category]:
                     categories[category] = num_dif
 
-    for _, test_data in report.tests.items():
-        get_highest_category_difficulty(test_data)
-
     return categories
 
 
@@ -58,9 +58,9 @@ def all_agent_categories(reports_data: dict[str, Any]) -> dict[str, Any]:
     all_categories: dict[str, Any] = {}
 
     for name, report in reports_data.items():
-        categories = get_agent_category(report)
+        categories = get_highest_achieved_difficulty_per_category(report)
         if categories:  # only add to all_categories if categories is not empty
-            print(f"Adding {name}: {categories}")
+            logger.debug(f"Adding {name}: {categories}")
             all_categories[name] = categories
 
     return all_categories