diff options
Diffstat (limited to 'benchmark/agbenchmark/reports/processing/process_report.py')
-rw-r--r-- | benchmark/agbenchmark/reports/processing/process_report.py | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/benchmark/agbenchmark/reports/processing/process_report.py b/benchmark/agbenchmark/reports/processing/process_report.py index b390ba2f9..3bb94f9e3 100644 --- a/benchmark/agbenchmark/reports/processing/process_report.py +++ b/benchmark/agbenchmark/reports/processing/process_report.py @@ -1,4 +1,5 @@ import json +import logging import os from pathlib import Path from typing import Any @@ -9,6 +10,8 @@ from agbenchmark.reports.processing.get_files import ( from agbenchmark.reports.processing.report_types import Report, Test from agbenchmark.utils.data_types import STRING_DIFFICULTY_MAP +logger = logging.getLogger(__name__) + def get_reports_data(report_path: str) -> dict[str, Any]: latest_files = get_latest_report_from_agent_directories(report_path) @@ -31,26 +34,23 @@ def get_reports_data(report_path: str) -> dict[str, Any]: return reports_data -def get_agent_category(report: Report) -> dict[str, Any]: +def get_highest_achieved_difficulty_per_category(report: Report) -> dict[str, Any]: categories: dict[str, Any] = {} - def get_highest_category_difficulty(data: Test) -> None: - for category in data.category: - if ( - category == "interface" - or category == "iterate" - or category == "product_advisor" - ): + for _, test_data in report.tests.items(): + for category in test_data.category: + if category in ("interface", "iterate", "product_advisor"): continue categories.setdefault(category, 0) - if data.metrics.success: - num_dif = STRING_DIFFICULTY_MAP[data.metrics.difficulty] + if ( + test_data.results + and all(r.success for r in test_data.results) + and test_data.difficulty + ): + num_dif = STRING_DIFFICULTY_MAP[test_data.difficulty] if num_dif > categories[category]: categories[category] = num_dif - for _, test_data in report.tests.items(): - get_highest_category_difficulty(test_data) - return categories @@ -58,9 +58,9 @@ def all_agent_categories(reports_data: dict[str, Any]) -> dict[str, Any]: all_categories: dict[str, Any] = {} for name, report in reports_data.items(): - categories = get_agent_category(report) + categories = get_highest_achieved_difficulty_per_category(report) if categories: # only add to all_categories if categories is not empty - print(f"Adding {name}: {categories}") + logger.debug(f"Adding {name}: {categories}") all_categories[name] = categories return all_categories |