From 05b018a837a67233c57e336942afd9ad374ac2d8 Mon Sep 17 00:00:00 2001 From: Reinier van der Leer Date: Fri, 19 Jan 2024 19:52:09 +0100 Subject: fix(benchmark/report): Fix and clean up logic in `update_challenges_already_beaten` - `update_challenges_already_beaten` incorrectly marked challenges as beaten if it was present in the report file but set to `false` --- benchmark/agbenchmark/reports/reports.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/benchmark/agbenchmark/reports/reports.py b/benchmark/agbenchmark/reports/reports.py index 1e6186f2f..7b03233e4 100644 --- a/benchmark/agbenchmark/reports/reports.py +++ b/benchmark/agbenchmark/reports/reports.py @@ -96,7 +96,6 @@ def finalize_test_report( mock = os.getenv("IS_MOCK") # Check if --mock is in sys.argv - logger.debug(f"Finalizing report with CallInfo: {vars(call)}") if call.excinfo is None: info_details.metrics.success = True else: @@ -135,17 +134,15 @@ def update_challenges_already_beaten( current_run_successful = info_details.metrics.success try: with open(challenges_already_beaten_file, "r") as f: - challenge_data = json.load(f) + challenges_beaten_before = json.load(f) except FileNotFoundError: - challenge_data = {} - challenge_beaten_in_the_past = challenge_data.get(test_name) + challenges_beaten_before = {} - challenge_data[test_name] = True - if challenge_beaten_in_the_past is None and not current_run_successful: - challenge_data[test_name] = False + has_ever_been_beaten = challenges_beaten_before.get(test_name) + challenges_beaten_before[test_name] = has_ever_been_beaten or current_run_successful with open(challenges_already_beaten_file, "w") as f: - json.dump(challenge_data, f, indent=4) + json.dump(challenges_beaten_before, f, indent=4) def session_finish(agbenchmark_config: AgentBenchmarkConfig) -> None: -- cgit v1.2.3