aboutsummaryrefslogtreecommitdiff
path: root/benchmark/reports/mini-agi/5_TestRevenueRetrieval_1.0.json
blob: 28d091d28f24a80011bcaab8a3e7a689c7ac3739 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
{
  "command": "agbenchmark start --test TestRetrieval2",
  "completion_time": "2023-07-17-13:54",
  "metrics": {
    "run_time": "36 seconds",
    "highest_difficulty": "TestRetrieval2: 3"
  },
  "tests": {
    "TestRetrieval2": {
      "data_path": "agbenchmark/challenges/retrieval/r2_tesla_revenue",
      "is_regression": false,
      "reached_cutoff": false,
      "answer": "It was $81.462 billion in 2022.",
      "description": "A no guardrails search for info",
      "metrics": {
        "difficulty": "novice",
        "success": true,
        "success_%": 50.0,
        "run_time": "35.59 seconds"
      }
    }
  },
  "config": {
    "workspace": "${os.path.join(Path.home(), 'miniagi')}"
  },
  "additional": {
    "model": "gpt-4"
  }
}