blob: 9090d1942c30f645e2e3ee1bcf8e7f9205aeb162 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
from enum import Enum
from typing import Literal
from pydantic import BaseModel
class DifficultyLevel(Enum):
interface = "interface"
basic = "basic"
novice = "novice"
intermediate = "intermediate"
advanced = "advanced"
expert = "expert"
human = "human"
# map from enum to difficulty level (numeric)
DIFFICULTY_MAP = {
DifficultyLevel.interface: 1,
DifficultyLevel.basic: 2,
DifficultyLevel.novice: 3,
DifficultyLevel.intermediate: 4,
DifficultyLevel.advanced: 5,
DifficultyLevel.expert: 6,
DifficultyLevel.human: 7,
}
STRING_DIFFICULTY_MAP = {e.value: DIFFICULTY_MAP[e] for e in DifficultyLevel}
class Category(str, Enum):
DATA = "data"
GENERALIST = "general"
CODING = "coding"
SCRAPE_SYNTHESIZE = "scrape_synthesize"
VALIDATION = "validation"
WEB = "web"
GAIA_1 = "GAIA_1"
GAIA_2 = "GAIA_2"
GAIA_3 = "GAIA_3"
class EvalResult(BaseModel):
result: str
result_source: Literal["step_output"] | str
score: float
passed: bool
|