Skip to content

EvalConfig

DataConfig

Bases: ConfigBaseModel

Data config

Source code in utu/config/eval_config.py
10
11
12
13
14
15
16
17
18
19
20
class DataConfig(ConfigBaseModel):
    """Data config"""

    dataset: str
    """Built-in dataset name or custom dataset path"""
    type: Literal["single", "mixed"]
    """Whether the dataset contains only single benchmark data or multiple benchmarks"""
    question_field: str
    """Question field name in the dataset"""
    gt_field: str
    """Ground truth field name in the dataset"""

dataset instance-attribute

dataset: str

Built-in dataset name or custom dataset path

type instance-attribute

type: Literal['single', 'mixed']

Whether the dataset contains only single benchmark data or multiple benchmarks

question_field instance-attribute

question_field: str

Question field name in the dataset

gt_field instance-attribute

gt_field: str

Ground truth field name in the dataset

EvalConfig

Bases: ConfigBaseModel

Evaluation config

Source code in utu/config/eval_config.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
class EvalConfig(ConfigBaseModel):
    """Evaluation config"""

    exp_id: str = "default"
    """Experiment ID"""

    # data
    db_url: str = os.getenv("DB_URL", "sqlite:///tesxt.db")
    """Database URL"""
    data: DataConfig = None
    """Data config"""

    # rollout
    agent: AgentConfig | None = None
    """Agent config for rollout"""
    concurrency: int
    """Rollout parallelism"""

    # judgement
    judge_model: ModelConfigs = Field(default_factory=ModelConfigs)
    """Judge model config"""
    judge_concurrency: int
    """Judgement parallelism"""
    eval_method: str = None
    """Evaluation method"""

exp_id class-attribute instance-attribute

exp_id: str = 'default'

Experiment ID

db_url class-attribute instance-attribute

db_url: str = getenv('DB_URL', 'sqlite:///tesxt.db')

Database URL

data class-attribute instance-attribute

data: DataConfig = None

Data config

agent class-attribute instance-attribute

agent: AgentConfig | None = None

Agent config for rollout

concurrency instance-attribute

concurrency: int

Rollout parallelism

judge_model class-attribute instance-attribute

judge_model: ModelConfigs = Field(
    default_factory=ModelConfigs
)

Judge model config

judge_concurrency instance-attribute

judge_concurrency: int

Judgement parallelism

eval_method class-attribute instance-attribute

eval_method: str = None

Evaluation method