Test Case Models¶

evaldeck.test_case.EvalCase ¶

Bases: BaseModel

A test case for evaluating an agent.

Test cases define conversation turns to send to the agent and the expected behavior/output to validate against for each turn.

Example

Single turn: turns: - user: "Book a flight to NYC" expected: tools_called: [search_flights, book_flight]

Multi-turn: turns: - user: "I want to book a flight" - user: "NYC to LA, March 15" expected: tools_called: [search_flights] - user: "Book the cheapest one" expected: tools_called: [book_flight]

is_multi_turn `property` ¶

is_multi_turn

Check if this is a multi-turn conversation.

expected `property` ¶

expected

Get expected behavior from first turn (for backward compat with graders).

graders `property` ¶

graders

Get graders from first turn (for backward compat).

input `property` ¶

input

Get input from first turn (for backward compat).

from_yaml `classmethod` ¶

from_yaml(path)

Load a test case from a YAML file.

Source code in src/evaldeck/test_case.py

@classmethod
def from_yaml(cls, path: str | Path) -> EvalCase:
    """Load a test case from a YAML file."""
    with open(path) as f:
        data = yaml.safe_load(f)
    return cls._from_dict(data)

from_yaml_string `classmethod` ¶

from_yaml_string(content)

Load a test case from a YAML string.

Source code in src/evaldeck/test_case.py

@classmethod
def from_yaml_string(cls, content: str) -> EvalCase:
    """Load a test case from a YAML string."""
    data = yaml.safe_load(content)
    return cls._from_dict(data)

to_yaml ¶

to_yaml()

Convert test case to YAML string.

Source code in src/evaldeck/test_case.py

def to_yaml(self) -> str:
    """Convert test case to YAML string."""
    result: str = yaml.dump(self.model_dump(exclude_none=True), default_flow_style=False)
    return result

evaldeck.test_case.ExpectedBehavior ¶

Bases: BaseModel

Expected behavior for an agent test case.

evaldeck.test_case.EvalSuite ¶

Bases: BaseModel

A collection of test cases.

from_directory `classmethod` ¶

from_directory(path, name=None)

Load all test cases from a directory.

Source code in src/evaldeck/test_case.py

@classmethod
def from_directory(cls, path: str | Path, name: str | None = None) -> EvalSuite:
    """Load all test cases from a directory."""
    path = Path(path)
    if not path.is_dir():
        raise ValueError(f"Path is not a directory: {path}")

    test_cases = []
    for file in sorted(path.glob("*.yaml")):
        if file.name.startswith("_"):
            continue
        try:
            test_cases.append(EvalCase.from_yaml(file))
        except Exception as e:
            raise ValueError(f"Failed to load {file}: {e}") from e

    for file in sorted(path.glob("*.yml")):
        if file.name.startswith("_"):
            continue
        try:
            test_cases.append(EvalCase.from_yaml(file))
        except Exception as e:
            raise ValueError(f"Failed to load {file}: {e}") from e

    return cls(
        name=name or path.name,
        test_cases=test_cases,
    )

filter_by_tags ¶

filter_by_tags(tags)

Return a new suite with only test cases matching the given tags.

Source code in src/evaldeck/test_case.py

def filter_by_tags(self, tags: list[str]) -> EvalSuite:
    """Return a new suite with only test cases matching the given tags."""
    filtered = [tc for tc in self.test_cases if any(t in tc.tags for t in tags)]
    return EvalSuite(
        name=self.name,
        description=self.description,
        test_cases=filtered,
        defaults=self.defaults,
        tags=self.tags,
    )

evaldeck.test_case.GraderConfig ¶

Bases: BaseModel

Configuration for a grader.

Test Case Models¶

evaldeck.test_case.EvalCase ¶

is_multi_turn property ¶

expected property ¶

graders property ¶

input property ¶

from_yaml classmethod ¶

from_yaml_string classmethod ¶

to_yaml ¶

evaldeck.test_case.ExpectedBehavior ¶

evaldeck.test_case.EvalSuite ¶

from_directory classmethod ¶

filter_by_tags ¶

evaldeck.test_case.GraderConfig ¶

is_multi_turn `property` ¶

expected `property` ¶

graders `property` ¶

input `property` ¶

from_yaml `classmethod` ¶

from_yaml_string `classmethod` ¶

from_directory `classmethod` ¶