Trace¶

evaldeck.trace.Trace ¶

Bases: BaseModel

Complete execution trace of an agent.

A trace captures everything that happened during an agent's execution, from the initial input to the final output, including all intermediate steps (LLM calls, tool calls, reasoning).

tool_calls `property` ¶

tool_calls

Get all tool call steps.

llm_calls `property` ¶

llm_calls

Get all LLM call steps.

tools_called `property` ¶

tools_called

Get list of tool names that were called.

total_tokens `property` ¶

total_tokens

Get total tokens used across all LLM calls.

step_count `property` ¶

step_count

Get total number of steps.

model_post_init ¶

model_post_init(__context)

Generate ID if not provided.

Source code in src/evaldeck/trace.py

def model_post_init(self, __context: Any) -> None:
    """Generate ID if not provided."""
    if not self.id:
        import uuid

        self.id = str(uuid.uuid4())[:8]

add_step ¶

add_step(step)

Add a step to the trace.

Source code in src/evaldeck/trace.py

def add_step(self, step: Step) -> None:
    """Add a step to the trace."""
    self.steps.append(step)

complete ¶

complete(output, status=SUCCESS)

Mark the trace as complete.

Source code in src/evaldeck/trace.py

def complete(self, output: str, status: TraceStatus = TraceStatus.SUCCESS) -> None:
    """Mark the trace as complete."""
    self.output = output
    self.status = status
    self.completed_at = datetime.now()
    if self.started_at:
        delta = self.completed_at - self.started_at
        self.duration_ms = delta.total_seconds() * 1000

to_dict ¶

to_dict()

Convert trace to dictionary.

Source code in src/evaldeck/trace.py

def to_dict(self) -> dict[str, Any]:
    """Convert trace to dictionary."""
    return self.model_dump(mode="json")

from_dict `classmethod` ¶

from_dict(data)

Create trace from dictionary.

Source code in src/evaldeck/trace.py

@classmethod
def from_dict(cls, data: dict[str, Any]) -> Trace:
    """Create trace from dictionary."""
    return cls.model_validate(data)

evaldeck.trace.Step ¶

Bases: BaseModel

A single step in an agent's execution trace.

Steps can represent LLM calls, tool calls, reasoning steps, or human input.

model_post_init ¶

model_post_init(__context)

Generate ID if not provided.

Source code in src/evaldeck/trace.py

def model_post_init(self, __context: Any) -> None:
    """Generate ID if not provided."""
    if not self.id:
        import uuid

        self.id = str(uuid.uuid4())[:8]

llm_call `classmethod` ¶

llm_call(model, input, output, tokens=None, **kwargs)

Create an LLM call step.

Source code in src/evaldeck/trace.py

@classmethod
def llm_call(
    cls,
    model: str,
    input: str,
    output: str,
    tokens: TokenUsage | None = None,
    **kwargs: Any,
) -> Step:
    """Create an LLM call step."""
    return cls(
        type=StepType.LLM_CALL,
        model=model,
        input=input,
        output=output,
        tokens=tokens,
        **kwargs,
    )

tool_call `classmethod` ¶

tool_call(tool_name, tool_args=None, tool_result=None, **kwargs)

Create a tool call step.

Source code in src/evaldeck/trace.py

@classmethod
def tool_call(
    cls,
    tool_name: str,
    tool_args: dict[str, Any] | None = None,
    tool_result: Any = None,
    **kwargs: Any,
) -> Step:
    """Create a tool call step."""
    return cls(
        type=StepType.TOOL_CALL,
        tool_name=tool_name,
        tool_args=tool_args or {},
        tool_result=tool_result,
        **kwargs,
    )

reasoning `classmethod` ¶

reasoning(text, **kwargs)

Create a reasoning step.

Source code in src/evaldeck/trace.py

@classmethod
def reasoning(cls, text: str, **kwargs: Any) -> Step:
    """Create a reasoning step."""
    return cls(
        type=StepType.REASONING,
        reasoning_text=text,
        **kwargs,
    )

evaldeck.trace.TokenUsage ¶

Bases: BaseModel

Token usage for an LLM call.

cost_estimate `property` ¶

cost_estimate

Estimate cost based on token usage. Returns None if model unknown.

evaldeck.trace.StepType ¶

Bases: str, Enum

Type of step in an agent trace.

evaldeck.trace.StepStatus ¶

Bases: str, Enum

Status of a step execution.

evaldeck.trace.TraceStatus ¶

Bases: str, Enum

Status of the overall trace execution.

Trace¶

evaldeck.trace.Trace ¶

tool_calls property ¶

llm_calls property ¶

tools_called property ¶

total_tokens property ¶

step_count property ¶

model_post_init ¶

add_step ¶

complete ¶

to_dict ¶

from_dict classmethod ¶

evaldeck.trace.Step ¶

model_post_init ¶

llm_call classmethod ¶

tool_call classmethod ¶

reasoning classmethod ¶

evaldeck.trace.TokenUsage ¶

cost_estimate property ¶

evaldeck.trace.StepType ¶

evaldeck.trace.StepStatus ¶

evaldeck.trace.TraceStatus ¶

tool_calls `property` ¶

llm_calls `property` ¶

tools_called `property` ¶

total_tokens `property` ¶

step_count `property` ¶

from_dict `classmethod` ¶

llm_call `classmethod` ¶

tool_call `classmethod` ¶

reasoning `classmethod` ¶

cost_estimate `property` ¶