Transformers Local Client

Source: examples/clients/transformers_local_client.py

Introduction

Transformers pipelines are often the first local baseline for experimentation, HELM stresses the value of consistent evaluation scaffolding, and AI-assisted design education literature motivates reproducible local setups for pedagogy. This example demonstrates the Transformers local client path with deterministic trace output.

Technical Implementation

  1. Configure Tracer with JSONL + console output so each run emits machine-readable traces and lifecycle logs.

  2. Build the runtime surface (public APIs only) and execute TransformersLocalLLMClient.generate(...) with a fixed request_id.

  3. Construct LLMRequest inputs and call generate through the selected client implementation.

  4. Print a compact JSON payload including trace_info for deterministic tests and docs examples.

        flowchart LR
    A["Input prompt or scenario"] --> B["main(): runtime wiring"]
    B --> C["TransformersLocalLLMClient.generate(...)"]
    C --> D["LLMRequest/LLMResponse contracts wrap provider behavior"]
    C --> E["Tracer JSONL + console events"]
    D --> F["ExecutionResult/payload"]
    E --> F
    F --> G["Printed JSON output"]
    
 1from __future__ import annotations
 2
 3import json
 4from pathlib import Path
 5
 6from design_research_agents import Tracer, TransformersLocalLLMClient
 7from design_research_agents.llm import LLMMessage, LLMRequest
 8
 9
10def _build_payload() -> dict[str, object]:
11    # Run the local Transformers client using public runtime APIs. Using this with statement will automatically
12    # release any loaded model resources when the example is done.
13    with TransformersLocalLLMClient(
14        name="transformers-local-dev",
15        model_id="Qwen/Qwen2.5-1.5B-Instruct",
16        default_model="Qwen/Qwen2.5-1.5B-Instruct",
17        device="auto",
18        dtype="auto",
19        quantization="none",
20        trust_remote_code=False,
21        revision="main",
22        max_retries=2,
23        model_patterns=("Qwen/*", "qwen2.5-*"),
24    ) as client:
25        description = client.describe()
26        prompt = "Provide one sentence on why deterministic local runs aid design reproducibility."
27        response = client.generate(
28            LLMRequest(
29                messages=(
30                    LLMMessage(role="system", content="You are a concise engineering design assistant."),
31                    LLMMessage(role="user", content=prompt),
32                ),
33                model=client.default_model(),
34                temperature=0.0,
35                max_tokens=120,
36            )
37        )
38        llm_call = {
39            "prompt": prompt,
40            "response_text": response.text,
41            "response_model": response.model,
42            "response_provider": response.provider,
43            "response_has_text": bool(response.text.strip()),
44        }
45        return {
46            "client_class": description["client_class"],
47            "default_model": description["default_model"],
48            "llm_call": llm_call,
49            "backend": description["backend"],
50            "capabilities": description["capabilities"],
51            "server": description["server"],
52        }
53
54
55def main() -> None:
56    """Run traced Transformers client call payload."""
57    # Fixed request id keeps traces and docs output deterministic across runs.
58    request_id = "example-clients-transformers-local-call-001"
59    tracer = Tracer(
60        enabled=True,
61        trace_dir=Path("artifacts/examples/traces"),
62        enable_jsonl=True,
63        enable_console=True,
64    )
65    payload = tracer.run_callable(
66        agent_name="ExamplesTransformersClientCall",
67        request_id=request_id,
68        input_payload={"scenario": "transformers-local-client-call"},
69        function=_build_payload,
70    )
71    assert isinstance(payload, dict)
72    payload["example"] = "clients/transformers_local_client.py"
73    payload["trace"] = tracer.trace_info(request_id)
74    # Print the results
75    print(json.dumps(payload, ensure_ascii=True, indent=2, sort_keys=True))
76
77
78if __name__ == "__main__":
79    main()

Expected Results

Run Command

PYTHONPATH=src python3 examples/clients/transformers_local_client.py

Example output captured with DRA_EXAMPLE_LLM_MODE=deterministic (timestamps, durations, and trace filenames vary by run):

{
  "backend": {
    "base_url": null,
    "default_model": "Qwen/Qwen2.5-1.5B-Instruct",
    "device": "auto",
    "dtype": "auto",
    "kind": "transformers_local",
    "max_retries": 2,
    "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
    "model_patterns": [
      "Qwen/*",
      "qwen2.5-*"
    ],
    "name": "transformers-local-dev",
    "quantization": "none"
  },
  "capabilities": {
    "json_mode": "prompt+validate",
    "max_context_tokens": null,
    "streaming": false,
    "tool_calling": "best_effort",
    "vision": false
  },
  "client_class": "TransformersLocalLLMClient",
  "default_model": "Qwen/Qwen2.5-1.5B-Instruct",
  "example": "clients/transformers_local_client.py",
  "llm_call": {
    "prompt": "Provide one sentence on why deterministic local runs aid design reproducibility.",
    "response_has_text": true,
    "response_model": "Qwen/Qwen2.5-1.5B-Instruct",
    "response_provider": "example-test-monkeypatch",
    "response_text": "Deterministic local runs make design comparisons repeatable across experiments."
  },
  "server": null,
  "trace": {
    "request_id": "example-clients-transformers-local-call-001",
    "trace_dir": "artifacts/examples/traces",
    "trace_path": "artifacts/examples/traces/run_20260222T162206Z_example-clients-transformers-local-call-001.jsonl"
  }
}

References