Transformers Local Client
Source: examples/clients/transformers_local_client.py
Introduction
Transformers pipelines are often the first local baseline for experimentation, HELM stresses the value of consistent evaluation scaffolding, and AI-assisted design education literature motivates reproducible local setups for pedagogy. This example demonstrates the Transformers local client path with deterministic trace output.
Technical Implementation
Configure
Tracerwith JSONL + console output so each run emits machine-readable traces and lifecycle logs.Build the runtime surface (public APIs only) and execute
TransformersLocalLLMClient.generate(...)with a fixedrequest_id.Construct
LLMRequestinputs and callgeneratethrough the selected client implementation.Print a compact JSON payload including
trace_infofor deterministic tests and docs examples.
flowchart LR
A["Input prompt or scenario"] --> B["main(): runtime wiring"]
B --> C["TransformersLocalLLMClient.generate(...)"]
C --> D["LLMRequest/LLMResponse contracts wrap provider behavior"]
C --> E["Tracer JSONL + console events"]
D --> F["ExecutionResult/payload"]
E --> F
F --> G["Printed JSON output"]
1from __future__ import annotations
2
3import json
4from pathlib import Path
5
6from design_research_agents import Tracer, TransformersLocalLLMClient
7from design_research_agents.llm import LLMMessage, LLMRequest
8
9
10def _build_payload() -> dict[str, object]:
11 # Run the local Transformers client using public runtime APIs. Using this with statement will automatically
12 # release any loaded model resources when the example is done.
13 with TransformersLocalLLMClient(
14 name="transformers-local-dev",
15 model_id="Qwen/Qwen2.5-1.5B-Instruct",
16 default_model="Qwen/Qwen2.5-1.5B-Instruct",
17 device="auto",
18 dtype="auto",
19 quantization="none",
20 trust_remote_code=False,
21 revision="main",
22 max_retries=2,
23 model_patterns=("Qwen/*", "qwen2.5-*"),
24 ) as client:
25 description = client.describe()
26 prompt = "Provide one sentence on why deterministic local runs aid design reproducibility."
27 response = client.generate(
28 LLMRequest(
29 messages=(
30 LLMMessage(role="system", content="You are a concise engineering design assistant."),
31 LLMMessage(role="user", content=prompt),
32 ),
33 model=client.default_model(),
34 temperature=0.0,
35 max_tokens=120,
36 )
37 )
38 llm_call = {
39 "prompt": prompt,
40 "response_text": response.text,
41 "response_model": response.model,
42 "response_provider": response.provider,
43 "response_has_text": bool(response.text.strip()),
44 }
45 return {
46 "client_class": description["client_class"],
47 "default_model": description["default_model"],
48 "llm_call": llm_call,
49 "backend": description["backend"],
50 "capabilities": description["capabilities"],
51 "server": description["server"],
52 }
53
54
55def main() -> None:
56 """Run traced Transformers client call payload."""
57 # Fixed request id keeps traces and docs output deterministic across runs.
58 request_id = "example-clients-transformers-local-call-001"
59 tracer = Tracer(
60 enabled=True,
61 trace_dir=Path("artifacts/examples/traces"),
62 enable_jsonl=True,
63 enable_console=True,
64 )
65 payload = tracer.run_callable(
66 agent_name="ExamplesTransformersClientCall",
67 request_id=request_id,
68 input_payload={"scenario": "transformers-local-client-call"},
69 function=_build_payload,
70 )
71 assert isinstance(payload, dict)
72 payload["example"] = "clients/transformers_local_client.py"
73 payload["trace"] = tracer.trace_info(request_id)
74 # Print the results
75 print(json.dumps(payload, ensure_ascii=True, indent=2, sort_keys=True))
76
77
78if __name__ == "__main__":
79 main()
Expected Results
Run Command
PYTHONPATH=src python3 examples/clients/transformers_local_client.py
Example output captured with DRA_EXAMPLE_LLM_MODE=deterministic
(timestamps, durations, and trace filenames vary by run):
{
"backend": {
"base_url": null,
"default_model": "Qwen/Qwen2.5-1.5B-Instruct",
"device": "auto",
"dtype": "auto",
"kind": "transformers_local",
"max_retries": 2,
"model_id": "Qwen/Qwen2.5-1.5B-Instruct",
"model_patterns": [
"Qwen/*",
"qwen2.5-*"
],
"name": "transformers-local-dev",
"quantization": "none"
},
"capabilities": {
"json_mode": "prompt+validate",
"max_context_tokens": null,
"streaming": false,
"tool_calling": "best_effort",
"vision": false
},
"client_class": "TransformersLocalLLMClient",
"default_model": "Qwen/Qwen2.5-1.5B-Instruct",
"example": "clients/transformers_local_client.py",
"llm_call": {
"prompt": "Provide one sentence on why deterministic local runs aid design reproducibility.",
"response_has_text": true,
"response_model": "Qwen/Qwen2.5-1.5B-Instruct",
"response_provider": "example-test-monkeypatch",
"response_text": "Deterministic local runs make design comparisons repeatable across experiments."
},
"server": null,
"trace": {
"request_id": "example-clients-transformers-local-call-001",
"trace_dir": "artifacts/examples/traces",
"trace_path": "artifacts/examples/traces/run_20260222T162206Z_example-clients-transformers-local-call-001.jsonl"
}
}