Ollama Local Client#
Source: examples/clients/ollama_local_client.py
Introduction#
Ollama operationalizes local model serving, the OpenAI Responses API provides a common contract surface, and HELM underlines why comparable execution conditions matter in benchmarking. This example verifies the Ollama client integration path under the project tracing/runtime conventions.
Technical Implementation#
Configure
Tracerwith JSONL + console output so each run emits machine-readable traces and lifecycle logs.Build the runtime surface (public APIs only) and execute
OllamaLLMClient.generate(...)with a fixedrequest_id.Construct
LLMRequestinputs and callgeneratethrough the selected client implementation.Print a compact JSON payload including
trace_infofor deterministic tests and docs examples.
flowchart LR
A["Input prompt or scenario"] --> B["main(): runtime wiring"]
B --> C["OllamaLLMClient.generate(...)"]
C --> D["LLMRequest/LLMResponse contracts wrap provider behavior"]
C --> E["Tracer JSONL + console events"]
D --> F["ExecutionResult/payload"]
E --> F
F --> G["Printed JSON output"]
1from __future__ import annotations
2
3import json
4from pathlib import Path
5
6import design_research_agents as drag
7
8
9def _build_payload() -> dict[str, object]:
10 # Run the managed Ollama client using public runtime APIs. Using this with statement will automatically
11 # shut down the managed local server when the example is done.
12 with drag.OllamaLLMClient(
13 name="ollama-local-dev",
14 default_model="qwen2.5:1.5b-instruct",
15 host="127.0.0.1",
16 port=11434,
17 manage_server=True,
18 ollama_executable="ollama",
19 auto_pull_model=False,
20 startup_timeout_seconds=60.0,
21 poll_interval_seconds=0.25,
22 request_timeout_seconds=60.0,
23 max_retries=2,
24 model_patterns=("qwen2.5:*", "llama3:*"),
25 ) as client:
26 description = client.describe()
27 prompt = "Give one sentence on when to use local model pull automation."
28 response = client.generate(
29 drag.LLMRequest(
30 messages=(
31 drag.LLMMessage(role="system", content="You are a concise engineering design assistant."),
32 drag.LLMMessage(role="user", content=prompt),
33 ),
34 model=client.default_model(),
35 temperature=0.0,
36 max_tokens=120,
37 )
38 )
39 llm_call = {
40 "prompt": prompt,
41 "response_text": response.text,
42 "response_model": response.model,
43 "response_provider": response.provider,
44 "response_has_text": bool(response.text.strip()),
45 }
46 return {
47 "client_class": description["client_class"],
48 "default_model": description["default_model"],
49 "llm_call": llm_call,
50 "backend": description["backend"],
51 "capabilities": description["capabilities"],
52 "server": description["server"],
53 }
54
55
56def main() -> None:
57 """Run traced Ollama client call payload."""
58 # Fixed request id keeps traces and docs output deterministic across runs.
59 request_id = "example-clients-ollama-local-call-001"
60 tracer = drag.Tracer(
61 enabled=True,
62 trace_dir=Path("artifacts/examples/traces"),
63 enable_jsonl=True,
64 enable_console=True,
65 )
66 payload = tracer.run_callable(
67 agent_name="ExamplesOllamaClientCall",
68 request_id=request_id,
69 input_payload={"scenario": "ollama-local-client-call"},
70 function=_build_payload,
71 )
72 assert isinstance(payload, dict)
73 payload["example"] = "clients/ollama_local_client.py"
74 payload["trace"] = tracer.trace_info(request_id)
75 # Print the results
76 print(json.dumps(payload, ensure_ascii=True, indent=2, sort_keys=True))
77
78
79if __name__ == "__main__":
80 main()
Expected Results#
Run Command
PYTHONPATH=src python3 examples/clients/ollama_local_client.py
Example output captured with DRA_EXAMPLE_LLM_MODE=deterministic
(timestamps, durations, and trace filenames vary by run):
{
"backend": {
"base_url": "http://127.0.0.1:11434",
"default_model": "qwen2.5:1.5b-instruct",
"host": "127.0.0.1",
"kind": "ollama",
"max_retries": 2,
"model_patterns": [
"qwen2.5:*",
"llama3:*"
],
"name": "ollama-local-dev",
"port": 11434
},
"capabilities": {
"json_mode": "prompt+validate",
"max_context_tokens": null,
"streaming": false,
"tool_calling": "best_effort",
"vision": false
},
"client_class": "OllamaLLMClient",
"default_model": "qwen2.5:1.5b-instruct",
"example": "clients/ollama_local_client.py",
"llm_call": {
"prompt": "Give one sentence on when to use local model pull automation.",
"response_has_text": true,
"response_model": "qwen2.5:1.5b-instruct",
"response_provider": "example-test-monkeypatch",
"response_text": "Use automated local pulls when startup reliability matters more than cold-start time."
},
"server": {
"host": "127.0.0.1",
"kind": "ollama",
"managed": true,
"port": 11434
},
"trace": {
"request_id": "example-clients-ollama-local-call-001",
"trace_dir": "artifacts/examples/traces",
"trace_path": "artifacts/examples/traces/run_20260222T162206Z_example-clients-ollama-local-call-001.jsonl"
}
}