MLX Local Client
Source: examples/clients/mlx_local_client.py
Introduction
MLX-LM provides an Apple-silicon-native local inference stack, HELM motivates reproducible evaluation baselines, and AI-assisted design synthesis work connects these runtimes to educational design workflows. This example exercises the MLX local client path with trace artifacts suitable for repeatable comparisons.
Technical Implementation
Configure
Tracerwith JSONL + console output so each run emits machine-readable traces and lifecycle logs.Build the runtime surface (public APIs only) and execute
MLXLocalLLMClient.generate(...)with a fixedrequest_id.Construct
LLMRequestinputs and callgeneratethrough the selected client implementation.Print a compact JSON payload including
trace_infofor deterministic tests and docs examples.
flowchart LR
A["Input prompt or scenario"] --> B["main(): runtime wiring"]
B --> C["MLXLocalLLMClient.generate(...)"]
C --> D["LLMRequest/LLMResponse contracts wrap provider behavior"]
C --> E["Tracer JSONL + console events"]
D --> F["ExecutionResult/payload"]
E --> F
F --> G["Printed JSON output"]
1from __future__ import annotations
2
3import json
4from pathlib import Path
5
6from design_research_agents import MLXLocalLLMClient, Tracer
7from design_research_agents.llm import LLMMessage, LLMRequest
8
9
10def _build_payload() -> dict[str, object]:
11 # Run the local MLX client using public runtime APIs. Using this with statement will automatically
12 # release any loaded model resources when the example is done.
13 with MLXLocalLLMClient(
14 name="mlx-local-dev",
15 model_id="mlx-community/Qwen2.5-1.5B-Instruct-4bit",
16 default_model="mlx-community/Qwen2.5-1.5B-Instruct-4bit",
17 quantization="4bit",
18 max_retries=2,
19 model_patterns=("mlx-community/*", "qwen2.5-*"),
20 ) as client:
21 description = client.describe()
22 prompt = "Give one concise guideline for maintainable design telemetry schemas."
23 response = client.generate(
24 LLMRequest(
25 messages=(
26 LLMMessage(role="system", content="You are a concise engineering design assistant."),
27 LLMMessage(role="user", content=prompt),
28 ),
29 model=client.default_model(),
30 temperature=0.0,
31 max_tokens=120,
32 )
33 )
34 llm_call = {
35 "prompt": prompt,
36 "response_text": response.text,
37 "response_model": response.model,
38 "response_provider": response.provider,
39 "response_has_text": bool(response.text.strip()),
40 }
41 return {
42 "client_class": description["client_class"],
43 "default_model": description["default_model"],
44 "llm_call": llm_call,
45 "backend": description["backend"],
46 "capabilities": description["capabilities"],
47 "server": description["server"],
48 }
49
50
51def main() -> None:
52 """Run traced MLX client call payload."""
53 # Fixed request id keeps traces and docs output deterministic across runs.
54 request_id = "example-clients-mlx-local-call-001"
55 tracer = Tracer(
56 enabled=True,
57 trace_dir=Path("artifacts/examples/traces"),
58 enable_jsonl=True,
59 enable_console=True,
60 )
61 payload = tracer.run_callable(
62 agent_name="ExamplesMlxClientCall",
63 request_id=request_id,
64 input_payload={"scenario": "mlx-local-client-call"},
65 function=_build_payload,
66 )
67 assert isinstance(payload, dict)
68 payload["example"] = "clients/mlx_local_client.py"
69 payload["trace"] = tracer.trace_info(request_id)
70 # Print the results
71 print(json.dumps(payload, ensure_ascii=True, indent=2, sort_keys=True))
72
73
74if __name__ == "__main__":
75 main()
Expected Results
Run Command
PYTHONPATH=src python3 examples/clients/mlx_local_client.py
Example output captured with DRA_EXAMPLE_LLM_MODE=deterministic
(timestamps, durations, and trace filenames vary by run):
{
"backend": {
"base_url": null,
"default_model": "mlx-community/Qwen2.5-1.5B-Instruct-4bit",
"kind": "mlx_local",
"max_retries": 2,
"model_id": "mlx-community/Qwen2.5-1.5B-Instruct-4bit",
"model_patterns": [
"mlx-community/*",
"qwen2.5-*"
],
"name": "mlx-local-dev",
"quantization": "4bit"
},
"capabilities": {
"json_mode": "prompt+validate",
"max_context_tokens": null,
"streaming": false,
"tool_calling": "best_effort",
"vision": false
},
"client_class": "MLXLocalLLMClient",
"default_model": "mlx-community/Qwen2.5-1.5B-Instruct-4bit",
"example": "clients/mlx_local_client.py",
"llm_call": {
"prompt": "Give one concise guideline for maintainable design telemetry schemas.",
"response_has_text": true,
"response_model": "mlx-community/Qwen2.5-1.5B-Instruct-4bit",
"response_provider": "example-test-monkeypatch",
"response_text": "Keep schema fields stable, documented, and versioned for comparability."
},
"server": null,
"trace": {
"request_id": "example-clients-mlx-local-call-001",
"trace_dir": "artifacts/examples/traces",
"trace_path": "artifacts/examples/traces/run_20260222T162206Z_example-clients-mlx-local-call-001.jsonl"
}
}