Groq Service Client
Source: examples/clients/groq_service_client.py
Introduction
Groq hosted inference can provide low-latency responses for agent loops that still need standard chat-completion semantics such as streaming and tool-call metadata. This example runs the Groq service client through the same framework contracts used by other providers, with trace artifacts suitable for regression checks.
Technical Implementation
Configure
Tracerwith JSONL + console output for repeatable diagnostics.Build one request using public APIs and execute
GroqServiceLLMClient.generate(...).Serialize the key response contract fields and backend metadata into one JSON payload.
Emit the payload with fixed request id metadata for deterministic documentation tests.
flowchart LR
A["Prompt input"] --> B["main(): tracing setup"]
B --> C["GroqServiceLLMClient.generate(...)"]
C --> D["LLMRequest and LLMResponse contracts"]
C --> E["Tracer lifecycle events"]
D --> F["Output payload"]
E --> F
F --> G["Printed JSON result"]
1from __future__ import annotations
2
3import json
4from pathlib import Path
5
6from design_research_agents import GroqServiceLLMClient, Tracer
7from design_research_agents.llm import LLMMessage, LLMRequest
8
9
10def _build_payload() -> dict[str, object]:
11 # Build the hosted Groq client using public runtime APIs, then execute one representative request.
12 client = GroqServiceLLMClient(
13 name="groq-prod",
14 default_model="llama-3.1-8b-instant",
15 api_key_env="GROQ_API_KEY",
16 api_key="example-key-for-config-demo",
17 base_url="https://api.groq.com",
18 max_retries=3,
19 model_patterns=("llama-3.1-8b-instant", "llama-3.1-*"),
20 )
21 description = client.describe()
22 prompt = "Provide one sentence on when teams should trade latency for review depth."
23 response = client.generate(
24 LLMRequest(
25 messages=(
26 LLMMessage(role="system", content="You are a concise engineering design assistant."),
27 LLMMessage(role="user", content=prompt),
28 ),
29 model=client.default_model(),
30 temperature=0.0,
31 max_tokens=120,
32 )
33 )
34 llm_call = {
35 "prompt": prompt,
36 "response_text": response.text,
37 "response_model": response.model,
38 "response_provider": response.provider,
39 "response_has_text": bool(response.text.strip()),
40 }
41 return {
42 "client_class": description["client_class"],
43 "default_model": description["default_model"],
44 "llm_call": llm_call,
45 "backend": description["backend"],
46 "capabilities": description["capabilities"],
47 "server": description["server"],
48 }
49
50
51def main() -> None:
52 """Run traced Groq service client call payload."""
53 # Fixed request id keeps traces and docs output deterministic across runs.
54 request_id = "example-clients-groq-service-call-001"
55 tracer = Tracer(
56 enabled=True,
57 trace_dir=Path("artifacts/examples/traces"),
58 enable_jsonl=True,
59 enable_console=True,
60 )
61 payload = tracer.run_callable(
62 agent_name="ExamplesGroqServiceClientCall",
63 request_id=request_id,
64 input_payload={"scenario": "groq-service-client-call"},
65 function=_build_payload,
66 )
67 assert isinstance(payload, dict)
68 payload["example"] = "clients/groq_service_client.py"
69 payload["trace"] = tracer.trace_info(request_id)
70 # Print the results
71 print(json.dumps(payload, ensure_ascii=True, indent=2, sort_keys=True))
72
73
74if __name__ == "__main__":
75 main()
Expected Results
Run Command
PYTHONPATH=src python3 examples/clients/groq_service_client.py
Example output captured with DRA_EXAMPLE_LLM_MODE=deterministic
(timestamps, durations, and trace filenames vary by run):
{
"backend": {
"api_key_env": "GROQ_API_KEY",
"base_url": "https://api.groq.com",
"default_model": "llama-3.1-8b-instant",
"kind": "groq_service",
"max_retries": 3,
"model_patterns": [
"llama-3.1-8b-instant",
"llama-3.1-*"
],
"name": "groq-prod"
},
"capabilities": {
"json_mode": "native",
"max_context_tokens": null,
"streaming": true,
"tool_calling": "native",
"vision": false
},
"client_class": "GroqServiceLLMClient",
"default_model": "llama-3.1-8b-instant",
"example": "clients/groq_service_client.py",
"llm_call": {
"prompt": "Provide one sentence on when teams should trade latency for review depth.",
"response_has_text": true,
"response_model": "llama-3.1-8b-instant",
"response_provider": "example-test-monkeypatch",
"response_text": "Prefer deeper review when architectural choices are expensive to reverse."
},
"server": null,
"trace": {
"request_id": "example-clients-groq-service-call-001",
"trace_dir": "artifacts/examples/traces",
"trace_path": "artifacts/examples/traces/run_20260222T162206Z_example-clients-groq-service-call-001.jsonl"
}
}