Gemini Service Client
Source: examples/clients/gemini_service_client.py
Introduction
Gemini hosted inference is useful when teams want multimodel experimentation through one provider SDK, while keeping request payloads under the framework’s provider-neutral LLM contracts. This example exercises the Gemini service client path with trace capture and deterministic output support for CI.
Technical Implementation
Configure
Tracerwith JSONL + console sinks so each run emits machine-readable traces.Build runtime inputs through public package APIs and invoke
GeminiServiceLLMClient.generate(...).Construct
LLMRequestpayload fields and execute one representative remote-style call.Print a compact JSON payload that includes trace metadata for docs and deterministic tests.
flowchart LR
A["Prompt input"] --> B["main(): tracing setup"]
B --> C["GeminiServiceLLMClient.generate(...)"]
C --> D["LLMRequest and LLMResponse contracts"]
C --> E["Tracer JSONL + console events"]
D --> F["Output payload"]
E --> F
F --> G["Printed JSON result"]
1from __future__ import annotations
2
3import json
4from pathlib import Path
5
6from design_research_agents import GeminiServiceLLMClient, Tracer
7from design_research_agents.llm import LLMMessage, LLMRequest
8
9
10def _build_payload() -> dict[str, object]:
11 # Build the hosted Gemini client using public runtime APIs, then execute one representative request.
12 client = GeminiServiceLLMClient(
13 name="gemini-prod",
14 default_model="gemini-2.5-flash",
15 api_key_env="GOOGLE_API_KEY",
16 api_key="example-key-for-config-demo",
17 max_retries=3,
18 model_patterns=("gemini-2.5-flash", "gemini-2.5-*"),
19 )
20 description = client.describe()
21 prompt = "In one sentence, when should engineers run an explicit design pre-mortem?"
22 response = client.generate(
23 LLMRequest(
24 messages=(
25 LLMMessage(role="system", content="You are a concise engineering design assistant."),
26 LLMMessage(role="user", content=prompt),
27 ),
28 model=client.default_model(),
29 temperature=0.0,
30 max_tokens=120,
31 )
32 )
33 llm_call = {
34 "prompt": prompt,
35 "response_text": response.text,
36 "response_model": response.model,
37 "response_provider": response.provider,
38 "response_has_text": bool(response.text.strip()),
39 }
40 return {
41 "client_class": description["client_class"],
42 "default_model": description["default_model"],
43 "llm_call": llm_call,
44 "backend": description["backend"],
45 "capabilities": description["capabilities"],
46 "server": description["server"],
47 }
48
49
50def main() -> None:
51 """Run traced Gemini service client call payload."""
52 # Fixed request id keeps traces and docs output deterministic across runs.
53 request_id = "example-clients-gemini-service-call-001"
54 tracer = Tracer(
55 enabled=True,
56 trace_dir=Path("artifacts/examples/traces"),
57 enable_jsonl=True,
58 enable_console=True,
59 )
60 payload = tracer.run_callable(
61 agent_name="ExamplesGeminiServiceClientCall",
62 request_id=request_id,
63 input_payload={"scenario": "gemini-service-client-call"},
64 function=_build_payload,
65 )
66 assert isinstance(payload, dict)
67 payload["example"] = "clients/gemini_service_client.py"
68 payload["trace"] = tracer.trace_info(request_id)
69 # Print the results
70 print(json.dumps(payload, ensure_ascii=True, indent=2, sort_keys=True))
71
72
73if __name__ == "__main__":
74 main()
Expected Results
Run Command
PYTHONPATH=src python3 examples/clients/gemini_service_client.py
Example output captured with DRA_EXAMPLE_LLM_MODE=deterministic
(timestamps, durations, and trace filenames vary by run):
{
"backend": {
"api_key_env": "GOOGLE_API_KEY",
"default_model": "gemini-2.5-flash",
"kind": "gemini_service",
"max_retries": 3,
"model_patterns": [
"gemini-2.5-flash",
"gemini-2.5-*"
],
"name": "gemini-prod"
},
"capabilities": {
"json_mode": "native",
"max_context_tokens": null,
"streaming": true,
"tool_calling": "none",
"vision": false
},
"client_class": "GeminiServiceLLMClient",
"default_model": "gemini-2.5-flash",
"example": "clients/gemini_service_client.py",
"llm_call": {
"prompt": "In one sentence, when should engineers run an explicit design pre-mortem?",
"response_has_text": true,
"response_model": "gemini-2.5-flash",
"response_provider": "example-test-monkeypatch",
"response_text": "Run a design pre-mortem before committing architecture changes with high uncertainty or safety risk."
},
"server": null,
"trace": {
"request_id": "example-clients-gemini-service-call-001",
"trace_dir": "artifacts/examples/traces",
"trace_path": "artifacts/examples/traces/run_20260222T162206Z_example-clients-gemini-service-call-001.jsonl"
}
}