Gemini Service Client

Source: examples/clients/gemini_service_client.py

Introduction

Gemini hosted inference is useful when teams want multimodel experimentation through one provider SDK, while keeping request payloads under the framework’s provider-neutral LLM contracts. This example exercises the Gemini service client path with trace capture and deterministic output support for CI.

Technical Implementation

  1. Configure Tracer with JSONL + console sinks so each run emits machine-readable traces.

  2. Build runtime inputs through public package APIs and invoke GeminiServiceLLMClient.generate(...).

  3. Construct LLMRequest payload fields and execute one representative remote-style call.

  4. Print a compact JSON payload that includes trace metadata for docs and deterministic tests.

        flowchart LR
    A["Prompt input"] --> B["main(): tracing setup"]
    B --> C["GeminiServiceLLMClient.generate(...)"]
    C --> D["LLMRequest and LLMResponse contracts"]
    C --> E["Tracer JSONL + console events"]
    D --> F["Output payload"]
    E --> F
    F --> G["Printed JSON result"]
    
 1from __future__ import annotations
 2
 3import json
 4from pathlib import Path
 5
 6from design_research_agents import GeminiServiceLLMClient, Tracer
 7from design_research_agents.llm import LLMMessage, LLMRequest
 8
 9
10def _build_payload() -> dict[str, object]:
11    # Build the hosted Gemini client using public runtime APIs, then execute one representative request.
12    client = GeminiServiceLLMClient(
13        name="gemini-prod",
14        default_model="gemini-2.5-flash",
15        api_key_env="GOOGLE_API_KEY",
16        api_key="example-key-for-config-demo",
17        max_retries=3,
18        model_patterns=("gemini-2.5-flash", "gemini-2.5-*"),
19    )
20    description = client.describe()
21    prompt = "In one sentence, when should engineers run an explicit design pre-mortem?"
22    response = client.generate(
23        LLMRequest(
24            messages=(
25                LLMMessage(role="system", content="You are a concise engineering design assistant."),
26                LLMMessage(role="user", content=prompt),
27            ),
28            model=client.default_model(),
29            temperature=0.0,
30            max_tokens=120,
31        )
32    )
33    llm_call = {
34        "prompt": prompt,
35        "response_text": response.text,
36        "response_model": response.model,
37        "response_provider": response.provider,
38        "response_has_text": bool(response.text.strip()),
39    }
40    return {
41        "client_class": description["client_class"],
42        "default_model": description["default_model"],
43        "llm_call": llm_call,
44        "backend": description["backend"],
45        "capabilities": description["capabilities"],
46        "server": description["server"],
47    }
48
49
50def main() -> None:
51    """Run traced Gemini service client call payload."""
52    # Fixed request id keeps traces and docs output deterministic across runs.
53    request_id = "example-clients-gemini-service-call-001"
54    tracer = Tracer(
55        enabled=True,
56        trace_dir=Path("artifacts/examples/traces"),
57        enable_jsonl=True,
58        enable_console=True,
59    )
60    payload = tracer.run_callable(
61        agent_name="ExamplesGeminiServiceClientCall",
62        request_id=request_id,
63        input_payload={"scenario": "gemini-service-client-call"},
64        function=_build_payload,
65    )
66    assert isinstance(payload, dict)
67    payload["example"] = "clients/gemini_service_client.py"
68    payload["trace"] = tracer.trace_info(request_id)
69    # Print the results
70    print(json.dumps(payload, ensure_ascii=True, indent=2, sort_keys=True))
71
72
73if __name__ == "__main__":
74    main()

Expected Results

Run Command

PYTHONPATH=src python3 examples/clients/gemini_service_client.py

Example output captured with DRA_EXAMPLE_LLM_MODE=deterministic (timestamps, durations, and trace filenames vary by run):

{
  "backend": {
    "api_key_env": "GOOGLE_API_KEY",
    "default_model": "gemini-2.5-flash",
    "kind": "gemini_service",
    "max_retries": 3,
    "model_patterns": [
      "gemini-2.5-flash",
      "gemini-2.5-*"
    ],
    "name": "gemini-prod"
  },
  "capabilities": {
    "json_mode": "native",
    "max_context_tokens": null,
    "streaming": true,
    "tool_calling": "none",
    "vision": false
  },
  "client_class": "GeminiServiceLLMClient",
  "default_model": "gemini-2.5-flash",
  "example": "clients/gemini_service_client.py",
  "llm_call": {
    "prompt": "In one sentence, when should engineers run an explicit design pre-mortem?",
    "response_has_text": true,
    "response_model": "gemini-2.5-flash",
    "response_provider": "example-test-monkeypatch",
    "response_text": "Run a design pre-mortem before committing architecture changes with high uncertainty or safety risk."
  },
  "server": null,
  "trace": {
    "request_id": "example-clients-gemini-service-call-001",
    "trace_dir": "artifacts/examples/traces",
    "trace_path": "artifacts/examples/traces/run_20260222T162206Z_example-clients-gemini-service-call-001.jsonl"
  }
}

References