OpenAI Compatible HTTP Client
Source: examples/clients/openai_compatible_http_client.py
Introduction
OpenAI-compatible HTTP surfaces are valuable because they let one orchestration stack target multiple providers; vLLM and SGLang both expose this style of interface while OpenAI Responses API defines the baseline semantics. This example demonstrates that compatibility layer in the framework client runtime.
Technical Implementation
Configure
Tracerwith JSONL + console output so each run emits machine-readable traces and lifecycle logs.Build the runtime surface (public APIs only) and execute
OpenAICompatibleHTTPLLMClient.generate(...)with a fixedrequest_id.Construct
LLMRequestinputs and callgeneratethrough the selected client implementation.Print a compact JSON payload including
trace_infofor deterministic tests and docs examples.
flowchart LR
A["Input prompt or scenario"] --> B["main(): runtime wiring"]
B --> C["OpenAICompatibleHTTPLLMClient.generate(...)"]
C --> D["LLMRequest/LLMResponse contracts wrap provider behavior"]
C --> E["Tracer JSONL + console events"]
D --> F["ExecutionResult/payload"]
E --> F
F --> G["Printed JSON output"]
1from __future__ import annotations
2
3import json
4from pathlib import Path
5
6from design_research_agents import OpenAICompatibleHTTPLLMClient, Tracer
7from design_research_agents.llm import LLMMessage, LLMRequest
8
9
10def _build_payload() -> dict[str, object]:
11 # Run the OpenAI-compatible client using public runtime APIs. Using this with statement will automatically
12 # close the configured HTTP client when the example is done.
13 with OpenAICompatibleHTTPLLMClient(
14 name="local-openai-compat",
15 base_url="http://127.0.0.1:8011/v1",
16 default_model="qwen2.5-1.5b-q4",
17 api_key_env="OPENAI_API_KEY",
18 api_key="example-key-for-config-demo",
19 max_retries=3,
20 model_patterns=("qwen2.5-*", "qwen2-*"),
21 ) as client:
22 description = client.describe()
23 prompt = "Provide one sentence on balancing latency and quality in design review assistants."
24 response = client.generate(
25 LLMRequest(
26 messages=(
27 LLMMessage(role="system", content="You are a concise engineering design assistant."),
28 LLMMessage(role="user", content=prompt),
29 ),
30 model=client.default_model(),
31 temperature=0.0,
32 max_tokens=120,
33 )
34 )
35 llm_call = {
36 "prompt": prompt,
37 "response_text": response.text,
38 "response_model": response.model,
39 "response_provider": response.provider,
40 "response_has_text": bool(response.text.strip()),
41 }
42 return {
43 "client_class": description["client_class"],
44 "default_model": description["default_model"],
45 "llm_call": llm_call,
46 "backend": description["backend"],
47 "capabilities": description["capabilities"],
48 "server": description["server"],
49 }
50
51
52def main() -> None:
53 """Run traced OpenAI-compatible client call payload."""
54 # Fixed request id keeps traces and docs output deterministic across runs.
55 request_id = "example-clients-openai-compatible-call-001"
56 tracer = Tracer(
57 enabled=True,
58 trace_dir=Path("artifacts/examples/traces"),
59 enable_jsonl=True,
60 enable_console=True,
61 )
62 payload = tracer.run_callable(
63 agent_name="ExamplesOpenAICompatClientCall",
64 request_id=request_id,
65 input_payload={"scenario": "openai-compatible-client-call"},
66 function=_build_payload,
67 )
68 assert isinstance(payload, dict)
69 payload["example"] = "clients/openai_compatible_http_client.py"
70 payload["trace"] = tracer.trace_info(request_id)
71 # Print the results
72 print(json.dumps(payload, ensure_ascii=True, indent=2, sort_keys=True))
73
74
75if __name__ == "__main__":
76 main()
Expected Results
Run Command
PYTHONPATH=src python3 examples/clients/openai_compatible_http_client.py
Example output captured with DRA_EXAMPLE_LLM_MODE=deterministic
(timestamps, durations, and trace filenames vary by run):
{
"backend": {
"api_key_env": "OPENAI_API_KEY",
"base_url": "http://127.0.0.1:8011/v1",
"default_model": "qwen2.5-1.5b-q4",
"kind": "openai_compatible_http",
"max_retries": 3,
"model_patterns": [
"qwen2.5-*",
"qwen2-*"
],
"name": "local-openai-compat"
},
"capabilities": {
"json_mode": "prompt+validate",
"max_context_tokens": null,
"streaming": false,
"tool_calling": "best_effort",
"vision": false
},
"client_class": "OpenAICompatibleHTTPLLMClient",
"default_model": "qwen2.5-1.5b-q4",
"example": "clients/openai_compatible_http_client.py",
"llm_call": {
"prompt": "Provide one sentence on balancing latency and quality in design review assistants.",
"response_has_text": true,
"response_model": "qwen2.5-1.5b-q4",
"response_provider": "example-test-monkeypatch",
"response_text": "Use fast drafts for iteration, then escalate critical decisions to higher-quality models."
},
"server": null,
"trace": {
"request_id": "example-clients-openai-compatible-call-001",
"trace_dir": "artifacts/examples/traces",
"trace_path": "artifacts/examples/traces/run_20260222T162206Z_example-clients-openai-compatible-call-001.jsonl"
}
}