refactor(openai): drop SDK dependency and switch to native HTTP client

This commit is contained in:
zhayujie
2026-04-27 20:21:54 +08:00
parent 5c65196e44
commit 8752f0cc60
8 changed files with 920 additions and 330 deletions

View File

@@ -3,8 +3,15 @@
import time import time
import json import json
import openai from models.openai.openai_compat import (
from models.openai.openai_compat import error as openai_error, RateLimitError, Timeout, APIError, APIConnectionError error as openai_error,
RateLimitError,
Timeout,
APIError,
APIConnectionError,
wrap_http_error,
)
from models.openai.openai_http_client import OpenAIHTTPClient, OpenAIHTTPError
import requests import requests
from common import const from common import const
from models.bot import Bot from models.bot import Bot
@@ -23,18 +30,19 @@ from models.baidu.baidu_wenxin_session import BaiduWenxinSession
class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot): class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
# set the default api_key / api_base based on bot_type # Resolve api key / base from config (no global SDK state anymore).
if conf().get("bot_type") == "custom": if conf().get("bot_type") == "custom":
openai.api_key = conf().get("custom_api_key", "") self._api_key = conf().get("custom_api_key", "")
if conf().get("custom_api_base"): self._api_base = conf().get("custom_api_base") or None
openai.api_base = conf().get("custom_api_base")
else: else:
openai.api_key = conf().get("open_ai_api_key") self._api_key = conf().get("open_ai_api_key")
if conf().get("open_ai_api_base"): self._api_base = conf().get("open_ai_api_base") or None
openai.api_base = conf().get("open_ai_api_base") self._proxy = conf().get("proxy") or None
proxy = conf().get("proxy") self._http_client = OpenAIHTTPClient(
if proxy: api_key=self._api_key,
openai.proxy = proxy api_base=self._api_base,
proxy=self._proxy,
)
if conf().get("rate_limit_chatgpt"): if conf().get("rate_limit_chatgpt"):
self.tb4chatgpt = TokenBucket(conf().get("rate_limit_chatgpt", 20)) self.tb4chatgpt = TokenBucket(conf().get("rate_limit_chatgpt", 20))
conf_model = conf().get("model") or "gpt-3.5-turbo" conf_model = conf().get("model") or "gpt-3.5-turbo"
@@ -71,6 +79,10 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot):
'default_frequency_penalty': conf().get("frequency_penalty", 0.0), 'default_frequency_penalty': conf().get("frequency_penalty", 0.0),
'default_presence_penalty': conf().get("presence_penalty", 0.0), 'default_presence_penalty': conf().get("presence_penalty", 0.0),
} }
def _get_http_client(self) -> OpenAIHTTPClient:
"""Override the default HTTP client to reuse our pre-configured one."""
return self._http_client
def reply(self, query, context=None): def reply(self, query, context=None):
# acquire reply content # acquire reply content
@@ -195,20 +207,16 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot):
logger.info(f"[CHATGPT] Calling vision API with model: {model}") logger.info(f"[CHATGPT] Calling vision API with model: {model}")
# Call OpenAI API # Call OpenAI-compatible API via HTTP
kwargs = { response = self._http_client.chat_completions(
"model": model, api_key=api_key or None,
"messages": messages, api_base=api_base or None,
"max_tokens": 1000 model=model,
} messages=messages,
if api_key: max_tokens=1000,
kwargs["api_key"] = api_key )
if api_base:
kwargs["api_base"] = api_base content = response["choices"][0]["message"]["content"]
response = openai.ChatCompletion.create(**kwargs)
content = response.choices[0]["message"]["content"]
logger.info(f"[CHATGPT] Vision API response: {content[:100]}...") logger.info(f"[CHATGPT] Vision API response: {content[:100]}...")
# Clean up temp file # Clean up temp file
@@ -237,57 +245,100 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot):
try: try:
if conf().get("rate_limit_chatgpt") and not self.tb4chatgpt.get_token(): if conf().get("rate_limit_chatgpt") and not self.tb4chatgpt.get_token():
raise RateLimitError("RateLimitError: rate limit exceeded") raise RateLimitError("RateLimitError: rate limit exceeded")
# if api_key == None, the default openai.api_key will be used # If api_key is None, the per-instance default key will be used.
if args is None: if args is None:
args = self.args args = self.args
response = openai.ChatCompletion.create(api_key=api_key, messages=session.messages, **args) # Translate old SDK kwargs to HTTP client params:
# logger.debug("[CHATGPT] response={}".format(response)) # - request_timeout / timeout -> per-call timeout
logger.info("[ChatGPT] reply={}, total_tokens={}".format(response.choices[0]['message']['content'], response["usage"]["total_tokens"])) call_args = dict(args)
timeout = call_args.pop("request_timeout", None) or call_args.pop("timeout", None)
response = self._http_client.chat_completions(
api_key=api_key or None,
timeout=timeout,
messages=session.messages,
**call_args,
)
logger.info("[ChatGPT] reply={}, total_tokens={}".format(
response["choices"][0]["message"]["content"],
response["usage"]["total_tokens"]
))
return { return {
"total_tokens": response["usage"]["total_tokens"], "total_tokens": response["usage"]["total_tokens"],
"completion_tokens": response["usage"]["completion_tokens"], "completion_tokens": response["usage"]["completion_tokens"],
"content": response.choices[0]["message"]["content"], "content": response["choices"][0]["message"]["content"],
} }
except OpenAIHTTPError as http_err:
return self._handle_reply_error(
wrap_http_error(http_err), session, api_key, args, retry_count
)
except Exception as e: except Exception as e:
need_retry = retry_count < 2 return self._handle_reply_error(e, session, api_key, args, retry_count)
result = {"completion_tokens": 0, "content": "我现在有点累了,等会再来吧"}
if isinstance(e, RateLimitError):
logger.warn("[CHATGPT] RateLimitError: {}".format(e))
result["content"] = "提问太快啦,请休息一下再问我吧"
if need_retry:
time.sleep(20)
elif isinstance(e, Timeout):
logger.warn("[CHATGPT] Timeout: {}".format(e))
result["content"] = "我没有收到你的消息"
if need_retry:
time.sleep(5)
elif isinstance(e, APIError):
logger.warn("[CHATGPT] Bad Gateway: {}".format(e))
result["content"] = "请再问我一次"
if need_retry:
time.sleep(10)
elif isinstance(e, APIConnectionError):
logger.warn("[CHATGPT] APIConnectionError: {}".format(e))
result["content"] = "我连接不到你的网络"
if need_retry:
time.sleep(5)
else:
logger.exception("[CHATGPT] Exception: {}".format(e))
need_retry = False
self.sessions.clear_session(session.session_id)
def _handle_reply_error(self, e, session, api_key, args, retry_count):
"""Map exception to user-facing reply with retry/backoff (mirrors SDK behavior)."""
need_retry = retry_count < 2
result = {"completion_tokens": 0, "content": "我现在有点累了,等会再来吧"}
if isinstance(e, RateLimitError):
logger.warn("[CHATGPT] RateLimitError: {}".format(e))
result["content"] = "提问太快啦,请休息一下再问我吧"
if need_retry: if need_retry:
logger.warn("[CHATGPT] 第{}次重试".format(retry_count + 1)) time.sleep(20)
return self.reply_text(session, api_key, args, retry_count + 1) elif isinstance(e, Timeout):
else: logger.warn("[CHATGPT] Timeout: {}".format(e))
return result result["content"] = "我没有收到你的消息"
if need_retry:
time.sleep(5)
elif isinstance(e, APIConnectionError):
logger.warn("[CHATGPT] APIConnectionError: {}".format(e))
result["content"] = "我连接不到你的网络"
if need_retry:
time.sleep(5)
elif isinstance(e, APIError):
logger.warn("[CHATGPT] Bad Gateway: {}".format(e))
result["content"] = "请再问我一次"
if need_retry:
time.sleep(10)
else:
logger.exception("[CHATGPT] Exception: {}".format(e))
need_retry = False
self.sessions.clear_session(session.session_id)
if need_retry:
logger.warn("[CHATGPT] 第{}次重试".format(retry_count + 1))
return self.reply_text(session, api_key, args, retry_count + 1)
return result
class AzureChatGPTBot(ChatGPTBot): class AzureChatGPTBot(ChatGPTBot):
"""Azure OpenAI variant.
Azure's HTTP shape differs from public OpenAI:
URL : {endpoint}/openai/deployments/{deployment}/chat/completions
Auth : api-key header (not Bearer)
Query : ?api-version={version}
We model that with a dedicated HTTP client and override _get_http_client
so the OpenAICompatibleBot streaming/tool path uses it transparently.
"""
def __init__(self): def __init__(self):
super().__init__() super().__init__()
openai.api_type = "azure" self._azure_api_version = conf().get("azure_api_version", "2023-06-01-preview")
openai.api_version = conf().get("azure_api_version", "2023-06-01-preview") self._azure_deployment_id = conf().get("azure_deployment_id")
self.args["deployment_id"] = conf().get("azure_deployment_id") # Drop legacy SDK kwarg; Azure deployment is encoded in the URL now.
self.args.pop("deployment_id", None)
endpoint = (self._api_base or "").rstrip("/")
deployment = self._azure_deployment_id or ""
# Build a base that already includes /openai/deployments/{deployment}.
# /chat/completions will be appended by the client.
azure_base = (
f"{endpoint}/openai/deployments/{deployment}" if endpoint and deployment else endpoint
)
self._http_client = _AzureChatHTTPClient(
api_key=self._api_key,
api_base=azure_base,
api_version=self._azure_api_version,
proxy=self._proxy,
)
def create_img(self, query, retry_count=0, api_key=None): def create_img(self, query, retry_count=0, api_key=None):
text_to_image_model = conf().get("text_to_image") text_to_image_model = conf().get("text_to_image")
@@ -357,3 +408,35 @@ class AzureChatGPTBot(ChatGPTBot):
return False, "图片生成失败" return False, "图片生成失败"
else: else:
return False, "图片生成失败未配置text_to_image参数" return False, "图片生成失败未配置text_to_image参数"
class _AzureChatHTTPClient(OpenAIHTTPClient):
"""Subclass that injects Azure's ``api-version`` query param and ``api-key``
header on every chat-completion request, and accepts the deployment-scoped
base URL set by :class:`AzureChatGPTBot`.
"""
def __init__(self, api_key, api_base, api_version, proxy=None, timeout=None):
super().__init__(
api_key=api_key, api_base=api_base, proxy=proxy, timeout=timeout
)
self._api_version = api_version
def _build_headers(self, api_key, extra_headers):
# Azure uses api-key header, not Bearer token.
key = api_key if api_key is not None else self.api_key
headers = {"Content-Type": "application/json"}
if key:
headers["api-key"] = key
if self.extra_headers:
headers.update(self.extra_headers)
if extra_headers:
headers.update(extra_headers)
return headers
def chat_completions(self, **kwargs):
# Always force api-version query param for Azure.
eq = dict(kwargs.get("extra_query") or {})
eq.setdefault("api-version", self._api_version)
kwargs["extra_query"] = eq
return super().chat_completions(**kwargs)

View File

@@ -2,8 +2,14 @@
import time import time
import openai from models.openai.openai_compat import (
from models.openai.openai_compat import RateLimitError, Timeout, APIConnectionError RateLimitError,
Timeout,
APIConnectionError,
APIError,
wrap_http_error,
)
from models.openai.openai_http_client import OpenAIHTTPClient, OpenAIHTTPError
from models.bot import Bot from models.bot import Bot
from models.openai_compatible_bot import OpenAICompatibleBot from models.openai_compatible_bot import OpenAICompatibleBot
@@ -22,12 +28,14 @@ user_session = dict()
class OpenAIBot(Bot, OpenAIImage, OpenAICompatibleBot): class OpenAIBot(Bot, OpenAIImage, OpenAICompatibleBot):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
openai.api_key = conf().get("open_ai_api_key") self._api_key = conf().get("open_ai_api_key")
if conf().get("open_ai_api_base"): self._api_base = conf().get("open_ai_api_base") or None
openai.api_base = conf().get("open_ai_api_base") self._proxy = conf().get("proxy") or None
proxy = conf().get("proxy") self._http_client = OpenAIHTTPClient(
if proxy: api_key=self._api_key,
openai.proxy = proxy api_base=self._api_base,
proxy=self._proxy,
)
self.sessions = SessionManager(OpenAISession, model=conf().get("model") or "text-davinci-003") self.sessions = SessionManager(OpenAISession, model=conf().get("model") or "text-davinci-003")
self.args = { self.args = {
@@ -54,6 +62,10 @@ class OpenAIBot(Bot, OpenAIImage, OpenAICompatibleBot):
'default_presence_penalty': conf().get("presence_penalty", 0.0), 'default_presence_penalty': conf().get("presence_penalty", 0.0),
} }
def _get_http_client(self) -> OpenAIHTTPClient:
"""Reuse the per-instance HTTP client for the streaming/tool path."""
return self._http_client
def reply(self, query, context=None): def reply(self, query, context=None):
# acquire reply content # acquire reply content
if context and context.type: if context and context.type:
@@ -96,8 +108,14 @@ class OpenAIBot(Bot, OpenAIImage, OpenAICompatibleBot):
def reply_text(self, session: OpenAISession, retry_count=0): def reply_text(self, session: OpenAISession, retry_count=0):
try: try:
response = openai.Completion.create(prompt=str(session), **self.args) call_args = dict(self.args)
res_content = response.choices[0]["text"].strip().replace("<|endoftext|>", "") timeout = call_args.pop("request_timeout", None) or call_args.pop("timeout", None)
response = self._http_client.completions(
timeout=timeout,
prompt=str(session),
**call_args,
)
res_content = response["choices"][0]["text"].strip().replace("<|endoftext|>", "")
total_tokens = response["usage"]["total_tokens"] total_tokens = response["usage"]["total_tokens"]
completion_tokens = response["usage"]["completion_tokens"] completion_tokens = response["usage"]["completion_tokens"]
logger.info("[OPEN_AI] reply={}".format(res_content)) logger.info("[OPEN_AI] reply={}".format(res_content))
@@ -106,125 +124,41 @@ class OpenAIBot(Bot, OpenAIImage, OpenAICompatibleBot):
"completion_tokens": completion_tokens, "completion_tokens": completion_tokens,
"content": res_content, "content": res_content,
} }
except OpenAIHTTPError as http_err:
return self._handle_legacy_error(wrap_http_error(http_err), session, retry_count)
except Exception as e: except Exception as e:
need_retry = retry_count < 2 return self._handle_legacy_error(e, session, retry_count)
result = {"completion_tokens": 0, "content": "我现在有点累了,等会再来吧"}
if isinstance(e, RateLimitError):
logger.warn("[OPEN_AI] RateLimitError: {}".format(e))
result["content"] = "提问太快啦,请休息一下再问我吧"
if need_retry:
time.sleep(20)
elif isinstance(e, Timeout):
logger.warn("[OPEN_AI] Timeout: {}".format(e))
result["content"] = "我没有收到你的消息"
if need_retry:
time.sleep(5)
elif isinstance(e, APIConnectionError):
logger.warn("[OPEN_AI] APIConnectionError: {}".format(e))
need_retry = False
result["content"] = "我连接不到你的网络"
else:
logger.warn("[OPEN_AI] Exception: {}".format(e))
need_retry = False
self.sessions.clear_session(session.session_id)
def _handle_legacy_error(self, e, session, retry_count):
"""Map exception -> reply for the legacy /completions endpoint."""
need_retry = retry_count < 2
result = {"completion_tokens": 0, "content": "我现在有点累了,等会再来吧"}
if isinstance(e, RateLimitError):
logger.warn("[OPEN_AI] RateLimitError: {}".format(e))
result["content"] = "提问太快啦,请休息一下再问我吧"
if need_retry: if need_retry:
logger.warn("[OPEN_AI] 第{}次重试".format(retry_count + 1)) time.sleep(20)
return self.reply_text(session, retry_count + 1) elif isinstance(e, Timeout):
else: logger.warn("[OPEN_AI] Timeout: {}".format(e))
return result result["content"] = "我没有收到你的消息"
if need_retry:
time.sleep(5)
elif isinstance(e, APIConnectionError):
logger.warn("[OPEN_AI] APIConnectionError: {}".format(e))
need_retry = False
result["content"] = "我连接不到你的网络"
else:
logger.warn("[OPEN_AI] Exception: {}".format(e))
need_retry = False
self.sessions.clear_session(session.session_id)
def call_with_tools(self, messages, tools=None, stream=False, **kwargs): if need_retry:
""" logger.warn("[OPEN_AI] 第{}次重试".format(retry_count + 1))
Call OpenAI API with tool support for agent integration return self.reply_text(session, retry_count + 1)
Note: This bot uses the old Completion API which doesn't support tools. return result
For tool support, use ChatGPTBot instead.
# NOTE: Tool-call routing is delegated to OpenAICompatibleBot.call_with_tools,
This method converts to ChatCompletion API when tools are provided. # which calls /chat/completions via our shared HTTP client. The previous
# bespoke implementation here bypassed Claude->OpenAI message/tool conversion
Args: # and was effectively broken for agent flows; we now inherit the correct
messages: List of messages # implementation from the base class.
tools: List of tool definitions (OpenAI format)
stream: Whether to use streaming
**kwargs: Additional parameters
Returns:
Formatted response in OpenAI format or generator for streaming
"""
try:
# The old Completion API doesn't support tools
# We need to use ChatCompletion API instead
logger.info("[OPEN_AI] Using ChatCompletion API for tool support")
# Build request parameters for ChatCompletion
request_params = {
"model": kwargs.get("model", conf().get("model") or "gpt-4.1"),
"messages": messages,
"temperature": kwargs.get("temperature", conf().get("temperature", 0.9)),
"top_p": kwargs.get("top_p", 1),
"frequency_penalty": kwargs.get("frequency_penalty", conf().get("frequency_penalty", 0.0)),
"presence_penalty": kwargs.get("presence_penalty", conf().get("presence_penalty", 0.0)),
"stream": stream
}
# Add max_tokens if specified
if kwargs.get("max_tokens"):
request_params["max_tokens"] = kwargs["max_tokens"]
# Add tools if provided
if tools:
request_params["tools"] = tools
request_params["tool_choice"] = kwargs.get("tool_choice", "auto")
# Make API call using ChatCompletion
if stream:
return self._handle_stream_response(request_params)
else:
return self._handle_sync_response(request_params)
except Exception as e:
logger.error(f"[OPEN_AI] call_with_tools error: {e}")
if stream:
def error_generator():
yield {
"error": True,
"message": str(e),
"status_code": 500
}
return error_generator()
else:
return {
"error": True,
"message": str(e),
"status_code": 500
}
def _handle_sync_response(self, request_params):
"""Handle synchronous OpenAI ChatCompletion API response"""
try:
response = openai.ChatCompletion.create(**request_params)
logger.info(f"[OPEN_AI] call_with_tools reply, model={response.get('model')}, "
f"total_tokens={response.get('usage', {}).get('total_tokens', 0)}")
return response
except Exception as e:
logger.error(f"[OPEN_AI] sync response error: {e}")
raise
def _handle_stream_response(self, request_params):
"""Handle streaming OpenAI ChatCompletion API response"""
try:
stream = openai.ChatCompletion.create(**request_params)
for chunk in stream:
yield chunk
except Exception as e:
logger.error(f"[OPEN_AI] stream response error: {e}")
yield {
"error": True,
"message": str(e),
"status_code": 500
}

View File

@@ -1,17 +1,25 @@
import time import time
import openai
from models.openai.openai_compat import RateLimitError
from common.log import logger from common.log import logger
from common.token_bucket import TokenBucket from common.token_bucket import TokenBucket
from config import conf from config import conf
from models.openai.openai_compat import RateLimitError, wrap_http_error
from models.openai.openai_http_client import OpenAIHTTPClient, OpenAIHTTPError
# OPENAI提供的画图接口 # OpenAI image generation API wrapper
class OpenAIImage(object): class OpenAIImage(object):
def __init__(self): def __init__(self):
openai.api_key = conf().get("open_ai_api_key") # Lazy default client; subclasses (ChatGPTBot/OpenAIBot) typically
# construct their own _http_client and override _get_image_client().
self._image_api_key = conf().get("open_ai_api_key")
self._image_api_base = conf().get("open_ai_api_base") or None
self._image_proxy = conf().get("proxy") or None
self._image_client = OpenAIHTTPClient(
api_key=self._image_api_key,
api_base=self._image_api_base,
proxy=self._image_proxy,
)
if conf().get("rate_limit_dalle"): if conf().get("rate_limit_dalle"):
self.tb4dalle = TokenBucket(conf().get("rate_limit_dalle", 50)) self.tb4dalle = TokenBucket(conf().get("rate_limit_dalle", 50))
@@ -20,24 +28,35 @@ class OpenAIImage(object):
if conf().get("rate_limit_dalle") and not self.tb4dalle.get_token(): if conf().get("rate_limit_dalle") and not self.tb4dalle.get_token():
return False, "请求太快了,请休息一下再问我吧" return False, "请求太快了,请休息一下再问我吧"
logger.info("[OPEN_AI] image_query={}".format(query)) logger.info("[OPEN_AI] image_query={}".format(query))
response = openai.Image.create( response = self._image_client.images_generate(
api_key=api_key, api_key=api_key or None,
prompt=query, # 图片描述 api_base=api_base or None,
n=1, # 每次生成图片的数量 prompt=query, # image description
n=1,
model=conf().get("text_to_image") or "dall-e-2", model=conf().get("text_to_image") or "dall-e-2",
# size=conf().get("image_create_size", "256x256"), # 图片大小,可选有 256x256, 512x512, 1024x1024 # size=conf().get("image_create_size", "256x256"),
) )
image_url = response["data"][0]["url"] image_url = response["data"][0]["url"]
logger.info("[OPEN_AI] image_url={}".format(image_url)) logger.info("[OPEN_AI] image_url={}".format(image_url))
return True, image_url return True, image_url
except OpenAIHTTPError as http_err:
mapped = wrap_http_error(http_err)
if isinstance(mapped, RateLimitError):
logger.warn(mapped)
if retry_count < 1:
time.sleep(5)
logger.warn("[OPEN_AI] ImgCreate RateLimit exceed, 第{}次重试".format(retry_count + 1))
return self.create_img(query, retry_count + 1)
return False, "画图出现问题,请休息一下再问我吧"
logger.exception(mapped)
return False, "画图出现问题,请休息一下再问我吧"
except RateLimitError as e: except RateLimitError as e:
logger.warn(e) logger.warn(e)
if retry_count < 1: if retry_count < 1:
time.sleep(5) time.sleep(5)
logger.warn("[OPEN_AI] ImgCreate RateLimit exceed, 第{}次重试".format(retry_count + 1)) logger.warn("[OPEN_AI] ImgCreate RateLimit exceed, 第{}次重试".format(retry_count + 1))
return self.create_img(query, retry_count + 1) return self.create_img(query, retry_count + 1)
else: return False, "画图出现问题,请休息一下再问我吧"
return False, "画图出现问题,请休息一下再问我吧"
except Exception as e: except Exception as e:
logger.exception(e) logger.exception(e)
return False, "画图出现问题,请休息一下再问我吧" return False, "画图出现问题,请休息一下再问我吧"

View File

@@ -1,102 +1,163 @@
""" """
OpenAI compatibility layer for different versions. OpenAI-compatible exception layer.
This module provides a compatibility layer between OpenAI library versions: This module used to bridge between openai SDK 0.x and 1.x exception types.
- OpenAI < 1.0 (old API with openai.error module) Since we no longer depend on the `openai` SDK at all (we call HTTP directly
- OpenAI >= 1.0 (new API with direct exception imports) via :mod:`models.openai.openai_http_client`), this file now provides:
1. Pure Python exception classes that match the *names* the rest of the
codebase already imports (RateLimitError / Timeout / APIError /
APIConnectionError / AuthenticationError / InvalidRequestError ...).
2. A :func:`map_http_error` helper that converts an
:class:`OpenAIHTTPError` (or any HTTP status code + message) into the
appropriate exception subclass, so existing ``except RateLimitError``
``except Timeout`` etc. blocks keep working unchanged.
This keeps the behavior of all existing bots (rate-limit backoff, timeout
retry, auth-error fast-fail) identical to the openai-SDK-based version, while
removing the hard dependency on the `openai` package.
""" """
try: from typing import Optional
# Try new OpenAI >= 1.0 API
from openai import (
OpenAIError, # --------------------------------------------------------------------------- #
RateLimitError, # Exception hierarchy (mirrors openai SDK names so call sites don't change)
APIError, # --------------------------------------------------------------------------- #
APIConnectionError,
AuthenticationError, class OpenAIError(Exception):
APITimeoutError, """Base exception for all OpenAI-compatible API errors."""
BadRequestError,
) def __init__(self, message: str = "", status_code: Optional[int] = None,
body=None):
# Create a mock error module for backward compatibility super().__init__(message)
class ErrorModule: self.message = message
OpenAIError = OpenAIError self.status_code = status_code
RateLimitError = RateLimitError self.body = body
APIError = APIError
APIConnectionError = APIConnectionError
AuthenticationError = AuthenticationError class APIError(OpenAIError):
Timeout = APITimeoutError # Renamed in new version """Generic API error (5xx and unclassified errors)."""
InvalidRequestError = BadRequestError # Renamed in new version
error = ErrorModule() class APIConnectionError(OpenAIError):
"""Network / connection failure (DNS, refused, reset...)."""
# Also export with new names
Timeout = APITimeoutError
InvalidRequestError = BadRequestError class Timeout(OpenAIError):
"""Request timeout. Aliased as APITimeoutError for new-SDK style imports."""
except ImportError:
# Fall back to old OpenAI < 1.0 API
try: class AuthenticationError(OpenAIError):
import openai.error as error """401 Unauthorized."""
# Export individual exceptions for direct import
OpenAIError = error.OpenAIError class PermissionDeniedError(OpenAIError):
RateLimitError = error.RateLimitError """403 Forbidden."""
APIError = error.APIError
APIConnectionError = error.APIConnectionError
AuthenticationError = error.AuthenticationError class NotFoundError(OpenAIError):
InvalidRequestError = error.InvalidRequestError """404 Not Found."""
Timeout = error.Timeout
BadRequestError = error.InvalidRequestError # Alias
APITimeoutError = error.Timeout # Alias class InvalidRequestError(OpenAIError):
except (ImportError, AttributeError): """400 Bad Request. Aliased as BadRequestError."""
# Neither version works, create dummy classes
class OpenAIError(Exception):
pass class RateLimitError(OpenAIError):
"""429 Too Many Requests."""
class RateLimitError(OpenAIError):
pass
# Aliases used by some new-SDK-style code paths in the project.
class APIError(OpenAIError): APITimeoutError = Timeout
pass BadRequestError = InvalidRequestError
class APIConnectionError(OpenAIError):
pass # --------------------------------------------------------------------------- #
# Backward-compat ``error`` module-style accessor
class AuthenticationError(OpenAIError): # --------------------------------------------------------------------------- #
pass # Some legacy code in the codebase (and possibly user plugins) does
# from models.openai.openai_compat import error
class InvalidRequestError(OpenAIError): # except error.RateLimitError: ...
pass # Keep that path working by exposing an attribute namespace.
class _ErrorModule:
class Timeout(OpenAIError): OpenAIError = OpenAIError
pass APIError = APIError
APIConnectionError = APIConnectionError
BadRequestError = InvalidRequestError Timeout = Timeout
APITimeoutError = Timeout AuthenticationError = AuthenticationError
PermissionDeniedError = PermissionDeniedError
# Create error module NotFoundError = NotFoundError
class ErrorModule: InvalidRequestError = InvalidRequestError
OpenAIError = OpenAIError RateLimitError = RateLimitError
RateLimitError = RateLimitError
APIError = APIError
APIConnectionError = APIConnectionError error = _ErrorModule()
AuthenticationError = AuthenticationError
InvalidRequestError = InvalidRequestError
Timeout = Timeout # --------------------------------------------------------------------------- #
# HTTP -> exception mapping
error = ErrorModule() # --------------------------------------------------------------------------- #
def map_http_error(status_code: Optional[int], message: str = "",
body=None) -> OpenAIError:
"""Convert an HTTP status (+ optional message/body) to the right subclass.
Used by HTTP-based bot wrappers so that downstream ``except RateLimitError``
blocks behave identically to when the openai SDK was raising them.
"""
sc = status_code or 0
msg = message or ""
msg_lower = msg.lower()
# Connection-level (no status / non-HTTP failure)
if sc == 0:
if "timeout" in msg_lower or "timed out" in msg_lower:
return Timeout(msg, sc, body)
return APIConnectionError(msg, sc, body)
if sc == 408:
return Timeout(msg, sc, body)
if sc == 401:
return AuthenticationError(msg, sc, body)
if sc == 403:
return PermissionDeniedError(msg, sc, body)
if sc == 404:
return NotFoundError(msg, sc, body)
if sc == 429:
return RateLimitError(msg, sc, body)
if 400 <= sc < 500:
return InvalidRequestError(msg, sc, body)
if sc >= 500:
return APIError(msg, sc, body)
return APIError(msg, sc, body)
def wrap_http_error(http_err) -> OpenAIError:
"""Adapter for :class:`OpenAIHTTPError` -> compat exception subclass.
Accepts any object with ``status_code`` / ``message`` / ``body`` attrs.
"""
sc = getattr(http_err, "status_code", None)
msg = getattr(http_err, "message", "") or str(http_err)
body = getattr(http_err, "body", None)
return map_http_error(sc, msg, body)
# Export all for easy import
__all__ = [ __all__ = [
'error', "error",
'OpenAIError', "OpenAIError",
'RateLimitError', "APIError",
'APIError', "APIConnectionError",
'APIConnectionError', "Timeout",
'AuthenticationError', "APITimeoutError",
'InvalidRequestError', "AuthenticationError",
'Timeout', "PermissionDeniedError",
'BadRequestError', "NotFoundError",
'APITimeoutError', "InvalidRequestError",
"BadRequestError",
"RateLimitError",
"map_http_error",
"wrap_http_error",
] ]

View File

@@ -0,0 +1,456 @@
# encoding:utf-8
"""
Lightweight HTTP client for OpenAI-compatible APIs.
This client is a drop-in replacement for the parts of the `openai` SDK that this
project actually uses (chat completions, completions, image generation), so we
can drop the hard dependency on `openai==0.27.x`.
Design goals:
- Pure `requests` based (no httpx / pydantic / openai SDK dependency).
- Returns plain `dict` responses with the same shape OpenAI's HTTP API returns,
so existing code that does `response["choices"][0]["message"]["content"]` /
`response["usage"]["total_tokens"]` keeps working.
- Streaming yields plain `dict` chunks (parsed SSE `data:` JSON), matching the
shape that `agent/protocol/agent_stream.py` consumes:
chunk["choices"][0]["delta"]["content" | "tool_calls" | "reasoning_content"]
chunk["choices"][0]["finish_reason"]
Plus dict-style error chunks: {"error": True, "message": ..., "status_code": ...}
- Compatible with arbitrary OpenAI-compatible endpoints (LinkAI, Azure-style
proxies, DeepSeek, Moonshot, etc.) by allowing per-call api_key / api_base
override and trusting whatever path/payload shape the caller passes.
"""
import json
from typing import Any, Dict, Generator, Optional
import requests
from common.log import logger
DEFAULT_API_BASE = "https://api.openai.com/v1"
DEFAULT_TIMEOUT = 600 # seconds; matches old openai SDK default
class OpenAIHTTPError(Exception):
"""Raised for non-2xx responses. Carries status code + parsed body."""
def __init__(self, status_code: int, body: Any, message: str = ""):
self.status_code = status_code
self.body = body
# Try to extract human-readable message from OpenAI-style error envelope
if not message and isinstance(body, dict):
err = body.get("error") or {}
if isinstance(err, dict):
message = err.get("message") or ""
elif isinstance(err, str):
message = err
if not message:
message = str(body)[:500]
self.message = message
super().__init__(f"HTTP {status_code}: {message}")
class OpenAIHTTPClient:
"""Minimal HTTP client for OpenAI-compatible endpoints.
Per-instance defaults (api_key / api_base / proxy / timeout) can be
overridden on every call. Callers can also pass ``extra_headers`` for
Azure-style ``api-key`` headers or custom routing headers.
"""
def __init__(
self,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
proxy: Optional[str] = None,
timeout: Optional[float] = None,
extra_headers: Optional[Dict[str, str]] = None,
):
self.api_key = api_key
self.api_base = (api_base or DEFAULT_API_BASE).rstrip("/")
self.timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
self.proxies = (
{"http": proxy, "https": proxy} if proxy else None
)
self.extra_headers = dict(extra_headers) if extra_headers else {}
# ------------------------------------------------------------------ #
# Public API surface (mirrors what the old openai SDK provided)
# ------------------------------------------------------------------ #
def chat_completions(
self,
*,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
timeout: Optional[float] = None,
proxy: Optional[str] = None,
extra_headers: Optional[Dict[str, str]] = None,
extra_query: Optional[Dict[str, str]] = None,
path: str = "/chat/completions",
stream: bool = False,
**payload,
):
"""POST /chat/completions.
When ``stream=True`` returns a generator yielding parsed SSE chunks
(plain ``dict``). On error during streaming, yields a single dict with
``{"error": True, ...}`` and stops, matching the contract expected by
``agent/protocol/agent_stream.py``.
"""
payload["stream"] = stream
return self._request(
path=path,
payload=payload,
api_key=api_key,
api_base=api_base,
timeout=timeout,
proxy=proxy,
extra_headers=extra_headers,
extra_query=extra_query,
stream=stream,
)
def completions(
self,
*,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
timeout: Optional[float] = None,
**payload,
) -> Dict[str, Any]:
"""POST /completions (legacy text completion). Non-streaming only."""
payload.pop("stream", None)
return self._request(
path="/completions",
payload=payload,
api_key=api_key,
api_base=api_base,
timeout=timeout,
stream=False,
)
def images_generate(
self,
*,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
timeout: Optional[float] = None,
**payload,
) -> Dict[str, Any]:
"""POST /images/generations."""
return self._request(
path="/images/generations",
payload=payload,
api_key=api_key,
api_base=api_base,
timeout=timeout,
stream=False,
)
# ------------------------------------------------------------------ #
# Internal helpers
# ------------------------------------------------------------------ #
def _build_headers(
self,
api_key: Optional[str],
extra_headers: Optional[Dict[str, str]],
) -> Dict[str, str]:
key = api_key if api_key is not None else self.api_key
headers = {"Content-Type": "application/json"}
if key:
headers["Authorization"] = f"Bearer {key}"
if self.extra_headers:
headers.update(self.extra_headers)
if extra_headers:
headers.update(extra_headers)
return headers
def _request(
self,
*,
path: str,
payload: Dict[str, Any],
api_key: Optional[str],
api_base: Optional[str],
timeout: Optional[float],
stream: bool,
proxy: Optional[str] = None,
extra_headers: Optional[Dict[str, str]] = None,
extra_query: Optional[Dict[str, str]] = None,
):
base = (api_base or self.api_base).rstrip("/") if api_base else self.api_base
url = f"{base}{path}" if path.startswith("/") else f"{base}/{path}"
headers = self._build_headers(api_key, extra_headers)
req_timeout = timeout if timeout is not None else self.timeout
proxies = (
{"http": proxy, "https": proxy} if proxy else self.proxies
)
# Drop None-valued keys; some providers reject explicit nulls.
clean_payload = {k: v for k, v in payload.items() if v is not None}
if stream:
# Return a generator. Errors during stream are yielded as a single
# error chunk so callers (agent_stream) can map them to their
# existing error-handling path without try/except around the loop.
return self._stream_chat(
url=url,
headers=headers,
payload=clean_payload,
proxies=proxies,
timeout=req_timeout,
params=extra_query,
)
try:
resp = requests.post(
url,
headers=headers,
json=clean_payload,
timeout=req_timeout,
proxies=proxies,
params=extra_query,
)
except requests.exceptions.Timeout as e:
raise OpenAIHTTPError(408, {}, f"Request timed out: {e}")
except requests.exceptions.ConnectionError as e:
raise OpenAIHTTPError(0, {}, f"Connection error: {e}")
except requests.exceptions.RequestException as e:
raise OpenAIHTTPError(0, {}, f"Request failed: {e}")
return self._parse_response(resp)
@staticmethod
def _parse_response(resp: requests.Response) -> Dict[str, Any]:
# Try JSON, fall back to text
try:
data = resp.json()
except ValueError:
data = {"raw": resp.text}
if resp.status_code >= 400:
raise OpenAIHTTPError(resp.status_code, data)
return data
def _stream_chat(
self,
*,
url: str,
headers: Dict[str, str],
payload: Dict[str, Any],
proxies: Optional[Dict[str, str]],
timeout: float,
params: Optional[Dict[str, str]] = None,
) -> Generator[Dict[str, Any], None, None]:
"""Stream SSE response and yield parsed JSON chunks.
Yields:
- Normal chunks: dict with ``choices[0].delta`` etc.
- Error chunks: ``{"error": True, "message": str, "status_code": int}``
followed by termination of the generator.
"""
try:
resp = requests.post(
url,
headers=headers,
json=payload,
timeout=timeout,
proxies=proxies,
stream=True,
params=params,
)
except requests.exceptions.Timeout as e:
yield self._make_error_chunk(408, f"Request timed out: {e}")
return
except requests.exceptions.ConnectionError as e:
yield self._make_error_chunk(0, f"Connection error: {e}")
return
except requests.exceptions.RequestException as e:
yield self._make_error_chunk(0, f"Request failed: {e}")
return
if resp.status_code >= 400:
# Read full body once for error reporting
try:
body = resp.json()
except ValueError:
body = {"raw": resp.text[:1000]}
err_msg = ""
err_code = ""
err_type = ""
if isinstance(body, dict):
err = body.get("error") or {}
if isinstance(err, dict):
err_msg = err.get("message") or ""
err_code = err.get("code") or ""
err_type = err.get("type") or ""
elif isinstance(err, str):
err_msg = err
if not err_msg:
err_msg = str(body)[:500]
yield {
"error": {
"message": err_msg,
"code": err_code,
"type": err_type,
},
# Top-level fields kept for backward compatibility with the
# error-shape that `_handle_stream_response` previously emitted.
"message": err_msg,
"status_code": resp.status_code,
}
return
# IMPORTANT: do NOT use `iter_lines(decode_unicode=True)`.
#
# `requests` decodes per-network-chunk using the response's declared
# encoding (often Latin-1 / ISO-8859-1 for SSE), which mangles UTF-8
# codepoints that straddle a chunk boundary. Some upstreams (Azure
# OpenAI proxies, Cloudflare-fronted gateways, ...) split TCP chunks
# aggressively in the middle of multibyte characters, producing
# garbled text and "skip malformed SSE chunk" errors.
#
# The fix is to read raw bytes, accumulate them until we have a
# complete SSE event (terminated by a blank line per the SSE spec:
# https://html.spec.whatwg.org/multipage/server-sent-events.html),
# and only THEN decode as UTF-8. This mirrors what the official
# openai SDK 1.x does in `openai/_streaming.py::SSEDecoder` (which
# itself is copied from httpx-sse).
try:
for sse_event in self._iter_sse_events(resp):
# `sse_event` is the joined `data:` payload as a str.
if sse_event == "[DONE]":
return
if not sse_event:
continue
try:
chunk = json.loads(sse_event)
except ValueError:
logger.debug(
f"[OpenAIHTTP] skip malformed SSE chunk: {sse_event[:200]}"
)
continue
yield chunk
except requests.exceptions.ChunkedEncodingError as e:
yield self._make_error_chunk(0, f"Stream interrupted: {e}")
except requests.exceptions.RequestException as e:
yield self._make_error_chunk(0, f"Stream error: {e}")
finally:
try:
resp.close()
except Exception:
pass
@staticmethod
def _iter_sse_events(resp: requests.Response) -> Generator[str, None, None]:
"""Decode an SSE byte stream into joined `data:` payloads.
Implements the subset of the SSE spec that OpenAI / OpenAI-compatible
endpoints actually use:
- Events are separated by blank lines (\\r\\r, \\n\\n, or \\r\\n\\r\\n).
- Within an event, multiple ``data:`` lines are concatenated with
"\\n" (per spec).
- ``event:``, ``id:``, ``retry:`` and comment lines (``:``) are
tolerated but not yielded — for chat-completion we only care
about the JSON payload in ``data:``.
- Bytes are buffered until a complete event boundary is seen so
UTF-8 codepoints split across TCP chunks decode correctly.
Yields each event's joined ``data`` string. The terminal sentinel
``[DONE]`` is yielded as a literal string so the caller can break.
"""
buf = b""
for raw in resp.iter_content(chunk_size=None, decode_unicode=False):
if not raw:
continue
buf += raw
# Find complete events (terminated by a blank line).
while True:
# Look for the earliest event terminator. SSE allows three
# forms; check all and pick the earliest match.
idx_nn = buf.find(b"\n\n")
idx_rr = buf.find(b"\r\r")
idx_rnrn = buf.find(b"\r\n\r\n")
candidates = [i for i in (idx_nn, idx_rr, idx_rnrn) if i != -1]
if not candidates:
break
# We need to know the length of the matched terminator to
# advance past it correctly.
end_pos = min(candidates)
if end_pos == idx_rnrn:
term_len = 4
else:
term_len = 2
event_bytes = buf[:end_pos]
buf = buf[end_pos + term_len:]
# Decode the full event as UTF-8. ``errors="replace"`` is a
# belt-and-suspenders safety net for truly malformed upstream
# bytes; it should never trigger for well-formed providers.
try:
event_text = event_bytes.decode("utf-8")
except UnicodeDecodeError:
event_text = event_bytes.decode("utf-8", errors="replace")
data_lines = []
for line in event_text.splitlines():
if not line or line.startswith(":"):
continue
field, _, value = line.partition(":")
# Per SSE spec, a single optional space after the colon
# is part of the framing, not the value.
if value.startswith(" "):
value = value[1:]
if field == "data":
data_lines.append(value)
# Other fields (event/id/retry) are intentionally ignored
# — chat-completion endpoints don't use them in a way we
# need for parsing.
if data_lines:
yield "\n".join(data_lines)
# Flush any trailing bytes the server forgot to terminate. This is
# rare but spec-allowed (some providers omit the final \n\n).
if buf.strip():
try:
event_text = buf.decode("utf-8")
except UnicodeDecodeError:
event_text = buf.decode("utf-8", errors="replace")
data_lines = []
for line in event_text.splitlines():
if not line or line.startswith(":"):
continue
field, _, value = line.partition(":")
if value.startswith(" "):
value = value[1:]
if field == "data":
data_lines.append(value)
if data_lines:
yield "\n".join(data_lines)
@staticmethod
def _make_error_chunk(status_code: int, message: str) -> Dict[str, Any]:
return {
"error": {"message": message, "code": "", "type": ""},
"message": message,
"status_code": status_code,
}
# A tiny helper for callers that just need a one-shot client without storing
# state. Keeps call sites cleaner than instantiating the class every time.
def get_default_client(
api_key: Optional[str] = None,
api_base: Optional[str] = None,
proxy: Optional[str] = None,
timeout: Optional[float] = None,
) -> OpenAIHTTPClient:
return OpenAIHTTPClient(
api_key=api_key, api_base=api_base, proxy=proxy, timeout=timeout
)

View File

@@ -8,11 +8,11 @@ This includes: OpenAI, LinkAI, Azure OpenAI, and many third-party providers.
""" """
import json import json
import openai
import requests import requests
from typing import Optional from typing import Optional
from common.log import logger from common.log import logger
from agent.protocol.message_utils import drop_orphaned_tool_results_openai from agent.protocol.message_utils import drop_orphaned_tool_results_openai
from models.openai.openai_http_client import OpenAIHTTPClient, OpenAIHTTPError
class OpenAICompatibleBot: class OpenAICompatibleBot:
@@ -135,49 +135,87 @@ class OpenAICompatibleBot:
"status_code": 500 "status_code": 500
} }
def _get_http_client(self) -> OpenAIHTTPClient:
"""Build an HTTP client honoring the global proxy config.
Subclasses can override this for custom auth headers (e.g. Azure's
``api-key`` header) by returning a pre-configured client.
"""
from config import conf
proxy = conf().get("proxy") or None
return OpenAIHTTPClient(proxy=proxy)
def _handle_sync_response(self, request_params, api_key, api_base): def _handle_sync_response(self, request_params, api_key, api_base):
"""Handle synchronous OpenAI API response""" """Handle synchronous chat-completion via HTTP."""
params = dict(request_params)
params.pop("stream", None)
# Translate legacy SDK timeout kwarg to our HTTP client kwarg.
timeout = params.pop("request_timeout", None) or params.pop("timeout", None)
try: try:
# Build kwargs with explicit API configuration client = self._get_http_client()
kwargs = dict(request_params) return client.chat_completions(
if api_key: api_key=api_key,
kwargs["api_key"] = api_key api_base=api_base,
if api_base: timeout=timeout,
kwargs["api_base"] = api_base stream=False,
**params,
response = openai.ChatCompletion.create(**kwargs) )
return response except OpenAIHTTPError as e:
logger.error(
f"[{self.__class__.__name__}] sync response error: "
f"HTTP {e.status_code}: {e.message}"
)
return {
"error": True,
"message": e.message,
"status_code": e.status_code or 500,
}
except Exception as e: except Exception as e:
logger.error(f"[{self.__class__.__name__}] sync response error: {e}") logger.error(f"[{self.__class__.__name__}] sync response error: {e}")
return { return {
"error": True, "error": True,
"message": str(e), "message": str(e),
"status_code": 500 "status_code": 500,
} }
def _handle_stream_response(self, request_params, api_key, api_base): def _handle_stream_response(self, request_params, api_key, api_base):
"""Handle streaming OpenAI API response""" """Handle streaming chat-completion via HTTP (SSE).
Yields dict chunks in OpenAI's standard streaming shape:
{"choices": [{"delta": {...}, "finish_reason": ...}], ...}
On error, yields a single ``{"error": ..., "status_code": ...}`` chunk
— the same contract :mod:`agent.protocol.agent_stream` already handles.
"""
params = dict(request_params)
params.pop("stream", None)
timeout = params.pop("request_timeout", None) or params.pop("timeout", None)
try: try:
# Build kwargs with explicit API configuration client = self._get_http_client()
kwargs = dict(request_params) stream = client.chat_completions(
if api_key: api_key=api_key,
kwargs["api_key"] = api_key api_base=api_base,
if api_base: timeout=timeout,
kwargs["api_base"] = api_base stream=True,
**params,
stream = openai.ChatCompletion.create(**kwargs) )
# Stream chunks to caller
for chunk in stream: for chunk in stream:
yield chunk yield chunk
except OpenAIHTTPError as e:
logger.error(
f"[{self.__class__.__name__}] stream response error: "
f"HTTP {e.status_code}: {e.message}"
)
yield {
"error": True,
"message": e.message,
"status_code": e.status_code or 500,
}
except Exception as e: except Exception as e:
logger.error(f"[{self.__class__.__name__}] stream response error: {e}") logger.error(f"[{self.__class__.__name__}] stream response error: {e}")
yield { yield {
"error": True, "error": True,
"message": str(e), "message": str(e),
"status_code": 500 "status_code": 500,
} }
def _convert_tools_to_openai_format(self, tools): def _convert_tools_to_openai_format(self, tools):

View File

@@ -1,4 +1,3 @@
openai==0.27.8
aiohttp>=3.8.6,<3.10 aiohttp>=3.8.6,<3.10
requests>=2.28.2 requests>=2.28.2
chardet>=5.1.0 chardet>=5.1.0

View File

@@ -3,8 +3,6 @@ google voice service
""" """
import json import json
import openai
from bridge.reply import Reply, ReplyType from bridge.reply import Reply, ReplyType
from common.log import logger from common.log import logger
from config import conf from config import conf
@@ -15,7 +13,9 @@ import datetime, random
class OpenaiVoice(Voice): class OpenaiVoice(Voice):
def __init__(self): def __init__(self):
openai.api_key = conf().get("open_ai_api_key") # No-op: this implementation calls OpenAI HTTP endpoints directly via
# `requests`, so it does not need a global SDK to be configured.
pass
def voiceToText(self, voice_file): def voiceToText(self, voice_file):
logger.debug("[Openai] voice file name={}".format(voice_file)) logger.debug("[Openai] voice file name={}".format(voice_file))