mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 00:57:41 +08:00
fix(vision): respect tool.vision.model and add automatic fallback #2792
This commit is contained in:
@@ -2,12 +2,18 @@
|
||||
Vision tool - Analyze images using Vision API.
|
||||
Supports local files (auto base64-encoded) and HTTP URLs.
|
||||
|
||||
Provider priority (default):
|
||||
1. Main model via bot.call_vision — zero extra cost
|
||||
2. Other models whose API key is configured — auto-discovered
|
||||
3. OpenAI / LinkAI raw HTTP — reliable fallback
|
||||
When use_linkai=true, LinkAI is promoted to #1.
|
||||
When tool.vision.model is set, that model is used exclusively first.
|
||||
Provider resolution:
|
||||
- tool.vision.model (if set) means "prefer this model first; fall back to
|
||||
other configured providers if it fails". The model name is mapped to its
|
||||
native provider (e.g. doubao-* → Doubao, kimi-* → Moonshot, gpt-* →
|
||||
OpenAI/LinkAI). That provider is tried first, then the standard auto
|
||||
chain runs as fallback (with the preferred provider de-duplicated).
|
||||
- Auto chain priority:
|
||||
1. Main model via bot.call_vision — only when the main bot is known
|
||||
to actually support vision (not just expose a call_vision method).
|
||||
2. Other models whose API key is configured.
|
||||
3. OpenAI / LinkAI raw HTTP.
|
||||
When use_linkai=true, LinkAI is promoted to #1.
|
||||
"""
|
||||
|
||||
import base64
|
||||
@@ -52,6 +58,24 @@ _DISCOVERABLE_MODELS = [
|
||||
("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
|
||||
]
|
||||
|
||||
# Model name prefix → discoverable provider display_name.
|
||||
# Used to auto-route tool.vision.model to its native provider.
|
||||
# Matched case-insensitively; longest prefix wins.
|
||||
_MODEL_PREFIX_TO_PROVIDER = [
|
||||
("doubao-", "Doubao"),
|
||||
("kimi-", "Moonshot"),
|
||||
("moonshot-", "Moonshot"),
|
||||
("qwen", "DashScope"), # qwen-*, qwen3-*, qwen3.6-*, etc.
|
||||
("claude-", "Claude"),
|
||||
("gemini-", "Gemini"),
|
||||
("glm-", "ZhipuAI"),
|
||||
("minimax-", "MiniMax"),
|
||||
("abab", "MiniMax"),
|
||||
]
|
||||
|
||||
# Model prefixes that natively belong to OpenAI / LinkAI (raw HTTP providers).
|
||||
_OPENAI_MODEL_PREFIXES = ("gpt-", "o1-", "o3-", "o4-", "chatgpt-")
|
||||
|
||||
|
||||
@dataclass
|
||||
class VisionProvider:
|
||||
@@ -126,6 +150,9 @@ class Vision(BaseTool):
|
||||
except Exception as e:
|
||||
return ToolResult.fail(f"Error: {e}")
|
||||
|
||||
# Default model is only used as a last-resort placeholder for providers
|
||||
# whose VisionProvider.model_override is None (e.g. raw OpenAI provider
|
||||
# when the user did not configure tool.vision.model).
|
||||
return self._call_with_fallback(providers, DEFAULT_MODEL, question, image_content)
|
||||
|
||||
def _call_with_fallback(self, providers: List[VisionProvider], model: str,
|
||||
@@ -162,29 +189,55 @@ class Vision(BaseTool):
|
||||
|
||||
def _resolve_providers(self) -> List[VisionProvider]:
|
||||
"""
|
||||
Build an ordered list of available providers.
|
||||
Build an ordered list of providers to try.
|
||||
|
||||
Priority:
|
||||
- use_linkai=true → [LinkAI, MainModel, OtherModels…, OpenAI]
|
||||
- default → [MainModel, OtherModels…, OpenAI, LinkAI]
|
||||
Semantics of `tool.vision.model`:
|
||||
"Prefer this model first; fall back to other configured providers
|
||||
if it fails."
|
||||
|
||||
"OtherModels" are auto-discovered from configured API keys.
|
||||
The main model's bot_type is excluded from OtherModels to avoid
|
||||
duplicating the MainModel provider.
|
||||
Order:
|
||||
1. The provider that natively serves `tool.vision.model` (if any
|
||||
and its API key is configured) — using the user-specified model
|
||||
name verbatim.
|
||||
2. Auto-discovery chain as fallback:
|
||||
- use_linkai=true → [LinkAI, MainModel?, OtherModels…, OpenAI]
|
||||
- default → [MainModel?, OtherModels…, OpenAI, LinkAI]
|
||||
MainModel is only included when the main bot is known to support
|
||||
vision (see _main_bot_supports_vision).
|
||||
|
||||
Providers that share the same display name as the preferred provider
|
||||
are de-duplicated to avoid retrying the same endpoint twice.
|
||||
"""
|
||||
use_linkai = conf().get("use_linkai", False) and conf().get("linkai_api_key")
|
||||
user_model = self._resolve_user_vision_model()
|
||||
providers: List[VisionProvider] = []
|
||||
|
||||
# Step 1: preferred provider derived from tool.vision.model
|
||||
if user_model:
|
||||
preferred = self._route_by_model_name(user_model)
|
||||
if preferred:
|
||||
providers.extend(preferred)
|
||||
|
||||
# Step 2: auto-discovery chain as fallback
|
||||
existing = {p.name for p in providers}
|
||||
fallback: List[VisionProvider] = []
|
||||
use_linkai = conf().get("use_linkai", False) and conf().get("linkai_api_key")
|
||||
|
||||
if use_linkai:
|
||||
self._append_provider(providers, self._build_linkai_provider)
|
||||
self._append_provider(providers, self._build_main_model_provider)
|
||||
self._append_other_model_providers(providers)
|
||||
self._append_provider(providers, self._build_openai_provider)
|
||||
self._append_provider(fallback, lambda: self._build_linkai_provider(user_model))
|
||||
self._append_provider(fallback, self._build_main_model_provider)
|
||||
self._append_other_model_providers(fallback, preferred_model=user_model)
|
||||
self._append_provider(fallback, lambda: self._build_openai_provider(user_model))
|
||||
else:
|
||||
self._append_provider(providers, self._build_main_model_provider)
|
||||
self._append_other_model_providers(providers)
|
||||
self._append_provider(providers, self._build_openai_provider)
|
||||
self._append_provider(providers, self._build_linkai_provider)
|
||||
self._append_provider(fallback, self._build_main_model_provider)
|
||||
self._append_other_model_providers(fallback, preferred_model=user_model)
|
||||
self._append_provider(fallback, lambda: self._build_openai_provider(user_model))
|
||||
self._append_provider(fallback, lambda: self._build_linkai_provider(user_model))
|
||||
|
||||
for p in fallback:
|
||||
if p.name in existing:
|
||||
continue
|
||||
providers.append(p)
|
||||
existing.add(p.name)
|
||||
|
||||
return providers
|
||||
|
||||
@@ -194,18 +247,115 @@ class Vision(BaseTool):
|
||||
if p:
|
||||
providers.append(p)
|
||||
|
||||
def _append_other_model_providers(self, providers: List[VisionProvider]) -> None:
|
||||
@staticmethod
|
||||
def _resolve_user_vision_model() -> Optional[str]:
|
||||
"""Read tool.vision.model from config; return None if unset/blank."""
|
||||
tool_conf = conf().get("tool", {})
|
||||
if not isinstance(tool_conf, dict):
|
||||
return None
|
||||
vision_conf = tool_conf.get("vision", {})
|
||||
if not isinstance(vision_conf, dict):
|
||||
return None
|
||||
m = vision_conf.get("model")
|
||||
if isinstance(m, str) and m.strip():
|
||||
return m.strip()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _infer_provider_from_model(model_name: str) -> Optional[str]:
|
||||
"""
|
||||
Infer the provider display name from a model name's prefix.
|
||||
Returns None when no rule matches (or for OpenAI-family names, which
|
||||
are handled separately by the caller).
|
||||
"""
|
||||
if not model_name:
|
||||
return None
|
||||
lower = model_name.lower()
|
||||
# Sort by prefix length desc so e.g. "moonshot-" wins over hypothetical "moo-"
|
||||
for prefix, display_name in sorted(_MODEL_PREFIX_TO_PROVIDER, key=lambda x: -len(x[0])):
|
||||
if lower.startswith(prefix.lower()):
|
||||
return display_name
|
||||
return None
|
||||
|
||||
def _route_by_model_name(self, user_model: str) -> Optional[List[VisionProvider]]:
|
||||
"""
|
||||
Try to build a provider list using the user-specified model name.
|
||||
Returns:
|
||||
- [provider] : matched and the provider's key is configured
|
||||
- [] : matched but key missing → tell caller to surface this
|
||||
as a hard error rather than silently falling back
|
||||
- None : no rule matches → caller should fall through to auto
|
||||
"""
|
||||
lower = user_model.lower()
|
||||
|
||||
# OpenAI / LinkAI family
|
||||
if lower.startswith(_OPENAI_MODEL_PREFIXES):
|
||||
providers: List[VisionProvider] = []
|
||||
# Prefer LinkAI when explicitly enabled, else OpenAI first
|
||||
use_linkai = conf().get("use_linkai", False) and conf().get("linkai_api_key")
|
||||
if use_linkai:
|
||||
self._append_provider(providers, lambda: self._build_linkai_provider(user_model))
|
||||
self._append_provider(providers, lambda: self._build_openai_provider(user_model))
|
||||
else:
|
||||
self._append_provider(providers, lambda: self._build_openai_provider(user_model))
|
||||
self._append_provider(providers, lambda: self._build_linkai_provider(user_model))
|
||||
if providers:
|
||||
return providers
|
||||
logger.warning(f"[Vision] tool.vision.model='{user_model}' looks like an OpenAI "
|
||||
f"model but neither OPENAI_API_KEY nor LINKAI_API_KEY is configured.")
|
||||
return None # fall through to auto
|
||||
|
||||
# Discoverable native providers (Doubao, Moonshot, etc.)
|
||||
target_display = self._infer_provider_from_model(user_model)
|
||||
if not target_display:
|
||||
return None # unknown prefix → auto
|
||||
|
||||
for config_key, bot_type, _default_model, display_name in _DISCOVERABLE_MODELS:
|
||||
if display_name != target_display:
|
||||
continue
|
||||
api_key = conf().get(config_key, "")
|
||||
if not api_key or not api_key.strip():
|
||||
logger.warning(f"[Vision] tool.vision.model='{user_model}' routes to "
|
||||
f"'{display_name}' but '{config_key}' is not configured. "
|
||||
f"Falling back to auto-discovery.")
|
||||
return None # fall through to auto
|
||||
try:
|
||||
from models.bot_factory import create_bot
|
||||
bot = create_bot(bot_type)
|
||||
if not hasattr(bot, 'call_vision'):
|
||||
logger.warning(f"[Vision] '{display_name}' bot does not implement call_vision.")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"[Vision] Failed to create '{display_name}' bot: {e}")
|
||||
return None
|
||||
|
||||
return [VisionProvider(
|
||||
name=display_name,
|
||||
api_key="",
|
||||
api_base="",
|
||||
model_override=user_model,
|
||||
use_bot=True,
|
||||
fallback_bot=bot,
|
||||
)]
|
||||
|
||||
return None
|
||||
|
||||
def _append_other_model_providers(self, providers: List[VisionProvider],
|
||||
preferred_model: Optional[str] = None) -> None:
|
||||
"""
|
||||
Auto-discover other models whose API key is configured.
|
||||
Skip the main model's own bot_type (already covered by MainModel provider).
|
||||
Skip bot_types that already have a provider in the list (e.g. OpenAI).
|
||||
|
||||
If preferred_model matches a provider's family (e.g. "doubao-*" matches
|
||||
Doubao), use it instead of that provider's hard-coded default model.
|
||||
"""
|
||||
# Determine main model's bot_type so we can skip it
|
||||
main_bot_type = None
|
||||
if self.model and hasattr(self.model, '_resolve_bot_type'):
|
||||
main_bot_type = self.model._resolve_bot_type(conf().get("model", ""))
|
||||
|
||||
existing_names = {p.name for p in providers}
|
||||
preferred_provider = self._infer_provider_from_model(preferred_model) if preferred_model else None
|
||||
|
||||
for config_key, bot_type, default_model, display_name in _DISCOVERABLE_MODELS:
|
||||
if display_name in existing_names:
|
||||
@@ -216,7 +366,6 @@ class Vision(BaseTool):
|
||||
if not api_key or not api_key.strip():
|
||||
continue
|
||||
|
||||
# Create a bot instance and check if it supports call_vision
|
||||
try:
|
||||
from models.bot_factory import create_bot
|
||||
bot = create_bot(bot_type)
|
||||
@@ -225,62 +374,95 @@ class Vision(BaseTool):
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
model_for_provider = (preferred_model
|
||||
if preferred_provider == display_name and preferred_model
|
||||
else default_model)
|
||||
|
||||
providers.append(VisionProvider(
|
||||
name=display_name,
|
||||
api_key="",
|
||||
api_base="",
|
||||
model_override=default_model,
|
||||
model_override=model_for_provider,
|
||||
use_bot=True,
|
||||
fallback_bot=bot,
|
||||
))
|
||||
|
||||
def _resolve_vision_model(self) -> Optional[str]:
|
||||
def _main_bot_supports_vision(self, bot) -> bool:
|
||||
"""
|
||||
Determine which model to use for vision.
|
||||
Whether the main bot is known to natively support vision.
|
||||
|
||||
1. User explicit config: tool.vision.model in config.json
|
||||
2. Fallback to the main configured model name
|
||||
Having a `call_vision` method is necessary but not sufficient — some
|
||||
bots (e.g. DeepSeek) implement the method against an endpoint that
|
||||
does not actually serve vision models, which causes silent failures
|
||||
when a vendor-foreign model name (e.g. doubao-*) is forwarded.
|
||||
|
||||
We trust call_vision only when:
|
||||
- The bot exposes a truthy `supports_vision` attribute, OR
|
||||
- The configured main model name has a known multimodal prefix
|
||||
handled by this bot's own vendor (claude-/gemini-/glm-/qwen-/
|
||||
kimi-/doubao-/MiniMax-/abab*/gpt-*).
|
||||
"""
|
||||
tool_conf = conf().get("tool", {})
|
||||
user_vision_model = tool_conf.get("vision", {}).get("model") if isinstance(tool_conf, dict) else None
|
||||
if user_vision_model:
|
||||
return user_vision_model
|
||||
model_name = conf().get("model", "")
|
||||
return model_name or None
|
||||
if bot is None:
|
||||
return False
|
||||
if getattr(bot, "supports_vision", False):
|
||||
return True
|
||||
main_model = (conf().get("model") or "").lower()
|
||||
if not main_model:
|
||||
return False
|
||||
if main_model.startswith(_OPENAI_MODEL_PREFIXES):
|
||||
return True
|
||||
return self._infer_provider_from_model(main_model) is not None
|
||||
|
||||
def _build_main_model_provider(self) -> Optional[VisionProvider]:
|
||||
"""
|
||||
Use the vendor's own model for vision via bot.call_vision.
|
||||
Only available when the bot class has call_vision.
|
||||
Gated by _main_bot_supports_vision so non-vision bots (DeepSeek, etc.)
|
||||
do not get routed vendor-foreign model names.
|
||||
"""
|
||||
if not (self.model and hasattr(self.model, 'bot')):
|
||||
return None
|
||||
try:
|
||||
bot = self.model.bot
|
||||
if not hasattr(bot, 'call_vision'):
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
if not hasattr(bot, 'call_vision'):
|
||||
return None
|
||||
if not self._main_bot_supports_vision(bot):
|
||||
return None
|
||||
|
||||
vision_model = self._resolve_vision_model()
|
||||
# Use the configured main model name; do NOT inject tool.vision.model
|
||||
# here, because by the time we reach this branch the tool.vision.model
|
||||
# routing has already been attempted (and either matched the main bot
|
||||
# or failed to find a provider).
|
||||
main_model_name = conf().get("model") or None
|
||||
|
||||
return VisionProvider(
|
||||
name=_MAIN_MODEL_PROVIDER_NAME,
|
||||
api_key="",
|
||||
api_base="",
|
||||
model_override=vision_model,
|
||||
model_override=main_model_name,
|
||||
use_bot=True,
|
||||
)
|
||||
|
||||
def _build_openai_provider(self) -> Optional[VisionProvider]:
|
||||
def _build_openai_provider(self, preferred_model: Optional[str] = None) -> Optional[VisionProvider]:
|
||||
api_key = conf().get("open_ai_api_key") or os.environ.get("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
return None
|
||||
api_base = (conf().get("open_ai_api_base") or os.environ.get("OPENAI_API_BASE", "")).rstrip("/") \
|
||||
or "https://api.openai.com/v1"
|
||||
return VisionProvider(name="OpenAI", api_key=api_key, api_base=self._ensure_v1(api_base))
|
||||
# Only honor preferred_model when it looks like an OpenAI-family name;
|
||||
# otherwise the OpenAI endpoint would 400 on a vendor-specific name.
|
||||
model_override = preferred_model if (
|
||||
preferred_model and preferred_model.lower().startswith(_OPENAI_MODEL_PREFIXES)
|
||||
) else None
|
||||
return VisionProvider(
|
||||
name="OpenAI",
|
||||
api_key=api_key,
|
||||
api_base=self._ensure_v1(api_base),
|
||||
model_override=model_override,
|
||||
)
|
||||
|
||||
def _build_linkai_provider(self) -> Optional[VisionProvider]:
|
||||
def _build_linkai_provider(self, preferred_model: Optional[str] = None) -> Optional[VisionProvider]:
|
||||
api_key = conf().get("linkai_api_key") or os.environ.get("LINKAI_API_KEY")
|
||||
if not api_key:
|
||||
return None
|
||||
@@ -290,8 +472,15 @@ class Vision(BaseTool):
|
||||
extra = get_cloud_headers(api_key)
|
||||
extra.pop("Authorization", None)
|
||||
extra.pop("Content-Type", None)
|
||||
return VisionProvider(name="LinkAI", api_key=api_key, api_base=self._ensure_v1(api_base),
|
||||
extra_headers=extra)
|
||||
# LinkAI is a multi-vendor proxy and accepts most model names, so we
|
||||
# honor any user-configured model name here.
|
||||
return VisionProvider(
|
||||
name="LinkAI",
|
||||
api_key=api_key,
|
||||
api_base=self._ensure_v1(api_base),
|
||||
extra_headers=extra,
|
||||
model_override=preferred_model,
|
||||
)
|
||||
|
||||
def _call_via_bot(self, model: str, question: str, image_content: dict,
|
||||
provider: Optional[VisionProvider] = None) -> ToolResult:
|
||||
|
||||
@@ -47,6 +47,8 @@ Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置
|
||||
}
|
||||
```
|
||||
|
||||
指定的模型会被**优先使用**,工具会根据模型名自动路由到对应的 provider;若调用失败,会自动 fallback 到其他已配置的 provider。
|
||||
|
||||
大多数情况下无需配置,主模型支持多模态或配置任意一个支持视觉的 API Key 即可自动工作。
|
||||
|
||||
## 参数
|
||||
|
||||
Reference in New Issue
Block a user