mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 00:57:41 +08:00
feat(qianfan): scope vision support to multimodal models
This commit is contained in:
@@ -609,7 +609,7 @@ API Key 创建:在 [控制台](https://aistudio.google.com/app/apikey?hl=zh-cn
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
- `model`: 默认推荐填写 `ernie-5.0`,也可填写 `ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k`、`ernie-x1-turbo-32k`;Vision 工具可使用 `ernie-4.5-turbo-vl`
|
- `model`: 默认推荐填写 `ernie-5.0`(多模态,可直接识图),也可填写 `ernie-x1.1`、`ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k`;当主模型为纯文本 ERNIE 时,Vision 工具会自动 fallback 到 `ernie-4.5-turbo-vl`
|
||||||
- `qianfan_api_key`: 百度千帆 API Key,通常以 `bce-v3/` 开头,可在百度智能云控制台创建
|
- `qianfan_api_key`: 百度千帆 API Key,通常以 `bce-v3/` 开头,可在百度智能云控制台创建
|
||||||
- `qianfan_api_base`: 可选,默认为 `https://qianfan.baidubce.com/v2`
|
- `qianfan_api_base`: 可选,默认为 `https://qianfan.baidubce.com/v2`
|
||||||
|
|
||||||
|
|||||||
@@ -53,8 +53,8 @@ _DISCOVERABLE_MODELS = [
|
|||||||
("ark_api_key", const.DOUBAO, const.DOUBAO_SEED_2_PRO, "Doubao"),
|
("ark_api_key", const.DOUBAO, const.DOUBAO_SEED_2_PRO, "Doubao"),
|
||||||
("dashscope_api_key", const.QWEN_DASHSCOPE, const.QWEN36_PLUS, "DashScope"),
|
("dashscope_api_key", const.QWEN_DASHSCOPE, const.QWEN36_PLUS, "DashScope"),
|
||||||
("claude_api_key", const.CLAUDEAPI, const.CLAUDE_4_6_SONNET, "Claude"),
|
("claude_api_key", const.CLAUDEAPI, const.CLAUDE_4_6_SONNET, "Claude"),
|
||||||
("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"),
|
|
||||||
("gemini_api_key", const.GEMINI, const.GEMINI_31_FLASH_LITE_PRE, "Gemini"),
|
("gemini_api_key", const.GEMINI, const.GEMINI_31_FLASH_LITE_PRE, "Gemini"),
|
||||||
|
("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"),
|
||||||
("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"),
|
("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"),
|
||||||
("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
|
("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
|
||||||
]
|
]
|
||||||
@@ -346,15 +346,21 @@ class Vision(BaseTool):
|
|||||||
preferred_model: Optional[str] = None) -> None:
|
preferred_model: Optional[str] = None) -> None:
|
||||||
"""
|
"""
|
||||||
Auto-discover other models whose API key is configured.
|
Auto-discover other models whose API key is configured.
|
||||||
Skip the main model's own bot_type (already covered by MainModel provider).
|
Skip the main model's own bot_type (already covered by MainModel
|
||||||
Skip bot_types that already have a provider in the list (e.g. OpenAI).
|
provider), unless the main model itself does not support vision —
|
||||||
|
in that case we still want the vendor's dedicated vision model
|
||||||
|
as a fallback. Also skip bot_types that already appear in the
|
||||||
|
provider list.
|
||||||
|
|
||||||
If preferred_model matches a provider's family (e.g. "doubao-*" matches
|
If preferred_model matches a provider's family, use it instead
|
||||||
Doubao), use it instead of that provider's hard-coded default model.
|
of that provider's hard-coded default model.
|
||||||
"""
|
"""
|
||||||
main_bot_type = None
|
main_bot_type = None
|
||||||
|
main_bot_supports_vision = False
|
||||||
if self.model and hasattr(self.model, '_resolve_bot_type'):
|
if self.model and hasattr(self.model, '_resolve_bot_type'):
|
||||||
main_bot_type = self.model._resolve_bot_type(conf().get("model", ""))
|
main_bot_type = self.model._resolve_bot_type(conf().get("model", ""))
|
||||||
|
main_bot = getattr(self.model, "bot", None)
|
||||||
|
main_bot_supports_vision = self._main_bot_supports_vision(main_bot)
|
||||||
|
|
||||||
existing_names = {p.name for p in providers}
|
existing_names = {p.name for p in providers}
|
||||||
preferred_provider = self._infer_provider_from_model(preferred_model) if preferred_model else None
|
preferred_provider = self._infer_provider_from_model(preferred_model) if preferred_model else None
|
||||||
@@ -362,7 +368,11 @@ class Vision(BaseTool):
|
|||||||
for config_key, bot_type, default_model, display_name in _DISCOVERABLE_MODELS:
|
for config_key, bot_type, default_model, display_name in _DISCOVERABLE_MODELS:
|
||||||
if display_name in existing_names:
|
if display_name in existing_names:
|
||||||
continue
|
continue
|
||||||
if bot_type == main_bot_type:
|
# Same bot_type as the main model is normally handled by the
|
||||||
|
# MainModel provider; only skip it here if the main model
|
||||||
|
# actually supports vision. Otherwise fall through and add
|
||||||
|
# the vendor's dedicated vision model as a fallback.
|
||||||
|
if bot_type == main_bot_type and main_bot_supports_vision:
|
||||||
continue
|
continue
|
||||||
api_key = conf().get(config_key, "")
|
api_key = conf().get(config_key, "")
|
||||||
if not api_key or not api_key.strip():
|
if not api_key or not api_key.strip():
|
||||||
@@ -380,34 +390,44 @@ class Vision(BaseTool):
|
|||||||
if preferred_provider == display_name and preferred_model
|
if preferred_provider == display_name and preferred_model
|
||||||
else default_model)
|
else default_model)
|
||||||
|
|
||||||
providers.append(VisionProvider(
|
provider = VisionProvider(
|
||||||
name=display_name,
|
name=display_name,
|
||||||
api_key="",
|
api_key="",
|
||||||
api_base="",
|
api_base="",
|
||||||
model_override=model_for_provider,
|
model_override=model_for_provider,
|
||||||
use_bot=True,
|
use_bot=True,
|
||||||
fallback_bot=bot,
|
fallback_bot=bot,
|
||||||
))
|
)
|
||||||
|
|
||||||
|
# Same vendor as the main bot is the most natural fallback when
|
||||||
|
# the main model itself does not support vision — promote it to
|
||||||
|
# the front of the list instead of relying on declaration order.
|
||||||
|
if bot_type == main_bot_type:
|
||||||
|
providers.insert(0, provider)
|
||||||
|
else:
|
||||||
|
providers.append(provider)
|
||||||
|
|
||||||
def _main_bot_supports_vision(self, bot) -> bool:
|
def _main_bot_supports_vision(self, bot) -> bool:
|
||||||
"""
|
"""
|
||||||
Whether the main bot is known to natively support vision.
|
Whether the main bot is known to natively support vision.
|
||||||
|
|
||||||
Having a `call_vision` method is necessary but not sufficient — some
|
Having a `call_vision` method is necessary but not sufficient —
|
||||||
bots (e.g. DeepSeek) implement the method against an endpoint that
|
some bots implement the method against an endpoint that does not
|
||||||
does not actually serve vision models, which causes silent failures
|
actually serve vision models, which causes silent failures when a
|
||||||
when a vendor-foreign model name (e.g. doubao-*) is forwarded.
|
vendor-foreign model name is forwarded.
|
||||||
|
|
||||||
We trust call_vision only when:
|
Resolution order:
|
||||||
- The bot exposes a truthy `supports_vision` attribute, OR
|
1. If the bot explicitly declares `supports_vision`, trust it.
|
||||||
- The configured main model name has a known multimodal prefix
|
This lets bots opt in or out based on their own runtime
|
||||||
handled by this bot's own vendor (claude-/gemini-/glm-/qwen-/
|
configuration (e.g. the currently selected model).
|
||||||
kimi-/doubao-/MiniMax-/abab*/gpt-*).
|
2. Otherwise, fall back to a model-name prefix heuristic: trust
|
||||||
|
call_vision when the main model looks like an OpenAI family
|
||||||
|
model or matches a known multimodal vendor prefix.
|
||||||
"""
|
"""
|
||||||
if bot is None:
|
if bot is None:
|
||||||
return False
|
return False
|
||||||
if getattr(bot, "supports_vision", False):
|
if hasattr(bot, "supports_vision"):
|
||||||
return True
|
return bool(getattr(bot, "supports_vision"))
|
||||||
main_model = (conf().get("model") or "").lower()
|
main_model = (conf().get("model") or "").lower()
|
||||||
if not main_model:
|
if not main_model:
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -780,7 +780,7 @@ class ConfigHandler:
|
|||||||
const.QWEN36_PLUS, const.QWEN35_PLUS, const.QWEN3_MAX,
|
const.QWEN36_PLUS, const.QWEN35_PLUS, const.QWEN3_MAX,
|
||||||
const.DOUBAO_SEED_2_PRO, const.DOUBAO_SEED_2_CODE,
|
const.DOUBAO_SEED_2_PRO, const.DOUBAO_SEED_2_CODE,
|
||||||
const.KIMI_K2_6, const.KIMI_K2_5, const.KIMI_K2,
|
const.KIMI_K2_6, const.KIMI_K2_5, const.KIMI_K2,
|
||||||
const.ERNIE_5, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K, const.ERNIE_X1_TURBO_32K,
|
const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K,
|
||||||
]
|
]
|
||||||
|
|
||||||
# Generic placeholder hints surfaced in the web console. We deliberately
|
# Generic placeholder hints surfaced in the web console. We deliberately
|
||||||
@@ -873,7 +873,7 @@ class ConfigHandler:
|
|||||||
"api_base_key": "qianfan_api_base",
|
"api_base_key": "qianfan_api_base",
|
||||||
"api_base_default": "https://qianfan.baidubce.com/v2",
|
"api_base_default": "https://qianfan.baidubce.com/v2",
|
||||||
"api_base_placeholder": _PLACEHOLDER_QIANFAN,
|
"api_base_placeholder": _PLACEHOLDER_QIANFAN,
|
||||||
"models": [const.ERNIE_5, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K, const.ERNIE_X1_TURBO_32K],
|
"models": [const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K],
|
||||||
}),
|
}),
|
||||||
("modelscope", {
|
("modelscope", {
|
||||||
"label": "ModelScope",
|
"label": "ModelScope",
|
||||||
|
|||||||
@@ -88,9 +88,9 @@ DEEPSEEK_V4_PRO = "deepseek-v4-pro" # DeepSeek V4 Pro - 复杂任务更强 (思
|
|||||||
|
|
||||||
# Baidu Qianfan / ERNIE
|
# Baidu Qianfan / ERNIE
|
||||||
ERNIE_5 = "ernie-5.0" # ERNIE 5.0 - default recommendation
|
ERNIE_5 = "ernie-5.0" # ERNIE 5.0 - default recommendation
|
||||||
|
ERNIE_X1_1 = "ernie-x1.1" # ERNIE X1.1 - reasoning-focused, multimodal
|
||||||
ERNIE_45_TURBO_128K = "ernie-4.5-turbo-128k"
|
ERNIE_45_TURBO_128K = "ernie-4.5-turbo-128k"
|
||||||
ERNIE_45_TURBO_32K = "ernie-4.5-turbo-32k"
|
ERNIE_45_TURBO_32K = "ernie-4.5-turbo-32k"
|
||||||
ERNIE_X1_TURBO_32K = "ernie-x1-turbo-32k"
|
|
||||||
ERNIE_4_TURBO_8K = "ERNIE-4.0-Turbo-8K"
|
ERNIE_4_TURBO_8K = "ERNIE-4.0-Turbo-8K"
|
||||||
ERNIE_45_TURBO_VL = "ernie-4.5-turbo-vl"
|
ERNIE_45_TURBO_VL = "ernie-4.5-turbo-vl"
|
||||||
ERNIE_45_TURBO_VL_32K = "ernie-4.5-turbo-vl-32k"
|
ERNIE_45_TURBO_VL_32K = "ernie-4.5-turbo-vl-32k"
|
||||||
@@ -170,7 +170,7 @@ MODEL_LIST = [
|
|||||||
DEEPSEEK_V4_FLASH, DEEPSEEK_V4_PRO, DEEPSEEK_CHAT, DEEPSEEK_REASONER,
|
DEEPSEEK_V4_FLASH, DEEPSEEK_V4_PRO, DEEPSEEK_CHAT, DEEPSEEK_REASONER,
|
||||||
|
|
||||||
# Baidu Qianfan / ERNIE
|
# Baidu Qianfan / ERNIE
|
||||||
QIANFAN, ERNIE_5, ERNIE_45_TURBO_128K, ERNIE_45_TURBO_32K, ERNIE_X1_TURBO_32K, ERNIE_4_TURBO_8K,
|
QIANFAN, ERNIE_5, ERNIE_X1_1, ERNIE_45_TURBO_128K, ERNIE_45_TURBO_32K, ERNIE_4_TURBO_8K,
|
||||||
ERNIE_45_TURBO_VL, ERNIE_45_TURBO_VL_32K,
|
ERNIE_45_TURBO_VL, ERNIE_45_TURBO_VL_32K,
|
||||||
|
|
||||||
# MiniMax
|
# MiniMax
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ Option 1: Native integration (recommended):
|
|||||||
|
|
||||||
| Parameter | Description |
|
| Parameter | Description |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
| `model` | Default recommendation: `ernie-5.0`; also supports `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k`, `ernie-x1-turbo-32k` |
|
| `model` | Default recommendation: `ernie-5.0`; also supports `ernie-x1.1`, `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k` |
|
||||||
| `qianfan_api_key` | Qianfan API key, usually starting with `bce-v3/` |
|
| `qianfan_api_key` | Qianfan API key, usually starting with `bce-v3/` |
|
||||||
| `qianfan_api_base` | Optional, defaults to `https://qianfan.baidubce.com/v2` |
|
| `qianfan_api_base` | Optional, defaults to `https://qianfan.baidubce.com/v2` |
|
||||||
|
|
||||||
@@ -24,13 +24,18 @@ Option 1: Native integration (recommended):
|
|||||||
| Model | Use Case |
|
| Model | Use Case |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
| `ernie-5.0` | Default recommendation; latest ERNIE flagship with the strongest overall capability |
|
| `ernie-5.0` | Default recommendation; latest ERNIE flagship with the strongest overall capability |
|
||||||
|
| `ernie-x1.1` | Deep-thinking reasoning model with lower hallucination and stronger instruction following / tool calling |
|
||||||
| `ernie-4.5-turbo-128k` | Long-context and general chat |
|
| `ernie-4.5-turbo-128k` | Long-context and general chat |
|
||||||
| `ernie-4.5-turbo-32k` | General chat with a balanced context window and cost |
|
| `ernie-4.5-turbo-32k` | General chat with a balanced context window and cost |
|
||||||
| `ernie-x1-turbo-32k` | Tasks that need stronger reasoning |
|
|
||||||
|
|
||||||
## Vision tool
|
## Vision tool
|
||||||
|
|
||||||
After `qianfan_api_key` is configured, Agent mode can auto-discover Qianfan for the Vision tool. The recommended Qianfan vision model is `ernie-4.5-turbo-vl`:
|
Once `qianfan_api_key` is configured, Agent mode can auto-discover Qianfan for the Vision tool:
|
||||||
|
|
||||||
|
- When the main model itself is multimodal (e.g. `ernie-5.0`, `ernie-x1.1`, `ernie-4.5-turbo-vl`), images are handled directly by the main model with no extra setup.
|
||||||
|
- When the main model is text-only (e.g. `ernie-4.5-turbo-128k`), the Vision tool automatically falls back to `ernie-4.5-turbo-vl`.
|
||||||
|
|
||||||
|
To force a specific Vision model, set it explicitly in `config.json`:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ The voice and streaming building blocks come from a community contribution #2791
|
|||||||
|
|
||||||
- **DeepSeek V4 series**: Added `deepseek-v4-pro` / `deepseek-v4-flash`, with `deepseek-v4-flash` set as the new default
|
- **DeepSeek V4 series**: Added `deepseek-v4-pro` / `deepseek-v4-flash`, with `deepseek-v4-flash` set as the new default
|
||||||
- **Unified thinking-mode toggle**: DeepSeek V4, Qwen3 and other thinking-capable models now share the same `enable_thinking` switch
|
- **Unified thinking-mode toggle**: DeepSeek V4, Qwen3 and other thinking-capable models now share the same `enable_thinking` switch
|
||||||
- **Baidu Qianfan / ERNIE first-class integration**: New `qianfan` provider supporting `ernie-5.0` (default recommendation), `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k`, `ernie-x1-turbo-32k`. Dedicated `qianfan_api_key` / `qianfan_api_base` settings keep OpenAI config clean; legacy `wenxin` / `wenxin-4` paths are fully preserved. #2790 Thanks [@jimmyzhuu](https://github.com/jimmyzhuu)
|
- **Baidu Qianfan / ERNIE first-class integration**: New `qianfan` provider supporting `ernie-5.0` (default recommendation), `ernie-x1.1`, `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k`. Dedicated `qianfan_api_key` / `qianfan_api_base` settings keep OpenAI config clean; legacy `wenxin` / `wenxin-4` paths are fully preserved. #2790 Thanks [@jimmyzhuu](https://github.com/jimmyzhuu)
|
||||||
|
|
||||||
Documentation: [Baidu Qianfan / ERNIE](https://docs.cowagent.ai/en/models/qianfan)
|
Documentation: [Baidu Qianfan / ERNIE](https://docs.cowagent.ai/en/models/qianfan)
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ If the current provider fails, the tool automatically tries the next one until i
|
|||||||
| Vendor | Vision Model | Notes |
|
| Vendor | Vision Model | Notes |
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| OpenAI / Compatible | Main model | All OpenAI-compatible multimodal models |
|
| OpenAI / Compatible | Main model | All OpenAI-compatible multimodal models |
|
||||||
| Baidu Qianfan | ernie-4.5-turbo-vl | Auto-discovered when `qianfan_api_key` is configured; can also be selected via `tool.vision.model` |
|
| Baidu Qianfan | Main model | Multimodal main models (e.g. `ernie-5.0`) handle images directly; falls back to `ernie-4.5-turbo-vl` for text-only main models |
|
||||||
| Qwen (DashScope) | Main model | Via MultiModalConversation API |
|
| Qwen (DashScope) | Main model | Via MultiModalConversation API |
|
||||||
| Claude | Main model | Anthropic native image format |
|
| Claude | Main model | Anthropic native image format |
|
||||||
| Gemini | Main model | inlineData format |
|
| Gemini | Main model | inlineData format |
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ description: Baidu Qianfan ERNIE モデル設定
|
|||||||
|
|
||||||
| パラメータ | 説明 |
|
| パラメータ | 説明 |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
| `model` | デフォルトの推奨は `ernie-5.0`。`ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k`、`ernie-x1-turbo-32k` も利用できます |
|
| `model` | デフォルトの推奨は `ernie-5.0`。`ernie-x1.1`、`ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k` も利用できます |
|
||||||
| `qianfan_api_key` | Qianfan API Key。通常は `bce-v3/` で始まります |
|
| `qianfan_api_key` | Qianfan API Key。通常は `bce-v3/` で始まります |
|
||||||
| `qianfan_api_base` | 任意。デフォルトは `https://qianfan.baidubce.com/v2` |
|
| `qianfan_api_base` | 任意。デフォルトは `https://qianfan.baidubce.com/v2` |
|
||||||
|
|
||||||
@@ -24,13 +24,18 @@ description: Baidu Qianfan ERNIE モデル設定
|
|||||||
| モデル | 用途 |
|
| モデル | 用途 |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
| `ernie-5.0` | デフォルト推奨。文心の最新フラッグシップモデルで、総合性能が最も強い |
|
| `ernie-5.0` | デフォルト推奨。文心の最新フラッグシップモデルで、総合性能が最も強い |
|
||||||
|
| `ernie-x1.1` | 深層推論モデル。ハルシネーションが少なく、指示追従とツール呼び出しが強化 |
|
||||||
| `ernie-4.5-turbo-128k` | 長いコンテキストと一般的なチャット向け |
|
| `ernie-4.5-turbo-128k` | 長いコンテキストと一般的なチャット向け |
|
||||||
| `ernie-4.5-turbo-32k` | コンテキスト長とコストのバランスが良い一般チャット向け |
|
| `ernie-4.5-turbo-32k` | コンテキスト長とコストのバランスが良い一般チャット向け |
|
||||||
| `ernie-x1-turbo-32k` | より強い推論が必要なタスク向け |
|
|
||||||
|
|
||||||
## Vision ツール
|
## Vision ツール
|
||||||
|
|
||||||
`qianfan_api_key` を設定すると、Agent モードの Vision ツールは Qianfan を自動検出できます。推奨する Qianfan の視覚モデルは `ernie-4.5-turbo-vl` です:
|
`qianfan_api_key` を設定すると、Agent モードの Vision ツールは Qianfan を自動検出します:
|
||||||
|
|
||||||
|
- 主モデルが多モーダル(`ernie-5.0`、`ernie-x1.1`、`ernie-4.5-turbo-vl` など)の場合は、追加設定なしで主モデルがそのまま画像を処理します。
|
||||||
|
- 主モデルがテキスト専用(`ernie-4.5-turbo-128k` など)の場合は、Vision ツールが自動的に `ernie-4.5-turbo-vl` にフォールバックします。
|
||||||
|
|
||||||
|
特定の Vision モデルを強制したい場合は、`config.json` で明示的に指定できます:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ description: CowAgent 2.0.8 - 飛書チャネル全面アップグレード(
|
|||||||
|
|
||||||
- **DeepSeek V4 シリーズ**:`deepseek-v4-pro` / `deepseek-v4-flash` を追加、デフォルトモデルを `deepseek-v4-flash` に切り替え
|
- **DeepSeek V4 シリーズ**:`deepseek-v4-pro` / `deepseek-v4-flash` を追加、デフォルトモデルを `deepseek-v4-flash` に切り替え
|
||||||
- **思考モデルスイッチの統一**:DeepSeek V4、Qwen3 など思考対応モデルの切り替え動作を `enable_thinking` に統一
|
- **思考モデルスイッチの統一**:DeepSeek V4、Qwen3 など思考対応モデルの切り替え動作を `enable_thinking` に統一
|
||||||
- **百度千帆 / ERNIE のファーストクラス対応**:新たな `qianfan` プロバイダーを追加。`ernie-5.0`(デフォルト推奨)、`ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k`、`ernie-x1-turbo-32k` をサポート。`qianfan_api_key` / `qianfan_api_base` の独立設定により OpenAI 設定を汚染せず、旧来の `wenxin` / `wenxin-4` パスも完全互換 #2790 Thanks [@jimmyzhuu](https://github.com/jimmyzhuu)
|
- **百度千帆 / ERNIE のファーストクラス対応**:新たな `qianfan` プロバイダーを追加。`ernie-5.0`(デフォルト推奨)、`ernie-x1.1`、`ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k` をサポート。`qianfan_api_key` / `qianfan_api_base` の独立設定により OpenAI 設定を汚染せず、旧来の `wenxin` / `wenxin-4` パスも完全互換 #2790 Thanks [@jimmyzhuu](https://github.com/jimmyzhuu)
|
||||||
|
|
||||||
ドキュメント:[百度千帆 / ERNIE](https://docs.cowagent.ai/ja/models/qianfan)
|
ドキュメント:[百度千帆 / ERNIE](https://docs.cowagent.ai/ja/models/qianfan)
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ Vision ツールは多段階の自動選択+自動フォールバック戦略
|
|||||||
| ベンダー | ビジョンモデル | 説明 |
|
| ベンダー | ビジョンモデル | 説明 |
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| OpenAI / 互換プロトコル | メインモデル | すべての OpenAI 互換マルチモーダルモデルに対応 |
|
| OpenAI / 互換プロトコル | メインモデル | すべての OpenAI 互換マルチモーダルモデルに対応 |
|
||||||
| Baidu Qianfan | ernie-4.5-turbo-vl | `qianfan_api_key` を設定すると自動検出され、`tool.vision.model` でも指定できます |
|
| Baidu Qianfan | メインモデル | 多モーダルの主モデル(`ernie-5.0` など)は直接画像を処理。テキスト専用主モデルの場合は `ernie-4.5-turbo-vl` に自動フォールバック |
|
||||||
| 通義千問 (DashScope) | メインモデル | MultiModalConversation API 経由 |
|
| 通義千問 (DashScope) | メインモデル | MultiModalConversation API 経由 |
|
||||||
| Claude | メインモデル | Anthropic ネイティブ画像形式 |
|
| Claude | メインモデル | Anthropic ネイティブ画像形式 |
|
||||||
| Gemini | メインモデル | inlineData 形式 |
|
| Gemini | メインモデル | inlineData 形式 |
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ description: 百度千帆 ERNIE 模型配置
|
|||||||
|
|
||||||
| 参数 | 说明 |
|
| 参数 | 说明 |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
| `model` | 默认推荐使用 `ernie-5.0`;也可使用 `ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k`、`ernie-x1-turbo-32k` |
|
| `model` | 默认推荐使用 `ernie-5.0`;也可使用 `ernie-x1.1`、`ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k` |
|
||||||
| `qianfan_api_key` | 千帆 API Key,格式通常以 `bce-v3/` 开头 |
|
| `qianfan_api_key` | 千帆 API Key,格式通常以 `bce-v3/` 开头 |
|
||||||
| `qianfan_api_base` | 可选,默认为 `https://qianfan.baidubce.com/v2` |
|
| `qianfan_api_base` | 可选,默认为 `https://qianfan.baidubce.com/v2` |
|
||||||
|
|
||||||
@@ -24,13 +24,18 @@ description: 百度千帆 ERNIE 模型配置
|
|||||||
| 模型 | 适用场景 |
|
| 模型 | 适用场景 |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
| `ernie-5.0` | 默认推荐,文心新一代旗舰模型,综合能力最强 |
|
| `ernie-5.0` | 默认推荐,文心新一代旗舰模型,综合能力最强 |
|
||||||
|
| `ernie-x1.1` | 深度思考推理模型,幻觉更低、指令遵循与工具调用更强 |
|
||||||
| `ernie-4.5-turbo-128k` | 长上下文和通用对话 |
|
| `ernie-4.5-turbo-128k` | 长上下文和通用对话 |
|
||||||
| `ernie-4.5-turbo-32k` | 通用对话,成本和上下文更均衡 |
|
| `ernie-4.5-turbo-32k` | 通用对话,成本和上下文更均衡 |
|
||||||
| `ernie-x1-turbo-32k` | 需要更强推理能力的任务 |
|
|
||||||
|
|
||||||
## Vision 工具
|
## Vision 工具
|
||||||
|
|
||||||
配置 `qianfan_api_key` 后,Agent 的 Vision 工具可以自动使用千帆视觉模型。默认推荐使用 `ernie-4.5-turbo-vl`:
|
配置 `qianfan_api_key` 后,Agent 的 Vision 工具可以自动使用千帆视觉模型:
|
||||||
|
|
||||||
|
- 当主模型本身是多模态时(如 `ernie-5.0`、`ernie-x1.1`、`ernie-4.5-turbo-vl`),直接由主模型识别图像,无需额外配置
|
||||||
|
- 当主模型是纯文本时(如 `ernie-4.5-turbo-128k`),Vision 工具会自动 fallback 到 `ernie-4.5-turbo-vl`
|
||||||
|
|
||||||
|
如需手动指定 Vision 模型,可在 `config.json` 中显式配置:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ description: CowAgent 2.0.8 - 飞书渠道全面升级(语音、流式打字
|
|||||||
|
|
||||||
- **DeepSeek V4 系列**:新增 `deepseek-v4-pro` / `deepseek-v4-flash`,并将默认模型切换为 `deepseek-v4-flash`
|
- **DeepSeek V4 系列**:新增 `deepseek-v4-pro` / `deepseek-v4-flash`,并将默认模型切换为 `deepseek-v4-flash`
|
||||||
- **思考模型开关统一**:DeepSeek V4、Qwen3 等思考模型的开关行为对齐到 `enable_thinking`
|
- **思考模型开关统一**:DeepSeek V4、Qwen3 等思考模型的开关行为对齐到 `enable_thinking`
|
||||||
- **百度千帆模型接入**:新增百度千帆厂商,支持 `ernie-5.0`、`ernie-4.5-turbo-128k` 等模型, 相关文档查看 [百度千帆](https://docs.cowagent.ai/models/qianfan)。#2790 Thanks @jimmyzhuu
|
- **百度千帆模型接入**:新增百度千帆厂商,支持 `ernie-5.0`、`ernie-4.5-turbo-128k` 等模型,并支持图像识别工具,相关文档查看 [百度千帆](https://docs.cowagent.ai/models/qianfan)。#2790 Thanks @jimmyzhuu
|
||||||
- **新增有道翻译**:`translate` 模块新增有道翻译支持 #2797 Thanks @Zmjjeff7
|
- **新增有道翻译**:`translate` 模块新增有道翻译支持 #2797 Thanks @Zmjjeff7
|
||||||
|
|
||||||
## 🛠 OpenAI 客户端重构
|
## 🛠 OpenAI 客户端重构
|
||||||
|
|||||||
@@ -19,12 +19,12 @@ Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置
|
|||||||
| 厂商 | 视觉模型 | 说明 |
|
| 厂商 | 视觉模型 | 说明 |
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| OpenAI / 兼容协议 | 使用主模型 | 支持所有 OpenAI 协议兼容的多模态模型 |
|
| OpenAI / 兼容协议 | 使用主模型 | 支持所有 OpenAI 协议兼容的多模态模型 |
|
||||||
| 百度千帆 (Qianfan) | ernie-4.5-turbo-vl | 配置 `qianfan_api_key` 后自动发现,也可通过 `tool.vision.model` 指定 |
|
|
||||||
| 通义千问 (DashScope) | 使用主模型 | 例如 qwen3.6-plus 等 |
|
| 通义千问 (DashScope) | 使用主模型 | 例如 qwen3.6-plus 等 |
|
||||||
| Claude | 使用主模型 | Anthropic 原生图像格式 |
|
| Claude | 使用主模型 | Anthropic 原生图像格式 |
|
||||||
| Gemini | 使用主模型 | inlineData 格式 |
|
| Gemini | 使用主模型 | inlineData 格式 |
|
||||||
| 豆包 (Doubao) | 使用主模型 | doubao-seed-2-0 系列原生支持 |
|
| 豆包 (Doubao) | 使用主模型 | doubao-seed-2-0 系列原生支持 |
|
||||||
| Kimi (Moonshot) | 使用主模型 | kimi-k2.6、kimi-k2.5 原生支持 |
|
| Kimi (Moonshot) | 使用主模型 | kimi-k2.6、kimi-k2.5 原生支持 |
|
||||||
|
| 百度千帆 (Qianfan) | 使用主模型 | 默认使用多模态主模型 (如 ernie-5.0),主模型不支持时兜底使用 ernie-4.5-turbo-vl |
|
||||||
| 智谱 AI | glm-5v-turbo | 固定使用视觉专用模型 |
|
| 智谱 AI | glm-5v-turbo | 固定使用视觉专用模型 |
|
||||||
| MiniMax | MiniMax-Text-01 | 固定使用视觉专用模型 |
|
| MiniMax | MiniMax-Text-01 | 固定使用视觉专用模型 |
|
||||||
|
|
||||||
@@ -42,7 +42,7 @@ Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置
|
|||||||
{
|
{
|
||||||
"tool": {
|
"tool": {
|
||||||
"vision": {
|
"vision": {
|
||||||
"model": "ernie-4.5-turbo-vl"
|
"model": "gpt-4.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,9 +17,21 @@ DEFAULT_API_BASE = "https://qianfan.baidubce.com/v2"
|
|||||||
DEFAULT_MODEL = const.ERNIE_5
|
DEFAULT_MODEL = const.ERNIE_5
|
||||||
DEFAULT_VISION_MODEL = const.ERNIE_45_TURBO_VL
|
DEFAULT_VISION_MODEL = const.ERNIE_45_TURBO_VL
|
||||||
|
|
||||||
|
# Qianfan models that natively understand images. Other ERNIE variants
|
||||||
|
# are text-only and must not receive image payloads.
|
||||||
|
_VISION_CAPABLE_MODELS = {
|
||||||
|
const.ERNIE_5,
|
||||||
|
const.ERNIE_X1_1,
|
||||||
|
const.ERNIE_45_TURBO_VL,
|
||||||
|
const.ERNIE_45_TURBO_VL_32K,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class QianfanBot(Bot, OpenAICompatibleBot):
|
class QianfanBot(Bot, OpenAICompatibleBot):
|
||||||
supports_vision = True
|
@property
|
||||||
|
def supports_vision(self) -> bool:
|
||||||
|
"""Whether the configured main model is multimodal."""
|
||||||
|
return (conf().get("model") or "").lower() in _VISION_CAPABLE_MODELS
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ class TestQianfanConstantsAndRouting(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(const.ERNIE_45_TURBO_128K, "ernie-4.5-turbo-128k")
|
self.assertEqual(const.ERNIE_45_TURBO_128K, "ernie-4.5-turbo-128k")
|
||||||
self.assertEqual(const.ERNIE_45_TURBO_32K, "ernie-4.5-turbo-32k")
|
self.assertEqual(const.ERNIE_45_TURBO_32K, "ernie-4.5-turbo-32k")
|
||||||
self.assertEqual(const.ERNIE_X1_TURBO_32K, "ernie-x1-turbo-32k")
|
self.assertEqual(const.ERNIE_X1_1, "ernie-x1.1")
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
const.ERNIE_45_TURBO_VL,
|
const.ERNIE_45_TURBO_VL,
|
||||||
"ernie-4.5-turbo-vl",
|
"ernie-4.5-turbo-vl",
|
||||||
@@ -30,7 +30,7 @@ class TestQianfanConstantsAndRouting(unittest.TestCase):
|
|||||||
self.assertIn(const.QIANFAN, const.MODEL_LIST)
|
self.assertIn(const.QIANFAN, const.MODEL_LIST)
|
||||||
self.assertIn(const.ERNIE_45_TURBO_128K, const.MODEL_LIST)
|
self.assertIn(const.ERNIE_45_TURBO_128K, const.MODEL_LIST)
|
||||||
self.assertIn(const.ERNIE_45_TURBO_32K, const.MODEL_LIST)
|
self.assertIn(const.ERNIE_45_TURBO_32K, const.MODEL_LIST)
|
||||||
self.assertIn(const.ERNIE_X1_TURBO_32K, const.MODEL_LIST)
|
self.assertIn(const.ERNIE_X1_1, const.MODEL_LIST)
|
||||||
self.assertIn(const.ERNIE_45_TURBO_VL, const.MODEL_LIST)
|
self.assertIn(const.ERNIE_45_TURBO_VL, const.MODEL_LIST)
|
||||||
self.assertIn(const.ERNIE_45_TURBO_VL_32K, const.MODEL_LIST)
|
self.assertIn(const.ERNIE_45_TURBO_VL_32K, const.MODEL_LIST)
|
||||||
|
|
||||||
@@ -223,15 +223,31 @@ class TestQianfanBot(unittest.TestCase):
|
|||||||
self.assertEqual(result["content"], "请求失败:bad gateway text")
|
self.assertEqual(result["content"], "请求失败:bad gateway text")
|
||||||
post.assert_called_once()
|
post.assert_called_once()
|
||||||
|
|
||||||
def test_qianfan_bot_supports_vision(self):
|
def test_qianfan_bot_supports_vision_for_multimodal_models(self):
|
||||||
fake_conf = self._fake_conf()
|
for model in ("ernie-5.0", "ernie-x1.1", "ernie-4.5-turbo-vl", "ernie-4.5-turbo-vl-32k"):
|
||||||
with patch("models.qianfan.qianfan_bot.conf", return_value=fake_conf):
|
fake_conf = self._fake_conf({"model": model})
|
||||||
with patch("models.qianfan.qianfan_bot.SessionManager"):
|
with patch("models.qianfan.qianfan_bot.conf", return_value=fake_conf):
|
||||||
from models.qianfan.qianfan_bot import QianfanBot
|
with patch("models.qianfan.qianfan_bot.SessionManager"):
|
||||||
|
from models.qianfan.qianfan_bot import QianfanBot
|
||||||
|
|
||||||
bot = QianfanBot()
|
bot = QianfanBot()
|
||||||
|
self.assertTrue(
|
||||||
|
bot.supports_vision,
|
||||||
|
msg=f"{model} should be marked as multimodal",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertTrue(bot.supports_vision)
|
def test_qianfan_bot_does_not_advertise_vision_for_text_only_models(self):
|
||||||
|
for model in ("ernie-4.5-turbo-128k", "ernie-4.5-turbo-32k"):
|
||||||
|
fake_conf = self._fake_conf({"model": model})
|
||||||
|
with patch("models.qianfan.qianfan_bot.conf", return_value=fake_conf):
|
||||||
|
with patch("models.qianfan.qianfan_bot.SessionManager"):
|
||||||
|
from models.qianfan.qianfan_bot import QianfanBot
|
||||||
|
|
||||||
|
bot = QianfanBot()
|
||||||
|
self.assertFalse(
|
||||||
|
bot.supports_vision,
|
||||||
|
msg=f"{model} should not be marked as multimodal",
|
||||||
|
)
|
||||||
|
|
||||||
def test_call_vision_posts_openai_compatible_multimodal_payload(self):
|
def test_call_vision_posts_openai_compatible_multimodal_payload(self):
|
||||||
fake_conf = self._fake_conf()
|
fake_conf = self._fake_conf()
|
||||||
@@ -435,6 +451,105 @@ class TestQianfanVisionTool(unittest.TestCase):
|
|||||||
self.assertEqual(providers[0].name, "MainModel")
|
self.assertEqual(providers[0].name, "MainModel")
|
||||||
self.assertEqual(providers[0].model_override, "ernie-4.5-turbo-vl-32k")
|
self.assertEqual(providers[0].model_override, "ernie-4.5-turbo-vl-32k")
|
||||||
|
|
||||||
|
def test_vision_main_model_uses_ernie_5_directly(self):
|
||||||
|
"""ERNIE 5.0 is omni-modal → main-model path forwards image to it."""
|
||||||
|
fake_conf = self._fake_conf({"model": "ernie-5.0"})
|
||||||
|
from common import const
|
||||||
|
|
||||||
|
fake_model = MagicMock()
|
||||||
|
fake_model._resolve_bot_type.return_value = const.QIANFAN
|
||||||
|
fake_model.bot = MagicMock()
|
||||||
|
fake_model.bot.supports_vision = True
|
||||||
|
fake_model.bot.call_vision = MagicMock()
|
||||||
|
|
||||||
|
with patch("agent.tools.vision.vision.conf", return_value=fake_conf):
|
||||||
|
from agent.tools.vision.vision import Vision
|
||||||
|
|
||||||
|
tool = Vision()
|
||||||
|
tool.model = fake_model
|
||||||
|
providers = tool._resolve_providers()
|
||||||
|
|
||||||
|
self.assertEqual(providers[0].name, "MainModel")
|
||||||
|
self.assertEqual(providers[0].model_override, "ernie-5.0")
|
||||||
|
|
||||||
|
def test_vision_falls_back_to_qianfan_vl_when_main_model_is_text_only_ernie(self):
|
||||||
|
"""Text-only ERNIE (e.g. ernie-4.5-turbo-128k) must NOT receive image
|
||||||
|
payloads — Vision should skip MainModel and pick up the Qianfan
|
||||||
|
provider from _DISCOVERABLE_MODELS instead."""
|
||||||
|
fake_conf = self._fake_conf({
|
||||||
|
"model": "ernie-4.5-turbo-128k",
|
||||||
|
"qianfan_api_key": "test-qianfan-key",
|
||||||
|
})
|
||||||
|
from common import const
|
||||||
|
|
||||||
|
# Main bot reports supports_vision=False because the configured
|
||||||
|
# model is text-only.
|
||||||
|
fake_main_bot = MagicMock()
|
||||||
|
fake_main_bot.supports_vision = False
|
||||||
|
fake_main_bot.call_vision = MagicMock()
|
||||||
|
|
||||||
|
fake_model = MagicMock()
|
||||||
|
fake_model._resolve_bot_type.return_value = const.QIANFAN
|
||||||
|
fake_model.bot = fake_main_bot
|
||||||
|
|
||||||
|
# The discoverable Qianfan provider creates a new bot via factory.
|
||||||
|
fake_factory_bot = MagicMock()
|
||||||
|
fake_factory_bot.call_vision = MagicMock()
|
||||||
|
|
||||||
|
with patch("agent.tools.vision.vision.conf", return_value=fake_conf):
|
||||||
|
with patch("models.bot_factory.create_bot", return_value=fake_factory_bot):
|
||||||
|
from agent.tools.vision.vision import Vision
|
||||||
|
|
||||||
|
tool = Vision()
|
||||||
|
tool.model = fake_model
|
||||||
|
providers = tool._resolve_providers()
|
||||||
|
|
||||||
|
# MainModel must be absent; Qianfan fallback provider must be the
|
||||||
|
# first choice and pinned to the dedicated vision model.
|
||||||
|
names = [p.name for p in providers]
|
||||||
|
self.assertNotIn("MainModel", names)
|
||||||
|
self.assertEqual(names[0], "Qianfan")
|
||||||
|
self.assertEqual(providers[0].model_override, const.ERNIE_45_TURBO_VL)
|
||||||
|
|
||||||
|
def test_vision_prefers_same_vendor_fallback_over_other_configured_keys(self):
|
||||||
|
"""When the main bot is text-only ERNIE and several vision-capable
|
||||||
|
keys are configured, the same-vendor (Qianfan) fallback wins over
|
||||||
|
unrelated providers regardless of declaration order."""
|
||||||
|
fake_conf = self._fake_conf({
|
||||||
|
"model": "ernie-4.5-turbo-128k",
|
||||||
|
"qianfan_api_key": "test-qianfan-key",
|
||||||
|
"ark_api_key": "test-ark-key",
|
||||||
|
"claude_api_key": "test-claude-key",
|
||||||
|
"minimax_api_key": "test-minimax-key",
|
||||||
|
})
|
||||||
|
from common import const
|
||||||
|
|
||||||
|
fake_main_bot = MagicMock()
|
||||||
|
fake_main_bot.supports_vision = False
|
||||||
|
fake_main_bot.call_vision = MagicMock()
|
||||||
|
|
||||||
|
fake_model = MagicMock()
|
||||||
|
fake_model._resolve_bot_type.return_value = const.QIANFAN
|
||||||
|
fake_model.bot = fake_main_bot
|
||||||
|
|
||||||
|
fake_factory_bot = MagicMock()
|
||||||
|
fake_factory_bot.call_vision = MagicMock()
|
||||||
|
|
||||||
|
with patch("agent.tools.vision.vision.conf", return_value=fake_conf):
|
||||||
|
with patch("models.bot_factory.create_bot", return_value=fake_factory_bot):
|
||||||
|
from agent.tools.vision.vision import Vision
|
||||||
|
|
||||||
|
tool = Vision()
|
||||||
|
tool.model = fake_model
|
||||||
|
providers = tool._resolve_providers()
|
||||||
|
|
||||||
|
names = [p.name for p in providers]
|
||||||
|
self.assertEqual(names[0], "Qianfan")
|
||||||
|
self.assertEqual(providers[0].model_override, const.ERNIE_45_TURBO_VL)
|
||||||
|
# Other configured providers should still appear in the chain.
|
||||||
|
for expected in ("Doubao", "Claude", "MiniMax"):
|
||||||
|
self.assertIn(expected, names)
|
||||||
|
|
||||||
|
|
||||||
class TestQianfanDocs(unittest.TestCase):
|
class TestQianfanDocs(unittest.TestCase):
|
||||||
def _read(self, relative_path):
|
def _read(self, relative_path):
|
||||||
|
|||||||
Reference in New Issue
Block a user