From f9d099be1b788d82dd6ef2dfa809e07ed5f5a096 Mon Sep 17 00:00:00 2001 From: jimmyzhuu Date: Wed, 6 May 2026 13:23:04 +0800 Subject: [PATCH 1/5] feat: add qianfan vision model constants --- common/const.py | 3 +++ tests/test_qianfan_provider.py | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/common/const.py b/common/const.py index 9742af9c..eae1bf92 100644 --- a/common/const.py +++ b/common/const.py @@ -92,6 +92,8 @@ ERNIE_45_TURBO_128K = "ernie-4.5-turbo-128k" ERNIE_45_TURBO_32K = "ernie-4.5-turbo-32k" ERNIE_X1_TURBO_32K = "ernie-x1-turbo-32k" ERNIE_4_TURBO_8K = "ERNIE-4.0-Turbo-8K" +ERNIE_45_TURBO_VL_PREVIEW = "ernie-4.5-turbo-vl-preview" +ERNIE_45_VL_28B_A3B = "ernie-4.5-vl-28b-a3b" # Qwen (通义千问 - 阿里云 DashScope) QWEN_TURBO = "qwen-turbo" @@ -169,6 +171,7 @@ MODEL_LIST = [ # Baidu Qianfan / ERNIE QIANFAN, ERNIE_5, ERNIE_45_TURBO_128K, ERNIE_45_TURBO_32K, ERNIE_X1_TURBO_32K, ERNIE_4_TURBO_8K, + ERNIE_45_TURBO_VL_PREVIEW, ERNIE_45_VL_28B_A3B, # MiniMax MiniMax, MINIMAX_M2_7, MINIMAX_M2_7_HIGHSPEED, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5, diff --git a/tests/test_qianfan_provider.py b/tests/test_qianfan_provider.py index 7f638e9e..51e01ff9 100644 --- a/tests/test_qianfan_provider.py +++ b/tests/test_qianfan_provider.py @@ -19,10 +19,20 @@ class TestQianfanConstantsAndRouting(unittest.TestCase): self.assertEqual(const.ERNIE_45_TURBO_128K, "ernie-4.5-turbo-128k") self.assertEqual(const.ERNIE_45_TURBO_32K, "ernie-4.5-turbo-32k") self.assertEqual(const.ERNIE_X1_TURBO_32K, "ernie-x1-turbo-32k") + self.assertEqual( + const.ERNIE_45_TURBO_VL_PREVIEW, + "ernie-4.5-turbo-vl-preview", + ) + self.assertEqual( + const.ERNIE_45_VL_28B_A3B, + "ernie-4.5-vl-28b-a3b", + ) self.assertIn(const.QIANFAN, const.MODEL_LIST) self.assertIn(const.ERNIE_45_TURBO_128K, const.MODEL_LIST) self.assertIn(const.ERNIE_45_TURBO_32K, const.MODEL_LIST) self.assertIn(const.ERNIE_X1_TURBO_32K, const.MODEL_LIST) + self.assertIn(const.ERNIE_45_TURBO_VL_PREVIEW, const.MODEL_LIST) + self.assertIn(const.ERNIE_45_VL_28B_A3B, const.MODEL_LIST) def test_qianfan_config_keys_are_available(self): import config From 3b12ef2e66b8c3cb4ad1d0bddc63cb69a4c7167c Mon Sep 17 00:00:00 2001 From: jimmyzhuu Date: Wed, 6 May 2026 13:24:41 +0800 Subject: [PATCH 2/5] feat: add qianfan vision calls --- models/qianfan/qianfan_bot.py | 51 ++++++++++++++++ tests/test_qianfan_provider.py | 107 +++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) diff --git a/models/qianfan/qianfan_bot.py b/models/qianfan/qianfan_bot.py index 1626479e..9e3321fb 100644 --- a/models/qianfan/qianfan_bot.py +++ b/models/qianfan/qianfan_bot.py @@ -15,9 +15,12 @@ from .qianfan_session import QianfanSession DEFAULT_API_BASE = "https://qianfan.baidubce.com/v2" DEFAULT_MODEL = const.ERNIE_5 +DEFAULT_VISION_MODEL = const.ERNIE_45_TURBO_VL_PREVIEW class QianfanBot(Bot, OpenAICompatibleBot): + supports_vision = True + def __init__(self): super().__init__() model = self._resolve_model() @@ -136,6 +139,54 @@ class QianfanBot(Bot, OpenAICompatibleBot): return self.reply_text(session, args, retry_count + 1) return {"completion_tokens": 0, "content": "我现在有点累了,等会再来吧"} + def call_vision(self, image_url: str, question: str, + model: str = None, max_tokens: int = 1000) -> dict: + vision_model = model or DEFAULT_VISION_MODEL + payload = { + "model": vision_model, + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": question}, + {"type": "image_url", "image_url": {"url": image_url}}, + ], + } + ], + "max_tokens": max_tokens, + } + + try: + response = requests.post( + "{}/chat/completions".format(self.api_base), + headers=self._build_headers(), + json=payload, + timeout=conf().get("request_timeout", 180), + ) + if response.status_code != 200: + err = self._error_result(response, None) + return { + "error": True, + "message": err.get("content", "Qianfan vision request failed"), + } + + data = response.json() + choices = data.get("choices", []) + content = choices[0].get("message", {}).get("content", "") if choices else "" + usage = data.get("usage", {}) or {} + return { + "content": content, + "model": data.get("model", vision_model), + "usage": { + "prompt_tokens": usage.get("prompt_tokens", 0), + "completion_tokens": usage.get("completion_tokens", 0), + "total_tokens": usage.get("total_tokens", 0), + }, + } + except Exception as e: + logger.exception(e) + return {"error": True, "message": str(e)} + def _error_result(self, response, session, args=None, retry_count=0): try: body = response.json() diff --git a/tests/test_qianfan_provider.py b/tests/test_qianfan_provider.py index 51e01ff9..d97211c0 100644 --- a/tests/test_qianfan_provider.py +++ b/tests/test_qianfan_provider.py @@ -223,6 +223,113 @@ class TestQianfanBot(unittest.TestCase): self.assertEqual(result["content"], "请求失败:bad gateway text") post.assert_called_once() + def test_qianfan_bot_supports_vision(self): + fake_conf = self._fake_conf() + with patch("models.qianfan.qianfan_bot.conf", return_value=fake_conf): + with patch("models.qianfan.qianfan_bot.SessionManager"): + from models.qianfan.qianfan_bot import QianfanBot + + bot = QianfanBot() + + self.assertTrue(bot.supports_vision) + + def test_call_vision_posts_openai_compatible_multimodal_payload(self): + fake_conf = self._fake_conf() + fake_response = MagicMock() + fake_response.status_code = 200 + fake_response.json.return_value = { + "id": "chatcmpl-test", + "model": "ernie-4.5-turbo-vl-preview", + "choices": [{"message": {"content": "图中有一个红色方块。"}}], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 8, + "total_tokens": 18, + }, + } + + with patch("models.qianfan.qianfan_bot.conf", return_value=fake_conf): + with patch("models.qianfan.qianfan_bot.SessionManager"): + from models.qianfan.qianfan_bot import QianfanBot + + bot = QianfanBot() + with patch("models.qianfan.qianfan_bot.requests.post", return_value=fake_response) as post: + result = bot.call_vision( + image_url="data:image/png;base64,AAAA", + question="这张图里有什么?", + ) + + self.assertEqual(result["content"], "图中有一个红色方块。") + self.assertEqual(result["model"], "ernie-4.5-turbo-vl-preview") + self.assertEqual(result["usage"]["total_tokens"], 18) + post.assert_called_once() + url = post.call_args.args[0] + kwargs = post.call_args.kwargs + self.assertEqual(url, "https://qianfan.baidubce.com/v2/chat/completions") + self.assertEqual(kwargs["headers"]["Authorization"], "Bearer test-qianfan-key") + self.assertEqual(kwargs["json"]["model"], "ernie-4.5-turbo-vl-preview") + self.assertEqual(kwargs["json"]["max_tokens"], 1000) + self.assertEqual(kwargs["json"]["messages"], [ + { + "role": "user", + "content": [ + {"type": "text", "text": "这张图里有什么?"}, + { + "type": "image_url", + "image_url": {"url": "data:image/png;base64,AAAA"}, + }, + ], + } + ]) + + def test_call_vision_allows_explicit_model_override(self): + fake_conf = self._fake_conf() + fake_response = MagicMock() + fake_response.status_code = 200 + fake_response.json.return_value = { + "model": "ernie-4.5-vl-28b-a3b", + "choices": [{"message": {"content": "有文字。"}}], + "usage": {}, + } + + with patch("models.qianfan.qianfan_bot.conf", return_value=fake_conf): + with patch("models.qianfan.qianfan_bot.SessionManager"): + from models.qianfan.qianfan_bot import QianfanBot + + bot = QianfanBot() + with patch("models.qianfan.qianfan_bot.requests.post", return_value=fake_response) as post: + result = bot.call_vision( + image_url="data:image/jpeg;base64,BBBB", + question="识别文字", + model="ernie-4.5-vl-28b-a3b", + max_tokens=256, + ) + + self.assertEqual(result["model"], "ernie-4.5-vl-28b-a3b") + self.assertEqual(post.call_args.kwargs["json"]["model"], "ernie-4.5-vl-28b-a3b") + self.assertEqual(post.call_args.kwargs["json"]["max_tokens"], 256) + + def test_call_vision_returns_error_dict_for_api_error(self): + fake_conf = self._fake_conf() + fake_response = MagicMock() + fake_response.status_code = 400 + fake_response.json.return_value = {"error": {"message": "bad image"}} + fake_response.text = '{"error":{"message":"bad image"}}' + + with patch("models.qianfan.qianfan_bot.conf", return_value=fake_conf): + with patch("models.qianfan.qianfan_bot.SessionManager"): + from models.qianfan.qianfan_bot import QianfanBot + + bot = QianfanBot() + with patch("models.qianfan.qianfan_bot.requests.post", return_value=fake_response): + result = bot.call_vision( + image_url="data:image/png;base64,AAAA", + question="这张图里有什么?", + ) + + self.assertTrue(result["error"]) + self.assertEqual(result["message"], "请求失败:bad image") + class TestQianfanSurfaces(unittest.TestCase): def _read(self, relative_path): From fccb7ff9ed547e76116d512dbfa84c95c949b9a2 Mon Sep 17 00:00:00 2001 From: jimmyzhuu Date: Wed, 6 May 2026 13:25:59 +0800 Subject: [PATCH 3/5] feat: route qianfan vision provider --- agent/tools/vision/vision.py | 4 +- tests/test_qianfan_provider.py | 76 ++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/agent/tools/vision/vision.py b/agent/tools/vision/vision.py index ce2477a7..0c8e48ba 100644 --- a/agent/tools/vision/vision.py +++ b/agent/tools/vision/vision.py @@ -53,6 +53,7 @@ _DISCOVERABLE_MODELS = [ ("ark_api_key", const.DOUBAO, const.DOUBAO_SEED_2_PRO, "Doubao"), ("dashscope_api_key", const.QWEN_DASHSCOPE, const.QWEN36_PLUS, "DashScope"), ("claude_api_key", const.CLAUDEAPI, const.CLAUDE_4_6_SONNET, "Claude"), + ("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL_PREVIEW, "Qianfan"), ("gemini_api_key", const.GEMINI, const.GEMINI_31_FLASH_LITE_PRE, "Gemini"), ("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"), ("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"), @@ -67,6 +68,7 @@ _MODEL_PREFIX_TO_PROVIDER = [ ("moonshot-", "Moonshot"), ("qwen", "DashScope"), # qwen-*, qwen3-*, qwen3.6-*, etc. ("claude-", "Claude"), + ("ernie-", "Qianfan"), ("gemini-", "Gemini"), ("glm-", "ZhipuAI"), ("minimax-", "MiniMax"), @@ -140,7 +142,7 @@ class Vision(BaseTool): "Error: No model available for Vision.\n" "The main model does not support vision and no other API keys are configured.\n" "Options:\n" - " 1. Switch to a multimodal model (e.g. qwen3.6-plus, claude-sonnet-4-6, gemini-2.0-flash)\n" + " 1. Switch to a multimodal model (e.g. ernie-4.5-turbo-vl-preview, qwen3.6-plus, claude-sonnet-4-6, gemini-2.0-flash)\n" " 2. Configure OPENAI_API_KEY: env_config(action=\"set\", key=\"OPENAI_API_KEY\", value=\"your-key\")\n" " 3. Configure LINKAI_API_KEY: env_config(action=\"set\", key=\"LINKAI_API_KEY\", value=\"your-key\")" ) diff --git a/tests/test_qianfan_provider.py b/tests/test_qianfan_provider.py index d97211c0..2e51224a 100644 --- a/tests/test_qianfan_provider.py +++ b/tests/test_qianfan_provider.py @@ -360,6 +360,82 @@ class TestQianfanSurfaces(unittest.TestCase): self.assertIn("const.QIANFAN", godcmd_source) +class TestQianfanVisionTool(unittest.TestCase): + def _fake_conf(self, values=None): + data = { + "model": "deepseek-v4-flash", + "qianfan_api_key": "", + "qianfan_api_base": "https://qianfan.baidubce.com/v2", + "open_ai_api_key": "", + "linkai_api_key": "", + "use_linkai": False, + "tool": {}, + } + if values: + data.update(values) + fake_conf = MagicMock() + fake_conf.get.side_effect = lambda key, default=None: data.get(key, default) + return fake_conf + + def test_vision_auto_discovers_qianfan_when_key_configured(self): + fake_conf = self._fake_conf({"qianfan_api_key": "test-qianfan-key"}) + fake_bot = MagicMock() + fake_bot.call_vision = MagicMock() + + with patch("agent.tools.vision.vision.conf", return_value=fake_conf): + with patch("models.bot_factory.create_bot", return_value=fake_bot) as create_bot: + from agent.tools.vision.vision import Vision + from common import const + + tool = Vision() + tool.model = None + providers = tool._resolve_providers() + + self.assertEqual(providers[0].name, "Qianfan") + self.assertEqual(providers[0].model_override, const.ERNIE_45_TURBO_VL_PREVIEW) + self.assertTrue(providers[0].use_bot) + create_bot.assert_called_with(const.QIANFAN) + + def test_vision_routes_ernie_model_override_to_qianfan(self): + fake_conf = self._fake_conf({ + "qianfan_api_key": "test-qianfan-key", + "tool": {"vision": {"model": "ernie-4.5-vl-28b-a3b"}}, + }) + fake_bot = MagicMock() + fake_bot.call_vision = MagicMock() + + with patch("agent.tools.vision.vision.conf", return_value=fake_conf): + with patch("models.bot_factory.create_bot", return_value=fake_bot): + from agent.tools.vision.vision import Vision + + tool = Vision() + tool.model = None + providers = tool._resolve_providers() + + self.assertEqual(providers[0].name, "Qianfan") + self.assertEqual(providers[0].model_override, "ernie-4.5-vl-28b-a3b") + + def test_vision_main_model_uses_qianfan_when_configured_model_is_ernie(self): + fake_conf = self._fake_conf({"model": "ernie-4.5-vl-28b-a3b"}) + from common import const + + fake_model = MagicMock() + fake_model._resolve_bot_type.return_value = const.QIANFAN + fake_model.bot = MagicMock() + fake_model.bot.supports_vision = True + fake_model.bot.call_vision = MagicMock() + + with patch("agent.tools.vision.vision.conf", return_value=fake_conf): + from agent.tools.vision.vision import Vision + + tool = Vision() + tool.model = fake_model + providers = tool._resolve_providers() + + self.assertEqual(providers[0].name, "MainModel") + self.assertEqual(providers[0].model_override, "ernie-4.5-vl-28b-a3b") + + class TestQianfanDocs(unittest.TestCase): def _read(self, relative_path): root = os.path.join(os.path.dirname(__file__), "..") From 76e6b7b4716294cb2be200e76c829d333dd3c44e Mon Sep 17 00:00:00 2001 From: jimmyzhuu Date: Wed, 6 May 2026 13:28:46 +0800 Subject: [PATCH 4/5] docs: document qianfan vision support --- README.md | 5 +++-- docs/en/models/qianfan.mdx | 14 ++++++++++++++ docs/en/tools/vision.mdx | 3 ++- docs/ja/models/qianfan.mdx | 14 ++++++++++++++ docs/ja/tools/vision.mdx | 3 ++- docs/models/qianfan.mdx | 14 ++++++++++++++ docs/tools/vision.mdx | 3 ++- tests/test_qianfan_provider.py | 12 ++++++++++++ 8 files changed, 63 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e6b479ba..110f1a1a 100644 --- a/README.md +++ b/README.md @@ -604,11 +604,12 @@ API Key 创建:在 [控制台](https://aistudio.google.com/app/apikey?hl=zh-cn ```json { "model": "ernie-5.0", - "qianfan_api_key": "" + "qianfan_api_key": "", + "qianfan_api_base": "https://qianfan.baidubce.com/v2" } ``` - - `model`: 默认推荐填写 `ernie-5.0`,也可填写 `ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k`、`ernie-x1-turbo-32k` + - `model`: 默认推荐填写 `ernie-5.0`,也可填写 `ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k`、`ernie-x1-turbo-32k`;Vision 工具可使用 `ernie-4.5-turbo-vl-preview` - `qianfan_api_key`: 百度千帆 API Key,通常以 `bce-v3/` 开头,可在百度智能云控制台创建 - `qianfan_api_base`: 可选,默认为 `https://qianfan.baidubce.com/v2` diff --git a/docs/en/models/qianfan.mdx b/docs/en/models/qianfan.mdx index 10a4e862..1e87a26a 100644 --- a/docs/en/models/qianfan.mdx +++ b/docs/en/models/qianfan.mdx @@ -28,6 +28,20 @@ Option 1: Native integration (recommended): | `ernie-4.5-turbo-32k` | General chat with a balanced context window and cost | | `ernie-x1-turbo-32k` | Tasks that need stronger reasoning | +## Vision tool + +After `qianfan_api_key` is configured, Agent mode can auto-discover Qianfan for the Vision tool. The recommended Qianfan vision model is `ernie-4.5-turbo-vl-preview`: + +```json +{ + "tool": { + "vision": { + "model": "ernie-4.5-turbo-vl-preview" + } + } +} +``` + Option 2: OpenAI-compatible configuration: ```json diff --git a/docs/en/tools/vision.mdx b/docs/en/tools/vision.mdx index 01e36db2..942e1d7e 100644 --- a/docs/en/tools/vision.mdx +++ b/docs/en/tools/vision.mdx @@ -23,6 +23,7 @@ If the current provider fails, the tool automatically tries the next one until i | Vendor | Vision Model | Notes | | --- | --- | --- | | OpenAI / Compatible | Main model | All OpenAI-compatible multimodal models | +| Baidu Qianfan | ernie-4.5-turbo-vl-preview | Auto-discovered when `qianfan_api_key` is configured; can also be selected via `tool.vision.model` | | Qwen (DashScope) | Main model | Via MultiModalConversation API | | Claude | Main model | Anthropic native image format | | Gemini | Main model | inlineData format | @@ -52,7 +53,7 @@ To specify a particular model for the vision tool, add to `config.json`: { "tool": { "vision": { - "model": "gpt-4o" + "model": "ernie-4.5-turbo-vl-preview" } } } diff --git a/docs/ja/models/qianfan.mdx b/docs/ja/models/qianfan.mdx index 5fe11622..cd69d0f7 100644 --- a/docs/ja/models/qianfan.mdx +++ b/docs/ja/models/qianfan.mdx @@ -28,6 +28,20 @@ description: Baidu Qianfan ERNIE モデル設定 | `ernie-4.5-turbo-32k` | コンテキスト長とコストのバランスが良い一般チャット向け | | `ernie-x1-turbo-32k` | より強い推論が必要なタスク向け | +## Vision ツール + +`qianfan_api_key` を設定すると、Agent モードの Vision ツールは Qianfan を自動検出できます。推奨する Qianfan の視覚モデルは `ernie-4.5-turbo-vl-preview` です: + +```json +{ + "tool": { + "vision": { + "model": "ernie-4.5-turbo-vl-preview" + } + } +} +``` + 方法 2: OpenAI 互換接続: ```json diff --git a/docs/ja/tools/vision.mdx b/docs/ja/tools/vision.mdx index 95e28a22..037cc582 100644 --- a/docs/ja/tools/vision.mdx +++ b/docs/ja/tools/vision.mdx @@ -23,6 +23,7 @@ Vision ツールは多段階の自動選択+自動フォールバック戦略 | ベンダー | ビジョンモデル | 説明 | | --- | --- | --- | | OpenAI / 互換プロトコル | メインモデル | すべての OpenAI 互換マルチモーダルモデルに対応 | +| Baidu Qianfan | ernie-4.5-turbo-vl-preview | `qianfan_api_key` を設定すると自動検出され、`tool.vision.model` でも指定できます | | 通義千問 (DashScope) | メインモデル | MultiModalConversation API 経由 | | Claude | メインモデル | Anthropic ネイティブ画像形式 | | Gemini | メインモデル | inlineData 形式 | @@ -52,7 +53,7 @@ Vision ツールで使用するモデルを指定するには、`config.json` { "tool": { "vision": { - "model": "gpt-4o" + "model": "ernie-4.5-turbo-vl-preview" } } } diff --git a/docs/models/qianfan.mdx b/docs/models/qianfan.mdx index 4d71593f..c3ac6132 100644 --- a/docs/models/qianfan.mdx +++ b/docs/models/qianfan.mdx @@ -28,6 +28,20 @@ description: 百度千帆 ERNIE 模型配置 | `ernie-4.5-turbo-32k` | 通用对话,成本和上下文更均衡 | | `ernie-x1-turbo-32k` | 需要更强推理能力的任务 | +## Vision 工具 + +配置 `qianfan_api_key` 后,Agent 的 Vision 工具可以自动使用千帆视觉模型。默认推荐使用 `ernie-4.5-turbo-vl-preview`: + +```json +{ + "tool": { + "vision": { + "model": "ernie-4.5-turbo-vl-preview" + } + } +} +``` + 方式二:OpenAI 兼容方式接入: ```json diff --git a/docs/tools/vision.mdx b/docs/tools/vision.mdx index 5ef55674..398fc579 100644 --- a/docs/tools/vision.mdx +++ b/docs/tools/vision.mdx @@ -19,6 +19,7 @@ Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置 | 厂商 | 视觉模型 | 说明 | | --- | --- | --- | | OpenAI / 兼容协议 | 使用主模型 | 支持所有 OpenAI 协议兼容的多模态模型 | +| 百度千帆 (Qianfan) | ernie-4.5-turbo-vl-preview | 配置 `qianfan_api_key` 后自动发现,也可通过 `tool.vision.model` 指定 | | 通义千问 (DashScope) | 使用主模型 | 例如 qwen3.6-plus 等 | | Claude | 使用主模型 | Anthropic 原生图像格式 | | Gemini | 使用主模型 | inlineData 格式 | @@ -41,7 +42,7 @@ Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置 { "tool": { "vision": { - "model": "gpt-4o" + "model": "ernie-4.5-turbo-vl-preview" } } } diff --git a/tests/test_qianfan_provider.py b/tests/test_qianfan_provider.py index 2e51224a..8b996d11 100644 --- a/tests/test_qianfan_provider.py +++ b/tests/test_qianfan_provider.py @@ -452,6 +452,7 @@ class TestQianfanDocs(unittest.TestCase): self.assertIn("qianfan_api_key", text) self.assertIn("https://qianfan.baidubce.com/v2", text) self.assertIn("ernie-4.5-turbo-128k", text) + self.assertIn("ernie-4.5-turbo-vl-preview", text) def test_model_indexes_link_qianfan(self): for path in ( @@ -469,6 +470,17 @@ class TestQianfanDocs(unittest.TestCase): self.assertIn('"qianfan_api_key": ""', text) self.assertIn('"qianfan_api_base": "https://qianfan.baidubce.com/v2"', text) + def test_vision_docs_document_qianfan_provider(self): + expected = { + "docs/tools/vision.mdx": "百度千帆", + "docs/en/tools/vision.mdx": "Baidu Qianfan", + "docs/ja/tools/vision.mdx": "Baidu Qianfan", + } + for path, label in expected.items(): + text = self._read(path) + self.assertIn(label, text) + self.assertIn("ernie-4.5-turbo-vl-preview", text) + if __name__ == "__main__": unittest.main() From fb7962c7f285999f6a0f3309d778873505f337e7 Mon Sep 17 00:00:00 2001 From: jimmyzhuu Date: Wed, 6 May 2026 13:34:39 +0800 Subject: [PATCH 5/5] fix: use available qianfan vision model --- README.md | 2 +- agent/tools/vision/vision.py | 4 ++-- common/const.py | 6 ++--- docs/en/models/qianfan.mdx | 4 ++-- docs/en/tools/vision.mdx | 4 ++-- docs/ja/models/qianfan.mdx | 4 ++-- docs/ja/tools/vision.mdx | 4 ++-- docs/models/qianfan.mdx | 4 ++-- docs/tools/vision.mdx | 4 ++-- models/qianfan/qianfan_bot.py | 2 +- tests/test_qianfan_provider.py | 40 +++++++++++++++++----------------- 11 files changed, 39 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 110f1a1a..995488f5 100644 --- a/README.md +++ b/README.md @@ -609,7 +609,7 @@ API Key 创建:在 [控制台](https://aistudio.google.com/app/apikey?hl=zh-cn } ``` - - `model`: 默认推荐填写 `ernie-5.0`,也可填写 `ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k`、`ernie-x1-turbo-32k`;Vision 工具可使用 `ernie-4.5-turbo-vl-preview` + - `model`: 默认推荐填写 `ernie-5.0`,也可填写 `ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k`、`ernie-x1-turbo-32k`;Vision 工具可使用 `ernie-4.5-turbo-vl` - `qianfan_api_key`: 百度千帆 API Key,通常以 `bce-v3/` 开头,可在百度智能云控制台创建 - `qianfan_api_base`: 可选,默认为 `https://qianfan.baidubce.com/v2` diff --git a/agent/tools/vision/vision.py b/agent/tools/vision/vision.py index 0c8e48ba..5d8d9280 100644 --- a/agent/tools/vision/vision.py +++ b/agent/tools/vision/vision.py @@ -53,7 +53,7 @@ _DISCOVERABLE_MODELS = [ ("ark_api_key", const.DOUBAO, const.DOUBAO_SEED_2_PRO, "Doubao"), ("dashscope_api_key", const.QWEN_DASHSCOPE, const.QWEN36_PLUS, "DashScope"), ("claude_api_key", const.CLAUDEAPI, const.CLAUDE_4_6_SONNET, "Claude"), - ("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL_PREVIEW, "Qianfan"), + ("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"), ("gemini_api_key", const.GEMINI, const.GEMINI_31_FLASH_LITE_PRE, "Gemini"), ("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"), ("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"), @@ -142,7 +142,7 @@ class Vision(BaseTool): "Error: No model available for Vision.\n" "The main model does not support vision and no other API keys are configured.\n" "Options:\n" - " 1. Switch to a multimodal model (e.g. ernie-4.5-turbo-vl-preview, qwen3.6-plus, claude-sonnet-4-6, gemini-2.0-flash)\n" + " 1. Switch to a multimodal model (e.g. ernie-4.5-turbo-vl, qwen3.6-plus, claude-sonnet-4-6, gemini-2.0-flash)\n" " 2. Configure OPENAI_API_KEY: env_config(action=\"set\", key=\"OPENAI_API_KEY\", value=\"your-key\")\n" " 3. Configure LINKAI_API_KEY: env_config(action=\"set\", key=\"LINKAI_API_KEY\", value=\"your-key\")" ) diff --git a/common/const.py b/common/const.py index eae1bf92..18e0b082 100644 --- a/common/const.py +++ b/common/const.py @@ -92,8 +92,8 @@ ERNIE_45_TURBO_128K = "ernie-4.5-turbo-128k" ERNIE_45_TURBO_32K = "ernie-4.5-turbo-32k" ERNIE_X1_TURBO_32K = "ernie-x1-turbo-32k" ERNIE_4_TURBO_8K = "ERNIE-4.0-Turbo-8K" -ERNIE_45_TURBO_VL_PREVIEW = "ernie-4.5-turbo-vl-preview" -ERNIE_45_VL_28B_A3B = "ernie-4.5-vl-28b-a3b" +ERNIE_45_TURBO_VL = "ernie-4.5-turbo-vl" +ERNIE_45_TURBO_VL_32K = "ernie-4.5-turbo-vl-32k" # Qwen (通义千问 - 阿里云 DashScope) QWEN_TURBO = "qwen-turbo" @@ -171,7 +171,7 @@ MODEL_LIST = [ # Baidu Qianfan / ERNIE QIANFAN, ERNIE_5, ERNIE_45_TURBO_128K, ERNIE_45_TURBO_32K, ERNIE_X1_TURBO_32K, ERNIE_4_TURBO_8K, - ERNIE_45_TURBO_VL_PREVIEW, ERNIE_45_VL_28B_A3B, + ERNIE_45_TURBO_VL, ERNIE_45_TURBO_VL_32K, # MiniMax MiniMax, MINIMAX_M2_7, MINIMAX_M2_7_HIGHSPEED, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5, diff --git a/docs/en/models/qianfan.mdx b/docs/en/models/qianfan.mdx index 1e87a26a..129ae2da 100644 --- a/docs/en/models/qianfan.mdx +++ b/docs/en/models/qianfan.mdx @@ -30,13 +30,13 @@ Option 1: Native integration (recommended): ## Vision tool -After `qianfan_api_key` is configured, Agent mode can auto-discover Qianfan for the Vision tool. The recommended Qianfan vision model is `ernie-4.5-turbo-vl-preview`: +After `qianfan_api_key` is configured, Agent mode can auto-discover Qianfan for the Vision tool. The recommended Qianfan vision model is `ernie-4.5-turbo-vl`: ```json { "tool": { "vision": { - "model": "ernie-4.5-turbo-vl-preview" + "model": "ernie-4.5-turbo-vl" } } } diff --git a/docs/en/tools/vision.mdx b/docs/en/tools/vision.mdx index 942e1d7e..2c544880 100644 --- a/docs/en/tools/vision.mdx +++ b/docs/en/tools/vision.mdx @@ -23,7 +23,7 @@ If the current provider fails, the tool automatically tries the next one until i | Vendor | Vision Model | Notes | | --- | --- | --- | | OpenAI / Compatible | Main model | All OpenAI-compatible multimodal models | -| Baidu Qianfan | ernie-4.5-turbo-vl-preview | Auto-discovered when `qianfan_api_key` is configured; can also be selected via `tool.vision.model` | +| Baidu Qianfan | ernie-4.5-turbo-vl | Auto-discovered when `qianfan_api_key` is configured; can also be selected via `tool.vision.model` | | Qwen (DashScope) | Main model | Via MultiModalConversation API | | Claude | Main model | Anthropic native image format | | Gemini | Main model | inlineData format | @@ -53,7 +53,7 @@ To specify a particular model for the vision tool, add to `config.json`: { "tool": { "vision": { - "model": "ernie-4.5-turbo-vl-preview" + "model": "ernie-4.5-turbo-vl" } } } diff --git a/docs/ja/models/qianfan.mdx b/docs/ja/models/qianfan.mdx index cd69d0f7..b44dc152 100644 --- a/docs/ja/models/qianfan.mdx +++ b/docs/ja/models/qianfan.mdx @@ -30,13 +30,13 @@ description: Baidu Qianfan ERNIE モデル設定 ## Vision ツール -`qianfan_api_key` を設定すると、Agent モードの Vision ツールは Qianfan を自動検出できます。推奨する Qianfan の視覚モデルは `ernie-4.5-turbo-vl-preview` です: +`qianfan_api_key` を設定すると、Agent モードの Vision ツールは Qianfan を自動検出できます。推奨する Qianfan の視覚モデルは `ernie-4.5-turbo-vl` です: ```json { "tool": { "vision": { - "model": "ernie-4.5-turbo-vl-preview" + "model": "ernie-4.5-turbo-vl" } } } diff --git a/docs/ja/tools/vision.mdx b/docs/ja/tools/vision.mdx index 037cc582..1cea5308 100644 --- a/docs/ja/tools/vision.mdx +++ b/docs/ja/tools/vision.mdx @@ -23,7 +23,7 @@ Vision ツールは多段階の自動選択+自動フォールバック戦略 | ベンダー | ビジョンモデル | 説明 | | --- | --- | --- | | OpenAI / 互換プロトコル | メインモデル | すべての OpenAI 互換マルチモーダルモデルに対応 | -| Baidu Qianfan | ernie-4.5-turbo-vl-preview | `qianfan_api_key` を設定すると自動検出され、`tool.vision.model` でも指定できます | +| Baidu Qianfan | ernie-4.5-turbo-vl | `qianfan_api_key` を設定すると自動検出され、`tool.vision.model` でも指定できます | | 通義千問 (DashScope) | メインモデル | MultiModalConversation API 経由 | | Claude | メインモデル | Anthropic ネイティブ画像形式 | | Gemini | メインモデル | inlineData 形式 | @@ -53,7 +53,7 @@ Vision ツールで使用するモデルを指定するには、`config.json` { "tool": { "vision": { - "model": "ernie-4.5-turbo-vl-preview" + "model": "ernie-4.5-turbo-vl" } } } diff --git a/docs/models/qianfan.mdx b/docs/models/qianfan.mdx index c3ac6132..bea65dcb 100644 --- a/docs/models/qianfan.mdx +++ b/docs/models/qianfan.mdx @@ -30,13 +30,13 @@ description: 百度千帆 ERNIE 模型配置 ## Vision 工具 -配置 `qianfan_api_key` 后,Agent 的 Vision 工具可以自动使用千帆视觉模型。默认推荐使用 `ernie-4.5-turbo-vl-preview`: +配置 `qianfan_api_key` 后,Agent 的 Vision 工具可以自动使用千帆视觉模型。默认推荐使用 `ernie-4.5-turbo-vl`: ```json { "tool": { "vision": { - "model": "ernie-4.5-turbo-vl-preview" + "model": "ernie-4.5-turbo-vl" } } } diff --git a/docs/tools/vision.mdx b/docs/tools/vision.mdx index 398fc579..a5293ca1 100644 --- a/docs/tools/vision.mdx +++ b/docs/tools/vision.mdx @@ -19,7 +19,7 @@ Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置 | 厂商 | 视觉模型 | 说明 | | --- | --- | --- | | OpenAI / 兼容协议 | 使用主模型 | 支持所有 OpenAI 协议兼容的多模态模型 | -| 百度千帆 (Qianfan) | ernie-4.5-turbo-vl-preview | 配置 `qianfan_api_key` 后自动发现,也可通过 `tool.vision.model` 指定 | +| 百度千帆 (Qianfan) | ernie-4.5-turbo-vl | 配置 `qianfan_api_key` 后自动发现,也可通过 `tool.vision.model` 指定 | | 通义千问 (DashScope) | 使用主模型 | 例如 qwen3.6-plus 等 | | Claude | 使用主模型 | Anthropic 原生图像格式 | | Gemini | 使用主模型 | inlineData 格式 | @@ -42,7 +42,7 @@ Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置 { "tool": { "vision": { - "model": "ernie-4.5-turbo-vl-preview" + "model": "ernie-4.5-turbo-vl" } } } diff --git a/models/qianfan/qianfan_bot.py b/models/qianfan/qianfan_bot.py index 9e3321fb..bc5cc285 100644 --- a/models/qianfan/qianfan_bot.py +++ b/models/qianfan/qianfan_bot.py @@ -15,7 +15,7 @@ from .qianfan_session import QianfanSession DEFAULT_API_BASE = "https://qianfan.baidubce.com/v2" DEFAULT_MODEL = const.ERNIE_5 -DEFAULT_VISION_MODEL = const.ERNIE_45_TURBO_VL_PREVIEW +DEFAULT_VISION_MODEL = const.ERNIE_45_TURBO_VL class QianfanBot(Bot, OpenAICompatibleBot): diff --git a/tests/test_qianfan_provider.py b/tests/test_qianfan_provider.py index 8b996d11..211d3acd 100644 --- a/tests/test_qianfan_provider.py +++ b/tests/test_qianfan_provider.py @@ -20,19 +20,19 @@ class TestQianfanConstantsAndRouting(unittest.TestCase): self.assertEqual(const.ERNIE_45_TURBO_32K, "ernie-4.5-turbo-32k") self.assertEqual(const.ERNIE_X1_TURBO_32K, "ernie-x1-turbo-32k") self.assertEqual( - const.ERNIE_45_TURBO_VL_PREVIEW, - "ernie-4.5-turbo-vl-preview", + const.ERNIE_45_TURBO_VL, + "ernie-4.5-turbo-vl", ) self.assertEqual( - const.ERNIE_45_VL_28B_A3B, - "ernie-4.5-vl-28b-a3b", + const.ERNIE_45_TURBO_VL_32K, + "ernie-4.5-turbo-vl-32k", ) self.assertIn(const.QIANFAN, const.MODEL_LIST) self.assertIn(const.ERNIE_45_TURBO_128K, const.MODEL_LIST) self.assertIn(const.ERNIE_45_TURBO_32K, const.MODEL_LIST) self.assertIn(const.ERNIE_X1_TURBO_32K, const.MODEL_LIST) - self.assertIn(const.ERNIE_45_TURBO_VL_PREVIEW, const.MODEL_LIST) - self.assertIn(const.ERNIE_45_VL_28B_A3B, const.MODEL_LIST) + self.assertIn(const.ERNIE_45_TURBO_VL, const.MODEL_LIST) + self.assertIn(const.ERNIE_45_TURBO_VL_32K, const.MODEL_LIST) def test_qianfan_config_keys_are_available(self): import config @@ -239,7 +239,7 @@ class TestQianfanBot(unittest.TestCase): fake_response.status_code = 200 fake_response.json.return_value = { "id": "chatcmpl-test", - "model": "ernie-4.5-turbo-vl-preview", + "model": "ernie-4.5-turbo-vl", "choices": [{"message": {"content": "图中有一个红色方块。"}}], "usage": { "prompt_tokens": 10, @@ -260,14 +260,14 @@ class TestQianfanBot(unittest.TestCase): ) self.assertEqual(result["content"], "图中有一个红色方块。") - self.assertEqual(result["model"], "ernie-4.5-turbo-vl-preview") + self.assertEqual(result["model"], "ernie-4.5-turbo-vl") self.assertEqual(result["usage"]["total_tokens"], 18) post.assert_called_once() url = post.call_args.args[0] kwargs = post.call_args.kwargs self.assertEqual(url, "https://qianfan.baidubce.com/v2/chat/completions") self.assertEqual(kwargs["headers"]["Authorization"], "Bearer test-qianfan-key") - self.assertEqual(kwargs["json"]["model"], "ernie-4.5-turbo-vl-preview") + self.assertEqual(kwargs["json"]["model"], "ernie-4.5-turbo-vl") self.assertEqual(kwargs["json"]["max_tokens"], 1000) self.assertEqual(kwargs["json"]["messages"], [ { @@ -287,7 +287,7 @@ class TestQianfanBot(unittest.TestCase): fake_response = MagicMock() fake_response.status_code = 200 fake_response.json.return_value = { - "model": "ernie-4.5-vl-28b-a3b", + "model": "ernie-4.5-turbo-vl-32k", "choices": [{"message": {"content": "有文字。"}}], "usage": {}, } @@ -301,12 +301,12 @@ class TestQianfanBot(unittest.TestCase): result = bot.call_vision( image_url="data:image/jpeg;base64,BBBB", question="识别文字", - model="ernie-4.5-vl-28b-a3b", + model="ernie-4.5-turbo-vl-32k", max_tokens=256, ) - self.assertEqual(result["model"], "ernie-4.5-vl-28b-a3b") - self.assertEqual(post.call_args.kwargs["json"]["model"], "ernie-4.5-vl-28b-a3b") + self.assertEqual(result["model"], "ernie-4.5-turbo-vl-32k") + self.assertEqual(post.call_args.kwargs["json"]["model"], "ernie-4.5-turbo-vl-32k") self.assertEqual(post.call_args.kwargs["json"]["max_tokens"], 256) def test_call_vision_returns_error_dict_for_api_error(self): @@ -392,14 +392,14 @@ class TestQianfanVisionTool(unittest.TestCase): providers = tool._resolve_providers() self.assertEqual(providers[0].name, "Qianfan") - self.assertEqual(providers[0].model_override, const.ERNIE_45_TURBO_VL_PREVIEW) + self.assertEqual(providers[0].model_override, const.ERNIE_45_TURBO_VL) self.assertTrue(providers[0].use_bot) create_bot.assert_called_with(const.QIANFAN) def test_vision_routes_ernie_model_override_to_qianfan(self): fake_conf = self._fake_conf({ "qianfan_api_key": "test-qianfan-key", - "tool": {"vision": {"model": "ernie-4.5-vl-28b-a3b"}}, + "tool": {"vision": {"model": "ernie-4.5-turbo-vl-32k"}}, }) fake_bot = MagicMock() fake_bot.call_vision = MagicMock() @@ -413,10 +413,10 @@ class TestQianfanVisionTool(unittest.TestCase): providers = tool._resolve_providers() self.assertEqual(providers[0].name, "Qianfan") - self.assertEqual(providers[0].model_override, "ernie-4.5-vl-28b-a3b") + self.assertEqual(providers[0].model_override, "ernie-4.5-turbo-vl-32k") def test_vision_main_model_uses_qianfan_when_configured_model_is_ernie(self): - fake_conf = self._fake_conf({"model": "ernie-4.5-vl-28b-a3b"}) + fake_conf = self._fake_conf({"model": "ernie-4.5-turbo-vl-32k"}) from common import const fake_model = MagicMock() @@ -433,7 +433,7 @@ class TestQianfanVisionTool(unittest.TestCase): providers = tool._resolve_providers() self.assertEqual(providers[0].name, "MainModel") - self.assertEqual(providers[0].model_override, "ernie-4.5-vl-28b-a3b") + self.assertEqual(providers[0].model_override, "ernie-4.5-turbo-vl-32k") class TestQianfanDocs(unittest.TestCase): @@ -452,7 +452,7 @@ class TestQianfanDocs(unittest.TestCase): self.assertIn("qianfan_api_key", text) self.assertIn("https://qianfan.baidubce.com/v2", text) self.assertIn("ernie-4.5-turbo-128k", text) - self.assertIn("ernie-4.5-turbo-vl-preview", text) + self.assertIn("ernie-4.5-turbo-vl", text) def test_model_indexes_link_qianfan(self): for path in ( @@ -479,7 +479,7 @@ class TestQianfanDocs(unittest.TestCase): for path, label in expected.items(): text = self._read(path) self.assertIn(label, text) - self.assertIn("ernie-4.5-turbo-vl-preview", text) + self.assertIn("ernie-4.5-turbo-vl", text) if __name__ == "__main__":