feat(models): support ASR model selection in web console

2026-07-18 20:17:09 +08:00 · 2026-06-02 15:05:35 +08:00
parent a97eeb1fd9
commit e861d98007
7 changed files with 79 additions and 9 deletions
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,6 +1,6 @@
 <!--
 Thanks for your contribution! Please write this PR in English.
-【中文开发者】请使用英文填写，感谢 ❤️
+推荐使用英文填写，感谢 ❤️
 -->

 ## What does this PR do?
@@ -16,6 +16,7 @@ Thanks for your contribution! Please write this PR in English.

 ## Checklist

+- [ ] I have read the [Contributing Guide](https://github.com/zhayujie/CowAgent/blob/master/CONTRIBUTING.md)
 - [ ] I tested this change locally
 - [ ] Code comments and docs are in English
 - [ ] Linked related issue (if any): closes #
--- a/README.md
+++ b/README.md
@@ -238,7 +238,7 @@ For enterprise inquiries: sales@simple-future.tech or [scan the QR code](https:/

 ## 🛠️ Development & Contributing

-Contributions are welcome — add a new channel by following the [Telegram channel reference](https://github.com/zhayujie/CowAgent/blob/master/channel/telegram/telegram_channel.py), or contribute new skills to [Skill Hub](https://skills.cowagent.ai/submit).
+All kinds of contributions are welcome — new features, bug fixes, performance improvements, docs, or sharing your own skills on the [Skill Hub](https://skills.cowagent.ai/submit). See [CONTRIBUTING.md](/CONTRIBUTING.md) to get started, then open an Issue to discuss or send a PR directly.

 ⭐ Star the project to show your support, and Watch → Custom → Releases to get notified of new versions. PRs and Issues are always welcome.

--- a/channel/web/static/js/console.js
+++ b/channel/web/static/js/console.js
@@ -4025,7 +4025,7 @@ const MODELS_CAPABILITY_DEFS = [
      iconChip: 'bg-blue-50 dark:bg-blue-900/30',        iconGlyph: 'text-blue-500' },
    { id: 'image',     icon: 'fa-image',            editable: true,  needsModel: true,  titleKey: 'models_capability_image',     descKey: 'models_capability_image_desc',
      iconChip: 'bg-blue-50 dark:bg-blue-900/30',        iconGlyph: 'text-blue-500' },
-    { id: 'asr',       icon: 'fa-microphone',       editable: true,  needsModel: false, titleKey: 'models_capability_asr',       descKey: 'models_capability_asr_desc',
+    { id: 'asr',       icon: 'fa-microphone',       editable: true,  needsModel: true,  titleKey: 'models_capability_asr',       descKey: 'models_capability_asr_desc',
      iconChip: 'bg-amber-50 dark:bg-amber-900/30',      iconGlyph: 'text-amber-500' },
    { id: 'tts',       icon: 'fa-volume-high',      editable: true,  needsModel: true,  titleKey: 'models_capability_tts',       descKey: 'models_capability_tts_desc',
      iconChip: 'bg-amber-50 dark:bg-amber-900/30',      iconGlyph: 'text-amber-500' },
--- a/channel/web/web_channel.py
+++ b/channel/web/web_channel.py
@@ -1720,6 +1720,28 @@ class ModelsHandler:
        ],
    }

+    # ASR engine catalog per provider. The first entry of each list is the
+    # runtime default (mirrors DEFAULT_ASR_MODEL in voice/*). Users can still
+    # pick "custom" in the UI to send any other model id.
+    _ASR_PROVIDER_MODELS = {
+        "openai": [
+            {"value": "gpt-4o-mini-transcribe", "hint": "默认 · 速度快"},
+            {"value": "gpt-4o-transcribe",      "hint": "更高准确率"},
+            {"value": "whisper-1",              "hint": "经典 Whisper"},
+        ],
+        "dashscope": [
+            {"value": "qwen3-asr-flash", "hint": "覆盖普通话、方言与主流外语"},
+        ],
+        "zhipu": [
+            {"value": "glm-asr-2512", "hint": "智谱语音识别"},
+        ],
+        # LinkAI gateway pins whisper-1 for ASR and ignores any other id,
+        # so expose only that to avoid misleading the user.
+        "linkai": [
+            {"value": "whisper-1", "hint": "网关固定使用"},
+        ],
+    }
+
    # Per-provider voice timbres. Entries can be a bare code string
    # (label = code) or {value, hint?} when a friendly secondary label
    # helps recognition. We keep `value` as the raw API code so power
@@ -2240,8 +2262,9 @@ class ModelsHandler:
            "editable": True,
            "current_provider": explicit,
            "suggested_provider": suggested,
-            "current_model": "",
+            "current_model": (local_config.get("voice_to_text_model") or "") if explicit else "",
            "providers": cls._ASR_PROVIDERS,
+            "provider_models": cls._ASR_PROVIDER_MODELS,
        }

    @classmethod
@@ -2778,8 +2801,13 @@ class ModelsHandler:
        file_cfg = self._read_file_config()
        local_config["voice_to_text"] = provider_id
        file_cfg["voice_to_text"] = provider_id
-        local_config["voice_to_text_model"] = model
-        file_cfg["voice_to_text_model"] = model
+        # Only overwrite the model when one is supplied. An empty model means
+        # "keep whatever is configured" so switching provider from the console
+        # never wipes a user's hand-set voice_to_text_model (runtime falls back
+        # to the engine default via `or DEFAULT_ASR_MODEL` regardless).
+        if model:
+            local_config["voice_to_text_model"] = model
+            file_cfg["voice_to_text_model"] = model
        self._write_file_config(file_cfg)
        logger.info(
            f"[ModelsHandler] asr updated: provider={provider_id!r} "
@@ -2788,7 +2816,8 @@ class ModelsHandler:
        self._refresh_voice_routing()
        return json.dumps({
            "status": "success",
-            "provider": provider_id, "model": model,
+            "provider": provider_id,
+            "model": local_config.get("voice_to_text_model", ""),
        })

    def _set_tts(self, provider_id: str, model: str, voice: str = "") -> str:
--- a/docs/ja/README.md
+++ b/docs/ja/README.md
@@ -238,7 +238,7 @@ GitHub で [Issue を報告](https://github.com/zhayujie/CowAgent/issues) する

 ## 🛠️ 開発とコントリビューション

-新しいチャネルの追加を歓迎します — [Telegram チャネル](https://github.com/zhayujie/CowAgent/blob/master/channel/telegram/telegram_channel.py) を参考にカスタムチャネルを実装できます。新しい Skill のコントリビューションも [Skill Hub](https://skills.cowagent.ai/submit) で受け付けています。
+あらゆる形のコントリビューションを歓迎します —— 新機能、バグ修正、パフォーマンス改善、ドキュメント、あるいは [Skill Hub](https://skills.cowagent.ai/submit) への Skill の共有など。まずは [CONTRIBUTING.md](/CONTRIBUTING.md) をご覧いただき、Issue で相談するか、直接 PR を送ってください。

 ⭐ Star でプロジェクトを応援し、Watch → Custom → Releases で新バージョンの通知を受け取れます。PR や Issue の提出も歓迎します。

--- a/docs/zh/README.md
+++ b/docs/zh/README.md
@@ -250,7 +250,7 @@ CowAgent 支持国内外主流厂商的大语言模型。**文本对话、图像

 ## 🛠️ 开发与贡献

-欢迎接入更多应用通道，参考 [飞书通道实现](https://github.com/zhayujie/CowAgent/blob/master/channel/feishu/feishu_channel.py) 新增自定义通道；同时欢迎贡献新技能，向 [Skill Hub](https://skills.cowagent.ai/submit) 提交。
+欢迎各种形式的贡献：新功能、Bug 修复、性能优化、文档完善，或向 [Skill Hub](https://skills.cowagent.ai/submit) 分享你的技能。请先阅读 [CONTRIBUTING.md](/CONTRIBUTING.md) 了解如何开始，然后提交 Issue 讨论或直接发起 PR。

 欢迎 ⭐ Star 支持项目，并通过 Watch → Custom → Releases 订阅新版本通知。也欢迎提交 PR、Issue 进行反馈。

--- a/tests/test_models_handler.py
+++ b/tests/test_models_handler.py
@@ -54,6 +54,46 @@ class TestModelsHandler(unittest.TestCase):
        write_file.assert_called_once_with(file_config)
        refresh_voice.assert_called_once()

+    def test_set_asr_empty_model_keeps_existing(self):
+        # Switching provider with an empty model must not wipe a user's
+        # hand-configured voice_to_text_model.
+        from channel.web.web_channel import ModelsHandler
+
+        local_config = {"voice_to_text_model": "qwen3-asr-flash"}
+        file_config = {"voice_to_text_model": "qwen3-asr-flash"}
+        handler = ModelsHandler()
+
+        with patch("channel.web.web_channel.conf", return_value=local_config):
+            with patch.object(ModelsHandler, "_read_file_config", return_value=file_config):
+                with patch.object(ModelsHandler, "_write_file_config"):
+                    with patch.object(ModelsHandler, "_refresh_voice_routing"):
+                        result = json.loads(handler._handle_set_capability({
+                            "capability": "asr",
+                            "provider_id": "zhipu",
+                            "model": "",
+                        }))
+
+        self.assertEqual(result["status"], "success")
+        self.assertEqual(local_config["voice_to_text"], "zhipu")
+        # Existing model preserved, not overwritten with "".
+        self.assertEqual(local_config["voice_to_text_model"], "qwen3-asr-flash")
+        self.assertEqual(file_config["voice_to_text_model"], "qwen3-asr-flash")
+        self.assertEqual(result["model"], "qwen3-asr-flash")
+
+    def test_asr_capability_exposes_provider_models(self):
+        from channel.web.web_channel import ModelsHandler
+
+        cap = ModelsHandler._asr_capability({
+            "voice_to_text": "dashscope",
+            "voice_to_text_model": "qwen3-asr-flash",
+        })
+
+        self.assertTrue(cap["editable"])
+        self.assertEqual(cap["current_provider"], "dashscope")
+        self.assertEqual(cap["current_model"], "qwen3-asr-flash")
+        self.assertIn("provider_models", cap)
+        self.assertIn("dashscope", cap["provider_models"])
+

 if __name__ == "__main__":
    unittest.main()