mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-03 10:47:08 +08:00
feat(models): support ASR model selection in web console
This commit is contained in:
3
.github/PULL_REQUEST_TEMPLATE.md
vendored
3
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -1,6 +1,6 @@
|
||||
<!--
|
||||
Thanks for your contribution! Please write this PR in English.
|
||||
【中文开发者】请使用英文填写,感谢 ❤️
|
||||
推荐使用英文填写,感谢 ❤️
|
||||
-->
|
||||
|
||||
## What does this PR do?
|
||||
@@ -16,6 +16,7 @@ Thanks for your contribution! Please write this PR in English.
|
||||
|
||||
## Checklist
|
||||
|
||||
- [ ] I have read the [Contributing Guide](https://github.com/zhayujie/CowAgent/blob/master/CONTRIBUTING.md)
|
||||
- [ ] I tested this change locally
|
||||
- [ ] Code comments and docs are in English
|
||||
- [ ] Linked related issue (if any): closes #
|
||||
|
||||
@@ -238,7 +238,7 @@ For enterprise inquiries: sales@simple-future.tech or [scan the QR code](https:/
|
||||
|
||||
## 🛠️ Development & Contributing
|
||||
|
||||
Contributions are welcome — add a new channel by following the [Telegram channel reference](https://github.com/zhayujie/CowAgent/blob/master/channel/telegram/telegram_channel.py), or contribute new skills to [Skill Hub](https://skills.cowagent.ai/submit).
|
||||
All kinds of contributions are welcome — new features, bug fixes, performance improvements, docs, or sharing your own skills on the [Skill Hub](https://skills.cowagent.ai/submit). See [CONTRIBUTING.md](/CONTRIBUTING.md) to get started, then open an Issue to discuss or send a PR directly.
|
||||
|
||||
⭐ Star the project to show your support, and Watch → Custom → Releases to get notified of new versions. PRs and Issues are always welcome.
|
||||
|
||||
|
||||
@@ -4025,7 +4025,7 @@ const MODELS_CAPABILITY_DEFS = [
|
||||
iconChip: 'bg-blue-50 dark:bg-blue-900/30', iconGlyph: 'text-blue-500' },
|
||||
{ id: 'image', icon: 'fa-image', editable: true, needsModel: true, titleKey: 'models_capability_image', descKey: 'models_capability_image_desc',
|
||||
iconChip: 'bg-blue-50 dark:bg-blue-900/30', iconGlyph: 'text-blue-500' },
|
||||
{ id: 'asr', icon: 'fa-microphone', editable: true, needsModel: false, titleKey: 'models_capability_asr', descKey: 'models_capability_asr_desc',
|
||||
{ id: 'asr', icon: 'fa-microphone', editable: true, needsModel: true, titleKey: 'models_capability_asr', descKey: 'models_capability_asr_desc',
|
||||
iconChip: 'bg-amber-50 dark:bg-amber-900/30', iconGlyph: 'text-amber-500' },
|
||||
{ id: 'tts', icon: 'fa-volume-high', editable: true, needsModel: true, titleKey: 'models_capability_tts', descKey: 'models_capability_tts_desc',
|
||||
iconChip: 'bg-amber-50 dark:bg-amber-900/30', iconGlyph: 'text-amber-500' },
|
||||
|
||||
@@ -1720,6 +1720,28 @@ class ModelsHandler:
|
||||
],
|
||||
}
|
||||
|
||||
# ASR engine catalog per provider. The first entry of each list is the
|
||||
# runtime default (mirrors DEFAULT_ASR_MODEL in voice/*). Users can still
|
||||
# pick "custom" in the UI to send any other model id.
|
||||
_ASR_PROVIDER_MODELS = {
|
||||
"openai": [
|
||||
{"value": "gpt-4o-mini-transcribe", "hint": "默认 · 速度快"},
|
||||
{"value": "gpt-4o-transcribe", "hint": "更高准确率"},
|
||||
{"value": "whisper-1", "hint": "经典 Whisper"},
|
||||
],
|
||||
"dashscope": [
|
||||
{"value": "qwen3-asr-flash", "hint": "覆盖普通话、方言与主流外语"},
|
||||
],
|
||||
"zhipu": [
|
||||
{"value": "glm-asr-2512", "hint": "智谱语音识别"},
|
||||
],
|
||||
# LinkAI gateway pins whisper-1 for ASR and ignores any other id,
|
||||
# so expose only that to avoid misleading the user.
|
||||
"linkai": [
|
||||
{"value": "whisper-1", "hint": "网关固定使用"},
|
||||
],
|
||||
}
|
||||
|
||||
# Per-provider voice timbres. Entries can be a bare code string
|
||||
# (label = code) or {value, hint?} when a friendly secondary label
|
||||
# helps recognition. We keep `value` as the raw API code so power
|
||||
@@ -2240,8 +2262,9 @@ class ModelsHandler:
|
||||
"editable": True,
|
||||
"current_provider": explicit,
|
||||
"suggested_provider": suggested,
|
||||
"current_model": "",
|
||||
"current_model": (local_config.get("voice_to_text_model") or "") if explicit else "",
|
||||
"providers": cls._ASR_PROVIDERS,
|
||||
"provider_models": cls._ASR_PROVIDER_MODELS,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@@ -2778,8 +2801,13 @@ class ModelsHandler:
|
||||
file_cfg = self._read_file_config()
|
||||
local_config["voice_to_text"] = provider_id
|
||||
file_cfg["voice_to_text"] = provider_id
|
||||
local_config["voice_to_text_model"] = model
|
||||
file_cfg["voice_to_text_model"] = model
|
||||
# Only overwrite the model when one is supplied. An empty model means
|
||||
# "keep whatever is configured" so switching provider from the console
|
||||
# never wipes a user's hand-set voice_to_text_model (runtime falls back
|
||||
# to the engine default via `or DEFAULT_ASR_MODEL` regardless).
|
||||
if model:
|
||||
local_config["voice_to_text_model"] = model
|
||||
file_cfg["voice_to_text_model"] = model
|
||||
self._write_file_config(file_cfg)
|
||||
logger.info(
|
||||
f"[ModelsHandler] asr updated: provider={provider_id!r} "
|
||||
@@ -2788,7 +2816,8 @@ class ModelsHandler:
|
||||
self._refresh_voice_routing()
|
||||
return json.dumps({
|
||||
"status": "success",
|
||||
"provider": provider_id, "model": model,
|
||||
"provider": provider_id,
|
||||
"model": local_config.get("voice_to_text_model", ""),
|
||||
})
|
||||
|
||||
def _set_tts(self, provider_id: str, model: str, voice: str = "") -> str:
|
||||
|
||||
@@ -238,7 +238,7 @@ GitHub で [Issue を報告](https://github.com/zhayujie/CowAgent/issues) する
|
||||
|
||||
## 🛠️ 開発とコントリビューション
|
||||
|
||||
新しいチャネルの追加を歓迎します — [Telegram チャネル](https://github.com/zhayujie/CowAgent/blob/master/channel/telegram/telegram_channel.py) を参考にカスタムチャネルを実装できます。新しい Skill のコントリビューションも [Skill Hub](https://skills.cowagent.ai/submit) で受け付けています。
|
||||
あらゆる形のコントリビューションを歓迎します —— 新機能、バグ修正、パフォーマンス改善、ドキュメント、あるいは [Skill Hub](https://skills.cowagent.ai/submit) への Skill の共有など。まずは [CONTRIBUTING.md](/CONTRIBUTING.md) をご覧いただき、Issue で相談するか、直接 PR を送ってください。
|
||||
|
||||
⭐ Star でプロジェクトを応援し、Watch → Custom → Releases で新バージョンの通知を受け取れます。PR や Issue の提出も歓迎します。
|
||||
|
||||
|
||||
@@ -250,7 +250,7 @@ CowAgent 支持国内外主流厂商的大语言模型。**文本对话、图像
|
||||
|
||||
## 🛠️ 开发与贡献
|
||||
|
||||
欢迎接入更多应用通道,参考 [飞书通道实现](https://github.com/zhayujie/CowAgent/blob/master/channel/feishu/feishu_channel.py) 新增自定义通道;同时欢迎贡献新技能,向 [Skill Hub](https://skills.cowagent.ai/submit) 提交。
|
||||
欢迎各种形式的贡献:新功能、Bug 修复、性能优化、文档完善,或向 [Skill Hub](https://skills.cowagent.ai/submit) 分享你的技能。请先阅读 [CONTRIBUTING.md](/CONTRIBUTING.md) 了解如何开始,然后提交 Issue 讨论或直接发起 PR。
|
||||
|
||||
欢迎 ⭐ Star 支持项目,并通过 Watch → Custom → Releases 订阅新版本通知。也欢迎提交 PR、Issue 进行反馈。
|
||||
|
||||
|
||||
@@ -54,6 +54,46 @@ class TestModelsHandler(unittest.TestCase):
|
||||
write_file.assert_called_once_with(file_config)
|
||||
refresh_voice.assert_called_once()
|
||||
|
||||
def test_set_asr_empty_model_keeps_existing(self):
|
||||
# Switching provider with an empty model must not wipe a user's
|
||||
# hand-configured voice_to_text_model.
|
||||
from channel.web.web_channel import ModelsHandler
|
||||
|
||||
local_config = {"voice_to_text_model": "qwen3-asr-flash"}
|
||||
file_config = {"voice_to_text_model": "qwen3-asr-flash"}
|
||||
handler = ModelsHandler()
|
||||
|
||||
with patch("channel.web.web_channel.conf", return_value=local_config):
|
||||
with patch.object(ModelsHandler, "_read_file_config", return_value=file_config):
|
||||
with patch.object(ModelsHandler, "_write_file_config"):
|
||||
with patch.object(ModelsHandler, "_refresh_voice_routing"):
|
||||
result = json.loads(handler._handle_set_capability({
|
||||
"capability": "asr",
|
||||
"provider_id": "zhipu",
|
||||
"model": "",
|
||||
}))
|
||||
|
||||
self.assertEqual(result["status"], "success")
|
||||
self.assertEqual(local_config["voice_to_text"], "zhipu")
|
||||
# Existing model preserved, not overwritten with "".
|
||||
self.assertEqual(local_config["voice_to_text_model"], "qwen3-asr-flash")
|
||||
self.assertEqual(file_config["voice_to_text_model"], "qwen3-asr-flash")
|
||||
self.assertEqual(result["model"], "qwen3-asr-flash")
|
||||
|
||||
def test_asr_capability_exposes_provider_models(self):
|
||||
from channel.web.web_channel import ModelsHandler
|
||||
|
||||
cap = ModelsHandler._asr_capability({
|
||||
"voice_to_text": "dashscope",
|
||||
"voice_to_text_model": "qwen3-asr-flash",
|
||||
})
|
||||
|
||||
self.assertTrue(cap["editable"])
|
||||
self.assertEqual(cap["current_provider"], "dashscope")
|
||||
self.assertEqual(cap["current_model"], "qwen3-asr-flash")
|
||||
self.assertIn("provider_models", cap)
|
||||
self.assertIn("dashscope", cap["provider_models"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user