From 2b90f377e6c872bf454e89617321cfbe5d346f81 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Wed, 20 May 2026 22:36:37 +0800 Subject: [PATCH] feat(voice): add dashscope & zhipu ASR, in-page mic input --- bridge/bridge.py | 44 +++- channel/web/chat.html | 27 ++- channel/web/static/js/console.js | 317 ++++++++++++++++++++++++++++- channel/web/web_channel.py | 187 +++++++++++++++-- voice/dashscope/__init__.py | 0 voice/dashscope/dashscope_voice.py | 135 ++++++++++++ voice/factory.py | 8 + voice/zhipuai/__init__.py | 0 voice/zhipuai/zhipuai_voice.py | 102 ++++++++++ 9 files changed, 786 insertions(+), 34 deletions(-) create mode 100644 voice/dashscope/__init__.py create mode 100644 voice/dashscope/dashscope_voice.py create mode 100644 voice/zhipuai/__init__.py create mode 100644 voice/zhipuai/zhipuai_voice.py diff --git a/bridge/bridge.py b/bridge/bridge.py index 753e394a..c0cb62e4 100644 --- a/bridge/bridge.py +++ b/bridge/bridge.py @@ -14,7 +14,9 @@ class Bridge(object): def __init__(self): self.btype = { "chat": const.OPENAI, - "voice_to_text": conf().get("voice_to_text", "openai"), + # Empty `voice_to_text` (the default in new configs) triggers + # the auto-pick below — see _auto_pick_voice_to_text for order. + "voice_to_text": conf().get("voice_to_text") or self._auto_pick_voice_to_text(), "text_to_voice": conf().get("text_to_voice", "google"), "translate": conf().get("translate", "baidu"), } @@ -84,6 +86,46 @@ class Bridge(object): self.chat_bots = {} self._agent_bridge = None + def refresh_voice(self): + """Re-read voice_to_text / text_to_voice from config and drop the + cached voice bots so the next call picks up the new provider. + Used by the web console after the user edits voice settings. + Does NOT touch the agent_bridge / agent state. + """ + new_v2t = conf().get("voice_to_text") or self._auto_pick_voice_to_text() + new_t2v = conf().get("text_to_voice", "google") + if conf().get("use_linkai") and conf().get("linkai_api_key"): + if not conf().get("voice_to_text") or conf().get("voice_to_text") in ["openai"]: + new_v2t = const.LINKAI + if not conf().get("text_to_voice") or conf().get("text_to_voice") in ["openai", const.TTS_1, const.TTS_1_HD]: + new_t2v = const.LINKAI + self.btype["voice_to_text"] = new_v2t + self.btype["text_to_voice"] = new_t2v + self.bots.pop("voice_to_text", None) + self.bots.pop("text_to_voice", None) + logger.info(f"[Bridge] voice refreshed: voice_to_text={new_v2t}, text_to_voice={new_t2v}") + + @staticmethod + def _auto_pick_voice_to_text() -> str: + """Pick an ASR provider by configured api keys when voice_to_text is + unset. Order matches the web console: openai → dashscope → zhipu → + linkai. Falls back to 'openai' when nothing is configured so the + original "missing key" error is preserved. + """ + def has(k: str) -> bool: + v = (conf().get(k) or "").strip() + return v != "" and v not in ("YOUR API KEY", "YOUR_API_KEY") + + for key, provider in ( + ("open_ai_api_key", "openai"), + ("dashscope_api_key", "dashscope"), + ("zhipu_ai_api_key", "zhipu"), + ("linkai_api_key", "linkai"), + ): + if has(key): + return provider + return "openai" + # 模型对应的接口 def get_bot(self, typename): if self.bots.get(typename) is None: diff --git a/channel/web/chat.html b/channel/web/chat.html index 31705d66..ba68e0f4 100644 --- a/channel/web/chat.html +++ b/channel/web/chat.html @@ -422,15 +422,24 @@ - +
+ + +