feat(voice): rework TTS/ASR stack and unify tool/skill config schema

2026-07-17 11:07:11 +08:00 · 2026-05-21 16:00:54 +08:00
parent 2b90f377e6
commit b8333e351c
31 changed files with 1551 additions and 335 deletions
--- a/voice/linkai/linkai_voice.py
+++ b/voice/linkai/linkai_voice.py
@@ -1,16 +1,18 @@
-"""
-google voice service
-"""
+"""LinkAI voice: Whisper ASR + multi-vendor TTS (OpenAI / Doubao / Baidu)
+proxied via https://docs.link-ai.tech/platform/api/voice-speech."""
+import datetime
+import os
 import random
+
 import requests
-from voice import audio_convert
+
 from bridge.reply import Reply, ReplyType
+from common import const
 from common.log import logger
 from config import conf
+from voice import audio_convert
 from voice.voice import Voice
-from common import const
-import os
-import datetime
+

 class LinkAIVoice(Voice):
    def __init__(self):
@@ -21,63 +23,67 @@ class LinkAIVoice(Voice):
        try:
            url = conf().get("linkai_api_base", "https://api.link-ai.tech") + "/v1/audio/transcriptions"
            headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
-            model = None
-            if not conf().get("text_to_voice") or conf().get("voice_to_text") == "openai":
-                model = const.WHISPER_1
+            # Pin whisper-1: gateway ignores any other ASR model id.
+            model = const.WHISPER_1
            if voice_file.endswith(".amr"):
                try:
                    mp3_file = os.path.splitext(voice_file)[0] + ".mp3"
                    audio_convert.any_to_mp3(voice_file, mp3_file)
                    voice_file = mp3_file
                except Exception as e:
-                    logger.warn(f"[LinkVoice] amr file transfer failed, directly send amr voice file: {format(e)}")
-            file = open(voice_file, "rb")
-            file_body = {
-                "file": file
-            }
-            data = {
-                "model": model
-            }
-            res = requests.post(url, files=file_body, headers=headers, data=data, timeout=(5, 60))
-            if res.status_code == 200:
-                text = res.json().get("text")
-            else:
-                res_json = res.json()
-                logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={res_json.get('message')}")
+                    logger.warning(f"[LinkVoice] amr file transfer failed, directly send amr voice file: {e}")
+            with open(voice_file, "rb") as file:
+                res = requests.post(
+                    url,
+                    files={"file": file},
+                    headers=headers,
+                    data={"model": model},
+                    timeout=(5, 60),
+                )
+            if res.status_code != 200:
+                msg = ""
+                try:
+                    msg = res.json().get("message", "")
+                except Exception:
+                    pass
+                logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={msg}")
                return None
-            reply = Reply(ReplyType.TEXT, text)
+            text = res.json().get("text")
            logger.info(f"[LinkVoice] voiceToText success, text={text}, file name={voice_file}")
+            return Reply(ReplyType.TEXT, text)
        except Exception as e:
            logger.error(e)
            return None
-        return reply

    def textToVoice(self, text):
        try:
            url = conf().get("linkai_api_base", "https://api.link-ai.tech") + "/v1/audio/speech"
            headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
-            model = const.TTS_1
-            if not conf().get("text_to_voice") or conf().get("text_to_voice") in ["openai", const.TTS_1, const.TTS_1_HD]:
-                model = conf().get("text_to_voice_model") or const.TTS_1
+            # Gateway routes by `model` (tts-1 / doubao / baidu) + `voice` from
+            # that engine's catalog. `app_code` is optional workspace override.
            data = {
-                "model": model,
                "input": text,
                "voice": conf().get("tts_voice_id"),
-                "app_code": conf().get("linkai_app_code")
+                "app_code": conf().get("linkai_app_code"),
            }
+            model = conf().get("text_to_voice_model")
+            if model:
+                data["model"] = model
            res = requests.post(url, headers=headers, json=data, timeout=(5, 120))
-            if res.status_code == 200:
-                tmp_file_name = "tmp/" + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + str(random.randint(0, 1000)) + ".mp3"
-                with open(tmp_file_name, 'wb') as f:
-                    f.write(res.content)
-                reply = Reply(ReplyType.VOICE, tmp_file_name)
-                logger.info(f"[LinkVoice] textToVoice success, input={text}, model={model}, voice_id={data.get('voice')}")
-                return reply
-            else:
-                res_json = res.json()
-                logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={res_json.get('message')}")
+            if res.status_code != 200:
+                msg = ""
+                try:
+                    msg = res.json().get("message", "")
+                except Exception:
+                    pass
+                logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={msg}")
                return None
+            tmp_file_name = "tmp/" + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + str(random.randint(0, 1000)) + ".mp3"
+            os.makedirs(os.path.dirname(tmp_file_name), exist_ok=True)
+            with open(tmp_file_name, 'wb') as f:
+                f.write(res.content)
+            logger.info(f"[LinkVoice] textToVoice success, input={text}, voice_id={data.get('voice')}")
+            return Reply(ReplyType.VOICE, tmp_file_name)
        except Exception as e:
            logger.error(e)
-            # reply = Reply(ReplyType.ERROR, "遇到了一点小问题，请稍后再问我吧")
            return None