feat(voice): rework TTS/ASR stack and unify tool/skill config schema

This commit is contained in:
zhayujie
2026-05-21 16:00:54 +08:00
parent 2b90f377e6
commit b8333e351c
31 changed files with 1551 additions and 335 deletions

View File

@@ -1,16 +1,18 @@
"""
google voice service
"""
"""LinkAI voice: Whisper ASR + multi-vendor TTS (OpenAI / Doubao / Baidu)
proxied via https://docs.link-ai.tech/platform/api/voice-speech."""
import datetime
import os
import random
import requests
from voice import audio_convert
from bridge.reply import Reply, ReplyType
from common import const
from common.log import logger
from config import conf
from voice import audio_convert
from voice.voice import Voice
from common import const
import os
import datetime
class LinkAIVoice(Voice):
def __init__(self):
@@ -21,63 +23,67 @@ class LinkAIVoice(Voice):
try:
url = conf().get("linkai_api_base", "https://api.link-ai.tech") + "/v1/audio/transcriptions"
headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
model = None
if not conf().get("text_to_voice") or conf().get("voice_to_text") == "openai":
model = const.WHISPER_1
# Pin whisper-1: gateway ignores any other ASR model id.
model = const.WHISPER_1
if voice_file.endswith(".amr"):
try:
mp3_file = os.path.splitext(voice_file)[0] + ".mp3"
audio_convert.any_to_mp3(voice_file, mp3_file)
voice_file = mp3_file
except Exception as e:
logger.warn(f"[LinkVoice] amr file transfer failed, directly send amr voice file: {format(e)}")
file = open(voice_file, "rb")
file_body = {
"file": file
}
data = {
"model": model
}
res = requests.post(url, files=file_body, headers=headers, data=data, timeout=(5, 60))
if res.status_code == 200:
text = res.json().get("text")
else:
res_json = res.json()
logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={res_json.get('message')}")
logger.warning(f"[LinkVoice] amr file transfer failed, directly send amr voice file: {e}")
with open(voice_file, "rb") as file:
res = requests.post(
url,
files={"file": file},
headers=headers,
data={"model": model},
timeout=(5, 60),
)
if res.status_code != 200:
msg = ""
try:
msg = res.json().get("message", "")
except Exception:
pass
logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={msg}")
return None
reply = Reply(ReplyType.TEXT, text)
text = res.json().get("text")
logger.info(f"[LinkVoice] voiceToText success, text={text}, file name={voice_file}")
return Reply(ReplyType.TEXT, text)
except Exception as e:
logger.error(e)
return None
return reply
def textToVoice(self, text):
try:
url = conf().get("linkai_api_base", "https://api.link-ai.tech") + "/v1/audio/speech"
headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
model = const.TTS_1
if not conf().get("text_to_voice") or conf().get("text_to_voice") in ["openai", const.TTS_1, const.TTS_1_HD]:
model = conf().get("text_to_voice_model") or const.TTS_1
# Gateway routes by `model` (tts-1 / doubao / baidu) + `voice` from
# that engine's catalog. `app_code` is optional workspace override.
data = {
"model": model,
"input": text,
"voice": conf().get("tts_voice_id"),
"app_code": conf().get("linkai_app_code")
"app_code": conf().get("linkai_app_code"),
}
model = conf().get("text_to_voice_model")
if model:
data["model"] = model
res = requests.post(url, headers=headers, json=data, timeout=(5, 120))
if res.status_code == 200:
tmp_file_name = "tmp/" + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + str(random.randint(0, 1000)) + ".mp3"
with open(tmp_file_name, 'wb') as f:
f.write(res.content)
reply = Reply(ReplyType.VOICE, tmp_file_name)
logger.info(f"[LinkVoice] textToVoice success, input={text}, model={model}, voice_id={data.get('voice')}")
return reply
else:
res_json = res.json()
logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={res_json.get('message')}")
if res.status_code != 200:
msg = ""
try:
msg = res.json().get("message", "")
except Exception:
pass
logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={msg}")
return None
tmp_file_name = "tmp/" + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + str(random.randint(0, 1000)) + ".mp3"
os.makedirs(os.path.dirname(tmp_file_name), exist_ok=True)
with open(tmp_file_name, 'wb') as f:
f.write(res.content)
logger.info(f"[LinkVoice] textToVoice success, input={text}, voice_id={data.get('voice')}")
return Reply(ReplyType.VOICE, tmp_file_name)
except Exception as e:
logger.error(e)
# reply = Reply(ReplyType.ERROR, "遇到了一点小问题,请稍后再问我吧")
return None