mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 00:57:41 +08:00
feat(models): support xiaomi mimo
This commit is contained in:
@@ -66,4 +66,8 @@ def create_voice(voice_type):
|
||||
from voice.zhipuai.zhipuai_voice import ZhipuAIVoice
|
||||
|
||||
return ZhipuAIVoice()
|
||||
elif voice_type == "mimo":
|
||||
from voice.mimo.mimo_voice import MimoVoice
|
||||
|
||||
return MimoVoice()
|
||||
raise RuntimeError
|
||||
|
||||
0
voice/mimo/__init__.py
Normal file
0
voice/mimo/__init__.py
Normal file
109
voice/mimo/mimo_voice.py
Normal file
109
voice/mimo/mimo_voice.py
Normal file
@@ -0,0 +1,109 @@
|
||||
# encoding:utf-8
|
||||
"""
|
||||
小米 MiMo TTS - 基于 mimo-v2.5-tts 模型的语音合成。
|
||||
|
||||
通过 /chat/completions 接口实现:assistant 消息内容为待合成文本,
|
||||
audio 字段指定预置音色(如 冰糖/茉莉/苏打/Mia/Chloe 等),返回 base64
|
||||
编码的音频字节。
|
||||
|
||||
文档:https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5
|
||||
注意:MiMo 不提供 ASR 端点,因此 voiceToText 不实现。
|
||||
"""
|
||||
import base64
|
||||
import datetime
|
||||
import os
|
||||
import random
|
||||
|
||||
import requests
|
||||
|
||||
from bridge.reply import Reply, ReplyType
|
||||
from common.log import logger
|
||||
from config import conf
|
||||
from voice.voice import Voice
|
||||
|
||||
DEFAULT_API_BASE = "https://api.xiaomimimo.com/v1"
|
||||
DEFAULT_TTS_MODEL = "mimo-v2.5-tts"
|
||||
DEFAULT_TTS_VOICE = "冰糖" # 默认音色:中国集群事实默认值
|
||||
REQUEST_TIMEOUT = (5, 120)
|
||||
|
||||
|
||||
class MimoVoice(Voice):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def voiceToText(self, voice_file: str):
|
||||
# MiMo 没有独立 ASR 端点;建议使用其他 provider(如 openai/zhipu/dashscope)
|
||||
logger.warning("[MimoVoice] voiceToText is not supported by MiMo API")
|
||||
return Reply(ReplyType.ERROR, "MiMo 暂不支持语音识别,请配置其他 voice_to_text provider")
|
||||
|
||||
def textToVoice(self, text: str):
|
||||
try:
|
||||
api_key = conf().get("mimo_api_key", "")
|
||||
if not api_key:
|
||||
logger.error("[MimoVoice] mimo_api_key is not configured")
|
||||
return Reply(ReplyType.ERROR, "未配置 MiMo API key")
|
||||
|
||||
api_base = (conf().get("mimo_api_base") or DEFAULT_API_BASE).rstrip("/")
|
||||
model = conf().get("text_to_voice_model") or DEFAULT_TTS_MODEL
|
||||
voice_id = conf().get("tts_voice_id") or DEFAULT_TTS_VOICE
|
||||
|
||||
# 目标合成文本必须放在 assistant 消息;user 消息可选用作风格指令
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "assistant", "content": text},
|
||||
],
|
||||
"audio": {
|
||||
"format": "wav",
|
||||
"voice": voice_id,
|
||||
},
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
url = f"{api_base}/chat/completions"
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=REQUEST_TIMEOUT)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(
|
||||
f"[MimoVoice] textToVoice failed: status={response.status_code} "
|
||||
f"body={response.text[:500]} model={model} voice={voice_id}"
|
||||
)
|
||||
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||
|
||||
data = response.json()
|
||||
if "error" in data:
|
||||
err = data["error"]
|
||||
msg = err.get("message", str(err)) if isinstance(err, dict) else str(err)
|
||||
logger.error(f"[MimoVoice] textToVoice api error: {msg}")
|
||||
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||
|
||||
message = (data.get("choices") or [{}])[0].get("message", {}) or {}
|
||||
audio_obj = message.get("audio") or {}
|
||||
audio_b64 = audio_obj.get("data")
|
||||
if not audio_b64:
|
||||
logger.error(f"[MimoVoice] textToVoice empty audio in response: {data}")
|
||||
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||
|
||||
try:
|
||||
audio_bytes = base64.b64decode(audio_b64)
|
||||
except Exception as e:
|
||||
logger.error(f"[MimoVoice] base64 decode failed: {e}")
|
||||
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||
|
||||
file_name = (
|
||||
"tmp/" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
+ str(random.randint(0, 1000)) + ".wav"
|
||||
)
|
||||
os.makedirs(os.path.dirname(file_name), exist_ok=True)
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
logger.info(
|
||||
f"[MimoVoice] textToVoice model={model} voice={voice_id} "
|
||||
f"file={file_name} bytes={len(audio_bytes)}"
|
||||
)
|
||||
return Reply(ReplyType.VOICE, file_name)
|
||||
except Exception as e:
|
||||
logger.exception(f"[MimoVoice] textToVoice exception: {e}")
|
||||
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||
Reference in New Issue
Block a user