mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 00:57:41 +08:00
feat(models): support xiaomi mimo
This commit is contained in:
@@ -104,6 +104,7 @@ CowAgent supports all mainstream LLM providers. **Chat, vision, image generation
|
|||||||
| [Kimi](https://docs.cowagent.ai/en/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
| [Kimi](https://docs.cowagent.ai/en/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||||
| [MiniMax](https://docs.cowagent.ai/en/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
|
| [MiniMax](https://docs.cowagent.ai/en/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
|
||||||
| [ERNIE](https://docs.cowagent.ai/en/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
| [ERNIE](https://docs.cowagent.ai/en/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||||
|
| [MiMo](https://docs.cowagent.ai/en/models/mimo) | mimo-v2.5 / pro | ✅ | ✅ | | | ✅ | |
|
||||||
| [LinkAI](https://docs.cowagent.ai/en/models/linkai) | One key for 100+ models | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [LinkAI](https://docs.cowagent.ai/en/models/linkai) | One key for 100+ models | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [Custom](https://docs.cowagent.ai/en/models/custom) | Local models / third-party proxy | ✅ | | | | | |
|
| [Custom](https://docs.cowagent.ai/en/models/custom) | Local models / third-party proxy | ✅ | | | | | |
|
||||||
|
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ _DISCOVERABLE_MODELS = [
|
|||||||
("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"),
|
("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"),
|
||||||
("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"),
|
("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"),
|
||||||
("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
|
("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
|
||||||
|
("mimo_api_key", const.MIMO, const.MIMO_V2_5_PRO, "MiMo"),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Model name prefix → discoverable provider display_name.
|
# Model name prefix → discoverable provider display_name.
|
||||||
@@ -73,6 +74,7 @@ _MODEL_PREFIX_TO_PROVIDER = [
|
|||||||
("glm-", "ZhipuAI"),
|
("glm-", "ZhipuAI"),
|
||||||
("minimax-", "MiniMax"),
|
("minimax-", "MiniMax"),
|
||||||
("abab", "MiniMax"),
|
("abab", "MiniMax"),
|
||||||
|
("mimo-", "MiMo"),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Model prefixes that natively belong to OpenAI / LinkAI (raw HTTP providers).
|
# Model prefixes that natively belong to OpenAI / LinkAI (raw HTTP providers).
|
||||||
@@ -92,6 +94,7 @@ _PROVIDER_ID_TO_DISPLAY = {
|
|||||||
"qianfan": "Qianfan",
|
"qianfan": "Qianfan",
|
||||||
"zhipu": "ZhipuAI",
|
"zhipu": "ZhipuAI",
|
||||||
"minimax": "MiniMax",
|
"minimax": "MiniMax",
|
||||||
|
"mimo": "MiMo",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -63,6 +63,10 @@ class Bridge(object):
|
|||||||
if model_type and model_type.startswith("deepseek"):
|
if model_type and model_type.startswith("deepseek"):
|
||||||
self.btype["chat"] = const.DEEPSEEK
|
self.btype["chat"] = const.DEEPSEEK
|
||||||
|
|
||||||
|
# 小米 MiMo 系列模型,全部以 mimo- 开头
|
||||||
|
if model_type and model_type.startswith("mimo-"):
|
||||||
|
self.btype["chat"] = const.MIMO
|
||||||
|
|
||||||
if model_type and isinstance(model_type, str):
|
if model_type and isinstance(model_type, str):
|
||||||
lowered_model_type = model_type.lower()
|
lowered_model_type = model_type.lower()
|
||||||
if lowered_model_type == const.QIANFAN or lowered_model_type.startswith("ernie"):
|
if lowered_model_type == const.QIANFAN or lowered_model_type.startswith("ernie"):
|
||||||
|
|||||||
@@ -1387,6 +1387,7 @@ class ConfigHandler:
|
|||||||
const.DOUBAO_SEED_2_PRO, const.DOUBAO_SEED_2_CODE,
|
const.DOUBAO_SEED_2_PRO, const.DOUBAO_SEED_2_CODE,
|
||||||
const.KIMI_K2_6, const.KIMI_K2_5, const.KIMI_K2,
|
const.KIMI_K2_6, const.KIMI_K2_5, const.KIMI_K2,
|
||||||
const.ERNIE_5_1, const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K,
|
const.ERNIE_5_1, const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K,
|
||||||
|
const.MIMO_V2_5_PRO, const.MIMO_V2_5,
|
||||||
]
|
]
|
||||||
|
|
||||||
# Generic placeholder hints surfaced in the web console. We deliberately
|
# Generic placeholder hints surfaced in the web console. We deliberately
|
||||||
@@ -1481,6 +1482,14 @@ class ConfigHandler:
|
|||||||
"api_base_placeholder": _PLACEHOLDER_QIANFAN,
|
"api_base_placeholder": _PLACEHOLDER_QIANFAN,
|
||||||
"models": [const.ERNIE_5_1, const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K],
|
"models": [const.ERNIE_5_1, const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K],
|
||||||
}),
|
}),
|
||||||
|
("mimo", {
|
||||||
|
"label": {"zh": "小米 MiMo", "en": "MiMo"},
|
||||||
|
"api_key_field": "mimo_api_key",
|
||||||
|
"api_base_key": "mimo_api_base",
|
||||||
|
"api_base_default": "https://api.xiaomimimo.com/v1",
|
||||||
|
"api_base_placeholder": _PLACEHOLDER_V1,
|
||||||
|
"models": [const.MIMO_V2_5_PRO, const.MIMO_V2_5],
|
||||||
|
}),
|
||||||
("linkai", {
|
("linkai", {
|
||||||
"label": "LinkAI",
|
"label": "LinkAI",
|
||||||
"api_key_field": "linkai_api_key",
|
"api_key_field": "linkai_api_key",
|
||||||
@@ -1502,10 +1511,10 @@ class ConfigHandler:
|
|||||||
EDITABLE_KEYS = {
|
EDITABLE_KEYS = {
|
||||||
"model", "bot_type", "use_linkai",
|
"model", "bot_type", "use_linkai",
|
||||||
"open_ai_api_base", "deepseek_api_base", "qianfan_api_base", "claude_api_base", "gemini_api_base",
|
"open_ai_api_base", "deepseek_api_base", "qianfan_api_base", "claude_api_base", "gemini_api_base",
|
||||||
"zhipu_ai_api_base", "moonshot_base_url", "ark_base_url", "custom_api_base",
|
"zhipu_ai_api_base", "moonshot_base_url", "ark_base_url", "custom_api_base", "mimo_api_base",
|
||||||
"open_ai_api_key", "deepseek_api_key", "qianfan_api_key", "claude_api_key", "gemini_api_key",
|
"open_ai_api_key", "deepseek_api_key", "qianfan_api_key", "claude_api_key", "gemini_api_key",
|
||||||
"zhipu_ai_api_key", "dashscope_api_key", "moonshot_api_key",
|
"zhipu_ai_api_key", "dashscope_api_key", "moonshot_api_key",
|
||||||
"ark_api_key", "minimax_api_key", "linkai_api_key", "custom_api_key",
|
"ark_api_key", "minimax_api_key", "linkai_api_key", "custom_api_key", "mimo_api_key",
|
||||||
"agent_max_context_tokens", "agent_max_context_turns", "agent_max_steps",
|
"agent_max_context_tokens", "agent_max_context_turns", "agent_max_steps",
|
||||||
"enable_thinking", "web_password",
|
"enable_thinking", "web_password",
|
||||||
}
|
}
|
||||||
@@ -1646,7 +1655,7 @@ class ModelsHandler:
|
|||||||
# Capability -> provider ids drawn from ConfigHandler.PROVIDER_MODELS.
|
# Capability -> provider ids drawn from ConfigHandler.PROVIDER_MODELS.
|
||||||
_ASR_PROVIDERS = ["openai", "dashscope", "zhipu", "linkai"]
|
_ASR_PROVIDERS = ["openai", "dashscope", "zhipu", "linkai"]
|
||||||
# Web-console white-list. Other vendors stay usable via direct config.
|
# Web-console white-list. Other vendors stay usable via direct config.
|
||||||
_TTS_PROVIDERS = ["openai", "minimax", "dashscope", "linkai"]
|
_TTS_PROVIDERS = ["openai", "minimax", "dashscope", "mimo", "linkai"]
|
||||||
|
|
||||||
# TTS engine catalog (speech models, not voice timbres). Entries are
|
# TTS engine catalog (speech models, not voice timbres). Entries are
|
||||||
# either a bare code or {value, hint?} when a friendly label helps.
|
# either a bare code or {value, hint?} when a friendly label helps.
|
||||||
@@ -1661,6 +1670,10 @@ class ModelsHandler:
|
|||||||
"dashscope": [
|
"dashscope": [
|
||||||
{"value": "qwen3-tts-flash", "hint": "覆盖普通话、方言与主流外语"},
|
{"value": "qwen3-tts-flash", "hint": "覆盖普通话、方言与主流外语"},
|
||||||
],
|
],
|
||||||
|
# 小米 MiMo TTS 系列,通过 chat completions 接口合成
|
||||||
|
"mimo": [
|
||||||
|
{"value": "mimo-v2.5-tts", "hint": "预置音色 · 支持唱歌模式"},
|
||||||
|
],
|
||||||
# Aggregating gateway: a single endpoint multiplexes several
|
# Aggregating gateway: a single endpoint multiplexes several
|
||||||
# underlying TTS engines, selected via the `model` field.
|
# underlying TTS engines, selected via the `model` field.
|
||||||
# Each engine exposes its own voice catalog (see _TTS_PROVIDER_VOICES).
|
# Each engine exposes its own voice catalog (see _TTS_PROVIDER_VOICES).
|
||||||
@@ -1780,6 +1793,18 @@ class ModelsHandler:
|
|||||||
{"value": "Marcus", "hint": "陕西话 · 秦川"},
|
{"value": "Marcus", "hint": "陕西话 · 秦川"},
|
||||||
{"value": "Roy", "hint": "闽南语 · 阿杰"},
|
{"value": "Roy", "hint": "闽南语 · 阿杰"},
|
||||||
],
|
],
|
||||||
|
# 小米 MiMo 预置音色列表(mimo-v2.5-tts),文档:
|
||||||
|
# https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5
|
||||||
|
"mimo": [
|
||||||
|
{"value": "冰糖", "hint": "中文 · 女声 · 冰糖"},
|
||||||
|
{"value": "茉莉", "hint": "中文 · 女声 · 茉莉"},
|
||||||
|
{"value": "苏打", "hint": "中文 · 男声 · 苏打"},
|
||||||
|
{"value": "白桦", "hint": "中文 · 男声 · 白桦"},
|
||||||
|
{"value": "Mia", "hint": "英文 · 女声 · Mia"},
|
||||||
|
{"value": "Chloe", "hint": "英文 · 女声 · Chloe"},
|
||||||
|
{"value": "Milo", "hint": "英文 · 男声 · Milo"},
|
||||||
|
{"value": "Dean", "hint": "英文 · 男声 · Dean"},
|
||||||
|
],
|
||||||
# Aggregating gateway: voices are scoped per engine model. The
|
# Aggregating gateway: voices are scoped per engine model. The
|
||||||
# frontend picks the correct list based on the selected model so
|
# frontend picks the correct list based on the selected model so
|
||||||
# users don't see incompatible timbres for the active engine.
|
# users don't see incompatible timbres for the active engine.
|
||||||
@@ -1916,6 +1941,8 @@ class ModelsHandler:
|
|||||||
# (see models/minimax/minimax_bot.py::call_vision); the M2.x chat
|
# (see models/minimax/minimax_bot.py::call_vision); the M2.x chat
|
||||||
# family is text-only.
|
# family is text-only.
|
||||||
"minimax": [const.MINIMAX_TEXT_01],
|
"minimax": [const.MINIMAX_TEXT_01],
|
||||||
|
# MiMo 原生全模态模型:v2.5-pro / v2.5 支持图像/音频/视频输入
|
||||||
|
"mimo": [const.MIMO_V2_5_PRO, const.MIMO_V2_5],
|
||||||
# LinkAI proxies the underlying vendor; surface a curated set of
|
# LinkAI proxies the underlying vendor; surface a curated set of
|
||||||
# multimodal models. Order: gpt-4.1-mini → gpt-5.4-mini as the
|
# multimodal models. Order: gpt-4.1-mini → gpt-5.4-mini as the
|
||||||
# cross-vendor baselines, then each vendor's recommended default.
|
# cross-vendor baselines, then each vendor's recommended default.
|
||||||
@@ -2045,6 +2072,7 @@ class ModelsHandler:
|
|||||||
("qianfan", "qianfan_api_key", const.ERNIE_45_TURBO_VL),
|
("qianfan", "qianfan_api_key", const.ERNIE_45_TURBO_VL),
|
||||||
("zhipu", "zhipu_ai_api_key", const.GLM_5V_TURBO),
|
("zhipu", "zhipu_ai_api_key", const.GLM_5V_TURBO),
|
||||||
("minimax", "minimax_api_key", const.MINIMAX_TEXT_01),
|
("minimax", "minimax_api_key", const.MINIMAX_TEXT_01),
|
||||||
|
("mimo", "mimo_api_key", const.MIMO_V2_5_PRO),
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ ZHIPU_AI = "zhipu"
|
|||||||
MOONSHOT = "moonshot"
|
MOONSHOT = "moonshot"
|
||||||
MiniMax = "minimax"
|
MiniMax = "minimax"
|
||||||
DEEPSEEK = "deepseek"
|
DEEPSEEK = "deepseek"
|
||||||
|
MIMO = "mimo" # 小米 MiMo 大模型
|
||||||
CUSTOM = "custom" # custom OpenAI-compatible API, bot_type won't auto-switch on model change
|
CUSTOM = "custom" # custom OpenAI-compatible API, bot_type won't auto-switch on model change
|
||||||
MODELSCOPE = "modelscope"
|
MODELSCOPE = "modelscope"
|
||||||
|
|
||||||
@@ -140,6 +141,13 @@ KIMI_K2 = "kimi-k2"
|
|||||||
KIMI_K2_5 = "kimi-k2.5"
|
KIMI_K2_5 = "kimi-k2.5"
|
||||||
KIMI_K2_6 = "kimi-k2.6" # Kimi K2.6 - Agent recommended model (default)
|
KIMI_K2_6 = "kimi-k2.6" # Kimi K2.6 - Agent recommended model (default)
|
||||||
|
|
||||||
|
# 小米 MiMo
|
||||||
|
MIMO_V2_5_PRO = "mimo-v2.5-pro" # MiMo V2.5 Pro - 旗舰,长上下文(默认推荐)
|
||||||
|
MIMO_V2_5 = "mimo-v2.5" # MiMo V2.5 - 多模态(文/图/音/视频)
|
||||||
|
MIMO_V2_PRO = "mimo-v2-pro" # MiMo V2 Pro
|
||||||
|
MIMO_V2_OMNI = "mimo-v2-omni" # MiMo V2 Omni - 多模态
|
||||||
|
MIMO_V2_FLASH = "mimo-v2-flash" # MiMo V2 Flash - 极速版
|
||||||
|
|
||||||
# Doubao (Volcengine Ark)
|
# Doubao (Volcengine Ark)
|
||||||
DOUBAO = "doubao"
|
DOUBAO = "doubao"
|
||||||
DOUBAO_SEED_2_CODE = "doubao-seed-2-0-code-preview-260215"
|
DOUBAO_SEED_2_CODE = "doubao-seed-2-0-code-preview-260215"
|
||||||
@@ -182,6 +190,9 @@ MODEL_LIST = [
|
|||||||
# MiniMax
|
# MiniMax
|
||||||
MiniMax, MINIMAX_M2_7, MINIMAX_M2_7_HIGHSPEED, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5,
|
MiniMax, MINIMAX_M2_7, MINIMAX_M2_7_HIGHSPEED, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5,
|
||||||
|
|
||||||
|
# 小米 MiMo
|
||||||
|
MIMO, MIMO_V2_5_PRO, MIMO_V2_5, MIMO_V2_PRO, MIMO_V2_OMNI, MIMO_V2_FLASH,
|
||||||
|
|
||||||
# Claude
|
# Claude
|
||||||
CLAUDE3, CLAUDE_4_6_SONNET, CLAUDE_4_7_OPUS, CLAUDE_4_6_OPUS, CLAUDE_4_OPUS, CLAUDE_4_5_SONNET, CLAUDE_4_SONNET, CLAUDE_3_OPUS, CLAUDE_3_OPUS_0229,
|
CLAUDE3, CLAUDE_4_6_SONNET, CLAUDE_4_7_OPUS, CLAUDE_4_6_OPUS, CLAUDE_4_OPUS, CLAUDE_4_5_SONNET, CLAUDE_4_SONNET, CLAUDE_3_OPUS, CLAUDE_3_OPUS_0229,
|
||||||
CLAUDE_35_SONNET, CLAUDE_35_SONNET_1022, CLAUDE_35_SONNET_0620, CLAUDE_3_SONNET, CLAUDE_3_HAIKU,
|
CLAUDE_35_SONNET, CLAUDE_35_SONNET_1022, CLAUDE_35_SONNET_0620, CLAUDE_3_SONNET, CLAUDE_3_HAIKU,
|
||||||
|
|||||||
@@ -209,6 +209,9 @@ available_setting = {
|
|||||||
"Minimax_base_url": "",
|
"Minimax_base_url": "",
|
||||||
"deepseek_api_key": "",
|
"deepseek_api_key": "",
|
||||||
"deepseek_api_base": "https://api.deepseek.com/v1",
|
"deepseek_api_base": "https://api.deepseek.com/v1",
|
||||||
|
# 小米 MiMo 大模型
|
||||||
|
"mimo_api_key": "",
|
||||||
|
"mimo_api_base": "https://api.xiaomimimo.com/v1",
|
||||||
"web_host": "", # Web console bind address; empty means auto
|
"web_host": "", # Web console bind address; empty means auto
|
||||||
"web_port": 9899,
|
"web_port": 9899,
|
||||||
"web_password": "", # Web console password; empty means no authentication required
|
"web_password": "", # Web console password; empty means no authentication required
|
||||||
@@ -401,6 +404,8 @@ def load_config():
|
|||||||
"minimax_api_base": "MINIMAX_API_BASE",
|
"minimax_api_base": "MINIMAX_API_BASE",
|
||||||
"deepseek_api_key": "DEEPSEEK_API_KEY",
|
"deepseek_api_key": "DEEPSEEK_API_KEY",
|
||||||
"deepseek_api_base": "DEEPSEEK_API_BASE",
|
"deepseek_api_base": "DEEPSEEK_API_BASE",
|
||||||
|
"mimo_api_key": "MIMO_API_KEY",
|
||||||
|
"mimo_api_base": "MIMO_API_BASE",
|
||||||
"qianfan_api_key": "QIANFAN_API_KEY",
|
"qianfan_api_key": "QIANFAN_API_KEY",
|
||||||
"qianfan_api_base": "QIANFAN_API_BASE",
|
"qianfan_api_base": "QIANFAN_API_BASE",
|
||||||
"zhipu_ai_api_key": "ZHIPU_AI_API_KEY",
|
"zhipu_ai_api_key": "ZHIPU_AI_API_KEY",
|
||||||
|
|||||||
30
docs/README.md
Normal file
30
docs/README.md
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Documentation
|
||||||
|
|
||||||
|
This directory contains the Mintlify documentation site for the project.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- Node.js v20.17.0 or higher (LTS recommended)
|
||||||
|
|
||||||
|
## Install the CLI (one-time, global)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm i -g mint
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run the docs locally
|
||||||
|
|
||||||
|
From this `docs/` directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mint dev
|
||||||
|
```
|
||||||
|
|
||||||
|
Then open http://localhost:3000 (or the port Mint reports if 3000 is in use).
|
||||||
|
|
||||||
|
> The first run downloads the Mint preview framework (~90 MB) into `~/.mintlify/`.
|
||||||
|
> Subsequent runs start instantly from the local cache.
|
||||||
|
|
||||||
|
## More
|
||||||
|
|
||||||
|
- Mintlify docs: https://www.mintlify.com/docs
|
||||||
@@ -88,6 +88,7 @@
|
|||||||
"models/doubao",
|
"models/doubao",
|
||||||
"models/kimi",
|
"models/kimi",
|
||||||
"models/qianfan",
|
"models/qianfan",
|
||||||
|
"models/mimo",
|
||||||
"models/linkai",
|
"models/linkai",
|
||||||
"models/coding-plan",
|
"models/coding-plan",
|
||||||
"models/custom"
|
"models/custom"
|
||||||
@@ -290,6 +291,7 @@
|
|||||||
"en/models/doubao",
|
"en/models/doubao",
|
||||||
"en/models/kimi",
|
"en/models/kimi",
|
||||||
"en/models/qianfan",
|
"en/models/qianfan",
|
||||||
|
"en/models/mimo",
|
||||||
"en/models/linkai",
|
"en/models/linkai",
|
||||||
"en/models/coding-plan",
|
"en/models/coding-plan",
|
||||||
"en/models/custom"
|
"en/models/custom"
|
||||||
@@ -492,6 +494,7 @@
|
|||||||
"ja/models/doubao",
|
"ja/models/doubao",
|
||||||
"ja/models/kimi",
|
"ja/models/kimi",
|
||||||
"ja/models/qianfan",
|
"ja/models/qianfan",
|
||||||
|
"ja/models/mimo",
|
||||||
"ja/models/linkai",
|
"ja/models/linkai",
|
||||||
"ja/models/coding-plan",
|
"ja/models/coding-plan",
|
||||||
"ja/models/custom"
|
"ja/models/custom"
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ A snapshot of each vendor's capabilities. "Text" refers to the main chat model;
|
|||||||
| [Doubao](/en/models/doubao) | doubao-seed-2.0 series | ✅ | ✅ | ✅ | | | ✅ |
|
| [Doubao](/en/models/doubao) | doubao-seed-2.0 series | ✅ | ✅ | ✅ | | | ✅ |
|
||||||
| [Kimi](/en/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
| [Kimi](/en/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||||
| [ERNIE](/en/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
| [ERNIE](/en/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||||
|
| [MiMo](/en/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
|
||||||
| [LinkAI](/en/models/linkai) | 100+ models from multiple vendors | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [LinkAI](/en/models/linkai) | 100+ models from multiple vendors | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [Custom](/en/models/custom) | Local models / third-party proxies | ✅ | | | | | |
|
| [Custom](/en/models/custom) | Local models / third-party proxies | ✅ | | | | | |
|
||||||
|
|
||||||
|
|||||||
136
docs/en/models/mimo.mdx
Normal file
136
docs/en/models/mimo.mdx
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
---
|
||||||
|
title: MiMo
|
||||||
|
description: Xiaomi MiMo model configuration (Text Chat + Image Understanding + Text-to-Speech)
|
||||||
|
---
|
||||||
|
|
||||||
|
Xiaomi MiMo is a native omni-modal large model. A single `mimo_api_key` enables text chat, image understanding, and text-to-speech all at once.
|
||||||
|
|
||||||
|
<Tip>
|
||||||
|
All capabilities below can be configured in one place via the "Model Management" page in the Web Console — no need to manually edit the configuration file.
|
||||||
|
</Tip>
|
||||||
|
|
||||||
|
## Text Chat
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "mimo-v2.5-pro",
|
||||||
|
"mimo_api_key": "YOUR_API_KEY",
|
||||||
|
"mimo_api_base": "https://api.xiaomimimo.com/v1"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| Parameter | Description |
|
||||||
|
| --- | --- |
|
||||||
|
| `model` | Default recommendation: `mimo-v2.5-pro`; `mimo-v2.5` is also supported |
|
||||||
|
| `mimo_api_key` | Create one in the [MiMo Open Platform](https://platform.xiaomimimo.com/console/api-keys) |
|
||||||
|
| `mimo_api_base` | Optional, defaults to `https://api.xiaomimimo.com/v1` |
|
||||||
|
|
||||||
|
### Model Selection
|
||||||
|
|
||||||
|
| Model | Use Case |
|
||||||
|
| --- | --- |
|
||||||
|
| `mimo-v2.5-pro` | Flagship: native omni-modal + Agent capability, up to 1M tokens context |
|
||||||
|
| `mimo-v2.5` | General-purpose, native omni-modal (text / image / video / audio) |
|
||||||
|
|
||||||
|
## Thinking Mode
|
||||||
|
|
||||||
|
The MiMo V2.5 series enables "thinking mode" by default: the model emits `reasoning_content` (chain-of-thought) before the final answer, improving performance on complex tasks.
|
||||||
|
|
||||||
|
Use the global `enable_thinking` flag to toggle visibility (also switchable from the Web Console settings):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"enable_thinking": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Image Understanding
|
||||||
|
|
||||||
|
Once `mimo_api_key` is configured, the Agent's Vision tool can automatically use MiMo's vision models:
|
||||||
|
|
||||||
|
- When the main model itself is multimodal (`mimo-v2.5-pro` / `mimo-v2.5`), images are handled directly by the main model with no extra setup.
|
||||||
|
- When the main model belongs to another vendor, the Vision tool falls back to `mimo-v2.5-pro` in order.
|
||||||
|
|
||||||
|
To force a specific Vision model, set it explicitly in the configuration:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"tools": {
|
||||||
|
"vision": {
|
||||||
|
"provider": "mimo",
|
||||||
|
"model": "mimo-v2.5-pro"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Text-to-Speech (TTS)
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"text_to_voice": "mimo",
|
||||||
|
"text_to_voice_model": "mimo-v2.5-tts",
|
||||||
|
"tts_voice_id": "冰糖"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| Parameter | Description |
|
||||||
|
| --- | --- |
|
||||||
|
| `text_to_voice_model` | Currently only `mimo-v2.5-tts` (preset voices + singing mode) |
|
||||||
|
| `tts_voice_id` | Preset voice name (Chinese voice IDs use the Chinese name directly) |
|
||||||
|
|
||||||
|
### Preset Voices
|
||||||
|
|
||||||
|
| Voice ID | Description |
|
||||||
|
| --- | --- |
|
||||||
|
| `Mia` | English · Female |
|
||||||
|
| `Chloe` | English · Female |
|
||||||
|
| `Milo` | English · Male |
|
||||||
|
| `Dean` | English · Male |
|
||||||
|
| `冰糖` | Chinese · Female (default) |
|
||||||
|
| `茉莉` | Chinese · Female |
|
||||||
|
| `苏打` | Chinese · Male |
|
||||||
|
| `白桦` | Chinese · Male |
|
||||||
|
|
||||||
|
|
||||||
|
You can also pick a voice visually from the Web Console under "Model Management → Text-to-Speech".
|
||||||
|
|
||||||
|
### Style Control
|
||||||
|
|
||||||
|
MiMo TTS supports embedding **audio tags** in the synthesis text to control emotion, tone, dialect, persona, and even singing. Tags must appear in the **text that will be synthesized to speech (i.e. the Agent's reply)**, with the overall style tag placed at the very beginning:
|
||||||
|
|
||||||
|
```
|
||||||
|
(style)content-to-synthesize
|
||||||
|
```
|
||||||
|
|
||||||
|
Half-width `()`, full-width `()`, and `[]` brackets are all accepted. Both Chinese and English style descriptors work — pick whichever language expresses the timbre most precisely. Common examples:
|
||||||
|
|
||||||
|
| Category | Example tags |
|
||||||
|
| --- | --- |
|
||||||
|
| Basic emotions | `happy` `sad` `angry` `fear` `surprised` `excited` `aggrieved` `calm` `indifferent` |
|
||||||
|
| Compound emotions | `wistful` `relieved` `helpless` `guilty` `at ease` `uneasy` `touched` |
|
||||||
|
| Overall tone | `gentle` `aloof` `lively` `serious` `languid` `playful` `deep` `sharp` `cutting` |
|
||||||
|
| Voice character | `magnetic` `mellow` `bright` `ethereal` `childlike` `aged` `sweet` `husky` |
|
||||||
|
| Persona | `squeaky` `mature lady` `young boy` `uncle` `Taiwanese accent` |
|
||||||
|
| Dialect | `Northeastern` `Sichuan` `Henan` `Cantonese` |
|
||||||
|
| Role-play | `Sun Wukong` `Lin Daiyu` |
|
||||||
|
| Singing | `sing` / `singing` |
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
- `(magnetic)The night is deep, and the city is still breathing.`
|
||||||
|
- `(gentle)Take a breath. You've got this.`
|
||||||
|
- `(serious)This is the final warning before the system reboots.`
|
||||||
|
- `(singing)Oh, when the saints go marching in…`
|
||||||
|
|
||||||
|
You can also insert fine-grained audio tags at any position in the text to control breathing, laughter, pauses, etc. For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
(nervous, deep breath) Phew… stay calm, stay calm. (faster pace) I've rehearsed this intro fifty times, it'll be fine.
|
||||||
|
```
|
||||||
|
|
||||||
|
See the [MiMo speech synthesis documentation](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5) for the full tag list.
|
||||||
|
|
||||||
|
<Tip>
|
||||||
|
When CowAgent calls TTS, the Agent's reply text (including any `(...)` tags) is forwarded directly to MiMo for synthesis. Tell the model in its persona / system prompt to "prefix replies with a `(style)` tag to control the tone", and IM channels (WeChat / Feishu / DingTalk / WeCom) will play voice replies with the corresponding emotion, dialect, or even singing.
|
||||||
|
</Tip>
|
||||||
@@ -104,6 +104,7 @@ CowAgent は主要な LLM プロバイダーすべてに対応しています。
|
|||||||
| [Kimi](https://docs.cowagent.ai/ja/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
| [Kimi](https://docs.cowagent.ai/ja/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||||
| [MiniMax](https://docs.cowagent.ai/ja/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
|
| [MiniMax](https://docs.cowagent.ai/ja/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
|
||||||
| [ERNIE](https://docs.cowagent.ai/ja/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
| [ERNIE](https://docs.cowagent.ai/ja/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||||
|
| [MiMo](https://docs.cowagent.ai/ja/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
|
||||||
| [LinkAI](https://docs.cowagent.ai/ja/models/linkai) | 1 つの Key で 100+ モデルに接続 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [LinkAI](https://docs.cowagent.ai/ja/models/linkai) | 1 つの Key で 100+ モデルに接続 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [カスタム](https://docs.cowagent.ai/ja/models/custom) | ローカルモデル / サードパーティプロキシ | ✅ | | | | | |
|
| [カスタム](https://docs.cowagent.ai/ja/models/custom) | ローカルモデル / サードパーティプロキシ | ✅ | | | | | |
|
||||||
|
|
||||||
|
|||||||
135
docs/ja/models/mimo.mdx
Normal file
135
docs/ja/models/mimo.mdx
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
---
|
||||||
|
title: Xiaomi MiMo
|
||||||
|
description: Xiaomi MiMo モデル設定(テキスト対話 + 画像理解 + 音声合成)
|
||||||
|
---
|
||||||
|
|
||||||
|
Xiaomi MiMo はネイティブ全モーダル大規模言語モデルです。1 つの `mimo_api_key` でテキスト対話、画像理解、音声合成を同時に有効化できます。
|
||||||
|
|
||||||
|
<Tip>
|
||||||
|
Web コンソールの「モデル管理」ページから、以下のすべての機能をワンストップで設定でき、設定ファイルを手動で編集する必要はありません。
|
||||||
|
</Tip>
|
||||||
|
|
||||||
|
## テキスト対話
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "mimo-v2.5-pro",
|
||||||
|
"mimo_api_key": "YOUR_API_KEY",
|
||||||
|
"mimo_api_base": "https://api.xiaomimimo.com/v1"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| パラメータ | 説明 |
|
||||||
|
| --- | --- |
|
||||||
|
| `model` | 推奨は `mimo-v2.5-pro`。`mimo-v2.5` も使用可能 |
|
||||||
|
| `mimo_api_key` | [MiMo Open Platform](https://platform.xiaomimimo.com/console/api-keys) で作成 |
|
||||||
|
| `mimo_api_base` | 任意。デフォルトは `https://api.xiaomimimo.com/v1` |
|
||||||
|
|
||||||
|
### モデル選択
|
||||||
|
|
||||||
|
| モデル | ユースケース |
|
||||||
|
| --- | --- |
|
||||||
|
| `mimo-v2.5-pro` | フラッグシップ。ネイティブ全モーダル + Agent 能力、最大 100 万トークンのコンテキスト |
|
||||||
|
| `mimo-v2.5` | 汎用版。ネイティブ全モーダル(テキスト / 画像 / 動画 / 音声) |
|
||||||
|
|
||||||
|
## 思考モード
|
||||||
|
|
||||||
|
MiMo V2.5 シリーズはデフォルトで「思考モード」が有効です。最終回答の前に `reasoning_content`(思考過程)を出力することで、複雑なタスクのパフォーマンスを高めます。
|
||||||
|
|
||||||
|
表示の有無はグローバル設定 `enable_thinking` で切り替え可能です(Web コンソールの設定ページからも変更できます):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"enable_thinking": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 画像理解
|
||||||
|
|
||||||
|
`mimo_api_key` を設定すると、Agent の Vision ツールは自動的に MiMo のビジョンモデルを利用します:
|
||||||
|
|
||||||
|
- メインモデル自体がマルチモーダル(`mimo-v2.5-pro` / `mimo-v2.5`)の場合は、画像はメインモデルが直接処理し、追加設定は不要です。
|
||||||
|
- メインモデルが他社製の場合、Vision ツールは順序に従い `mimo-v2.5-pro` にフォールバックします。
|
||||||
|
|
||||||
|
特定の Vision モデルを強制したい場合は、設定ファイルで明示的に指定してください:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"tools": {
|
||||||
|
"vision": {
|
||||||
|
"provider": "mimo",
|
||||||
|
"model": "mimo-v2.5-pro"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 音声合成
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"text_to_voice": "mimo",
|
||||||
|
"text_to_voice_model": "mimo-v2.5-tts",
|
||||||
|
"tts_voice_id": "冰糖"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| パラメータ | 説明 |
|
||||||
|
| --- | --- |
|
||||||
|
| `text_to_voice_model` | 現在は `mimo-v2.5-tts` のみ対応(プリセット音色 + 歌唱モード) |
|
||||||
|
| `tts_voice_id` | プリセット音色名(中国語の音色は中国語名がそのまま ID) |
|
||||||
|
|
||||||
|
### プリセット音色
|
||||||
|
|
||||||
|
| 音色 ID | 説明 |
|
||||||
|
| --- | --- |
|
||||||
|
| `冰糖` | 中国語 · 女声(デフォルト) |
|
||||||
|
| `茉莉` | 中国語 · 女声 |
|
||||||
|
| `苏打` | 中国語 · 男声 |
|
||||||
|
| `白桦` | 中国語 · 男声 |
|
||||||
|
| `Mia` | 英語 · 女声 |
|
||||||
|
| `Chloe` | 英語 · 女声 |
|
||||||
|
| `Milo` | 英語 · 男声 |
|
||||||
|
| `Dean` | 英語 · 男声 |
|
||||||
|
|
||||||
|
Web コンソールの「モデル管理 → 音声合成」のドロップダウンから視覚的に選択することもできます。
|
||||||
|
|
||||||
|
### スタイル制御
|
||||||
|
|
||||||
|
MiMo TTS は合成テキスト内に **音声タグ** を埋め込むことで、感情、語調、方言、キャラクター、さらには歌唱まで制御できます。タグは **最終的に音声合成されるテキスト(つまり Agent の返信内容)** に含める必要があり、全体スタイルのタグは先頭に置きます:
|
||||||
|
|
||||||
|
```
|
||||||
|
(スタイル)合成するテキスト
|
||||||
|
```
|
||||||
|
|
||||||
|
半角 `()`、全角 `()`、`[]` の 3 種類の括弧に対応。スタイル記述は中国語・英語のどちらでも OK で、最も的確に表現できる言語を選んでください。代表的なスタイル例:
|
||||||
|
|
||||||
|
| 種類 | サンプルタグ |
|
||||||
|
| --- | --- |
|
||||||
|
| 基本感情 | `happy` `sad` `angry` `fear` `surprised` `excited` `aggrieved` `calm` `indifferent` |
|
||||||
|
| 複合感情 | `wistful` `relieved` `helpless` `guilty` `at ease` `uneasy` `touched` |
|
||||||
|
| 全体トーン | `gentle` `aloof` `lively` `serious` `languid` `playful` `deep` `sharp` `cutting` |
|
||||||
|
| 声質 | `magnetic` `mellow` `bright` `ethereal` `childlike` `aged` `sweet` `husky` |
|
||||||
|
| キャラクター調 | `squeaky` `mature lady` `young boy` `uncle` `Taiwanese accent` |
|
||||||
|
| 方言 | `Northeastern` `Sichuan` `Henan` `Cantonese` |
|
||||||
|
| ロールプレイ | `Sun Wukong` `Lin Daiyu` |
|
||||||
|
| 歌唱 | `sing` / `singing` |
|
||||||
|
|
||||||
|
例:
|
||||||
|
|
||||||
|
- `(magnetic)夜が深まり、街はまだ呼吸している。`
|
||||||
|
- `(gentle)深呼吸して。きっと大丈夫。`
|
||||||
|
- `(serious)これがシステム再起動前の最後の警告です。`
|
||||||
|
- `(singing)Twinkle, twinkle, little star, how I wonder what you are…`
|
||||||
|
|
||||||
|
テキストの任意の位置に細かい音声タグを挿入して、呼吸、笑い声、間などを制御することもできます。例:
|
||||||
|
|
||||||
|
```
|
||||||
|
(nervous, deep breath) ふぅ……落ち着いて、落ち着いて。(faster pace) 自己紹介は五十回練習したから大丈夫。
|
||||||
|
```
|
||||||
|
|
||||||
|
タグの完全な一覧は [MiMo 音声合成ドキュメント](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5) を参照してください。
|
||||||
|
|
||||||
|
<Tip>
|
||||||
|
CowAgent は TTS 呼び出し時、Agent の返信原文(`(...)` タグを含む)をそのまま MiMo に送信します。ペルソナ / システムプロンプトで「返信の冒頭に `(スタイル)` タグを付けて口調を指定する」よう指示すれば、IM チャネル(WeChat / Feishu / DingTalk / WeCom)の音声返信に感情・方言・歌唱などの効果を付与できます。
|
||||||
|
</Tip>
|
||||||
@@ -22,6 +22,7 @@ CowAgent 支持国内外主流厂商的大语言模型,模型接口实现在
|
|||||||
| [豆包 Doubao](/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ |
|
| [豆包 Doubao](/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ |
|
||||||
| [Kimi](/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
| [Kimi](/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||||
| [百度千帆](/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
| [百度千帆](/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||||
|
| [小米 MiMo](/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
|
||||||
| [LinkAI](/models/linkai) | 多厂商 100+ 模型统一接入 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [LinkAI](/models/linkai) | 多厂商 100+ 模型统一接入 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [自定义](/models/custom) |本地模型 / 三方代理 | ✅ | | | | | |
|
| [自定义](/models/custom) |本地模型 / 三方代理 | ✅ | | | | | |
|
||||||
|
|
||||||
|
|||||||
135
docs/models/mimo.mdx
Normal file
135
docs/models/mimo.mdx
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
---
|
||||||
|
title: 小米 MiMo
|
||||||
|
description: 小米 MiMo 模型配置(文本对话 + 图像理解 + 语音合成)
|
||||||
|
---
|
||||||
|
|
||||||
|
小米 MiMo 是原生全模态大模型,单 `mimo_api_key` 即可同时启用文本对话、图像理解与语音合成。
|
||||||
|
|
||||||
|
<Tip>
|
||||||
|
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||||
|
</Tip>
|
||||||
|
|
||||||
|
## 文本对话
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "mimo-v2.5-pro",
|
||||||
|
"mimo_api_key": "YOUR_API_KEY",
|
||||||
|
"mimo_api_base": "https://api.xiaomimimo.com/v1"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| 参数 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| `model` | 默认推荐 `mimo-v2.5-pro`,也可使用 `mimo-v2.5` |
|
||||||
|
| `mimo_api_key` | 在 [MiMo 开放平台](https://platform.xiaomimimo.com/console/api-keys) 创建 |
|
||||||
|
| `mimo_api_base` | 可选,默认为 `https://api.xiaomimimo.com/v1` |
|
||||||
|
|
||||||
|
### 模型选择
|
||||||
|
|
||||||
|
| 模型 | 适用场景 |
|
||||||
|
| --- | --- |
|
||||||
|
| `mimo-v2.5-pro` | 旗舰,原生全模态 + Agent 能力,最高 100 万 tokens 上下文 |
|
||||||
|
| `mimo-v2.5` | 综合版,原生全模态(文本 / 图像 / 视频 / 音频) |
|
||||||
|
|
||||||
|
## 思考模式
|
||||||
|
|
||||||
|
MiMo V2.5 系列默认开启「思考模式」:模型在输出最终回答前会先输出 `reasoning_content`(思维链),提升复杂任务表现。
|
||||||
|
|
||||||
|
通过全局配置 `enable_thinking` 控制是否展示(也可在 Web 控制台 - 配置页面切换):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"enable_thinking": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 图像理解
|
||||||
|
|
||||||
|
配置 `mimo_api_key` 后,Agent 的 Vision 工具可以自动使用 MiMo 视觉模型:
|
||||||
|
|
||||||
|
- 当主模型本身是多模态时(`mimo-v2.5-pro` / `mimo-v2.5`),直接由主模型识别图像,无需额外配置
|
||||||
|
- 当主模型是其他厂商时,Vision 工具会根据顺序自动 fallback 到 `mimo-v2.5-pro`
|
||||||
|
|
||||||
|
如需手动指定 Vision 模型,可在配置文件中显式配置:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"tools": {
|
||||||
|
"vision": {
|
||||||
|
"provider": "mimo",
|
||||||
|
"model": "mimo-v2.5-pro"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 语音合成
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"text_to_voice": "mimo",
|
||||||
|
"text_to_voice_model": "mimo-v2.5-tts",
|
||||||
|
"tts_voice_id": "冰糖"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| 参数 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| `text_to_voice_model` | 当前仅支持 `mimo-v2.5-tts`(预置音色 + 唱歌模式) |
|
||||||
|
| `tts_voice_id` | 预置音色名(中文音色直接使用中文名作为 ID) |
|
||||||
|
|
||||||
|
### 预置音色
|
||||||
|
|
||||||
|
| 音色 ID | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| `冰糖` | 中文 · 女声(默认) |
|
||||||
|
| `茉莉` | 中文 · 女声 |
|
||||||
|
| `苏打` | 中文 · 男声 |
|
||||||
|
| `白桦` | 中文 · 男声 |
|
||||||
|
| `Mia` | 英文 · 女声 |
|
||||||
|
| `Chloe` | 英文 · 女声 |
|
||||||
|
| `Milo` | 英文 · 男声 |
|
||||||
|
| `Dean` | 英文 · 男声 |
|
||||||
|
|
||||||
|
也可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。
|
||||||
|
|
||||||
|
### 风格控制
|
||||||
|
|
||||||
|
MiMo TTS 支持在合成文本中嵌入 **音频标签** 来控制情绪、语调、方言、角色甚至唱歌。标签需出现在 **最终被合成为语音的文本(即 Agent 回复内容)** 中,整体风格标签写在开头:
|
||||||
|
|
||||||
|
```
|
||||||
|
(风格)待合成内容
|
||||||
|
```
|
||||||
|
|
||||||
|
支持半角 `()`、全角 `()` 或 `[]` 三种括号。常见风格示例:
|
||||||
|
|
||||||
|
| 类型 | 示例标签 |
|
||||||
|
| --- | --- |
|
||||||
|
| 基础情绪 | `开心` `悲伤` `愤怒` `恐惧` `惊讶` `兴奋` `委屈` `平静` `冷漠` |
|
||||||
|
| 复合情绪 | `怅然` `欣慰` `无奈` `愧疚` `释然` `忐忑` `动情` |
|
||||||
|
| 整体语调 | `温柔` `高冷` `活泼` `严肃` `慵懒` `俏皮` `深沉` `干练` `凌厉` |
|
||||||
|
| 音色定位 | `磁性` `醇厚` `清亮` `空灵` `稚嫩` `苍老` `甜美` `沙哑` |
|
||||||
|
| 人设腔调 | `夹子音` `御姐音` `正太音` `大叔音` `台湾腔` |
|
||||||
|
| 方言 | `东北话` `四川话` `河南话` `粤语` |
|
||||||
|
| 角色扮演 | `孙悟空` `林黛玉` |
|
||||||
|
| 唱歌 | `唱歌`(等价于 `sing` / `singing`) |
|
||||||
|
|
||||||
|
示例:
|
||||||
|
|
||||||
|
- (磁性)夜已经深了,城市还在呼吸。
|
||||||
|
- (东北话)哎呀妈呀,这天儿也忒冷了吧!
|
||||||
|
- (粤语)呢个真係好正啊!
|
||||||
|
- (唱歌)原谅我这一生不羁放纵爱自由…
|
||||||
|
|
||||||
|
也可以在文本任意位置插入细粒度音频标签来控制呼吸、笑声、停顿等,例如:
|
||||||
|
|
||||||
|
```
|
||||||
|
(紧张,深呼吸)呼……冷静,冷静。(语速加快)自我介绍我背了五十遍了,应该没问题。
|
||||||
|
```
|
||||||
|
|
||||||
|
完整标签列表参见 [MiMo 语音合成文档](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5)。
|
||||||
|
|
||||||
|
<Tip>
|
||||||
|
CowAgent 在调用 TTS 时会将 Agent 的回复原文(含 `(...)` 标签)直接送入 MiMo 合成。你可以在人设 / 系统提示词里要求模型「在回复开头用 `(风格)` 标签控制语气」,即可让 IM 渠道(微信 / 飞书 / 钉钉 / 企微)的语音回复带上情绪、方言、唱歌等效果。
|
||||||
|
</Tip>
|
||||||
@@ -104,6 +104,7 @@ CowAgent 支持国内外主流厂商的大语言模型。**文本对话、图像
|
|||||||
| [豆包 Doubao](https://docs.cowagent.ai/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ |
|
| [豆包 Doubao](https://docs.cowagent.ai/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ |
|
||||||
| [Kimi](https://docs.cowagent.ai/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
| [Kimi](https://docs.cowagent.ai/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||||
| [百度ERNIE](https://docs.cowagent.ai/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
| [百度ERNIE](https://docs.cowagent.ai/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||||
|
| [小米 MiMo](https://docs.cowagent.ai/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
|
||||||
| [LinkAI](https://docs.cowagent.ai/models/linkai) | 一个 Key 接入 100+ 模型 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [LinkAI](https://docs.cowagent.ai/models/linkai) | 一个 Key 接入 100+ 模型 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [自定义](https://docs.cowagent.ai/models/custom) | 本地模型 / 三方代理 | ✅ | | | | | |
|
| [自定义](https://docs.cowagent.ai/models/custom) | 本地模型 / 三方代理 | ✅ | | | | | |
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,10 @@ def create_bot(bot_type):
|
|||||||
from models.qianfan.qianfan_bot import QianfanBot
|
from models.qianfan.qianfan_bot import QianfanBot
|
||||||
return QianfanBot()
|
return QianfanBot()
|
||||||
|
|
||||||
|
elif bot_type == const.MIMO:
|
||||||
|
from models.mimo.mimo_bot import MimoBot
|
||||||
|
return MimoBot()
|
||||||
|
|
||||||
elif bot_type in (const.OPENAI, const.CHATGPT, const.CUSTOM): # OpenAI-compatible API
|
elif bot_type in (const.OPENAI, const.CHATGPT, const.CUSTOM): # OpenAI-compatible API
|
||||||
from models.chatgpt.chat_gpt_bot import ChatGPTBot
|
from models.chatgpt.chat_gpt_bot import ChatGPTBot
|
||||||
return ChatGPTBot()
|
return ChatGPTBot()
|
||||||
|
|||||||
0
models/mimo/__init__.py
Normal file
0
models/mimo/__init__.py
Normal file
668
models/mimo/mimo_bot.py
Normal file
668
models/mimo/mimo_bot.py
Normal file
@@ -0,0 +1,668 @@
|
|||||||
|
# encoding:utf-8
|
||||||
|
|
||||||
|
"""
|
||||||
|
小米 MiMo Bot —— OpenAI 兼容协议,使用独立 API key / base 配置。
|
||||||
|
|
||||||
|
支持模型:
|
||||||
|
- mimo-v2.5-pro (旗舰,长上下文,默认开启思考)
|
||||||
|
- mimo-v2.5 (多模态:文/图/音/视频,默认开启思考)
|
||||||
|
- mimo-v2-pro (V2 Pro,默认开启思考)
|
||||||
|
- mimo-v2-omni (V2 多模态,默认开启思考)
|
||||||
|
- mimo-v2-flash (V2 极速版,默认关闭思考)
|
||||||
|
|
||||||
|
思考模式说明:
|
||||||
|
- 开关参数:``{"thinking": {"type": "enabled" | "disabled"}}``
|
||||||
|
- mimo-v2.5-pro / mimo-v2.5 在思考模式下 ``temperature`` 会被强制为 1.0,
|
||||||
|
本地直接剥离 ``temperature`` / ``top_p`` 等参数避免歧义。
|
||||||
|
- 多轮工具调用过程中,若历史包含 tool_calls,所有后续 assistant 消息必须回传
|
||||||
|
``reasoning_content``,否则 API 返回 400 错误。
|
||||||
|
- 文档:https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/passing-back-reasoning_content
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from bridge.context import ContextType
|
||||||
|
from bridge.reply import Reply, ReplyType
|
||||||
|
from common import const
|
||||||
|
from common.log import logger
|
||||||
|
from config import conf, load_config
|
||||||
|
from models.bot import Bot
|
||||||
|
from models.openai_compatible_bot import OpenAICompatibleBot
|
||||||
|
from models.session_manager import SessionManager
|
||||||
|
from .mimo_session import MimoSession
|
||||||
|
|
||||||
|
DEFAULT_API_BASE = "https://api.xiaomimimo.com/v1"
|
||||||
|
DEFAULT_MODEL = const.MIMO_V2_5_PRO
|
||||||
|
|
||||||
|
# 支持多模态输入(图/音/视频)的模型
|
||||||
|
MULTIMODAL_MODELS = {const.MIMO_V2_5_PRO, const.MIMO_V2_5, const.MIMO_V2_OMNI}
|
||||||
|
|
||||||
|
|
||||||
|
class MimoBot(Bot, OpenAICompatibleBot):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.sessions = SessionManager(
|
||||||
|
MimoSession,
|
||||||
|
model=conf().get("model") or DEFAULT_MODEL,
|
||||||
|
)
|
||||||
|
conf_model = conf().get("model") or DEFAULT_MODEL
|
||||||
|
self.args = {
|
||||||
|
"model": conf_model,
|
||||||
|
"temperature": conf().get("temperature", 1.0),
|
||||||
|
"top_p": conf().get("top_p", 0.95),
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------- config helpers ----------
|
||||||
|
|
||||||
|
@property
|
||||||
|
def api_key(self):
|
||||||
|
return conf().get("mimo_api_key")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def api_base(self):
|
||||||
|
url = conf().get("mimo_api_base") or DEFAULT_API_BASE
|
||||||
|
return url.rstrip("/")
|
||||||
|
|
||||||
|
def get_api_config(self):
|
||||||
|
"""OpenAICompatibleBot 接口 —— 供 call_with_tools() 使用。"""
|
||||||
|
return {
|
||||||
|
"api_key": self.api_key,
|
||||||
|
"api_base": self.api_base,
|
||||||
|
"model": conf().get("model", DEFAULT_MODEL),
|
||||||
|
"default_temperature": conf().get("temperature", 1.0),
|
||||||
|
"default_top_p": conf().get("top_p", 0.95),
|
||||||
|
}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def supports_vision(self) -> bool:
|
||||||
|
"""主模型为多模态模型时,允许 vision tool 走主 bot 通道。"""
|
||||||
|
model_name = (conf().get("model") or "").lower()
|
||||||
|
return model_name in MULTIMODAL_MODELS
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _model_supports_thinking(model_name: str) -> bool:
|
||||||
|
"""全部 mimo 系列模型都支持 thinking 开关。"""
|
||||||
|
if not model_name:
|
||||||
|
return False
|
||||||
|
return model_name.lower().startswith("mimo-")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _thinking_default_enabled(model_name: str) -> bool:
|
||||||
|
"""各模型的思考模式默认值。mimo-v2-flash 默认关闭,其他默认开启。"""
|
||||||
|
if not model_name:
|
||||||
|
return False
|
||||||
|
return model_name.lower() != const.MIMO_V2_FLASH
|
||||||
|
|
||||||
|
def _build_headers(self) -> dict:
|
||||||
|
return {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": f"Bearer {self.api_key}",
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------- simple chat (non-agent mode) ----------
|
||||||
|
|
||||||
|
def reply(self, query, context=None):
|
||||||
|
if context.type == ContextType.TEXT:
|
||||||
|
logger.info("[MIMO] query={}".format(query))
|
||||||
|
|
||||||
|
session_id = context["session_id"]
|
||||||
|
reply = None
|
||||||
|
clear_memory_commands = conf().get("clear_memory_commands", ["#清除记忆"])
|
||||||
|
if query in clear_memory_commands:
|
||||||
|
self.sessions.clear_session(session_id)
|
||||||
|
reply = Reply(ReplyType.INFO, "记忆已清除")
|
||||||
|
elif query == "#清除所有":
|
||||||
|
self.sessions.clear_all_session()
|
||||||
|
reply = Reply(ReplyType.INFO, "所有人记忆已清除")
|
||||||
|
elif query == "#更新配置":
|
||||||
|
load_config()
|
||||||
|
reply = Reply(ReplyType.INFO, "配置已更新")
|
||||||
|
if reply:
|
||||||
|
return reply
|
||||||
|
|
||||||
|
session = self.sessions.session_query(query, session_id)
|
||||||
|
logger.debug("[MIMO] session query={}".format(session.messages))
|
||||||
|
|
||||||
|
new_args = self.args.copy()
|
||||||
|
reply_content = self.reply_text(session, args=new_args)
|
||||||
|
logger.debug(
|
||||||
|
"[MIMO] new_query={}, session_id={}, reply_cont={}, completion_tokens={}".format(
|
||||||
|
session.messages, session_id,
|
||||||
|
reply_content["content"], reply_content["completion_tokens"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if reply_content["completion_tokens"] == 0 and len(reply_content["content"]) > 0:
|
||||||
|
reply = Reply(ReplyType.ERROR, reply_content["content"])
|
||||||
|
elif reply_content["completion_tokens"] > 0:
|
||||||
|
self.sessions.session_reply(
|
||||||
|
reply_content["content"], session_id, reply_content["total_tokens"],
|
||||||
|
)
|
||||||
|
reply = Reply(ReplyType.TEXT, reply_content["content"])
|
||||||
|
else:
|
||||||
|
reply = Reply(ReplyType.ERROR, reply_content["content"])
|
||||||
|
logger.debug("[MIMO] reply {} used 0 tokens.".format(reply_content))
|
||||||
|
return reply
|
||||||
|
else:
|
||||||
|
reply = Reply(ReplyType.ERROR, "Bot不支持处理{}类型的消息".format(context.type))
|
||||||
|
return reply
|
||||||
|
|
||||||
|
def reply_text(self, session, args=None, retry_count: int = 0) -> dict:
|
||||||
|
try:
|
||||||
|
headers = self._build_headers()
|
||||||
|
body = dict(args) if args else dict(self.args)
|
||||||
|
body["messages"] = session.messages
|
||||||
|
|
||||||
|
model_name = str(body.get("model", ""))
|
||||||
|
# 思考模式下 mimo-v2.5-pro / mimo-v2.5 不支持自定义 temperature/top_p,
|
||||||
|
# 简单起见,所有支持思考的模型按默认配置走,剥离这些参数。
|
||||||
|
if self._model_supports_thinking(model_name) and self._thinking_default_enabled(model_name):
|
||||||
|
for k in ("temperature", "top_p", "presence_penalty", "frequency_penalty"):
|
||||||
|
body.pop(k, None)
|
||||||
|
|
||||||
|
res = requests.post(
|
||||||
|
f"{self.api_base}/chat/completions",
|
||||||
|
headers=headers,
|
||||||
|
json=body,
|
||||||
|
timeout=180,
|
||||||
|
)
|
||||||
|
if res.status_code == 200:
|
||||||
|
response = res.json()
|
||||||
|
return {
|
||||||
|
"total_tokens": response["usage"]["total_tokens"],
|
||||||
|
"completion_tokens": response["usage"]["completion_tokens"],
|
||||||
|
"content": response["choices"][0]["message"]["content"],
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
response = res.json()
|
||||||
|
error = response.get("error", {})
|
||||||
|
except Exception:
|
||||||
|
error = {"message": res.text[:300]}
|
||||||
|
logger.error(
|
||||||
|
f"[MIMO] chat failed, status_code={res.status_code}, "
|
||||||
|
f"msg={error.get('message')}, type={error.get('type')}"
|
||||||
|
)
|
||||||
|
result = {"completion_tokens": 0, "content": "提问太快啦,请休息一下再问我吧"}
|
||||||
|
need_retry = False
|
||||||
|
if res.status_code >= 500:
|
||||||
|
need_retry = retry_count < 2
|
||||||
|
elif res.status_code == 401:
|
||||||
|
result["content"] = "授权失败,请检查API Key是否正确"
|
||||||
|
elif res.status_code == 429:
|
||||||
|
result["content"] = "请求过于频繁,请稍后再试"
|
||||||
|
need_retry = retry_count < 2
|
||||||
|
|
||||||
|
if need_retry:
|
||||||
|
time.sleep(3)
|
||||||
|
return self.reply_text(session, args, retry_count + 1)
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(e)
|
||||||
|
if retry_count < 2:
|
||||||
|
return self.reply_text(session, args, retry_count + 1)
|
||||||
|
return {"completion_tokens": 0, "content": "我现在有点累了,等会再来吧"}
|
||||||
|
|
||||||
|
# ==================== Agent mode support ====================
|
||||||
|
|
||||||
|
def call_with_tools(self, messages, tools=None, stream: bool = False, **kwargs):
|
||||||
|
"""
|
||||||
|
带工具调用支持的 MiMo API 调用 (供 agent 集成使用)。
|
||||||
|
|
||||||
|
处理逻辑:
|
||||||
|
- Claude 格式 → OpenAI 格式 转换(含 reasoning_content 全量回传)
|
||||||
|
- System prompt 注入
|
||||||
|
- SSE 流式响应(包含 tool_calls 与 reasoning_content 增量)
|
||||||
|
- 思考模式开关传递
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
converted_messages = self._convert_messages_to_openai_format(messages)
|
||||||
|
|
||||||
|
system_prompt = kwargs.pop("system", None)
|
||||||
|
if system_prompt:
|
||||||
|
if not converted_messages or converted_messages[0].get("role") != "system":
|
||||||
|
converted_messages.insert(0, {"role": "system", "content": system_prompt})
|
||||||
|
else:
|
||||||
|
converted_messages[0] = {"role": "system", "content": system_prompt}
|
||||||
|
|
||||||
|
converted_tools = None
|
||||||
|
if tools:
|
||||||
|
converted_tools = self._convert_tools_to_openai_format(tools)
|
||||||
|
|
||||||
|
model = kwargs.pop("model", None) or self.args["model"]
|
||||||
|
max_tokens = kwargs.pop("max_tokens", None)
|
||||||
|
|
||||||
|
request_body = {
|
||||||
|
"model": model,
|
||||||
|
"messages": converted_messages,
|
||||||
|
"stream": stream,
|
||||||
|
}
|
||||||
|
if max_tokens is not None:
|
||||||
|
# MiMo 使用 max_completion_tokens 命名(含可见输出 + 推理 token)
|
||||||
|
request_body["max_completion_tokens"] = max_tokens
|
||||||
|
|
||||||
|
if converted_tools:
|
||||||
|
request_body["tools"] = converted_tools
|
||||||
|
request_body["tool_choice"] = kwargs.pop("tool_choice", "auto")
|
||||||
|
|
||||||
|
# 思考模式:默认遵循各模型的官方默认值;caller 可显式覆盖
|
||||||
|
thinking_param = kwargs.pop("thinking", None)
|
||||||
|
thinking_active = False
|
||||||
|
|
||||||
|
if self._model_supports_thinking(model):
|
||||||
|
if thinking_param is None:
|
||||||
|
default_on = self._thinking_default_enabled(model)
|
||||||
|
thinking_param = {"type": "enabled" if default_on else "disabled"}
|
||||||
|
request_body["thinking"] = thinking_param
|
||||||
|
thinking_active = thinking_param.get("type") == "enabled"
|
||||||
|
|
||||||
|
# 思考模式下 v2.5-pro / v2.5 不支持自定义 temperature;干脆全部剥离避免被静默忽略
|
||||||
|
if thinking_active:
|
||||||
|
for k in ("temperature", "top_p", "presence_penalty", "frequency_penalty"):
|
||||||
|
request_body.pop(k, None)
|
||||||
|
kwargs.pop(k, None)
|
||||||
|
else:
|
||||||
|
temperature = kwargs.pop("temperature", None)
|
||||||
|
if temperature is not None:
|
||||||
|
request_body["temperature"] = temperature
|
||||||
|
top_p = kwargs.pop("top_p", None)
|
||||||
|
if top_p is not None:
|
||||||
|
request_body["top_p"] = top_p
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"[MIMO] API call: model={model}, "
|
||||||
|
f"tools={len(converted_tools) if converted_tools else 0}, "
|
||||||
|
f"stream={stream}, thinking={thinking_active}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if stream:
|
||||||
|
return self._handle_stream_response(request_body)
|
||||||
|
else:
|
||||||
|
return self._handle_sync_response(request_body)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MIMO] call_with_tools error: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
|
|
||||||
|
def error_generator():
|
||||||
|
yield {"error": True, "message": str(e), "status_code": 500}
|
||||||
|
return error_generator()
|
||||||
|
|
||||||
|
# -------------------- streaming --------------------
|
||||||
|
|
||||||
|
def _handle_stream_response(self, request_body: dict):
|
||||||
|
"""SSE 流式 chunk 转为 OpenAI 标准 delta 输出(含 reasoning_content)。"""
|
||||||
|
try:
|
||||||
|
headers = self._build_headers()
|
||||||
|
url = f"{self.api_base}/chat/completions"
|
||||||
|
response = requests.post(url, headers=headers, json=request_body, stream=True, timeout=180)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
error_msg = response.text
|
||||||
|
logger.error(f"[MIMO] API error: status={response.status_code}, msg={error_msg}")
|
||||||
|
yield {"error": True, "message": error_msg, "status_code": response.status_code}
|
||||||
|
return
|
||||||
|
|
||||||
|
current_tool_calls = {}
|
||||||
|
finish_reason = None
|
||||||
|
|
||||||
|
for line in response.iter_lines():
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
line = line.decode("utf-8")
|
||||||
|
if line.startswith("data: "):
|
||||||
|
data_str = line[6:]
|
||||||
|
elif line.startswith("data:"):
|
||||||
|
data_str = line[5:]
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if data_str.strip() == "[DONE]":
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
chunk = json.loads(data_str)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning(f"[MIMO] JSON decode error: {e}, data: {data_str[:200]}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if chunk.get("error"):
|
||||||
|
error_data = chunk["error"]
|
||||||
|
error_msg = error_data.get("message", "Unknown error") if isinstance(error_data, dict) else str(error_data)
|
||||||
|
logger.error(f"[MIMO] stream error: {error_msg}")
|
||||||
|
yield {"error": True, "message": error_msg, "status_code": 500}
|
||||||
|
return
|
||||||
|
|
||||||
|
if not chunk.get("choices"):
|
||||||
|
continue
|
||||||
|
choice = chunk["choices"][0]
|
||||||
|
delta = choice.get("delta", {})
|
||||||
|
|
||||||
|
if choice.get("finish_reason"):
|
||||||
|
finish_reason = choice["finish_reason"]
|
||||||
|
|
||||||
|
# 推理内容(思考模式):单独 delta 透传给 agent_stream
|
||||||
|
if delta.get("reasoning_content"):
|
||||||
|
yield {
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"reasoning_content": delta["reasoning_content"],
|
||||||
|
},
|
||||||
|
"finish_reason": None,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
if delta.get("content"):
|
||||||
|
yield {
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": delta["content"],
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
if "tool_calls" in delta and delta["tool_calls"]:
|
||||||
|
for tool_call_chunk in delta["tool_calls"]:
|
||||||
|
index = tool_call_chunk.get("index", 0)
|
||||||
|
if index not in current_tool_calls:
|
||||||
|
current_tool_calls[index] = {
|
||||||
|
"id": tool_call_chunk.get("id", ""),
|
||||||
|
"name": tool_call_chunk.get("function", {}).get("name", ""),
|
||||||
|
"arguments": "",
|
||||||
|
}
|
||||||
|
if "function" in tool_call_chunk and "arguments" in tool_call_chunk["function"]:
|
||||||
|
current_tool_calls[index]["arguments"] += tool_call_chunk["function"]["arguments"]
|
||||||
|
|
||||||
|
yield {
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"tool_calls": [tool_call_chunk]},
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
yield {
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {},
|
||||||
|
"finish_reason": finish_reason,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
logger.error("[MIMO] Request timeout")
|
||||||
|
yield {"error": True, "message": "Request timeout", "status_code": 500}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MIMO] stream response error: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
|
yield {"error": True, "message": str(e), "status_code": 500}
|
||||||
|
|
||||||
|
# -------------------- sync --------------------
|
||||||
|
|
||||||
|
def _handle_sync_response(self, request_body: dict):
|
||||||
|
"""非流式响应;统一 yield 一份 Claude 格式 dict 与流式路径对齐。"""
|
||||||
|
try:
|
||||||
|
headers = self._build_headers()
|
||||||
|
request_body.pop("stream", None)
|
||||||
|
url = f"{self.api_base}/chat/completions"
|
||||||
|
response = requests.post(url, headers=headers, json=request_body, timeout=180)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
error_msg = response.text
|
||||||
|
logger.error(f"[MIMO] API error: status={response.status_code}, msg={error_msg}")
|
||||||
|
yield {"error": True, "message": error_msg, "status_code": response.status_code}
|
||||||
|
return
|
||||||
|
|
||||||
|
result = response.json()
|
||||||
|
message = result["choices"][0]["message"]
|
||||||
|
finish_reason = result["choices"][0]["finish_reason"]
|
||||||
|
|
||||||
|
response_data = {"role": "assistant", "content": []}
|
||||||
|
|
||||||
|
# 推理内容包装成 thinking block,便于 agent 层持久化并在工具调用时回传
|
||||||
|
if message.get("reasoning_content"):
|
||||||
|
response_data["content"].append({
|
||||||
|
"type": "thinking",
|
||||||
|
"thinking": message["reasoning_content"],
|
||||||
|
})
|
||||||
|
|
||||||
|
if message.get("content"):
|
||||||
|
response_data["content"].append({
|
||||||
|
"type": "text",
|
||||||
|
"text": message["content"],
|
||||||
|
})
|
||||||
|
|
||||||
|
if message.get("tool_calls"):
|
||||||
|
for tool_call in message["tool_calls"]:
|
||||||
|
try:
|
||||||
|
tool_input = json.loads(tool_call["function"]["arguments"])
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
tool_input = {}
|
||||||
|
response_data["content"].append({
|
||||||
|
"type": "tool_use",
|
||||||
|
"id": tool_call["id"],
|
||||||
|
"name": tool_call["function"]["name"],
|
||||||
|
"input": tool_input,
|
||||||
|
})
|
||||||
|
|
||||||
|
if finish_reason == "tool_calls":
|
||||||
|
response_data["stop_reason"] = "tool_use"
|
||||||
|
elif finish_reason == "stop":
|
||||||
|
response_data["stop_reason"] = "end_turn"
|
||||||
|
else:
|
||||||
|
response_data["stop_reason"] = finish_reason
|
||||||
|
|
||||||
|
yield response_data
|
||||||
|
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
logger.error("[MIMO] Request timeout")
|
||||||
|
yield {"error": True, "message": "Request timeout", "status_code": 500}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MIMO] sync response error: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
|
yield {"error": True, "message": str(e), "status_code": 500}
|
||||||
|
|
||||||
|
# -------------------- format conversion --------------------
|
||||||
|
|
||||||
|
def _convert_messages_to_openai_format(self, messages):
|
||||||
|
"""
|
||||||
|
将 Claude 格式(content blocks)转为 OpenAI 格式。
|
||||||
|
|
||||||
|
关键约束:MiMo 思考模式下,一旦历史包含 tool_calls 的 assistant 轮次,
|
||||||
|
所有后续 assistant 消息(含工具调用轮)必须回传 reasoning_content,
|
||||||
|
否则 API 返回 400。本地无 trace 时用空字符串回填,MiMo 接受字段存在
|
||||||
|
即可。
|
||||||
|
"""
|
||||||
|
if not messages:
|
||||||
|
return []
|
||||||
|
|
||||||
|
has_tool_call_history = False
|
||||||
|
for msg in messages:
|
||||||
|
if msg.get("role") != "assistant":
|
||||||
|
continue
|
||||||
|
if msg.get("tool_calls"):
|
||||||
|
has_tool_call_history = True
|
||||||
|
break
|
||||||
|
content = msg.get("content")
|
||||||
|
if isinstance(content, list) and any(
|
||||||
|
isinstance(b, dict) and b.get("type") == "tool_use" for b in content
|
||||||
|
):
|
||||||
|
has_tool_call_history = True
|
||||||
|
break
|
||||||
|
|
||||||
|
converted = []
|
||||||
|
|
||||||
|
for msg in messages:
|
||||||
|
role = msg.get("role")
|
||||||
|
content = msg.get("content")
|
||||||
|
|
||||||
|
if not isinstance(content, list):
|
||||||
|
if (
|
||||||
|
role == "assistant"
|
||||||
|
and isinstance(msg, dict)
|
||||||
|
and has_tool_call_history
|
||||||
|
and "reasoning_content" not in msg
|
||||||
|
):
|
||||||
|
patched = dict(msg)
|
||||||
|
patched["reasoning_content"] = ""
|
||||||
|
converted.append(patched)
|
||||||
|
else:
|
||||||
|
converted.append(msg)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if role == "user":
|
||||||
|
has_tool_result = any(
|
||||||
|
isinstance(b, dict) and b.get("type") == "tool_result" for b in content
|
||||||
|
)
|
||||||
|
if has_tool_result:
|
||||||
|
text_parts = []
|
||||||
|
tool_results = []
|
||||||
|
|
||||||
|
for block in content:
|
||||||
|
if not isinstance(block, dict):
|
||||||
|
continue
|
||||||
|
if block.get("type") == "text":
|
||||||
|
text_parts.append(block.get("text", ""))
|
||||||
|
elif block.get("type") == "tool_result":
|
||||||
|
tool_call_id = block.get("tool_use_id") or ""
|
||||||
|
result_content = block.get("content", "")
|
||||||
|
if not isinstance(result_content, str):
|
||||||
|
result_content = json.dumps(result_content, ensure_ascii=False)
|
||||||
|
tool_results.append({
|
||||||
|
"role": "tool",
|
||||||
|
"tool_call_id": tool_call_id,
|
||||||
|
"content": result_content,
|
||||||
|
})
|
||||||
|
|
||||||
|
converted.extend(tool_results)
|
||||||
|
|
||||||
|
if text_parts:
|
||||||
|
converted.append({"role": "user", "content": "\n".join(text_parts)})
|
||||||
|
else:
|
||||||
|
# 多模态原样保留(image_url / input_audio / video_url 等 block)
|
||||||
|
converted.append(msg)
|
||||||
|
|
||||||
|
elif role == "assistant":
|
||||||
|
openai_msg = {"role": "assistant"}
|
||||||
|
text_parts = []
|
||||||
|
tool_calls = []
|
||||||
|
reasoning_parts = []
|
||||||
|
|
||||||
|
for block in content:
|
||||||
|
if not isinstance(block, dict):
|
||||||
|
continue
|
||||||
|
btype = block.get("type")
|
||||||
|
if btype == "text":
|
||||||
|
text_parts.append(block.get("text", ""))
|
||||||
|
elif btype == "tool_use":
|
||||||
|
tool_calls.append({
|
||||||
|
"id": block.get("id"),
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": block.get("name"),
|
||||||
|
"arguments": json.dumps(block.get("input", {})),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
elif btype == "thinking":
|
||||||
|
reasoning_parts.append(block.get("thinking", ""))
|
||||||
|
|
||||||
|
if text_parts:
|
||||||
|
openai_msg["content"] = "\n".join(text_parts)
|
||||||
|
elif not tool_calls:
|
||||||
|
openai_msg["content"] = ""
|
||||||
|
|
||||||
|
if tool_calls:
|
||||||
|
openai_msg["tool_calls"] = tool_calls
|
||||||
|
if not text_parts:
|
||||||
|
openai_msg["content"] = None
|
||||||
|
|
||||||
|
if reasoning_parts:
|
||||||
|
openai_msg["reasoning_content"] = "\n".join(reasoning_parts)
|
||||||
|
elif has_tool_call_history:
|
||||||
|
openai_msg["reasoning_content"] = ""
|
||||||
|
|
||||||
|
converted.append(openai_msg)
|
||||||
|
else:
|
||||||
|
converted.append(msg)
|
||||||
|
|
||||||
|
return converted
|
||||||
|
|
||||||
|
def _convert_tools_to_openai_format(self, tools):
|
||||||
|
"""工具定义 Claude 格式 → OpenAI 格式。"""
|
||||||
|
if not tools:
|
||||||
|
return None
|
||||||
|
|
||||||
|
converted = []
|
||||||
|
for tool in tools:
|
||||||
|
if "type" in tool and tool["type"] == "function":
|
||||||
|
converted.append(tool)
|
||||||
|
else:
|
||||||
|
converted.append({
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": tool.get("name"),
|
||||||
|
"description": tool.get("description"),
|
||||||
|
"parameters": tool.get("input_schema", {}),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return converted
|
||||||
|
|
||||||
|
# -------------------- vision --------------------
|
||||||
|
|
||||||
|
def call_vision(self, image_url: str, question: str,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
max_tokens: int = 1000) -> dict:
|
||||||
|
"""通过 MiMo OpenAI 兼容的 /chat/completions 端点进行图像理解。"""
|
||||||
|
try:
|
||||||
|
# 主模型若不支持视觉(如 mimo-v2-flash),自动切到 mimo-v2.5-pro
|
||||||
|
vision_model = model
|
||||||
|
if not vision_model:
|
||||||
|
cur = self.args.get("model") or DEFAULT_MODEL
|
||||||
|
vision_model = cur if cur in MULTIMODAL_MODELS else const.MIMO_V2_5_PRO
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": vision_model,
|
||||||
|
"max_completion_tokens": max_tokens,
|
||||||
|
"messages": [{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": question},
|
||||||
|
{"type": "image_url", "image_url": {"url": image_url}},
|
||||||
|
],
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
headers = self._build_headers()
|
||||||
|
resp = requests.post(
|
||||||
|
f"{self.api_base}/chat/completions",
|
||||||
|
headers=headers, json=payload, timeout=60,
|
||||||
|
)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return {"error": True, "message": f"HTTP {resp.status_code}: {resp.text[:300]}"}
|
||||||
|
data = resp.json()
|
||||||
|
if "error" in data:
|
||||||
|
return {"error": True, "message": data["error"].get("message", str(data["error"]))}
|
||||||
|
choice = data.get("choices", [{}])[0].get("message", {})
|
||||||
|
# 部分模型在多模态下会把答案塞在 reasoning_content 而非 content
|
||||||
|
content = choice.get("content") or choice.get("reasoning_content") or ""
|
||||||
|
usage = data.get("usage", {})
|
||||||
|
return {
|
||||||
|
"model": vision_model,
|
||||||
|
"content": content,
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": usage.get("prompt_tokens", 0),
|
||||||
|
"completion_tokens": usage.get("completion_tokens", 0),
|
||||||
|
"total_tokens": usage.get("total_tokens", 0),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MIMO] call_vision error: {e}")
|
||||||
|
return {"error": True, "message": str(e)}
|
||||||
57
models/mimo/mimo_session.py
Normal file
57
models/mimo/mimo_session.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
from common.log import logger
|
||||||
|
from models.session_manager import Session
|
||||||
|
|
||||||
|
|
||||||
|
class MimoSession(Session):
|
||||||
|
def __init__(self, session_id, system_prompt=None, model="mimo-v2.5-pro"):
|
||||||
|
super().__init__(session_id, system_prompt)
|
||||||
|
self.model = model
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def discard_exceeding(self, max_tokens, cur_tokens=None):
|
||||||
|
precise = True
|
||||||
|
try:
|
||||||
|
cur_tokens = self.calc_tokens()
|
||||||
|
except Exception as e:
|
||||||
|
precise = False
|
||||||
|
if cur_tokens is None:
|
||||||
|
raise e
|
||||||
|
logger.debug("Exception when counting tokens precisely for query: {}".format(e))
|
||||||
|
while cur_tokens > max_tokens:
|
||||||
|
if len(self.messages) > 2:
|
||||||
|
self.messages.pop(1)
|
||||||
|
elif len(self.messages) == 2 and self.messages[1]["role"] == "assistant":
|
||||||
|
self.messages.pop(1)
|
||||||
|
if precise:
|
||||||
|
cur_tokens = self.calc_tokens()
|
||||||
|
else:
|
||||||
|
cur_tokens = cur_tokens - max_tokens
|
||||||
|
break
|
||||||
|
elif len(self.messages) == 2 and self.messages[1]["role"] == "user":
|
||||||
|
logger.warn("user message exceed max_tokens. total_tokens={}".format(cur_tokens))
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
logger.debug("max_tokens={}, total_tokens={}, len(messages)={}".format(
|
||||||
|
max_tokens, cur_tokens, len(self.messages)))
|
||||||
|
break
|
||||||
|
if precise:
|
||||||
|
cur_tokens = self.calc_tokens()
|
||||||
|
else:
|
||||||
|
cur_tokens = cur_tokens - max_tokens
|
||||||
|
return cur_tokens
|
||||||
|
|
||||||
|
def calc_tokens(self):
|
||||||
|
return num_tokens_from_messages(self.messages, self.model)
|
||||||
|
|
||||||
|
|
||||||
|
def num_tokens_from_messages(messages, model):
|
||||||
|
tokens = 0
|
||||||
|
for msg in messages:
|
||||||
|
content = msg.get("content", "")
|
||||||
|
if isinstance(content, str):
|
||||||
|
tokens += len(content)
|
||||||
|
elif isinstance(content, list):
|
||||||
|
for block in content:
|
||||||
|
if isinstance(block, dict):
|
||||||
|
tokens += len(block.get("text", ""))
|
||||||
|
return tokens
|
||||||
@@ -66,4 +66,8 @@ def create_voice(voice_type):
|
|||||||
from voice.zhipuai.zhipuai_voice import ZhipuAIVoice
|
from voice.zhipuai.zhipuai_voice import ZhipuAIVoice
|
||||||
|
|
||||||
return ZhipuAIVoice()
|
return ZhipuAIVoice()
|
||||||
|
elif voice_type == "mimo":
|
||||||
|
from voice.mimo.mimo_voice import MimoVoice
|
||||||
|
|
||||||
|
return MimoVoice()
|
||||||
raise RuntimeError
|
raise RuntimeError
|
||||||
|
|||||||
0
voice/mimo/__init__.py
Normal file
0
voice/mimo/__init__.py
Normal file
109
voice/mimo/mimo_voice.py
Normal file
109
voice/mimo/mimo_voice.py
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
# encoding:utf-8
|
||||||
|
"""
|
||||||
|
小米 MiMo TTS - 基于 mimo-v2.5-tts 模型的语音合成。
|
||||||
|
|
||||||
|
通过 /chat/completions 接口实现:assistant 消息内容为待合成文本,
|
||||||
|
audio 字段指定预置音色(如 冰糖/茉莉/苏打/Mia/Chloe 等),返回 base64
|
||||||
|
编码的音频字节。
|
||||||
|
|
||||||
|
文档:https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5
|
||||||
|
注意:MiMo 不提供 ASR 端点,因此 voiceToText 不实现。
|
||||||
|
"""
|
||||||
|
import base64
|
||||||
|
import datetime
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from bridge.reply import Reply, ReplyType
|
||||||
|
from common.log import logger
|
||||||
|
from config import conf
|
||||||
|
from voice.voice import Voice
|
||||||
|
|
||||||
|
DEFAULT_API_BASE = "https://api.xiaomimimo.com/v1"
|
||||||
|
DEFAULT_TTS_MODEL = "mimo-v2.5-tts"
|
||||||
|
DEFAULT_TTS_VOICE = "冰糖" # 默认音色:中国集群事实默认值
|
||||||
|
REQUEST_TIMEOUT = (5, 120)
|
||||||
|
|
||||||
|
|
||||||
|
class MimoVoice(Voice):
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def voiceToText(self, voice_file: str):
|
||||||
|
# MiMo 没有独立 ASR 端点;建议使用其他 provider(如 openai/zhipu/dashscope)
|
||||||
|
logger.warning("[MimoVoice] voiceToText is not supported by MiMo API")
|
||||||
|
return Reply(ReplyType.ERROR, "MiMo 暂不支持语音识别,请配置其他 voice_to_text provider")
|
||||||
|
|
||||||
|
def textToVoice(self, text: str):
|
||||||
|
try:
|
||||||
|
api_key = conf().get("mimo_api_key", "")
|
||||||
|
if not api_key:
|
||||||
|
logger.error("[MimoVoice] mimo_api_key is not configured")
|
||||||
|
return Reply(ReplyType.ERROR, "未配置 MiMo API key")
|
||||||
|
|
||||||
|
api_base = (conf().get("mimo_api_base") or DEFAULT_API_BASE).rstrip("/")
|
||||||
|
model = conf().get("text_to_voice_model") or DEFAULT_TTS_MODEL
|
||||||
|
voice_id = conf().get("tts_voice_id") or DEFAULT_TTS_VOICE
|
||||||
|
|
||||||
|
# 目标合成文本必须放在 assistant 消息;user 消息可选用作风格指令
|
||||||
|
payload = {
|
||||||
|
"model": model,
|
||||||
|
"messages": [
|
||||||
|
{"role": "assistant", "content": text},
|
||||||
|
],
|
||||||
|
"audio": {
|
||||||
|
"format": "wav",
|
||||||
|
"voice": voice_id,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
url = f"{api_base}/chat/completions"
|
||||||
|
response = requests.post(url, headers=headers, json=payload, timeout=REQUEST_TIMEOUT)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(
|
||||||
|
f"[MimoVoice] textToVoice failed: status={response.status_code} "
|
||||||
|
f"body={response.text[:500]} model={model} voice={voice_id}"
|
||||||
|
)
|
||||||
|
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
if "error" in data:
|
||||||
|
err = data["error"]
|
||||||
|
msg = err.get("message", str(err)) if isinstance(err, dict) else str(err)
|
||||||
|
logger.error(f"[MimoVoice] textToVoice api error: {msg}")
|
||||||
|
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||||
|
|
||||||
|
message = (data.get("choices") or [{}])[0].get("message", {}) or {}
|
||||||
|
audio_obj = message.get("audio") or {}
|
||||||
|
audio_b64 = audio_obj.get("data")
|
||||||
|
if not audio_b64:
|
||||||
|
logger.error(f"[MimoVoice] textToVoice empty audio in response: {data}")
|
||||||
|
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||||
|
|
||||||
|
try:
|
||||||
|
audio_bytes = base64.b64decode(audio_b64)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MimoVoice] base64 decode failed: {e}")
|
||||||
|
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||||
|
|
||||||
|
file_name = (
|
||||||
|
"tmp/" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
|
+ str(random.randint(0, 1000)) + ".wav"
|
||||||
|
)
|
||||||
|
os.makedirs(os.path.dirname(file_name), exist_ok=True)
|
||||||
|
with open(file_name, "wb") as f:
|
||||||
|
f.write(audio_bytes)
|
||||||
|
logger.info(
|
||||||
|
f"[MimoVoice] textToVoice model={model} voice={voice_id} "
|
||||||
|
f"file={file_name} bytes={len(audio_bytes)}"
|
||||||
|
)
|
||||||
|
return Reply(ReplyType.VOICE, file_name)
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"[MimoVoice] textToVoice exception: {e}")
|
||||||
|
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||||
Reference in New Issue
Block a user