feat(voice): add dashscope & zhipu ASR, in-page mic input

This commit is contained in:
zhayujie
2026-05-20 22:36:37 +08:00
parent fff7326209
commit 2b90f377e6
9 changed files with 786 additions and 34 deletions

View File

@@ -422,15 +422,24 @@
</button>
</div>
<div id="slash-menu" class="slash-menu hidden"></div>
<textarea id="chat-input"
class="flex-1 min-w-0 px-4 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600
bg-slate-50 dark:bg-white/5 text-slate-800 dark:text-slate-100
placeholder:text-slate-400 dark:placeholder:text-slate-500
focus:outline-none focus:ring-0 focus:border-primary-600
text-sm leading-relaxed"
rows="1"
data-i18n-placeholder="input_placeholder"
placeholder="输入消息,或输入 / 使用指令"></textarea>
<div class="flex-1 min-w-0 relative flex items-center">
<textarea id="chat-input"
class="w-full pl-4 pr-11 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600
bg-slate-50 dark:bg-white/5 text-slate-800 dark:text-slate-100
placeholder:text-slate-400 dark:placeholder:text-slate-500
focus:outline-none focus:ring-0 focus:border-primary-600
text-sm leading-relaxed"
rows="1"
data-i18n-placeholder="input_placeholder"
placeholder="输入消息,或输入 / 使用指令"></textarea>
<button id="mic-btn" type="button"
class="absolute right-2 top-1/2 -translate-y-1/2 w-8 h-8 flex items-center justify-center rounded-lg
text-slate-400 hover:text-primary-500 hover:bg-primary-50 dark:hover:bg-primary-900/20
cursor-pointer transition-colors duration-150"
data-i18n-title="mic_idle_title" title="点击录音 / 再按一次结束">
<i class="fas fa-microphone text-sm"></i>
</button>
</div>
<button id="send-btn"
class="flex-shrink-0 w-10 h-10 flex items-center justify-center rounded-lg
bg-primary-400 text-white hover:bg-primary-500

View File

@@ -59,6 +59,7 @@ const I18N = {
models_embedding_saved_title: '向量模型已更新',
models_embedding_saved_msg: '请在聊天框输入 /memory rebuild-index 重建索引。',
models_embedding_saved_ok: '去执行',
models_pick_provider: '待选择',
models_clear_confirm_title: '清除厂商凭据',
models_clear_confirm_msg: '确认清除该厂商的 API Key 与 Base URL 吗?相关能力将不再可用。',
cancel: '取消',
@@ -153,6 +154,12 @@ const I18N = {
tip_clear_context: '清除上下文',
tip_attach: '添加附件',
attach_menu_file: '上传文件',
mic_idle_title: '点击录音 / 再按一次结束',
mic_recording_title: '录音中,再次点击结束',
mic_busy_title: '识别中…',
mic_permission_denied: '无法访问麦克风,请检查浏览器权限',
mic_too_short: '录音太短,请重试',
mic_error: '语音识别失败',
attach_menu_folder: '上传文件夹',
confirm_yes: '确认',
confirm_cancel: '取消',
@@ -207,6 +214,7 @@ const I18N = {
models_embedding_saved_title: 'Embedding model updated',
models_embedding_saved_msg: 'Send /memory rebuild-index in the chat to rebuild the index.',
models_embedding_saved_ok: 'Go',
models_pick_provider: 'Pick a provider',
models_clear_confirm_title: 'Clear vendor credentials',
models_clear_confirm_msg: 'Remove this vendor\'s API Key and Base URL? Capabilities relying on it will stop working.',
cancel: 'Cancel',
@@ -301,6 +309,12 @@ const I18N = {
tip_clear_context: 'Clear Context',
tip_attach: 'Add Attachment',
attach_menu_file: 'Upload File',
mic_idle_title: 'Click to record, click again to stop',
mic_recording_title: 'Recording, click to stop',
mic_busy_title: 'Transcribing…',
mic_permission_denied: 'Cannot access microphone — check browser permissions',
mic_too_short: 'Recording too short, please retry',
mic_error: 'Speech recognition failed',
attach_menu_folder: 'Upload Folder',
confirm_yes: 'Confirm',
confirm_cancel: 'Cancel',
@@ -707,6 +721,191 @@ if (!supportsDirectoryUpload && attachFolderOption) {
attachFolderOption.classList.add('hidden');
}
// ---------------- Mic button: in-page voice input via the configured ASR provider ----------------
(function setupMicButton() {
const micBtn = document.getElementById('mic-btn');
if (!micBtn) return;
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia ||
typeof window.MediaRecorder === 'undefined') {
micBtn.style.display = 'none';
return;
}
let mediaRecorder = null;
let stream = null;
let chunks = [];
let recording = false;
const setIdle = () => {
recording = false;
micBtn.classList.remove('text-red-500', 'animate-pulse');
micBtn.classList.add('text-slate-400');
micBtn.querySelector('i').className = 'fas fa-microphone text-sm';
micBtn.title = t('mic_idle_title');
};
const setRecording = () => {
recording = true;
micBtn.classList.remove('text-slate-400');
micBtn.classList.add('text-red-500', 'animate-pulse');
micBtn.querySelector('i').className = 'fas fa-stop text-sm';
micBtn.title = t('mic_recording_title');
};
const setBusy = () => {
micBtn.classList.remove('text-red-500', 'animate-pulse', 'text-slate-400');
micBtn.classList.add('text-primary-500');
micBtn.querySelector('i').className = 'fas fa-spinner fa-spin text-sm';
micBtn.title = t('mic_busy_title');
};
const pickMimeType = () => {
const candidates = [
'audio/webm;codecs=opus',
'audio/webm',
'audio/ogg;codecs=opus',
'audio/mp4',
];
for (const m of candidates) {
if (window.MediaRecorder.isTypeSupported && MediaRecorder.isTypeSupported(m)) {
return m;
}
}
return '';
};
const stopStream = () => {
if (stream) {
stream.getTracks().forEach(t => t.stop());
stream = null;
}
};
let _micTipTimer = null;
const flashError = (msg) => {
console.warn('[mic]', msg);
// Pop a small bubble above the mic so the user actually notices it.
// The mic lives inside a relatively-positioned wrapper around the
// textarea (see chat.html), so we hang the tip off that wrapper.
const wrapper = micBtn.parentElement;
if (!wrapper) return;
let tip = wrapper.querySelector('.mic-tip');
if (!tip) {
tip = document.createElement('div');
tip.className = 'mic-tip absolute right-1 bottom-full mb-2 px-2 py-1 rounded-md '
+ 'text-xs text-white bg-slate-800/90 dark:bg-slate-700/90 shadow-md '
+ 'pointer-events-none whitespace-nowrap z-10';
wrapper.appendChild(tip);
}
tip.textContent = msg;
tip.style.opacity = '1';
if (_micTipTimer) clearTimeout(_micTipTimer);
_micTipTimer = setTimeout(() => {
tip.style.opacity = '0';
tip.style.transition = 'opacity 200ms';
setTimeout(() => tip.remove(), 250);
}, 2000);
};
const upload = async (blob, ext) => {
setBusy();
const fd = new FormData();
fd.append('file', blob, `recording.${ext}`);
try {
const resp = await fetch('/api/voice/asr', { method: 'POST', body: fd });
const data = await resp.json();
if (data.status === 'success' && data.text) {
// Voice-message UX: drop the recording into the conversation
// as a playable bubble with the caption underneath, then
// dispatch the recognised text through the regular send path.
sendVoiceMessage(data.text, data.audio_url);
} else {
flashError(data.message || t('mic_error'));
}
} catch (e) {
flashError(t('mic_error') + ': ' + e.message);
} finally {
setIdle();
}
};
const start = async () => {
try {
stream = await navigator.mediaDevices.getUserMedia({ audio: true });
} catch (e) {
flashError(t('mic_permission_denied'));
return;
}
chunks = [];
const mimeType = pickMimeType();
try {
mediaRecorder = mimeType
? new MediaRecorder(stream, { mimeType })
: new MediaRecorder(stream);
} catch (e) {
stopStream();
flashError(t('mic_error') + ': ' + e.message);
return;
}
mediaRecorder.ondataavailable = (ev) => {
if (ev.data && ev.data.size > 0) chunks.push(ev.data);
};
mediaRecorder.onstop = () => {
stopStream();
const blob = new Blob(chunks, { type: mediaRecorder.mimeType || 'audio/webm' });
// Map mime -> extension so the server picks the right file suffix.
const mt = (mediaRecorder.mimeType || 'audio/webm').split(';')[0];
const extMap = {
'audio/webm': 'webm', 'audio/ogg': 'ogg',
'audio/mp4': 'm4a', 'audio/mpeg': 'mp3',
};
const ext = extMap[mt] || 'webm';
// 256 bytes ~ container header only, no actual audio. Anything
// below that we treat as "tapped by mistake".
if (blob.size < 256) {
setIdle();
flashError(t('mic_too_short'));
return;
}
upload(blob, ext);
};
// timeslice=250ms: force the recorder to flush a chunk every 250ms.
// Without it some browsers wait for stop() before producing any data,
// which loses the audio on very short taps.
mediaRecorder.start(250);
recordStartedAt = Date.now();
setRecording();
};
let recordStartedAt = 0;
const stopWithMinDuration = () => {
const elapsed = Date.now() - recordStartedAt;
const minMs = 350;
if (elapsed < minMs) {
// Give the recorder a moment to capture at least one chunk
// before we tell it to stop.
setTimeout(() => stop(), minMs - elapsed);
} else {
stop();
}
};
const stop = () => {
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
mediaRecorder.stop();
}
};
micBtn.addEventListener('click', () => {
if (recording) {
stopWithMinDuration();
} else {
start();
}
});
setIdle();
})();
// Smart auto-scroll: pause when user scrolls up, resume when near bottom
let _autoScrollEnabled = true;
const _SCROLL_THRESHOLD = 80; // px from bottom to re-enable auto-scroll
@@ -1250,6 +1449,87 @@ document.querySelectorAll('.example-card').forEach(card => {
});
});
// Voice-message variant of sendMessage(): renders a playable audio bubble
// with the ASR caption, then dispatches the recognised text to /message
// through the same SSE/loading flow as a typed message.
function sendVoiceMessage(text, audioUrl) {
text = (text || '').trim();
if (!text) return;
inputHistory.push(text);
historyIdx = -1;
historySavedDraft = '';
const ws = document.getElementById('welcome-screen');
const isFirstMessage = !!ws;
if (ws) ws.remove();
const titleInfo = isFirstMessage ? { sid: sessionId, userMsg: text } : null;
const timestamp = new Date();
addUserVoiceMessage(audioUrl, text, timestamp);
const loadingEl = addLoadingIndicator();
const body = {
session_id: sessionId,
message: text,
stream: true,
timestamp: timestamp.toISOString(),
};
const MAX_RETRIES = 2;
const RETRY_DELAY_MS = 1000;
function postWithRetry(attempt) {
fetch('/message', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body)
})
.then(r => r.json())
.then(data => {
if (data.status === 'success') {
if (data.stream) {
startSSE(data.request_id, loadingEl, timestamp, titleInfo);
} else {
loadingContainers[data.request_id] = loadingEl;
}
} else {
loadingEl.remove();
addBotMessage(t('error_send'), new Date());
}
})
.catch(err => {
if (attempt < MAX_RETRIES) {
setTimeout(() => postWithRetry(attempt + 1), RETRY_DELAY_MS * (attempt + 1));
return;
}
loadingEl.remove();
addBotMessage(t('error_send'), new Date());
});
}
postWithRetry(0);
}
function addUserVoiceMessage(audioUrl, caption, timestamp) {
const el = document.createElement('div');
el.className = 'flex justify-end px-4 sm:px-6 py-3';
// Voice-message bubble: playable <audio> on top, ASR caption beneath.
// The bubble keeps the same primary tint as a normal user message so
// it visually slots into the conversation flow.
el.innerHTML = `
<div class="max-w-[75%] sm:max-w-[60%]">
<div class="bg-slate-100 dark:bg-white/10 text-slate-700 dark:text-slate-200 rounded-2xl px-3 py-2 msg-content user-bubble">
<audio controls preload="metadata" src="${audioUrl}"
class="block w-[260px] max-w-full h-9"></audio>
${caption ? `<div class="text-xs mt-1.5 leading-snug text-slate-500 dark:text-slate-400 whitespace-pre-wrap break-words">${escapeHtml(caption)}</div>` : ''}
</div>
<div class="text-xs text-slate-400 dark:text-slate-500 mt-1.5 text-right">${formatTime(timestamp)}</div>
</div>
`;
messagesDiv.appendChild(el);
_autoScrollEnabled = true;
scrollChatToBottom(true);
}
function sendMessage() {
const text = chatInput.value.trim();
if (!text && pendingAttachments.length === 0) return;
@@ -2573,7 +2853,12 @@ let cfgProviderValue = '';
let cfgModelValue = '';
// --- Custom dropdown helper ---
function initDropdown(el, options, selectedValue, onChange) {
function initDropdown(el, options, selectedValue, onChange, opts) {
// opts.placeholder: when set AND selectedValue is empty, render that text
// in a dim style instead of auto-selecting options[0]. Useful for
// "pick or empty" capabilities (asr / embedding) where we want the
// user to make an explicit choice.
opts = opts || {};
const textEl = el.querySelector('.cfg-dropdown-text');
const menuEl = el.querySelector('.cfg-dropdown-menu');
const selEl = el.querySelector('.cfg-dropdown-selected');
@@ -2615,8 +2900,20 @@ function initDropdown(el, options, selectedValue, onChange) {
menuEl.appendChild(item);
});
const sel = options.find(o => o.value === el._ddValue);
textEl.textContent = sel ? sel.label : (options[0] ? options[0].label : '--');
if (!sel && options[0]) el._ddValue = options[0].value;
if (sel) {
textEl.textContent = sel.label;
textEl.classList.remove('text-slate-400', 'dark:text-slate-500');
} else if (opts.placeholder && !el._ddValue) {
// No selection yet — show the placeholder in muted style.
// Do NOT write a fallback value, so the dropdown stays
// "unsaved" until the user explicitly picks.
textEl.textContent = opts.placeholder;
textEl.classList.add('text-slate-400', 'dark:text-slate-500');
} else {
textEl.textContent = options[0] ? options[0].label : '--';
textEl.classList.remove('text-slate-400', 'dark:text-slate-500');
if (options[0]) el._ddValue = options[0].value;
}
}
render();
@@ -3566,21 +3863,27 @@ function renderCapabilityBody(def, cap, body) {
// For auto-capable capabilities, an "auto" strategy means the user has
// not pinned a vendor; we honor that by selecting the empty-string
// sentinel rather than the resolved fallback provider name.
// `suggested_provider` is a UI-only preselect for embedding when nothing
// is pinned yet — purely cosmetic, not persisted until the user saves.
// `suggested_provider` is a UI-only preselect (used by embedding & ASR)
// when the user has not pinned a vendor yet — purely cosmetic, not
// persisted until the user clicks Save.
// For "pick or empty" capabilities (no current, no suggestion), we leave
// the dropdown unselected and show a muted placeholder so the user is
// nudged to pick explicitly.
const noSelectionAndNoHint = !cap.current_provider && !cap.suggested_provider;
const initialProviderValue = pendingProvider
? pendingProvider
: ((cap.strategy === 'auto' && capabilitySupportsAuto(def.id))
? ''
: (cap.current_provider
|| cap.suggested_provider
|| (ddOpts[0] && ddOpts[0].value)
|| (noSelectionAndNoHint ? '' : (ddOpts[0] && ddOpts[0].value))
|| ''));
initDropdown(
provDd,
ddOpts,
initialProviderValue,
(value) => onCapabilityProviderChange(def, value, body)
(value) => onCapabilityProviderChange(def, value, body),
noSelectionAndNoHint ? { placeholder: t('models_pick_provider') } : null
);
decorateCapabilityProviderDropdown(def, provDd, providerOpts);

View File

@@ -1,10 +1,11 @@
import datetime
import hashlib
import hmac
import time
import json
import logging
import mimetypes
import os
import random
import threading
import time
import uuid
@@ -340,6 +341,10 @@ class WebChannel(ChatChannel):
# Use a single-element list as a mutable counter accessible from closure.
reasoning_chars_sent = [0]
reasoning_capped_notified = [False]
# Captures the first error message emitted by agent_stream so the
# subsequent agent_end handler can skip its "empty final_response"
# fallback (which would otherwise overwrite the real error).
streamed_error: List[str] = []
def on_event(event: dict):
if request_id not in self.sse_queues:
@@ -398,6 +403,25 @@ class WebChannel(ChatChannel):
if tool_calls:
q.put({"type": "message_end", "has_tool_calls": True})
elif event_type == "error":
# Agent raised an exception (LLM 401/timeout/etc). Surface the
# real message instead of letting the empty-response fallback
# below hide it as "(模型未返回任何内容)".
err_msg = data.get("error") or "unknown error"
logger.warning(
f"[WebChannel] agent_stream emitted error for "
f"request {request_id}: {err_msg}"
)
# Remember it so the agent_end handler below knows not to
# rewrite the message into a generic empty-response notice.
streamed_error.append(err_msg)
q.put({
"type": "done",
"content": f"{err_msg}",
"request_id": request_id,
"timestamp": time.time(),
})
elif event_type == "agent_end":
# Safety net: if the agent finishes with an empty final_response,
# chat_channel skips _send_reply (because reply.content is empty),
@@ -406,16 +430,21 @@ class WebChannel(ChatChannel):
# here so the frontend always gets closure.
final_response = data.get("final_response", "")
if not final_response or not str(final_response).strip():
logger.warning(
f"[WebChannel] agent_end with empty final_response for "
f"request {request_id}, sending fallback done"
)
q.put({
"type": "done",
"content": "(模型未返回任何内容,请重试或换一种方式描述你的需求)",
"request_id": request_id,
"timestamp": time.time(),
})
if streamed_error:
# Error was already surfaced via the `error` event
# handler above; nothing more to do here.
pass
else:
logger.warning(
f"[WebChannel] agent_end with empty final_response for "
f"request {request_id}, sending fallback done"
)
q.put({
"type": "done",
"content": "(模型未返回任何内容,请重试或换一种方式描述你的需求)",
"request_id": request_id,
"timestamp": time.time(),
})
elif event_type == "file_to_send":
file_path = data.get("path", "")
@@ -432,6 +461,39 @@ class WebChannel(ChatChannel):
return on_event
@staticmethod
def _cleanup_stale_voice_recordings(max_age_seconds: int = 3600) -> None:
"""Delete voice-input audio files older than `max_age_seconds`.
Called once at startup. Web mic recordings live in the upload
directory so the browser can replay them inside the conversation
bubble. We don't persist them to history, so once a process
restarts they're useless — but they're never auto-cleaned
anywhere else, so without this they accumulate over time.
"""
try:
upload_dir = _get_upload_dir()
if not os.path.isdir(upload_dir):
return
now = time.time()
removed = 0
for name in os.listdir(upload_dir):
if not name.startswith("voice_input_"):
continue
full = os.path.join(upload_dir, name)
try:
if not os.path.isfile(full):
continue
if now - os.path.getmtime(full) > max_age_seconds:
os.remove(full)
removed += 1
except OSError:
continue
if removed:
logger.info(f"[WebChannel] cleaned up {removed} stale voice recording(s) from {upload_dir}")
except Exception as e:
logger.warning(f"[WebChannel] voice cleanup failed: {e}")
def upload_file(self):
"""Handle file or directory upload via multipart/form-data."""
try:
@@ -703,6 +765,8 @@ class WebChannel(ChatChannel):
port = conf().get("web_port", 9899)
is_public_bind = host in ("0.0.0.0", "::")
self._cleanup_stale_voice_recordings()
# 打印可用渠道类型提示
logger.info(
"[WebChannel] 全部可用通道如下,可修改 config.json 配置文件中的 channel_type 字段进行切换,多个通道用逗号分隔:")
@@ -746,6 +810,7 @@ class WebChannel(ChatChannel):
'/upload', 'UploadHandler',
'/uploads/(.*)', 'UploadsHandler',
'/api/file', 'FileServeHandler',
'/api/voice/asr', 'VoiceAsrHandler',
'/poll', 'PollHandler',
'/stream', 'StreamHandler',
'/chat', 'ChatHandler',
@@ -870,6 +935,68 @@ class UploadHandler:
return WebChannel().upload_file()
class VoiceAsrHandler:
"""
Accept a short audio recording from the web console mic button,
save it under uploads/ so the browser can replay it, then run it
through the currently configured ASR provider.
Returns {status, text, audio_url} on success — the frontend renders
a voice-message bubble with the playable audio and the transcribed
caption.
"""
def POST(self):
_require_auth()
web.header('Content-Type', 'application/json; charset=utf-8')
saved_path = None
try:
params = _raw_web_input()
file_obj = params.get("file")
if file_obj is None:
return json.dumps({"status": "error", "message": "no audio file"})
filename = getattr(file_obj, "filename", "") or "recording.webm"
ext = os.path.splitext(filename)[1].lower() or ".webm"
if ext not in (".webm", ".ogg", ".opus", ".mp4", ".m4a", ".mp3", ".wav"):
ext = ".webm"
upload_dir = _get_upload_dir()
os.makedirs(upload_dir, exist_ok=True)
ts = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
saved_name = f"voice_input_{ts}_{random.randint(0, 9999)}{ext}"
saved_path = os.path.join(upload_dir, saved_name)
with open(saved_path, "wb") as f:
f.write(file_obj.file.read() if hasattr(file_obj, "file") else file_obj.value)
audio_url = f"/uploads/{saved_name}"
from bridge.bridge import Bridge
reply = Bridge().fetch_voice_to_text(saved_path)
if reply is None:
return json.dumps({
"status": "error",
"message": "ASR returned no reply",
"audio_url": audio_url,
})
from bridge.reply import ReplyType
if reply.type == ReplyType.TEXT:
return json.dumps({
"status": "success",
"text": reply.content or "",
"audio_url": audio_url,
})
return json.dumps({
"status": "error",
"message": reply.content or "ASR failed",
"audio_url": audio_url,
})
except Exception as e:
logger.exception(f"[VoiceAsrHandler] failed: {e}")
return json.dumps({"status": "error", "message": str(e)})
class UploadsHandler:
def GET(self, file_name):
_require_auth()
@@ -1232,7 +1359,7 @@ class ModelsHandler:
# Capability -> editable flag, current-value resolver, and supported provider
# ids drawn from ConfigHandler.PROVIDER_MODELS where applicable.
_ASR_PROVIDERS = ["openai", "linkai", "baidu", "ali", "xunfei", "azure", "google"]
_ASR_PROVIDERS = ["openai", "dashscope", "zhipu", "linkai"]
_TTS_PROVIDERS = ["openai", "linkai", "minimax", "baidu", "ali", "xunfei", "azure", "google", "elevenlabs", "edge", "pytts"]
_EMBEDDING_PROVIDERS = ["openai", "dashscope", "doubao", "zhipu", "linkai"]
@@ -1502,10 +1629,23 @@ class ModelsHandler:
@classmethod
def _asr_capability(cls, local_config: dict) -> dict:
provider_id = (local_config.get("voice_to_text") or "openai").strip().lower()
# "Pick or empty" — when voice_to_text is unset we don't show a
# current selection. `suggested_provider` previews which vendor
# the bridge auto-picker would land on (purely a UX hint, NOT
# persisted). Once the user saves a vendor, we lock onto it.
explicit = (local_config.get("voice_to_text") or "").strip().lower()
suggested = ""
if not explicit:
for pid in cls._ASR_PROVIDERS:
meta = ConfigHandler.PROVIDER_MODELS.get(pid) or {}
key_field = meta.get("api_key_field")
if key_field and cls._is_real_key(local_config.get(key_field, "")):
suggested = pid
break
return {
"editable": True,
"current_provider": provider_id,
"current_provider": explicit,
"suggested_provider": suggested,
"current_model": "",
"providers": cls._ASR_PROVIDERS,
}
@@ -1897,6 +2037,10 @@ class ModelsHandler:
file_cfg[key] = value
self._write_file_config(file_cfg)
logger.info(f"[ModelsHandler] {key} set: {value!r}")
# Bridge caches voice_to_text routing + bot instance; refresh it
# so the change takes effect on the next voice request.
if key in ("voice_to_text", "text_to_voice"):
self._refresh_voice_routing()
return json.dumps({"status": "success", key: value})
def _set_tts(self, provider_id: str, model: str) -> str:
@@ -1910,8 +2054,17 @@ class ModelsHandler:
file_cfg["text_to_voice_model"] = model
self._write_file_config(file_cfg)
logger.info(f"[ModelsHandler] tts updated: provider={provider_id!r} model={model!r}")
self._refresh_voice_routing()
return json.dumps({"status": "success", "provider": provider_id, "model": model})
@staticmethod
def _refresh_voice_routing() -> None:
try:
from bridge.bridge import Bridge
Bridge().refresh_voice()
except Exception as e:
logger.warning(f"[ModelsHandler] Bridge voice refresh failed: {e}")
def _set_embedding(self, provider_id: str, model: str) -> str:
# provider_id="" + model="" means "switch back to legacy auto mode".
local_config = conf()
@@ -1926,9 +2079,9 @@ class ModelsHandler:
file_cfg["embedding_model"] = ""
self._write_file_config(file_cfg)
logger.info(f"[ModelsHandler] embedding updated: provider={provider_id!r} model={model!r}")
# The agent's MemoryManager picks the new provider on next process
# restart; the index dim may now mismatch so a rebuild is needed.
# The frontend surfaces this via a confirm + post-save dialog.
# The next /memory rebuild-index command hot-swaps the provider onto
# the running MemoryManager (see plugins/cow_cli). The dim may have
# changed, so the frontend prompts the user to rebuild.
return json.dumps({"status": "success", "provider": provider_id, "model": model})
@staticmethod