diff --git a/agent/memory/conversation_store.py b/agent/memory/conversation_store.py
index c5d215bf..48148f61 100644
--- a/agent/memory/conversation_store.py
+++ b/agent/memory/conversation_store.py
@@ -44,6 +44,7 @@ CREATE TABLE IF NOT EXISTS messages (
     role         TEXT    NOT NULL,
     content      TEXT    NOT NULL,
     created_at   INTEGER NOT NULL,
+    extras       TEXT    NOT NULL DEFAULT '',
     UNIQUE (session_id, seq)
 );
 
@@ -67,6 +68,12 @@ _MIGRATION_ADD_CONTEXT_START_SEQ = """
 ALTER TABLE sessions ADD COLUMN context_start_seq INTEGER NOT NULL DEFAULT 0;
 """
 
+# Generic JSON sidecar for per-message attachments (TTS audio URL, future use).
+# Always optional — readers must tolerate missing column / empty / invalid JSON.
+_MIGRATION_ADD_MSG_EXTRAS = """
+ALTER TABLE messages ADD COLUMN extras TEXT NOT NULL DEFAULT '';
+"""
+
 DEFAULT_MAX_AGE_DAYS: int = 30
 
 
@@ -169,20 +176,26 @@ def _group_into_display_turns(
     cur_rest: List[tuple] = []
     started = False
 
-    for role, raw_content, created_at in rows:
+    for role, raw_content, created_at, raw_extras in rows:
         try:
             content = json.loads(raw_content)
         except Exception:
             content = raw_content
+        try:
+            extras = json.loads(raw_extras) if raw_extras else {}
+            if not isinstance(extras, dict):
+                extras = {}
+        except Exception:
+            extras = {}
 
         if role == "user" and _is_visible_user_message(content):
             if started:
                 groups.append((cur_user, cur_rest))
-            cur_user = (content, created_at)
+            cur_user = (content, created_at, extras)
             cur_rest = []
             started = True
         else:
-            cur_rest.append((role, content, created_at))
+            cur_rest.append((role, content, created_at, extras))
 
     if started:
         groups.append((cur_user, cur_rest))
@@ -195,7 +208,7 @@ def _group_into_display_turns(
     for user_row, rest in groups:
         # User turn
         if user_row:
-            content, created_at = user_row
+            content, created_at, _u_extras = user_row
             text = _extract_display_text(content)
             if text:
                 turns.append({"role": "user", "content": text, "created_at": created_at})
@@ -206,8 +219,11 @@ def _group_into_display_turns(
         tool_results: Dict[str, str] = {}
         final_text = ""
         final_ts: Optional[int] = None
+        merged_extras: Dict[str, Any] = {}
 
-        for role, content, created_at in rest:
+        for role, content, created_at, extras in rest:
+            if role == "assistant" and isinstance(extras, dict):
+                merged_extras.update(extras)
             if role == "user":
                 tool_results.update(_extract_tool_results(content))
             elif role == "assistant":
@@ -256,6 +272,8 @@ def _group_into_display_turns(
                 "steps": steps,
                 "created_at": final_ts or (user_row[1] if user_row else 0),
             }
+            if merged_extras:
+                turn["extras"] = merged_extras
             turns.append(turn)
 
     return turns
@@ -411,13 +429,15 @@ class ConversationStore:
                         content = json.dumps(
                             msg.get("content", ""), ensure_ascii=False
                         )
+                        extras_obj = msg.get("extras") or {}
+                        extras = json.dumps(extras_obj, ensure_ascii=False) if extras_obj else ""
                         conn.execute(
                             """
                             INSERT OR IGNORE INTO messages
-                                (session_id, seq, role, content, created_at)
-                            VALUES (?, ?, ?, ?, ?)
+                                (session_id, seq, role, content, created_at, extras)
+                            VALUES (?, ?, ?, ?, ?, ?)
                             """,
-                            (session_id, next_seq, role, content, now),
+                            (session_id, next_seq, role, content, now, extras),
                         )
                         next_seq += 1
 
@@ -651,6 +671,55 @@ class ConversationStore:
             logger.info(f"[ConversationStore] Pruned {deleted} expired sessions")
         return deleted
 
+    def attach_extras_to_last_assistant(
+        self,
+        session_id: str,
+        extras: Dict[str, Any],
+    ) -> Optional[int]:
+        """
+        Merge ``extras`` into the latest assistant message of a session.
+
+        Used by post-processing (e.g. TTS) that needs to annotate an already
+        persisted bot reply with attachments such as audio URLs.
+
+        Returns the message seq that was updated, or ``None`` if no assistant
+        message exists or the update could not be applied.
+        """
+        if not extras:
+            return None
+        with self._lock:
+            conn = self._connect()
+            try:
+                row = conn.execute(
+                    """
+                    SELECT seq, extras FROM messages
+                    WHERE session_id = ? AND role = 'assistant'
+                    ORDER BY seq DESC LIMIT 1
+                    """,
+                    (session_id,),
+                ).fetchone()
+                if not row:
+                    return None
+                seq, raw = row
+                try:
+                    cur = json.loads(raw) if raw else {}
+                    if not isinstance(cur, dict):
+                        cur = {}
+                except Exception:
+                    cur = {}
+                cur.update(extras)
+                conn.execute(
+                    "UPDATE messages SET extras = ? WHERE session_id = ? AND seq = ?",
+                    (json.dumps(cur, ensure_ascii=False), session_id, seq),
+                )
+                conn.commit()
+                return seq
+            except Exception as e:
+                logger.warning(f"[ConversationStore] attach_extras failed: {e}")
+                return None
+            finally:
+                conn.close()
+
     def load_history_page(
         self,
         session_id: str,
@@ -698,15 +767,31 @@ class ConversationStore:
                 ).fetchone()
                 ctx_start = ctx_row[0] if ctx_row else 0
 
-                rows = conn.execute(
-                    """
-                    SELECT seq, role, content, created_at
-                    FROM messages
-                    WHERE session_id = ?
-                    ORDER BY seq ASC
-                    """,
-                    (session_id,),
-                ).fetchall()
+                # extras column is added by migration; tolerate older DBs that
+                # might miss it by falling back to a NULL literal.
+                try:
+                    rows = conn.execute(
+                        """
+                        SELECT seq, role, content, created_at, extras
+                        FROM messages
+                        WHERE session_id = ?
+                        ORDER BY seq ASC
+                        """,
+                        (session_id,),
+                    ).fetchall()
+                except sqlite3.OperationalError:
+                    rows = [
+                        (seq, role, content, created_at, "")
+                        for (seq, role, content, created_at) in conn.execute(
+                            """
+                            SELECT seq, role, content, created_at
+                            FROM messages
+                            WHERE session_id = ?
+                            ORDER BY seq ASC
+                            """,
+                            (session_id,),
+                        ).fetchall()
+                    ]
             finally:
                 conn.close()
 
@@ -719,13 +804,16 @@ class ConversationStore:
             include_thinking = False
 
         # Strip seq for display grouping, but record max seq per visible user group
-        plain_rows = [(role, content, created_at) for _seq, role, content, created_at in rows]
+        plain_rows = [
+            (role, content, created_at, extras_raw)
+            for _seq, role, content, created_at, extras_raw in rows
+        ]
         visible = _group_into_display_turns(plain_rows, include_thinking=include_thinking)
 
         # Build a mapping: find the seq of each visible user message to annotate context boundary.
         # Walk through rows to find visible user message seqs in order.
         visible_user_seqs: List[int] = []
-        for seq, role, raw_content, _ts in rows:
+        for seq, role, raw_content, _ts, _extras in rows:
             if role != "user":
                 continue
             try:
@@ -911,6 +999,18 @@ class ConversationStore:
             except Exception as e:
                 logger.warning(f"[ConversationStore] Migration (context_start_seq) failed: {e}")
 
+        msg_cols = {
+            row[1]
+            for row in conn.execute("PRAGMA table_info(messages)").fetchall()
+        }
+        if "extras" not in msg_cols:
+            try:
+                conn.execute(_MIGRATION_ADD_MSG_EXTRAS)
+                conn.commit()
+                logger.info("[ConversationStore] Migrated: added messages.extras column")
+            except Exception as e:
+                logger.warning(f"[ConversationStore] Migration (extras) failed: {e}")
+
     def _connect(self) -> sqlite3.Connection:
         conn = sqlite3.connect(str(self._db_path), timeout=10)
         conn.execute("PRAGMA journal_mode=WAL")
diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py
index 75b4f4ff..ef4f975b 100644
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -603,15 +603,24 @@ class AgentStreamExecutor:
         except Exception as e:
             logger.debug(f"[Agent] MCP sync skipped: {e}")
 
-        # Prepare tool definitions (OpenAI/Claude format)
+        # Prepare tool definitions. Prefer get_json_schema() when it yields
+        # real properties (lets tools augment schema at runtime), otherwise
+        # fall back to the static `tool.params` (MCP tools rely on this).
         tools_schema = None
         if self.tools:
             tools_schema = []
             for tool in self.tools.values():
+                input_schema = tool.params
+                try:
+                    dynamic = (tool.get_json_schema() or {}).get("parameters") or {}
+                    if dynamic.get("properties"):
+                        input_schema = dynamic
+                except Exception:
+                    pass
                 tools_schema.append({
                     "name": tool.name,
                     "description": tool.description,
-                    "input_schema": tool.params  # Claude uses input_schema
+                    "input_schema": input_schema,
                 })
 
         # Create request
diff --git a/agent/tools/vision/vision.py b/agent/tools/vision/vision.py
index a1c3265f..d8d7b7a3 100644
--- a/agent/tools/vision/vision.py
+++ b/agent/tools/vision/vision.py
@@ -3,7 +3,7 @@ Vision tool - Analyze images using Vision API.
 Supports local files (auto base64-encoded) and HTTP URLs.
 
 Provider resolution:
-  - tool.vision.model (if set) means "prefer this model first; fall back to
+  - tools.vision.model (if set) means "prefer this model first; fall back to
     other configured providers if it fails". The model name is mapped to its
     native provider (e.g. doubao-* → Doubao, kimi-* → Moonshot, gpt-* →
     OpenAI/LinkAI). That provider is tried first, then the standard auto
@@ -30,7 +30,7 @@ from common import const
 from common.log import logger
 from config import conf
 
-DEFAULT_MODEL = const.GPT_41_MINI
+DEFAULT_MODEL = const.GPT_55
 DEFAULT_TIMEOUT = 60
 MAX_TOKENS = 1000
 COMPRESS_THRESHOLD = 1_048_576  # 1 MB
@@ -53,14 +53,14 @@ _DISCOVERABLE_MODELS = [
     ("ark_api_key", const.DOUBAO, const.DOUBAO_SEED_2_PRO, "Doubao"),
     ("dashscope_api_key", const.QWEN_DASHSCOPE, const.QWEN36_PLUS, "DashScope"),
     ("claude_api_key", const.CLAUDEAPI, const.CLAUDE_4_6_SONNET, "Claude"),
-    ("gemini_api_key", const.GEMINI, const.GEMINI_31_FLASH_LITE_PRE, "Gemini"),
+    ("gemini_api_key", const.GEMINI, const.GEMINI_35_FLASH, "Gemini"),
     ("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"),
     ("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"),
     ("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
 ]
 
 # Model name prefix → discoverable provider display_name.
-# Used to auto-route tool.vision.model to its native provider.
+# Used to auto-route tools.vision.model to its native provider.
 # Matched case-insensitively; longest prefix wins.
 _MODEL_PREFIX_TO_PROVIDER = [
     ("doubao-", "Doubao"),
@@ -154,7 +154,7 @@ class Vision(BaseTool):
 
         # Default model is only used as a last-resort placeholder for providers
         # whose VisionProvider.model_override is None (e.g. raw OpenAI provider
-        # when the user did not configure tool.vision.model).
+        # when the user did not configure tools.vision.model).
         return self._call_with_fallback(providers, DEFAULT_MODEL, question, image_content)
 
     def _call_with_fallback(self, providers: List[VisionProvider], model: str,
@@ -193,12 +193,12 @@ class Vision(BaseTool):
         """
         Build an ordered list of providers to try.
 
-        Semantics of `tool.vision.model`:
+        Semantics of `tools.vision.model`:
           "Prefer this model first; fall back to other configured providers
            if it fails."
 
         Order:
-          1. The provider that natively serves `tool.vision.model` (if any
+          1. The provider that natively serves `tools.vision.model` (if any
              and its API key is configured) — using the user-specified model
              name verbatim.
           2. Auto-discovery chain as fallback:
@@ -213,7 +213,7 @@ class Vision(BaseTool):
         user_model = self._resolve_user_vision_model()
         providers: List[VisionProvider] = []
 
-        # Step 1: preferred provider derived from tool.vision.model
+        # Step 1: preferred provider derived from tools.vision.model
         if user_model:
             preferred = self._route_by_model_name(user_model)
             if preferred:
@@ -251,11 +251,11 @@ class Vision(BaseTool):
 
     @staticmethod
     def _resolve_user_vision_model() -> Optional[str]:
-        """Read tool.vision.model from config; return None if unset/blank."""
-        tool_conf = conf().get("tool", {})
-        if not isinstance(tool_conf, dict):
+        """Read tools.vision.model (singular ``tool`` kept as runtime fallback)."""
+        tools_conf = conf().get("tools") or conf().get("tool") or {}
+        if not isinstance(tools_conf, dict):
             return None
-        vision_conf = tool_conf.get("vision", {})
+        vision_conf = tools_conf.get("vision", {})
         if not isinstance(vision_conf, dict):
             return None
         m = vision_conf.get("model")
@@ -303,7 +303,7 @@ class Vision(BaseTool):
                 self._append_provider(providers, lambda: self._build_linkai_provider(user_model))
             if providers:
                 return providers
-            logger.warning(f"[Vision] tool.vision.model='{user_model}' looks like an OpenAI "
+            logger.warning(f"[Vision] tools.vision.model='{user_model}' looks like an OpenAI "
                            f"model but neither OPENAI_API_KEY nor LINKAI_API_KEY is configured.")
             return None  # fall through to auto
 
@@ -317,7 +317,7 @@ class Vision(BaseTool):
                 continue
             api_key = conf().get(config_key, "")
             if not api_key or not api_key.strip():
-                logger.warning(f"[Vision] tool.vision.model='{user_model}' routes to "
+                logger.warning(f"[Vision] tools.vision.model='{user_model}' routes to "
                                f"'{display_name}' but '{config_key}' is not configured. "
                                f"Falling back to auto-discovery.")
                 return None  # fall through to auto
@@ -452,8 +452,8 @@ class Vision(BaseTool):
         if not self._main_bot_supports_vision(bot):
             return None
 
-        # Use the configured main model name; do NOT inject tool.vision.model
-        # here, because by the time we reach this branch the tool.vision.model
+        # Use the configured main model name; do NOT inject tools.vision.model
+        # here, because by the time we reach this branch the tools.vision.model
         # routing has already been attempted (and either matched the main bot
         # or failed to find a provider).
         main_model_name = conf().get("model") or None
diff --git a/agent/tools/web_search/web_search.py b/agent/tools/web_search/web_search.py
index 4c6d1e45..ca56567d 100644
--- a/agent/tools/web_search/web_search.py
+++ b/agent/tools/web_search/web_search.py
@@ -1,13 +1,27 @@
-"""
-Web Search tool - Search the web using Bocha or LinkAI search API.
-Supports two backends with unified response format:
-  1. Bocha Search (primary, requires BOCHA_API_KEY)
-  2. LinkAI Search (fallback, requires LINKAI_API_KEY)
+"""Web Search tool. Supports four backends with a unified response format:
+  - bocha   (https://open.bochaai.com)
+  - zhipu   (https://docs.bigmodel.cn/cn/guide/tools/web-search)
+  - qianfan (https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy)
+  - linkai  (https://link-ai.tech, fallback)
+
+Provider selection
+  - strategy 'auto' (default): pick the first configured provider in the
+    canonical order [bocha, zhipu, qianfan, linkai]. When the caller passes
+    an explicit `provider` it overrides the pick; an invalid/unconfigured
+    one silently falls back to the auto order.
+  - strategy 'fixed': use the configured provider; if its credential is
+    missing at call time, silently fall back to auto order (no card hint).
+
+Credentials
+  - bocha   : tools.web_search.bocha_api_key  ->  env BOCHA_API_KEY
+  - zhipu   : conf.zhipu_ai_api_key            ->  env ZHIPUAI_API_KEY
+  - qianfan : conf.qianfan_api_key             ->  env QIANFAN_API_KEY
+  - linkai  : conf.linkai_api_key              ->  env LINKAI_API_KEY
 """
 
-import os
 import json
-from typing import Dict, Any, Optional
+import os
+from typing import Any, Dict, List, Optional
 
 import requests
 
@@ -16,12 +30,63 @@ from common.log import logger
 from config import conf
 
 
-# Default timeout for API requests (seconds)
 DEFAULT_TIMEOUT = 30
 
+# Canonical fallback order. Empirically ordered by Chinese real-time
+# quality + relevance: bocha (best overall), qianfan (best for hot news),
+# zhipu (strong on long-form articles), linkai (cloud aggregator, last
+# resort).
+PROVIDER_ORDER = ("bocha", "qianfan", "zhipu", "linkai")
+
+PROVIDER_LABELS = {
+    "bocha":   "Bocha",
+    "zhipu":   "Zhipu",
+    "qianfan": "Baidu Qianfan",
+    "linkai":  "LinkAI",
+}
+
+
+def _tools_web_search_conf() -> dict:
+    """Return the tools.web_search config block (dict-like)."""
+    tools_cfg = conf().get("tools") or {}
+    if not isinstance(tools_cfg, dict):
+        return {}
+    block = tools_cfg.get("web_search") or {}
+    return block if isinstance(block, dict) else {}
+
+
+def _get_api_key(provider: str) -> str:
+    """Resolve API key for a provider, with conf -> env fallback."""
+    if provider == "bocha":
+        key = (_tools_web_search_conf().get("bocha_api_key") or "").strip()
+        return key or os.environ.get("BOCHA_API_KEY", "").strip()
+    if provider == "zhipu":
+        key = (conf().get("zhipu_ai_api_key") or "").strip()
+        return key or os.environ.get("ZHIPUAI_API_KEY", "").strip()
+    if provider == "qianfan":
+        key = (conf().get("qianfan_api_key") or "").strip()
+        return key or os.environ.get("QIANFAN_API_KEY", "").strip()
+    if provider == "linkai":
+        key = (conf().get("linkai_api_key") or "").strip()
+        return key or os.environ.get("LINKAI_API_KEY", "").strip()
+    return ""
+
+
+def configured_providers() -> List[str]:
+    """Return configured providers in canonical order."""
+    return [p for p in PROVIDER_ORDER if _get_api_key(p)]
+
+
+def _configured_strategy() -> str:
+    return (_tools_web_search_conf().get("strategy") or "auto").strip().lower()
+
+
+def _configured_provider() -> str:
+    return (_tools_web_search_conf().get("provider") or "").strip().lower()
+
 
 class WebSearch(BaseTool):
-    """Tool for searching the web using Bocha or LinkAI search API"""
+    """Tool for searching the web across multiple providers."""
 
     name: str = "web_search"
     description: str = "Search the web for real-time information. Returns titles, URLs, and snippets."
@@ -55,264 +120,368 @@ class WebSearch(BaseTool):
 
     def __init__(self, config: dict = None):
         self.config = config or {}
-        self._backend = None  # Will be resolved on first execute
 
     @staticmethod
     def is_available() -> bool:
-        """Check if web search is available (at least one API key is configured)"""
-        return bool(os.environ.get("BOCHA_API_KEY") or os.environ.get("LINKAI_API_KEY"))
+        """Tool is offered to the agent when at least one provider has a key."""
+        return bool(configured_providers())
 
-    def _resolve_backend(self) -> Optional[str]:
-        """
-        Determine which search backend to use.
-        Priority: Bocha > LinkAI
+    @classmethod
+    def get_json_schema(cls) -> dict:
+        """Augment the static schema with a `provider` field — only when the
+        user has ≥2 providers configured AND strategy is 'auto'. Otherwise
+        the backend picks silently and exposing the field would only waste
+        the agent's tokens."""
+        schema = {
+            "name": cls.name,
+            "description": cls.description,
+            "parameters": json.loads(json.dumps(cls.params)),  # deep copy
+        }
+        if _configured_strategy() != "auto":
+            return schema
+        available = configured_providers()
+        if len(available) < 2:
+            return schema
 
-        :return: 'bocha', 'linkai', or None
+        schema["parameters"]["properties"]["provider"] = {
+            "type": "string",
+            "enum": available,
+            "description": "Optional. Specifies the search backend. You may switch between providers when the user wants results from a particular source or from multiple sources.",
+        }
+        return schema
+
+    # ------------------------------------------------------------------
+    # Provider resolution
+    # ------------------------------------------------------------------
+
+    def _resolve_provider(self, requested: Optional[str]) -> Optional[str]:
+        """Pick a provider for this call.
+
+        Priority: caller-supplied (if configured) > fixed strategy (if
+        configured) > first configured in PROVIDER_ORDER. Silent fallback
+        when the desired one has no key.
         """
-        if os.environ.get("BOCHA_API_KEY"):
-            return "bocha"
-        if os.environ.get("LINKAI_API_KEY"):
-            return "linkai"
-        return None
+        available = configured_providers()
+        if not available:
+            return None
+
+        if requested:
+            req = requested.strip().lower()
+            if req in available:
+                return req
+            logger.warning(f"[WebSearch] requested provider '{requested}' unavailable, falling back")
+
+        if _configured_strategy() == "fixed":
+            pinned = _configured_provider()
+            if pinned in available:
+                return pinned
+            if pinned:
+                logger.warning(f"[WebSearch] pinned provider '{pinned}' unavailable, falling back to auto")
+
+        return available[0]
+
+    @staticmethod
+    def _resolution_reason(requested: Optional[str], chosen: str) -> str:
+        """Human-readable explanation for why `chosen` won the resolver."""
+        if requested and requested.strip().lower() == chosen:
+            return "caller-requested"
+        strategy = _configured_strategy()
+        if strategy == "fixed" and _configured_provider() == chosen:
+            return "fixed-strategy"
+        return "auto-fallback"
+
+    # ------------------------------------------------------------------
+    # Entry point
+    # ------------------------------------------------------------------
 
     def execute(self, args: Dict[str, Any]) -> ToolResult:
-        """
-        Execute web search
-
-        :param args: Search parameters (query, count, freshness, summary)
-        :return: Search results
-        """
-        query = args.get("query", "").strip()
+        query = (args.get("query") or "").strip()
         if not query:
             return ToolResult.fail("Error: 'query' parameter is required")
 
         count = args.get("count", 10)
         freshness = args.get("freshness", "noLimit")
         summary = args.get("summary", False)
-
-        # Validate count
         if not isinstance(count, int) or count < 1 or count > 50:
             count = 10
 
-        # Resolve backend
-        backend = self._resolve_backend()
-        if not backend:
+        requested = args.get("provider")
+        provider = self._resolve_provider(requested)
+        if not provider:
             return ToolResult.fail(
-                "Error: No search API key configured. "
-                "Please set BOCHA_API_KEY or LINKAI_API_KEY using env_config tool.\n"
-                "  - Bocha Search: https://open.bocha.cn\n"
-                "  - LinkAI Search: https://link-ai.tech"
+                "Error: No search provider configured. "
+                "Configure one of BOCHA_API_KEY / zhipu_ai_api_key / qianfan_api_key / linkai_api_key."
             )
 
+        # Always log the routing decision so multi-provider deployments can
+        # tell at a glance which backend served any given query.
+        available = configured_providers()
+        reason = self._resolution_reason(requested, provider)
+        q_preview = query if len(query) <= 60 else (query[:57] + "...")
+        logger.info(
+            f"[WebSearch] provider={provider} reason={reason} "
+            f"available={list(available)} query={q_preview!r} count={count} freshness={freshness}"
+        )
+
         try:
-            if backend == "bocha":
+            if provider == "bocha":
                 return self._search_bocha(query, count, freshness, summary)
-            else:
+            if provider == "zhipu":
+                return self._search_zhipu(query, count, freshness)
+            if provider == "qianfan":
+                return self._search_qianfan(query, count, freshness)
+            if provider == "linkai":
                 return self._search_linkai(query, count, freshness)
+            return ToolResult.fail(f"Error: Unknown provider '{provider}'")
         except requests.Timeout:
             return ToolResult.fail(f"Error: Search request timed out after {DEFAULT_TIMEOUT}s")
         except requests.ConnectionError:
             return ToolResult.fail("Error: Failed to connect to search API")
         except Exception as e:
-            logger.error(f"[WebSearch] Unexpected error: {e}", exc_info=True)
+            logger.error(f"[WebSearch] Unexpected error ({provider}): {e}", exc_info=True)
             return ToolResult.fail(f"Error: Search failed - {str(e)}")
 
+    # ------------------------------------------------------------------
+    # Bocha
+    # ------------------------------------------------------------------
+
     def _search_bocha(self, query: str, count: int, freshness: str, summary: bool) -> ToolResult:
-        """
-        Search using Bocha API
-
-        :param query: Search query
-        :param count: Number of results
-        :param freshness: Time range filter
-        :param summary: Whether to include summary
-        :return: Formatted search results
-        """
-        api_key = os.environ.get("BOCHA_API_KEY", "")
-        url = "https://api.bocha.cn/v1/web-search"
-
+        api_key = _get_api_key("bocha")
+        url = "https://api.bochaai.com/v1/web-search"
         headers = {
             "Authorization": f"Bearer {api_key}",
             "Content-Type": "application/json",
-            "Accept": "application/json"
+            "Accept": "application/json",
         }
+        payload = {"query": query, "count": count, "freshness": freshness, "summary": summary}
 
-        payload = {
-            "query": query,
-            "count": count,
-            "freshness": freshness,
-            "summary": summary
-        }
+        logger.debug(f"[WebSearch] bocha: query='{query}', count={count}")
+        resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
 
-        logger.debug(f"[WebSearch] Bocha search: query='{query}', count={count}")
+        if resp.status_code == 401:
+            return ToolResult.fail("Error: Invalid bocha API key.")
+        if resp.status_code == 403:
+            return ToolResult.fail("Error: bocha API — insufficient balance. Top up at https://open.bochaai.com")
+        if resp.status_code == 429:
+            return ToolResult.fail("Error: bocha API rate limit reached.")
+        if resp.status_code != 200:
+            return ToolResult.fail(f"Error: bocha API returned HTTP {resp.status_code}")
 
-        response = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
-
-        if response.status_code == 401:
-            return ToolResult.fail("Error: Invalid BOCHA_API_KEY. Please check your API key.")
-        if response.status_code == 403:
-            return ToolResult.fail("Error: Bocha API - insufficient balance. Please top up at https://open.bocha.cn")
-        if response.status_code == 429:
-            return ToolResult.fail("Error: Bocha API rate limit reached. Please try again later.")
-        if response.status_code != 200:
-            return ToolResult.fail(f"Error: Bocha API returned HTTP {response.status_code}")
-
-        data = response.json()
-
-        # Check API-level error code
+        data = resp.json()
         api_code = data.get("code")
         if api_code is not None and api_code != 200:
             msg = data.get("msg") or "Unknown error"
-            return ToolResult.fail(f"Error: Bocha API error (code={api_code}): {msg}")
-
-        # Extract and format results
-        return self._format_bocha_results(data, query)
-
-    def _format_bocha_results(self, data: dict, query: str) -> ToolResult:
-        """
-        Format Bocha API response into unified result structure
-
-        :param data: Raw API response
-        :param query: Original query
-        :return: Formatted ToolResult
-        """
-        search_data = data.get("data", {})
-        web_pages = search_data.get("webPages", {})
-        pages = web_pages.get("value", [])
-
-        if not pages:
-            return ToolResult.success({
-                "query": query,
-                "backend": "bocha",
-                "total": 0,
-                "results": [],
-                "message": "No results found"
-            })
+            return ToolResult.fail(f"Error: bocha API error (code={api_code}): {msg}")
 
+        pages = (data.get("data") or {}).get("webPages", {}).get("value", []) or []
         results = []
-        for page in pages:
-            result = {
-                "title": page.get("name", ""),
-                "url": page.get("url", ""),
-                "snippet": page.get("snippet", ""),
-                "siteName": page.get("siteName", ""),
-                "datePublished": page.get("datePublished") or page.get("dateLastCrawled", ""),
+        for p in pages:
+            item = {
+                "title": p.get("name", ""),
+                "url": p.get("url", ""),
+                "snippet": p.get("snippet", ""),
+                "siteName": p.get("siteName", ""),
+                "datePublished": p.get("datePublished") or p.get("dateLastCrawled", ""),
             }
-            # Include summary only if present
-            if page.get("summary"):
-                result["summary"] = page["summary"]
-            results.append(result)
-
-        total = web_pages.get("totalEstimatedMatches", len(results))
-
+            if p.get("summary"):
+                item["summary"] = p["summary"]
+            results.append(item)
+        total = (data.get("data") or {}).get("webPages", {}).get("totalEstimatedMatches", len(results))
         return ToolResult.success({
-            "query": query,
-            "backend": "bocha",
-            "total": total,
-            "count": len(results),
-            "results": results
+            "query": query, "backend": "bocha",
+            "total": total, "count": len(results), "results": results,
         })
 
-    def _search_linkai(self, query: str, count: int, freshness: str) -> ToolResult:
-        """
-        Search using LinkAI plugin API
+    # ------------------------------------------------------------------
+    # Zhipu
+    # ------------------------------------------------------------------
 
-        :param query: Search query
-        :param count: Number of results
-        :param freshness: Time range filter
-        :return: Formatted search results
-        """
-        api_key = os.environ.get("LINKAI_API_KEY", "")
-        api_base = conf().get("linkai_api_base", "https://api.link-ai.tech")
-        url = f"{api_base.rstrip('/')}/v1/plugin/execute"
+    def _search_zhipu(self, query: str, count: int, freshness: str) -> ToolResult:
+        api_key = _get_api_key("zhipu")
+        api_base = (conf().get("zhipu_ai_api_base") or "https://open.bigmodel.cn/api/paas/v4").rstrip("/")
+        url = f"{api_base}/web_search"
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        }
+
+        # Zhipu Web Search expects `search_query` <= 70 chars; truncate
+        # gracefully so a long agent-supplied query doesn't get rejected.
+        trimmed_query = (query or "")[:70]
+        engine = (_tools_web_search_conf().get("zhipu_search_engine") or "search_pro").strip().lower()
+        if engine not in ("search_std", "search_pro", "search_pro_sogou", "search_pro_quark"):
+            engine = "search_pro"
+
+        payload: Dict[str, Any] = {
+            "search_engine": engine,
+            "search_query": trimmed_query,
+            "search_intent": False,
+            "count": max(1, min(int(count or 10), 50)),
+            "search_recency_filter": freshness if freshness in (
+                "oneDay", "oneWeek", "oneMonth", "oneYear", "noLimit"
+            ) else "noLimit",
+        }
+        content_size = (_tools_web_search_conf().get("zhipu_content_size") or "").strip().lower()
+        if content_size in ("medium", "high"):
+            payload["content_size"] = content_size
+
+        logger.debug(f"[WebSearch] zhipu: query='{trimmed_query}', count={payload['count']}, engine={engine}")
+        resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
+
+        if resp.status_code == 401:
+            return ToolResult.fail("Error: Invalid Zhipu API key.")
+        if resp.status_code != 200:
+            return ToolResult.fail(f"Error: Zhipu API returned HTTP {resp.status_code}: {resp.text[:200]}")
+
+        data = resp.json()
+        # Business-level errors (1701/1702/1703 etc.) come back as
+        # {"error": {"code","message"}} even on HTTP 200.
+        if isinstance(data, dict) and data.get("error"):
+            err = data["error"] or {}
+            return ToolResult.fail(f"Error: Zhipu returned {err.get('code')}: {err.get('message','')}")
+
+        items = data.get("search_result") or (data.get("data") or {}).get("search_result") or []
+        results = []
+        for it in items:
+            results.append({
+                "title": it.get("title", ""),
+                "url": it.get("link") or it.get("url", ""),
+                "snippet": it.get("content") or it.get("snippet", ""),
+                "siteName": it.get("media") or it.get("siteName", ""),
+                "datePublished": it.get("publish_date") or it.get("datePublished", ""),
+            })
+        return ToolResult.success({
+            "query": query, "backend": "zhipu",
+            "total": len(results), "count": len(results), "results": results,
+        })
+
+    # ------------------------------------------------------------------
+    # Qianfan (Baidu)
+    # ------------------------------------------------------------------
+
+    def _search_qianfan(self, query: str, count: int, freshness: str) -> ToolResult:
+        api_key = _get_api_key("qianfan")
+        api_base = (conf().get("qianfan_api_base") or "https://qianfan.baidubce.com/v2").rstrip("/")
+        url = f"{api_base}/ai_search/web_search"
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+            "X-Appbuilder-From": "cow",
+        }
+
+        count = max(1, min(int(count or 10), 50))
+        payload: Dict[str, Any] = {
+            "messages": [{"role": "user", "content": query}],
+            "search_source": "baidu_search_v2",
+            "resource_type_filter": [{"type": "web", "top_k": count}],
+        }
+
+        # Baidu AI Search expects freshness as a date-range filter, not a
+        # named recency token. Translate our shared vocabulary into the
+        # underlying page_time range expected by the API.
+        search_filter = self._qianfan_build_freshness_filter(freshness)
+        if search_filter:
+            payload["search_filter"] = search_filter
+
+        logger.debug(f"[WebSearch] qianfan: query='{query}', count={count}, freshness={freshness!r}")
+        resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
+
+        if resp.status_code == 401:
+            return ToolResult.fail("Error: Invalid Qianfan API key.")
+        if resp.status_code != 200:
+            return ToolResult.fail(f"Error: Qianfan API returned HTTP {resp.status_code}: {resp.text[:200]}")
+
+        data = resp.json()
+        # Even on HTTP 200 Baidu surfaces business errors as {"code","message"}.
+        if isinstance(data, dict) and data.get("code"):
+            return ToolResult.fail(f"Error: Qianfan returned {data.get('code')}: {data.get('message','')}")
+
+        refs = data.get("references") or []
+        results = []
+        for d in refs:
+            results.append({
+                "title": d.get("title", ""),
+                "url": d.get("url", ""),
+                "snippet": (d.get("content") or "")[:200],
+                "siteName": d.get("web_anchor") or d.get("website") or "",
+                "datePublished": d.get("date", ""),
+            })
+        return ToolResult.success({
+            "query": query, "backend": "qianfan",
+            "total": len(results), "count": len(results), "results": results,
+        })
+
+    @staticmethod
+    def _qianfan_build_freshness_filter(freshness: str) -> Optional[Dict[str, Any]]:
+        if not freshness or freshness == "noLimit":
+            return None
+        delta_days = {"oneDay": 1, "oneWeek": 7, "oneMonth": 30, "oneYear": 365}.get(freshness)
+        if not delta_days:
+            return None
+        from datetime import datetime, timedelta
+        now = datetime.now()
+        end_date = (now + timedelta(days=1)).strftime("%Y-%m-%d")
+        start_date = (now - timedelta(days=delta_days)).strftime("%Y-%m-%d")
+        return {"range": {"page_time": {"gte": start_date, "lt": end_date}}}
+
+    # ------------------------------------------------------------------
+    # LinkAI (plugin)
+    # ------------------------------------------------------------------
+
+    def _search_linkai(self, query: str, count: int, freshness: str) -> ToolResult:
+        api_key = _get_api_key("linkai")
+        api_base = (conf().get("linkai_api_base") or "https://api.link-ai.tech").rstrip("/")
+        url = f"{api_base}/v1/plugin/execute"
 
         from common.utils import get_cloud_headers
         headers = get_cloud_headers(api_key)
 
-        payload = {
-            "code": "web-search",
-            "args": {
-                "query": query,
-                "count": count,
-                "freshness": freshness
-            }
-        }
+        payload = {"code": "web-search", "args": {"query": query, "count": count, "freshness": freshness}}
+        logger.debug(f"[WebSearch] linkai: query='{query}', count={count}")
+        resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
 
-        logger.debug(f"[WebSearch] LinkAI search: query='{query}', count={count}")
-
-        response = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
-
-        if response.status_code == 401:
-            return ToolResult.fail("Error: Invalid LINKAI_API_KEY. Please check your API key.")
-        if response.status_code != 200:
-            return ToolResult.fail(f"Error: LinkAI API returned HTTP {response.status_code}")
-
-        data = response.json()
+        if resp.status_code == 401:
+            return ToolResult.fail("Error: Invalid LinkAI API key.")
+        if resp.status_code != 200:
+            return ToolResult.fail(f"Error: LinkAI API returned HTTP {resp.status_code}")
 
+        data = resp.json()
         if not data.get("success"):
             msg = data.get("message") or "Unknown error"
             return ToolResult.fail(f"Error: LinkAI search failed: {msg}")
 
-        return self._format_linkai_results(data, query)
-
-    def _format_linkai_results(self, data: dict, query: str) -> ToolResult:
-        """
-        Format LinkAI API response into unified result structure.
-        LinkAI returns the search data in data.data field, which follows
-        the same Bing-compatible format as Bocha.
-
-        :param data: Raw API response
-        :param query: Original query
-        :return: Formatted ToolResult
-        """
-        raw_data = data.get("data", "")
-
-        # LinkAI may return data as a JSON string
-        if isinstance(raw_data, str):
+        raw = data.get("data", "")
+        if isinstance(raw, str):
             try:
-                raw_data = json.loads(raw_data)
+                raw = json.loads(raw)
             except (json.JSONDecodeError, TypeError):
-                # If data is plain text, return it as a single result
                 return ToolResult.success({
-                    "query": query,
-                    "backend": "linkai",
-                    "total": 1,
-                    "count": 1,
-                    "results": [{"content": raw_data}]
+                    "query": query, "backend": "linkai",
+                    "total": 1, "count": 1, "results": [{"content": raw}],
                 })
 
-        # If the response follows Bing-compatible structure
-        if isinstance(raw_data, dict):
-            web_pages = raw_data.get("webPages", {})
-            pages = web_pages.get("value", [])
-
+        if isinstance(raw, dict):
+            pages = (raw.get("webPages") or {}).get("value", []) or []
             if pages:
                 results = []
-                for page in pages:
-                    result = {
-                        "title": page.get("name", ""),
-                        "url": page.get("url", ""),
-                        "snippet": page.get("snippet", ""),
-                        "siteName": page.get("siteName", ""),
-                        "datePublished": page.get("datePublished") or page.get("dateLastCrawled", ""),
+                for p in pages:
+                    item = {
+                        "title": p.get("name", ""),
+                        "url": p.get("url", ""),
+                        "snippet": p.get("snippet", ""),
+                        "siteName": p.get("siteName", ""),
+                        "datePublished": p.get("datePublished") or p.get("dateLastCrawled", ""),
                     }
-                    if page.get("summary"):
-                        result["summary"] = page["summary"]
-                    results.append(result)
-
-                total = web_pages.get("totalEstimatedMatches", len(results))
+                    if p.get("summary"):
+                        item["summary"] = p["summary"]
+                    results.append(item)
+                total = (raw.get("webPages") or {}).get("totalEstimatedMatches", len(results))
                 return ToolResult.success({
-                    "query": query,
-                    "backend": "linkai",
-                    "total": total,
-                    "count": len(results),
-                    "results": results
+                    "query": query, "backend": "linkai",
+                    "total": total, "count": len(results), "results": results,
                 })
 
-        # Fallback: return raw data
         return ToolResult.success({
-            "query": query,
-            "backend": "linkai",
-            "total": 1,
-            "count": 1,
-            "results": [{"content": str(raw_data)}]
+            "query": query, "backend": "linkai",
+            "total": 1, "count": 1, "results": [{"content": str(raw)}],
         })
diff --git a/bridge/agent_initializer.py b/bridge/agent_initializer.py
index d17dcb0c..7d5afb4a 100644
--- a/bridge/agent_initializer.py
+++ b/bridge/agent_initializer.py
@@ -521,7 +521,7 @@ class AgentInitializer:
                 if tool_name == "web_search":
                     from agent.tools.web_search.web_search import WebSearch
                     if not WebSearch.is_available():
-                        logger.debug("[AgentInitializer] WebSearch skipped - no BOCHA_API_KEY or LINKAI_API_KEY")
+                        logger.debug("[AgentInitializer] WebSearch skipped - no search provider configured")
                         continue
 
                 # Special handling for EnvConfig tool
diff --git a/bridge/bridge.py b/bridge/bridge.py
index 753e394a..c0cb62e4 100644
--- a/bridge/bridge.py
+++ b/bridge/bridge.py
@@ -14,7 +14,9 @@ class Bridge(object):
     def __init__(self):
         self.btype = {
             "chat": const.OPENAI,
-            "voice_to_text": conf().get("voice_to_text", "openai"),
+            # Empty `voice_to_text` (the default in new configs) triggers
+            # the auto-pick below — see _auto_pick_voice_to_text for order.
+            "voice_to_text": conf().get("voice_to_text") or self._auto_pick_voice_to_text(),
             "text_to_voice": conf().get("text_to_voice", "google"),
             "translate": conf().get("translate", "baidu"),
         }
@@ -84,6 +86,46 @@ class Bridge(object):
         self.chat_bots = {}
         self._agent_bridge = None
 
+    def refresh_voice(self):
+        """Re-read voice_to_text / text_to_voice from config and drop the
+        cached voice bots so the next call picks up the new provider.
+        Used by the web console after the user edits voice settings.
+        Does NOT touch the agent_bridge / agent state.
+        """
+        new_v2t = conf().get("voice_to_text") or self._auto_pick_voice_to_text()
+        new_t2v = conf().get("text_to_voice", "google")
+        if conf().get("use_linkai") and conf().get("linkai_api_key"):
+            if not conf().get("voice_to_text") or conf().get("voice_to_text") in ["openai"]:
+                new_v2t = const.LINKAI
+            if not conf().get("text_to_voice") or conf().get("text_to_voice") in ["openai", const.TTS_1, const.TTS_1_HD]:
+                new_t2v = const.LINKAI
+        self.btype["voice_to_text"] = new_v2t
+        self.btype["text_to_voice"] = new_t2v
+        self.bots.pop("voice_to_text", None)
+        self.bots.pop("text_to_voice", None)
+        logger.info(f"[Bridge] voice refreshed: voice_to_text={new_v2t}, text_to_voice={new_t2v}")
+
+    @staticmethod
+    def _auto_pick_voice_to_text() -> str:
+        """Pick an ASR provider by configured api keys when voice_to_text is
+        unset. Order matches the web console: openai → dashscope → zhipu →
+        linkai. Falls back to 'openai' when nothing is configured so the
+        original "missing key" error is preserved.
+        """
+        def has(k: str) -> bool:
+            v = (conf().get(k) or "").strip()
+            return v != "" and v not in ("YOUR API KEY", "YOUR_API_KEY")
+
+        for key, provider in (
+            ("open_ai_api_key", "openai"),
+            ("dashscope_api_key", "dashscope"),
+            ("zhipu_ai_api_key", "zhipu"),
+            ("linkai_api_key", "linkai"),
+        ):
+            if has(key):
+                return provider
+        return "openai"
+
     # 模型对应的接口
     def get_bot(self, typename):
         if self.bots.get(typename) is None:
diff --git a/channel/chat_channel.py b/channel/chat_channel.py
index 3251c286..c38dd7c8 100644
--- a/channel/chat_channel.py
+++ b/channel/chat_channel.py
@@ -171,7 +171,13 @@ class ChatChannel(Channel):
             if "desire_rtype" not in context and conf().get("always_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
                 context["desire_rtype"] = ReplyType.VOICE
         elif context.type == ContextType.VOICE:
-            if "desire_rtype" not in context and conf().get("voice_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
+            # Voice input replies with voice when either voice_reply_voice
+            # (mirror voice) or the global always_reply_voice toggle is on.
+            if (
+                "desire_rtype" not in context
+                and (conf().get("voice_reply_voice") or conf().get("always_reply_voice"))
+                and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE
+            ):
                 context["desire_rtype"] = ReplyType.VOICE
         return context
 
@@ -264,6 +270,8 @@ class ChatChannel(Channel):
                 if reply.type == ReplyType.TEXT:
                     reply_text = reply.content
                     if desire_rtype == ReplyType.VOICE and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
+                        # Preserve original text for the "text-then-voice" pattern in _send_reply.
+                        context["voice_reply_text"] = reply.content
                         reply = super().build_text_to_voice(reply.content)
                         return self._decorate_reply(context, reply)
                     if context.get("isgroup", False):
@@ -311,6 +319,15 @@ class ChatChannel(Channel):
                     # 短暂延迟后发送图片
                     time.sleep(0.3)
                     self._send(reply, context)
+                # Send text bubble before voice, unless channel already streamed
+                # the text (feishu) or natively renders STT under the voice (wechatcom).
+                elif reply.type == ReplyType.VOICE and context.get("voice_reply_text") \
+                        and not context.get("feishu_streamed") \
+                        and context.get("channel_type") not in ("wechatcom_app",):
+                    text_reply = Reply(ReplyType.TEXT, context.get("voice_reply_text"))
+                    self._send(text_reply, context)
+                    time.sleep(0.3)
+                    self._send(reply, context)
                 else:
                     self._send(reply, context)
     
diff --git a/channel/dingtalk/dingtalk_channel.py b/channel/dingtalk/dingtalk_channel.py
index d572e35d..b1ae86c2 100644
--- a/channel/dingtalk/dingtalk_channel.py
+++ b/channel/dingtalk/dingtalk_channel.py
@@ -86,6 +86,8 @@ def _check(func):
 
 @singleton
 class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
+    NOT_SUPPORT_REPLYTYPE = []
+
     dingtalk_client_id = conf().get('dingtalk_client_id')
     dingtalk_client_secret = conf().get('dingtalk_client_secret')
 
@@ -870,6 +872,48 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
                     self.reply_text("抱歉，文件上传失败", incoming_message)
             return
         
+        # Native sampleAudio. Upload only accepts ogg/amr, so convert TTS mp3/wav to amr.
+        elif reply.type == ReplyType.VOICE:
+            logger.info(f"[DingTalk] Sending voice: {reply.content}")
+            access_token = self.get_access_token()
+            if not access_token:
+                logger.error("[DingTalk] Cannot get access token for voice")
+                self.reply_text("抱歉，语音发送失败（无法获取token）", incoming_message)
+                return
+
+            voice_path = reply.content
+            if voice_path.startswith("file://"):
+                voice_path = voice_path[7:]
+
+            amr_path = voice_path
+            duration_ms = 0
+            if not voice_path.lower().endswith((".amr", ".ogg")):
+                try:
+                    from voice.audio_convert import any_to_amr
+                    amr_path = os.path.splitext(voice_path)[0] + ".amr"
+                    duration_ms = int(any_to_amr(voice_path, amr_path) or 0)
+                except Exception as e:
+                    logger.error(f"[DingTalk] Failed to convert voice to amr: {e}")
+                    self.reply_text("抱歉，语音转码失败", incoming_message)
+                    return
+
+            media_id = self.upload_media(amr_path, media_type="voice")
+            if not media_id:
+                logger.error("[DingTalk] Failed to upload voice media")
+                self.reply_text("抱歉，语音上传失败", incoming_message)
+                return
+
+            msg_param = {
+                "mediaId": media_id,
+                "duration": str(duration_ms or 1000),
+            }
+            success = self._send_file_message(
+                access_token, incoming_message, "sampleAudio", msg_param, isgroup
+            )
+            if not success:
+                self.reply_text("抱歉，语音发送失败", incoming_message)
+            return
+
         # 处理文本消息
         elif reply.type == ReplyType.TEXT:
             logger.info(f"[DingTalk] Sending text message, length={len(reply.content)}")
diff --git a/channel/feishu/feishu_channel.py b/channel/feishu/feishu_channel.py
index f479394a..ca18e64b 100644
--- a/channel/feishu/feishu_channel.py
+++ b/channel/feishu/feishu_channel.py
@@ -1515,10 +1515,16 @@ class FeiShuChanel(ChatChannel):
             else:
                 context.type = ContextType.TEXT
             context.content = content.strip()
+            # Text input opts into voice replies only when the always-on toggle is set.
+            if "desire_rtype" not in context and conf().get("always_reply_voice"):
+                context["desire_rtype"] = ReplyType.VOICE
 
         elif context.type == ContextType.VOICE:
-            # 2.语音请求
-            if "desire_rtype" not in context and conf().get("voice_reply_voice"):
+            # 2.语音请求: voice input replies with voice if either
+            # voice_reply_voice (mirror reply) or always_reply_voice is on.
+            if "desire_rtype" not in context and (
+                conf().get("voice_reply_voice") or conf().get("always_reply_voice")
+            ):
                 context["desire_rtype"] = ReplyType.VOICE
 
         return context
diff --git a/channel/web/chat.html b/channel/web/chat.html
index 56ce808f..947e07b7 100644
--- a/channel/web/chat.html
+++ b/channel/web/chat.html
@@ -137,6 +137,11 @@
                             <i class="fas fa-sliders item-icon text-xs w-5 text-center"></i>
                             <span data-i18n="menu_config">配置</span>
                         </a>
+                        <a class="sidebar-item flex items-center gap-3 px-3 py-2 rounded-lg cursor-pointer transition-all duration-150 hover:bg-white/5 hover:text-neutral-200 text-[14px]"
+                           data-view="models">
+                            <i class="fas fa-microchip item-icon text-xs w-5 text-center"></i>
+                            <span data-i18n="menu_models">模型</span>
+                        </a>
                         <a class="sidebar-item flex items-center gap-3 px-3 py-2 rounded-lg cursor-pointer transition-all duration-150 hover:bg-white/5 hover:text-neutral-200 text-[14px]"
                            data-view="skills">
                             <i class="fas fa-bolt item-icon text-xs w-5 text-center"></i>
@@ -417,15 +422,24 @@
                                     </button>
                                 </div>
                                 <div id="slash-menu" class="slash-menu hidden"></div>
-                                <textarea id="chat-input"
-                                          class="flex-1 min-w-0 px-4 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600
-                                                 bg-slate-50 dark:bg-white/5 text-slate-800 dark:text-slate-100
-                                                 placeholder:text-slate-400 dark:placeholder:text-slate-500
-                                                 focus:outline-none focus:ring-0 focus:border-primary-600
-                                                 text-sm leading-relaxed"
-                                          rows="1"
-                                          data-i18n-placeholder="input_placeholder"
-                                          placeholder="输入消息，或输入 / 使用指令"></textarea>
+                                <div class="flex-1 min-w-0 relative flex items-center">
+                                    <textarea id="chat-input"
+                                              class="w-full pl-4 pr-11 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600
+                                                     bg-slate-50 dark:bg-white/5 text-slate-800 dark:text-slate-100
+                                                     placeholder:text-slate-400 dark:placeholder:text-slate-500
+                                                     focus:outline-none focus:ring-0 focus:border-primary-600
+                                                     text-sm leading-relaxed"
+                                              rows="1"
+                                              data-i18n-placeholder="input_placeholder"
+                                              placeholder="输入消息，或输入 / 使用指令"></textarea>
+                                    <button id="mic-btn" type="button"
+                                            class="absolute right-2 top-1/2 -translate-y-1/2 w-8 h-8 flex items-center justify-center rounded-lg
+                                                   text-slate-400 hover:text-primary-500 hover:bg-primary-50 dark:hover:bg-primary-900/20
+                                                   cursor-pointer transition-colors duration-150"
+                                            data-i18n-title="mic_idle_title" title="点击录音 / 再按一次结束">
+                                        <i class="fas fa-microphone text-sm"></i>
+                                    </button>
+                                </div>
                                 <button id="send-btn"
                                         class="flex-shrink-0 w-10 h-10 flex items-center justify-center rounded-lg
                                                bg-primary-400 text-white hover:bg-primary-500
@@ -460,6 +474,11 @@
                                             <i class="fas fa-microchip text-primary-500 text-sm"></i>
                                         </div>
                                         <h3 class="font-semibold text-slate-800 dark:text-slate-100" data-i18n="config_model">模型配置</h3>
+                                        <a class="ml-auto text-xs text-slate-500 dark:text-slate-400 hover:text-primary-500 dark:hover:text-primary-400 cursor-pointer transition-colors flex items-center gap-1"
+                                           onclick="navigateTo('models')">
+                                            <span data-i18n="config_model_advanced">高级配置</span>
+                                            <i class="fas fa-arrow-right text-[10px]"></i>
+                                        </a>
                                     </div>
                                     <div class="space-y-5">
                                         <!-- Provider -->
@@ -850,6 +869,41 @@
                     </div>
                 </div>
 
+                <!-- ====================================================== -->
+                <!-- VIEW: Models                                            -->
+                <!-- ====================================================== -->
+                <div id="view-models" class="view">
+                    <!-- Tailwind JIT safelist: capability-card icon colors are
+                         emitted from JS template strings. Listing them here
+                         (display:none) guarantees the CDN-side compiler picks
+                         them up regardless of render timing. -->
+                    <div class="hidden bg-blue-50 dark:bg-blue-900/30 text-blue-500
+                                       bg-orange-50 dark:bg-orange-900/30 text-orange-500
+                                       bg-purple-50 dark:bg-purple-900/30 text-purple-500
+                                       bg-amber-50 dark:bg-amber-900/30 text-amber-500
+                                       bg-primary-50 dark:bg-primary-900/30 text-primary-500"></div>
+                    <div class="flex-1 overflow-y-auto p-6">
+                        <div class="max-w-4xl mx-auto">
+                            <div class="flex items-center justify-between mb-6">
+                                <div>
+                                    <h2 class="text-xl font-bold text-slate-800 dark:text-slate-100" data-i18n="models_title">模型管理</h2>
+                                    <p class="text-sm text-slate-500 dark:text-slate-400 mt-1" data-i18n="models_desc">统一管理对话、视觉、语音、向量、图像、搜索能力</p>
+                                </div>
+                                <button id="models-add-vendor-btn" onclick="openVendorModal('')"
+                                        class="flex items-center gap-2 px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600
+                                               text-white text-sm font-medium cursor-pointer transition-colors duration-150">
+                                    <i class="fas fa-plus text-xs"></i>
+                                    <span data-i18n="models_add_vendor">添加厂商</span>
+                                </button>
+                            </div>
+                            <div id="models-loading" class="flex items-center gap-2 py-12 justify-center text-slate-400 dark:text-slate-500 text-sm">
+                                <i class="fas fa-spinner fa-spin text-xs"></i><span>Loading...</span>
+                            </div>
+                            <div id="models-content" class="grid gap-6 hidden"></div>
+                        </div>
+                    </div>
+                </div>
+
                 <!-- ====================================================== -->
                 <!-- VIEW: Channels                                          -->
                 <!-- ====================================================== -->
@@ -959,7 +1013,7 @@
     </div><!-- /app -->
 
     <!-- Confirm Dialog -->
-    <div id="confirm-dialog-overlay" class="fixed inset-0 bg-black/50 z-[100] hidden flex items-center justify-center">
+    <div id="confirm-dialog-overlay" class="fixed inset-0 bg-black/50 z-[200] hidden flex items-center justify-center">
         <div class="bg-white dark:bg-[#1A1A1A] rounded-2xl border border-slate-200 dark:border-white/10 shadow-xl
                     w-full max-w-sm mx-4 overflow-hidden">
             <div class="p-6">
@@ -984,6 +1038,77 @@
         </div>
     </div>
 
+    <!-- Vendor Credentials Modal -->
+    <div id="vendor-modal-overlay" class="fixed inset-0 bg-black/50 z-[100] hidden flex items-center justify-center">
+        <div class="bg-white dark:bg-[#1A1A1A] rounded-2xl border border-slate-200 dark:border-white/10 shadow-xl
+                    w-full max-w-md mx-4">
+            <div class="p-6">
+                <div class="flex items-center gap-3 mb-5">
+                    <div class="w-10 h-10 rounded-xl bg-primary-50 dark:bg-primary-900/20 flex items-center justify-center flex-shrink-0">
+                        <i class="fas fa-key text-primary-500"></i>
+                    </div>
+                    <div class="min-w-0 flex-1">
+                        <h3 id="vendor-modal-title" class="font-semibold text-slate-800 dark:text-slate-100 text-base"></h3>
+                        <p id="vendor-modal-subtitle" class="text-xs text-slate-500 dark:text-slate-400 mt-0.5 font-mono"></p>
+                    </div>
+                </div>
+
+                <!-- Provider selector (only visible when adding via top button) -->
+                <div id="vendor-modal-picker-wrap" class="mb-4 hidden">
+                    <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5" data-i18n="models_provider">厂商</label>
+                    <div id="vendor-modal-picker" class="cfg-dropdown" tabindex="0">
+                        <div class="cfg-dropdown-selected">
+                            <span class="cfg-dropdown-text">--</span>
+                            <i class="fas fa-chevron-down cfg-dropdown-arrow"></i>
+                        </div>
+                        <div class="cfg-dropdown-menu"></div>
+                    </div>
+                </div>
+
+                <div class="space-y-4">
+                    <div>
+                        <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">API Key</label>
+                        <input id="vendor-modal-key" type="text" autocomplete="off" data-1p-ignore data-lpignore="true"
+                               class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
+                                      bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
+                                      focus:outline-none focus:border-primary-500 font-mono transition-colors"
+                               placeholder="sk-...">
+                    </div>
+                    <div id="vendor-modal-base-wrap">
+                        <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">API Base</label>
+                        <input id="vendor-modal-base" type="text"
+                               class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
+                                      bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
+                                      focus:outline-none focus:border-primary-500 font-mono transition-colors"
+                               placeholder="https://...../v1">
+                        <p id="vendor-modal-base-hint" class="mt-1.5 text-xs text-slate-400 dark:text-slate-500 hidden">
+                            <i class="fas fa-info-circle mr-1"></i><span data-i18n="models_base_default_hint">留空将使用官方默认地址</span>
+                        </p>
+                    </div>
+                </div>
+            </div>
+            <div class="flex items-center justify-between gap-3 px-6 py-4 border-t border-slate-100 dark:border-white/5 rounded-b-2xl">
+                <button id="vendor-modal-clear"
+                        class="px-3 py-2 rounded-lg text-xs
+                               text-red-500 dark:text-red-400 hover:bg-red-50 dark:hover:bg-red-900/20
+                               cursor-pointer transition-colors duration-150 hidden"
+                        data-i18n="models_clear_credential">清除凭据</button>
+                <span id="vendor-modal-status"
+                      class="flex-1 text-xs text-primary-500 opacity-0 transition-opacity duration-300 text-center"></span>
+                <button id="vendor-modal-cancel"
+                        class="px-4 py-2 rounded-lg border border-slate-200 dark:border-white/10
+                               text-slate-600 dark:text-slate-300 text-sm font-medium
+                               hover:bg-slate-50 dark:hover:bg-white/5
+                               cursor-pointer transition-colors duration-150"
+                        data-i18n="cancel">取消</button>
+                <button id="vendor-modal-save"
+                        class="px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600 text-white text-sm font-medium
+                               cursor-pointer transition-colors duration-150 disabled:opacity-50 disabled:cursor-not-allowed"
+                        data-i18n="save">保存</button>
+            </div>
+        </div>
+    </div>
+
     <script defer src="assets/js/console.js"></script>
 </body>
 </html>
diff --git a/channel/web/static/css/console.css b/channel/web/static/css/console.css
index d5caf5b1..35fc307b 100644
--- a/channel/web/static/css/console.css
+++ b/channel/web/static/css/console.css
@@ -725,6 +725,58 @@
     background: rgba(74, 190, 110, 0.15);
     color: #74E9A4;
 }
+/* When an item carries a hint (e.g. brand alias next to a technical model
+   id), label/hint are split into two spans so the hint sits on the right in
+   a dim, smaller weight. Without a hint the row stays a plain text node and
+   uses the default ellipsis behaviour, so no layout regressions for old call
+   sites. */
+.cfg-dropdown-label {
+    flex: 1 1 auto;
+    min-width: 0;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+.cfg-dropdown-hint {
+    flex-shrink: 0;
+    margin-left: auto;
+    padding-left: 12px;
+    color: #94a3b8;
+    font-size: 12px;
+    font-weight: 400;
+}
+.dark .cfg-dropdown-hint {
+    color: #64748b;
+}
+.cfg-dropdown-item.active .cfg-dropdown-hint {
+    /* Tint the hint toward the brand colour on the active row so it doesn't
+       fight with the highlighted label tone. */
+    color: rgba(34, 133, 71, 0.65);
+}
+.dark .cfg-dropdown-item.active .cfg-dropdown-hint {
+    color: rgba(116, 233, 164, 0.6);
+}
+/* The active row gets a trailing brand-green checkmark via a Font Awesome
+   pseudo-element so every dropdown (chat / vision / image / asr / tts / etc.)
+   surfaces "this is what's currently selected" without per-call JS plumbing.
+   When a hint is present, the ✓ sits to its right with a small gap; without
+   a hint, margin-left:auto pushes the ✓ flush against the right edge. */
+.cfg-dropdown-item.active::after {
+    content: '\f00c';                  /* FontAwesome check glyph */
+    font-family: 'Font Awesome 6 Free', 'Font Awesome 5 Free', 'FontAwesome';
+    font-weight: 900;
+    margin-left: auto;
+    padding-left: 12px;
+    color: #4abe6e;
+    font-size: 11px;
+    flex-shrink: 0;
+}
+.cfg-dropdown-item.active:has(.cfg-dropdown-hint)::after {
+    /* When hint occupies the auto-margin slot, the ✓ no longer benefits
+       from `margin-left: auto`; replace it with a small fixed gap so the
+       ✓ trails the hint cleanly. */
+    margin-left: 0;
+    padding-left: 10px;
+}
 
 /* API Key masking via CSS (avoids browser password prompts) */
 .cfg-key-masked {
@@ -732,6 +784,77 @@
     text-security: disc;
 }
 
+/* Provider logo image — vendors flagged as `provider-logo-invert-dark`
+   ship a black wordmark that disappears on the dark canvas; we invert their
+   luminance only in dark mode so the brand stays recognizable without
+   touching multi-color marks like Google/MiniMax. */
+.provider-logo-img {
+    object-fit: contain;
+    object-position: center;
+}
+.dark .provider-logo-invert-dark {
+    filter: invert(1) brightness(1.15);
+}
+
+/* Models page — provider dropdown rows.
+   Configured rows look like ordinary picker entries; the .active row's
+   trailing brand-green ✓ already announces "this is what's selected"
+   (handled globally by .cfg-dropdown-item.active::after above).
+   Unconfigured rows are visually subdued and carry a trailing gear icon
+   as a "click to set up" affordance. */
+.cap-provider-label {
+    flex: 1 1 auto;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+.cap-provider-gear {
+    margin-left: auto;
+    padding-left: 12px;
+    color: #94a3b8;
+    font-size: 11px;
+    flex-shrink: 0;
+}
+.cap-provider-item.cap-provider-unconfigured {
+    color: #94a3b8;
+}
+.dark .cap-provider-item.cap-provider-unconfigured {
+    color: #64748b;
+}
+.cap-provider-item.cap-provider-unconfigured:hover {
+    color: #475569;
+}
+.dark .cap-provider-item.cap-provider-unconfigured:hover {
+    color: #cbd5e1;
+}
+.cap-provider-item.cap-provider-unconfigured:hover .cap-provider-gear {
+    color: #475569;
+}
+.dark .cap-provider-item.cap-provider-unconfigured:hover .cap-provider-gear {
+    color: #cbd5e1;
+}
+/* If the active row ever lands on an unconfigured vendor (defensive — the
+   click handler normally diverts to the modal), suppress the global ✓ so
+   the gear remains the sole trailing icon and the row keeps reading as
+   "needs setup" rather than "already selected". */
+.cap-provider-item.cap-provider-unconfigured.active::after {
+    content: none;
+}
+
+/* "Add vendor" modal picker — each configured row carries a static
+   brand-green ✓ via decorateVendorModalPicker so users can see what's set
+   up at a glance. The active row's global ✓ is suppressed here to avoid
+   showing two checks side by side on configured + selected rows. */
+.vendor-picker-item.active::after {
+    content: none;
+}
+.vendor-picker-configured-mark {
+    margin-left: auto;
+    padding-left: 12px;
+    color: #4abe6e;
+    font-size: 11px;
+    flex-shrink: 0;
+}
+
 /* Chat Input */
 #chat-input {
     resize: none; height: 42px; max-height: 180px;
@@ -1171,3 +1294,76 @@
     overflow: hidden;
     min-height: 2.5em;  /* ~2 lines at text-sm leading-relaxed */
 }
+
+/* --------------------------------------------------------------------
+ * Voice pill — compact custom audio player used by mic uploads and TTS
+ * replies. Replaces the bulky native <audio controls> with a play/pause
+ * icon + thin progress bar + duration counter so it blends into chat
+ * bubbles without the chrome-grey browser default look.
+ * ------------------------------------------------------------------ */
+.voice-pill {
+    display: inline-flex;
+    align-items: center;
+    gap: 8px;
+    padding: 6px 10px;
+    border-radius: 999px;
+    background: rgba(15, 23, 42, 0.05);
+    color: rgb(71, 85, 105);
+    font-size: 12px;
+    line-height: 1;
+    max-width: 240px;
+    user-select: none;
+    cursor: default;
+}
+.dark .voice-pill {
+    background: rgba(255, 255, 255, 0.08);
+    color: rgb(203, 213, 225);
+}
+.voice-pill[data-loading="1"] {
+    opacity: 0.65;
+}
+.voice-pill-btn {
+    width: 22px;
+    height: 22px;
+    border-radius: 999px;
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    background: var(--color-primary-500, #2563eb);
+    color: #fff;
+    flex-shrink: 0;
+    cursor: pointer;
+    transition: transform 0.1s ease;
+}
+.voice-pill-btn:hover { transform: scale(1.05); }
+.voice-pill-btn i { font-size: 9px; margin-left: 1px; }
+.voice-pill-btn[data-state="play"] i { margin-left: 2px; }
+.voice-pill-btn[data-state="pause"] i { margin-left: 0; }
+.voice-pill-track {
+    flex: 1;
+    height: 3px;
+    border-radius: 999px;
+    background: rgba(100, 116, 139, 0.25);
+    overflow: hidden;
+    min-width: 70px;
+}
+.dark .voice-pill-track {
+    background: rgba(148, 163, 184, 0.25);
+}
+.voice-pill-fill {
+    height: 100%;
+    width: 0%;
+    background: var(--color-primary-500, #2563eb);
+    border-radius: inherit;
+    transition: width 0.1s linear;
+}
+.voice-pill-time {
+    font-variant-numeric: tabular-nums;
+    font-size: 11px;
+    color: inherit;
+    opacity: 0.75;
+    flex-shrink: 0;
+    min-width: 28px;
+    text-align: right;
+}
+.voice-pill audio { display: none; }
diff --git a/channel/web/static/js/console.js b/channel/web/static/js/console.js
index fa094664..4e2d99e5 100644
--- a/channel/web/static/js/console.js
+++ b/channel/web/static/js/console.js
@@ -14,9 +14,70 @@ const I18N = {
     zh: {
         console: '控制台',
         nav_chat: '对话', nav_manage: '管理', nav_monitor: '监控',
-        menu_chat: '对话', menu_config: '配置', menu_skills: '技能',
+        menu_chat: '对话', menu_config: '配置', menu_models: '模型', menu_skills: '技能',
         menu_memory: '记忆', menu_knowledge: '知识', menu_channels: '通道', menu_tasks: '定时',
         menu_logs: '日志',
+        models_title: '模型管理',
+        models_desc: '统一管理对话、视觉、语音、向量、图像、搜索能力',
+        models_section_vendors: '厂商凭据',
+        models_section_vendors_desc: '一处配置，多个能力共享',
+        models_section_capabilities: '模型能力',
+        models_add_vendor: '添加厂商',
+        models_provider: '厂商',
+        models_model: '模型',
+        models_voice: '音色',
+        models_configured: '已配置',
+        models_not_configured: '未配置',
+        models_pick_to_configure: '选择以配置',
+        models_clear_credential: '清除凭据',
+        models_base_default_hint: '留空将使用官方默认地址',
+        models_base_default: '默认',
+        models_capability_chat: '主模型',
+        models_capability_chat_desc: '驱动对话与 Agent 推理',
+        models_capability_vision: '图像理解',
+        models_capability_vision_desc: '识别图片内容；未指定时自动跟随主模型',
+        models_capability_image: '图像生成',
+        models_capability_image_desc: '生成图片，可固定厂商或跟随主模型',
+        models_auto_using: '当前优先使用',
+        models_capability_asr: '语音识别',
+        models_capability_asr_desc: '语音转文字',
+        models_capability_tts: '语音合成',
+        models_capability_tts_desc: '文字转语音',
+        models_capability_embedding: '向量',
+        models_capability_embedding_desc: '记忆与知识的向量化',
+        models_capability_search: '联网搜索',
+        models_capability_search_desc: '实时网页检索能力，在搜索工具中使用',
+        models_strategy_auto: '自动',
+        models_search_strategy_label: '策略',
+        models_search_strategy_fixed: '指定',
+        models_search_strategy_auto_hint: '从已配置厂商中自动选择',
+        models_search_strategy_fixed_hint: '指定使用搜索厂商',
+        models_pending_config: '待配置',
+        models_search_available_label: '可用搜索厂商：',
+        models_search_none_configured: '暂未启用任何搜索厂商，点击添加',
+        models_search_add_provider: '添加厂商',
+        models_search_add_desc: '选择一个搜索厂商进行配置',
+        models_search_bocha_title: '配置博查 API Key',
+        models_search_bocha_desc: '前往博查开放平台创建 API Key',
+        models_search_edit_hint: '点击修改配置',
+        models_unavailable: '不可用',
+        models_set_via_env: '通过环境变量启用',
+        models_dim_label: '维度',
+        models_save_success: '已保存',
+        models_save_failed: '保存失败',
+        models_cleared: '已清除',
+        models_clear_failed: '清除失败',
+        models_embedding_change_title: '更改向量模型',
+        models_embedding_change_msg: '切换向量模型后，已有索引将失效，需要重建。是否继续？',
+        models_embedding_saved_title: '向量模型已更新',
+        models_embedding_saved_msg: '请在聊天框输入 /memory rebuild-index 重建索引。',
+        models_embedding_saved_ok: '去执行',
+        models_pick_provider: '待选择',
+        models_clear_confirm_title: '清除厂商凭据',
+        models_clear_confirm_msg: '确认清除该厂商的 API Key 与 Base URL 吗？相关能力将不再可用。',
+        cancel: '取消',
+        save: '保存',
+        ok: '确定',
         knowledge_title: '知识库', knowledge_desc: '浏览和探索你的知识库',
         knowledge_tab_docs: '文档', knowledge_tab_graph: '图谱',
         knowledge_loading: '加载知识库中...', knowledge_loading_desc: '知识页面将显示在这里',
@@ -33,6 +94,7 @@ const I18N = {
         input_placeholder: '输入消息，或输入 / 使用指令',
         config_title: '配置管理', config_desc: '管理模型和 Agent 配置',
         config_model: '模型配置', config_agent: 'Agent 配置',
+        config_model_advanced: '高级配置',
         config_channel: '通道配置',
         config_agent_enabled: 'Agent 模式',
         config_max_tokens: '最大上下文 Token', config_max_tokens_hint: '对话中 Agent 能输入的最大 Token 长度，超过后会智能压缩处理',
@@ -106,6 +168,17 @@ const I18N = {
         tip_clear_context: '清除上下文',
         tip_attach: '添加附件',
         attach_menu_file: '上传文件',
+        mic_idle_title: '点击录音 / 再按一次结束',
+        mic_recording_title: '录音中，再次点击结束',
+        mic_busy_title: '识别中…',
+        mic_permission_denied: '无法访问麦克风，请检查浏览器权限',
+        mic_too_short: '录音太短，请重试',
+        mic_error: '语音识别失败',
+        speak_msg: '朗读这段回复',
+        voice_reply_mode_label: '语音回复策略',
+        voice_reply_off: '关闭',
+        voice_reply_if_voice: '仅语音问/语音答',
+        voice_reply_always: '总是语音回复',
         attach_menu_folder: '上传文件夹',
         confirm_yes: '确认',
         confirm_cancel: '取消',
@@ -115,9 +188,70 @@ const I18N = {
     en: {
         console: 'Console',
         nav_chat: 'Chat', nav_manage: 'Management', nav_monitor: 'Monitor',
-        menu_chat: 'Chat', menu_config: 'Config', menu_skills: 'Skills',
+        menu_chat: 'Chat', menu_config: 'Config', menu_models: 'Models', menu_skills: 'Skills',
         menu_memory: 'Memory', menu_knowledge: 'Knowledge', menu_channels: 'Channels', menu_tasks: 'Tasks',
         menu_logs: 'Logs',
+        models_title: 'Models',
+        models_desc: 'Manage chat, vision, voice, embedding, image and search capabilities in one place',
+        models_section_vendors: 'Vendor Credentials',
+        models_section_vendors_desc: 'Configured once, shared by every capability',
+        models_section_capabilities: 'Capabilities',
+        models_add_vendor: 'Add Vendor',
+        models_provider: 'Provider',
+        models_model: 'Model',
+        models_voice: 'Voice',
+        models_configured: 'configured',
+        models_not_configured: 'not configured',
+        models_pick_to_configure: 'pick to configure',
+        models_clear_credential: 'Clear credentials',
+        models_base_default_hint: 'Leave blank to use the official default base URL',
+        models_base_default: 'Default',
+        models_capability_chat: 'Main Model',
+        models_capability_chat_desc: 'Powers chat and agent reasoning',
+        models_capability_vision: 'Image Understanding',
+        models_capability_vision_desc: 'Reads images; auto-follows main model when unspecified',
+        models_capability_image: 'Image Generation',
+        models_capability_image_desc: 'Generate images; pin a vendor or follow the main model',
+        models_auto_using: 'Preferred',
+        models_capability_asr: 'Speech Recognition',
+        models_capability_asr_desc: 'Voice to text',
+        models_capability_tts: 'Speech Synthesis',
+        models_capability_tts_desc: 'Text to voice',
+        models_capability_embedding: 'Embedding',
+        models_capability_embedding_desc: 'Vectorizes memory and knowledge',
+        models_capability_search: 'Web Search',
+        models_capability_search_desc: 'Real-time web retrieval',
+        models_strategy_auto: 'auto',
+        models_search_strategy_label: 'Strategy',
+        models_search_strategy_fixed: 'Pinned',
+        models_search_strategy_auto_hint: 'Auto-pick from configured providers',
+        models_search_strategy_fixed_hint: 'Always use a specific provider',
+        models_pending_config: 'Pending setup',
+        models_search_available_label: 'Available:',
+        models_search_none_configured: 'No search provider enabled yet — click add.',
+        models_search_add_provider: 'Add provider',
+        models_search_add_desc: 'Pick a search provider to configure',
+        models_search_bocha_title: 'Configure Bocha API Key',
+        models_search_bocha_desc: 'Create a key at the Bocha open platform.',
+        models_search_edit_hint: 'Click to edit',
+        models_unavailable: 'unavailable',
+        models_set_via_env: 'enable via environment variable',
+        models_dim_label: 'dim',
+        models_save_success: 'Saved',
+        models_save_failed: 'Save failed',
+        models_cleared: 'Cleared',
+        models_clear_failed: 'Clear failed',
+        models_embedding_change_title: 'Change embedding model',
+        models_embedding_change_msg: 'Switching the embedding model invalidates the existing index — a rebuild will be needed. Continue?',
+        models_embedding_saved_title: 'Embedding model updated',
+        models_embedding_saved_msg: 'Send /memory rebuild-index in the chat to rebuild the index.',
+        models_embedding_saved_ok: 'Go',
+        models_pick_provider: 'Pick a provider',
+        models_clear_confirm_title: 'Clear vendor credentials',
+        models_clear_confirm_msg: 'Remove this vendor\'s API Key and Base URL? Capabilities relying on it will stop working.',
+        cancel: 'Cancel',
+        save: 'Save',
+        ok: 'OK',
         knowledge_title: 'Knowledge', knowledge_desc: 'Browse and explore your knowledge base',
         knowledge_tab_docs: 'Documents', knowledge_tab_graph: 'Graph',
         knowledge_loading: 'Loading knowledge base...', knowledge_loading_desc: 'Knowledge pages will be displayed here',
@@ -134,6 +268,7 @@ const I18N = {
         input_placeholder: 'Type a message, or press / for commands',
         config_title: 'Configuration', config_desc: 'Manage model and agent settings',
         config_model: 'Model Configuration', config_agent: 'Agent Configuration',
+        config_model_advanced: 'Advanced',
         config_channel: 'Channel Configuration',
         config_agent_enabled: 'Agent Mode',
         config_max_tokens: 'Max Context Tokens', config_max_tokens_hint: 'Max tokens the Agent can input per conversation, auto-compressed when exceeded',
@@ -207,6 +342,17 @@ const I18N = {
         tip_clear_context: 'Clear Context',
         tip_attach: 'Add Attachment',
         attach_menu_file: 'Upload File',
+        mic_idle_title: 'Click to record, click again to stop',
+        mic_recording_title: 'Recording, click to stop',
+        mic_busy_title: 'Transcribing…',
+        mic_permission_denied: 'Cannot access microphone — check browser permissions',
+        mic_too_short: 'Recording too short, please retry',
+        mic_error: 'Speech recognition failed',
+        speak_msg: 'Read this reply aloud',
+        voice_reply_mode_label: 'Voice reply policy',
+        voice_reply_off: 'Off',
+        voice_reply_if_voice: 'Voice only if voice input',
+        voice_reply_always: 'Always reply with voice',
         attach_menu_folder: 'Upload Folder',
         confirm_yes: 'Confirm',
         confirm_cancel: 'Cancel',
@@ -244,6 +390,18 @@ function toggleLanguage() {
     localStorage.setItem('cow_lang', currentLang);
     applyI18n();
     _applyInputTooltips();
+    // Re-render views whose DOM is built in JS (data-i18n alone does not
+    // cover strings interpolated via t() into innerHTML).
+    try { rerenderDynamicViews(); } catch (e) {}
+}
+
+// Refresh JS-rendered views after a language switch. Each branch uses the
+// lightweight in-memory re-render path (no extra network round-trips).
+function rerenderDynamicViews() {
+    if (currentView === 'models' && typeof renderModelsView === 'function'
+            && modelsState && (modelsState.providers || modelsState.capabilities)) {
+        renderModelsView();
+    }
 }
 
 // Floating tooltip portal for [data-tip-key] elements. Tooltip nodes are
@@ -326,6 +484,7 @@ function toggleTheme() {
 const VIEW_META = {
     chat:     { group: 'nav_chat',    page: 'menu_chat' },
     config:   { group: 'nav_manage',  page: 'menu_config' },
+    models:   { group: 'nav_manage',  page: 'menu_models' },
     skills:   { group: 'nav_manage',  page: 'menu_skills' },
     memory:   { group: 'nav_manage',  page: 'menu_memory' },
     knowledge:{ group: 'nav_manage',  page: 'menu_knowledge' },
@@ -612,6 +771,191 @@ if (!supportsDirectoryUpload && attachFolderOption) {
     attachFolderOption.classList.add('hidden');
 }
 
+// ---------------- Mic button: in-page voice input via the configured ASR provider ----------------
+(function setupMicButton() {
+    const micBtn = document.getElementById('mic-btn');
+    if (!micBtn) return;
+    if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia ||
+        typeof window.MediaRecorder === 'undefined') {
+        micBtn.style.display = 'none';
+        return;
+    }
+
+    let mediaRecorder = null;
+    let stream = null;
+    let chunks = [];
+    let recording = false;
+
+    const setIdle = () => {
+        recording = false;
+        micBtn.classList.remove('text-red-500', 'animate-pulse');
+        micBtn.classList.add('text-slate-400');
+        micBtn.querySelector('i').className = 'fas fa-microphone text-sm';
+        micBtn.title = t('mic_idle_title');
+    };
+    const setRecording = () => {
+        recording = true;
+        micBtn.classList.remove('text-slate-400');
+        micBtn.classList.add('text-red-500', 'animate-pulse');
+        micBtn.querySelector('i').className = 'fas fa-stop text-sm';
+        micBtn.title = t('mic_recording_title');
+    };
+    const setBusy = () => {
+        micBtn.classList.remove('text-red-500', 'animate-pulse', 'text-slate-400');
+        micBtn.classList.add('text-primary-500');
+        micBtn.querySelector('i').className = 'fas fa-spinner fa-spin text-sm';
+        micBtn.title = t('mic_busy_title');
+    };
+
+    const pickMimeType = () => {
+        const candidates = [
+            'audio/webm;codecs=opus',
+            'audio/webm',
+            'audio/ogg;codecs=opus',
+            'audio/mp4',
+        ];
+        for (const m of candidates) {
+            if (window.MediaRecorder.isTypeSupported && MediaRecorder.isTypeSupported(m)) {
+                return m;
+            }
+        }
+        return '';
+    };
+
+    const stopStream = () => {
+        if (stream) {
+            stream.getTracks().forEach(t => t.stop());
+            stream = null;
+        }
+    };
+
+    let _micTipTimer = null;
+    const flashError = (msg) => {
+        console.warn('[mic]', msg);
+        // Pop a small bubble above the mic so the user actually notices it.
+        // The mic lives inside a relatively-positioned wrapper around the
+        // textarea (see chat.html), so we hang the tip off that wrapper.
+        const wrapper = micBtn.parentElement;
+        if (!wrapper) return;
+        let tip = wrapper.querySelector('.mic-tip');
+        if (!tip) {
+            tip = document.createElement('div');
+            tip.className = 'mic-tip absolute right-1 bottom-full mb-2 px-2 py-1 rounded-md '
+                + 'text-xs text-white bg-slate-800/90 dark:bg-slate-700/90 shadow-md '
+                + 'pointer-events-none whitespace-nowrap z-10';
+            wrapper.appendChild(tip);
+        }
+        tip.textContent = msg;
+        tip.style.opacity = '1';
+        if (_micTipTimer) clearTimeout(_micTipTimer);
+        _micTipTimer = setTimeout(() => {
+            tip.style.opacity = '0';
+            tip.style.transition = 'opacity 200ms';
+            setTimeout(() => tip.remove(), 250);
+        }, 2000);
+    };
+
+    const upload = async (blob, ext) => {
+        setBusy();
+        const fd = new FormData();
+        fd.append('file', blob, `recording.${ext}`);
+        try {
+            const resp = await fetch('/api/voice/asr', { method: 'POST', body: fd });
+            const data = await resp.json();
+            if (data.status === 'success' && data.text) {
+                // Voice-message UX: drop the recording into the conversation
+                // as a playable bubble with the caption underneath, then
+                // dispatch the recognised text through the regular send path.
+                sendVoiceMessage(data.text, data.audio_url);
+            } else {
+                flashError(data.message || t('mic_error'));
+            }
+        } catch (e) {
+            flashError(t('mic_error') + ': ' + e.message);
+        } finally {
+            setIdle();
+        }
+    };
+
+    const start = async () => {
+        try {
+            stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+        } catch (e) {
+            flashError(t('mic_permission_denied'));
+            return;
+        }
+        chunks = [];
+        const mimeType = pickMimeType();
+        try {
+            mediaRecorder = mimeType
+                ? new MediaRecorder(stream, { mimeType })
+                : new MediaRecorder(stream);
+        } catch (e) {
+            stopStream();
+            flashError(t('mic_error') + ': ' + e.message);
+            return;
+        }
+        mediaRecorder.ondataavailable = (ev) => {
+            if (ev.data && ev.data.size > 0) chunks.push(ev.data);
+        };
+        mediaRecorder.onstop = () => {
+            stopStream();
+            const blob = new Blob(chunks, { type: mediaRecorder.mimeType || 'audio/webm' });
+            // Map mime -> extension so the server picks the right file suffix.
+            const mt = (mediaRecorder.mimeType || 'audio/webm').split(';')[0];
+            const extMap = {
+                'audio/webm': 'webm', 'audio/ogg': 'ogg',
+                'audio/mp4': 'm4a',   'audio/mpeg': 'mp3',
+            };
+            const ext = extMap[mt] || 'webm';
+            // 256 bytes ~ container header only, no actual audio. Anything
+            // below that we treat as "tapped by mistake".
+            if (blob.size < 256) {
+                setIdle();
+                flashError(t('mic_too_short'));
+                return;
+            }
+            upload(blob, ext);
+        };
+        // timeslice=250ms: force the recorder to flush a chunk every 250ms.
+        // Without it some browsers wait for stop() before producing any data,
+        // which loses the audio on very short taps.
+        mediaRecorder.start(250);
+        recordStartedAt = Date.now();
+        setRecording();
+    };
+
+    let recordStartedAt = 0;
+
+    const stopWithMinDuration = () => {
+        const elapsed = Date.now() - recordStartedAt;
+        const minMs = 350;
+        if (elapsed < minMs) {
+            // Give the recorder a moment to capture at least one chunk
+            // before we tell it to stop.
+            setTimeout(() => stop(), minMs - elapsed);
+        } else {
+            stop();
+        }
+    };
+
+    const stop = () => {
+        if (mediaRecorder && mediaRecorder.state !== 'inactive') {
+            mediaRecorder.stop();
+        }
+    };
+
+    micBtn.addEventListener('click', () => {
+        if (recording) {
+            stopWithMinDuration();
+        } else {
+            start();
+        }
+    });
+
+    setIdle();
+})();
+
 // Smart auto-scroll: pause when user scrolls up, resume when near bottom
 let _autoScrollEnabled = true;
 const _SCROLL_THRESHOLD = 80; // px from bottom to re-enable auto-scroll
@@ -1155,6 +1499,88 @@ document.querySelectorAll('.example-card').forEach(card => {
     });
 });
 
+// Voice-message variant of sendMessage(): renders a playable audio bubble
+// with the ASR caption, then dispatches the recognised text to /message
+// through the same SSE/loading flow as a typed message.
+function sendVoiceMessage(text, audioUrl) {
+    text = (text || '').trim();
+    if (!text) return;
+
+    inputHistory.push(text);
+    historyIdx = -1;
+    historySavedDraft = '';
+
+    const ws = document.getElementById('welcome-screen');
+    const isFirstMessage = !!ws;
+    if (ws) ws.remove();
+
+    const titleInfo = isFirstMessage ? { sid: sessionId, userMsg: text } : null;
+    const timestamp = new Date();
+    addUserVoiceMessage(audioUrl, text, timestamp);
+    const loadingEl = addLoadingIndicator();
+
+    const body = {
+        session_id: sessionId,
+        message: text,
+        stream: true,
+        timestamp: timestamp.toISOString(),
+        is_voice: true,
+    };
+
+    const MAX_RETRIES = 2;
+    const RETRY_DELAY_MS = 1000;
+    function postWithRetry(attempt) {
+        fetch('/message', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify(body)
+        })
+        .then(r => r.json())
+        .then(data => {
+            if (data.status === 'success') {
+                if (data.stream) {
+                    startSSE(data.request_id, loadingEl, timestamp, titleInfo);
+                } else {
+                    loadingContainers[data.request_id] = loadingEl;
+                }
+            } else {
+                loadingEl.remove();
+                addBotMessage(t('error_send'), new Date());
+            }
+        })
+        .catch(err => {
+            if (attempt < MAX_RETRIES) {
+                setTimeout(() => postWithRetry(attempt + 1), RETRY_DELAY_MS * (attempt + 1));
+                return;
+            }
+            loadingEl.remove();
+            addBotMessage(t('error_send'), new Date());
+        });
+    }
+    postWithRetry(0);
+}
+
+function addUserVoiceMessage(audioUrl, caption, timestamp) {
+    const el = document.createElement('div');
+    el.className = 'flex justify-end px-4 sm:px-6 py-3';
+    // Voice-message bubble: compact voice pill on top, ASR caption beneath.
+    // The bubble keeps the same primary tint as a normal user message so
+    // it visually slots into the conversation flow.
+    el.innerHTML = `
+        <div class="max-w-[75%] sm:max-w-[60%]">
+            <div class="bg-slate-100 dark:bg-white/10 text-slate-700 dark:text-slate-200 rounded-2xl px-3 py-2 msg-content user-bubble">
+                <div class="user-voice-slot"></div>
+                ${caption ? `<div class="text-xs mt-1.5 leading-snug text-slate-500 dark:text-slate-400 whitespace-pre-wrap break-words">${escapeHtml(caption)}</div>` : ''}
+            </div>
+            <div class="text-xs text-slate-400 dark:text-slate-500 mt-1.5 text-right">${formatTime(timestamp)}</div>
+        </div>
+    `;
+    el.querySelector('.user-voice-slot').appendChild(renderVoicePill(audioUrl));
+    messagesDiv.appendChild(el);
+    _autoScrollEnabled = true;
+    scrollChatToBottom(true);
+}
+
 function sendMessage() {
     const text = chatInput.value.trim();
     if (!text && pendingAttachments.length === 0) return;
@@ -1264,12 +1690,16 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
                     <div class="agent-steps"></div>
                     <div class="answer-content sse-streaming"></div>
                     <div class="media-content"></div>
+                    <div class="bot-audio-slot"></div>
                 </div>
                 <div class="flex items-center gap-2 mt-1.5">
                     <span class="text-xs text-slate-400 dark:text-slate-500">${formatTime(timestamp)}</span>
                     <button class="copy-msg-btn text-xs text-slate-300 dark:text-slate-600 hover:text-slate-500 dark:hover:text-slate-400 transition-colors cursor-pointer" title="${currentLang === 'zh' ? '复制' : 'Copy'}" style="display:none">
                         <i class="fas fa-copy"></i>
                     </button>
+                    <button class="speak-msg-btn text-xs text-slate-300 dark:text-slate-600 hover:text-slate-500 dark:hover:text-slate-400 transition-colors cursor-pointer" title="${t('speak_msg')}" style="display:none;">
+                        <i class="fas fa-volume-up"></i>
+                    </button>
                 </div>
             </div>
         `;
@@ -1481,11 +1911,12 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
                 scrollChatToBottom();
 
             } else if (item.type === 'done') {
+                // Don't close the stream yet: the backend keeps it open
+                // for a short tail to deliver async attachments such as
+                // TTS audio (`voice_attach`). It will close the stream on
+                // its own via onerror once the tail expires.
                 done = true;
-                es.close();
-                delete activeStreams[requestId];
 
-                // item.content may be empty when "done" is only a stream-close signal after media.
                 const finalText = item.content || accumulatedText;
 
                 if (!botEl && finalText) {
@@ -1499,6 +1930,7 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
                     if (copyBtn && finalText) copyBtn.style.display = '';
                     applyHighlighting(botEl);
                 }
+                renderBotSpeakerButton(botEl, finalText);
                 scrollChatToBottom();
 
                 if (titleInfo) {
@@ -1508,6 +1940,15 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
                     loadSessionList();
                 }
 
+            } else if (item.type === 'voice_attach') {
+                // TTS finished — attach a playable audio element to the
+                // current bot bubble. The stream closes right after.
+                if (botEl && item.url) {
+                    attachAudioToBotBubble(botEl, item.url, { autoplay: true });
+                }
+                es.close();
+                delete activeStreams[requestId];
+
             } else if (item.type === 'error') {
                 done = true;
                 es.close();
@@ -1521,7 +1962,10 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
             es.close();
             delete activeStreams[requestId];
 
-            if (done) return;
+            if (done) {
+                // Normal close after the post-done tail expired; nothing to do.
+                return;
+            }
 
             if (currentReasoningEl) {
                 finalizeThinking(currentReasoningEl, reasoningStartTime, reasoningText);
@@ -1812,21 +2256,174 @@ function createBotMessageEl(content, timestamp, requestId, msg) {
             <div class="bg-white dark:bg-[#1A1A1A] border border-slate-200 dark:border-white/10 rounded-2xl px-4 py-3 text-sm leading-relaxed msg-content text-slate-700 dark:text-slate-200">
                 ${stepsHtml ? `<div class="agent-steps">${stepsHtml}</div>` : ''}
                 <div class="answer-content">${renderMarkdown(displayContent)}</div>
+                <div class="bot-audio-slot"></div>
             </div>
             <div class="flex items-center gap-2 mt-1.5">
                 <span class="text-xs text-slate-400 dark:text-slate-500">${formatTime(timestamp)}</span>
                 <button class="copy-msg-btn text-xs text-slate-300 dark:text-slate-600 hover:text-slate-500 dark:hover:text-slate-400 transition-colors cursor-pointer" title="${currentLang === 'zh' ? '复制' : 'Copy'}">
                     <i class="fas fa-copy"></i>
                 </button>
+                <button class="speak-msg-btn text-xs text-slate-300 dark:text-slate-600 hover:text-slate-500 dark:hover:text-slate-400 transition-colors cursor-pointer" title="${t('speak_msg')}" style="display:none;">
+                    <i class="fas fa-volume-up"></i>
+                </button>
             </div>
         </div>
     `;
     el.querySelector('.answer-content').dataset.rawMd = displayContent;
+    // Existing TTS attachment (history replay): mount the player up-front.
+    const existingAudio = msg && msg.extras && msg.extras.audio && msg.extras.audio.url;
+    if (existingAudio) {
+        attachAudioToBotBubble(el, existingAudio, { autoplay: false });
+    }
+    renderBotSpeakerButton(el, displayContent);
     applyHighlighting(el);
     bindChatKnowledgeLinks(el);
     return el;
 }
 
+// Append (or replace) a small audio player inside a bot bubble's
+// dedicated `.bot-audio-slot`. Used by both live TTS pushes and history
+// replay. Silent failures: never throws.
+function attachAudioToBotBubble(botEl, audioUrl, opts) {
+    try {
+        if (!botEl || !audioUrl) return;
+        const slot = botEl.querySelector('.bot-audio-slot');
+        if (!slot) return;
+        slot.innerHTML = '';
+        slot.style.marginTop = '6px';
+        const pill = renderVoicePill(audioUrl, { autoplay: !!(opts && opts.autoplay) });
+        slot.appendChild(pill);
+        const speakBtn = botEl.querySelector('.speak-msg-btn');
+        if (speakBtn) speakBtn.style.display = 'none';
+    } catch (_) { /* silent */ }
+}
+
+// Build a compact play/pause + progress + duration pill that wraps a
+// hidden <audio>. Returns the root element; safe to embed anywhere.
+function renderVoicePill(audioUrl, opts) {
+    opts = opts || {};
+    const wrap = document.createElement('div');
+    wrap.className = 'voice-pill';
+    wrap.innerHTML = `
+        <button type="button" class="voice-pill-btn" data-state="play" aria-label="play">
+            <i class="fas fa-play"></i>
+        </button>
+        <div class="voice-pill-track"><div class="voice-pill-fill"></div></div>
+        <span class="voice-pill-time">0:00</span>
+        <audio preload="metadata" src="${audioUrl}"></audio>
+    `;
+    const btn = wrap.querySelector('.voice-pill-btn');
+    const fill = wrap.querySelector('.voice-pill-fill');
+    const timeEl = wrap.querySelector('.voice-pill-time');
+    const audio = wrap.querySelector('audio');
+
+    const fmt = (s) => {
+        if (!isFinite(s) || s < 0) s = 0;
+        const m = Math.floor(s / 60);
+        const r = Math.floor(s % 60);
+        return `${m}:${r < 10 ? '0' : ''}${r}`;
+    };
+    const setIcon = (state) => {
+        btn.dataset.state = state;
+        btn.querySelector('i').className = state === 'pause' ? 'fas fa-pause' : 'fas fa-play';
+        btn.setAttribute('aria-label', state === 'pause' ? 'pause' : 'play');
+    };
+
+    audio.addEventListener('loadedmetadata', () => {
+        if (audio.duration && isFinite(audio.duration)) timeEl.textContent = fmt(audio.duration);
+    });
+    audio.addEventListener('timeupdate', () => {
+        const dur = audio.duration || 0;
+        if (dur > 0) {
+            fill.style.width = `${Math.min(100, (audio.currentTime / dur) * 100)}%`;
+            timeEl.textContent = fmt(dur - audio.currentTime);
+        }
+    });
+    audio.addEventListener('ended', () => {
+        setIcon('play');
+        fill.style.width = '0%';
+        timeEl.textContent = fmt(audio.duration || 0);
+    });
+    audio.addEventListener('play',  () => setIcon('pause'));
+    audio.addEventListener('pause', () => setIcon('play'));
+
+    btn.addEventListener('click', (e) => {
+        e.stopPropagation();
+        if (audio.paused) {
+            audio.play().catch(() => {});
+        } else {
+            audio.pause();
+        }
+    });
+
+    if (opts.autoplay) {
+        // Autoplay may be blocked by the browser; fall back silently and
+        // let the user tap the play button.
+        const tryPlay = () => audio.play().catch(() => {});
+        if (audio.readyState >= 2) tryPlay();
+        else audio.addEventListener('canplay', tryPlay, { once: true });
+    }
+    return wrap;
+}
+
+// Show the manual "read aloud" button when TTS is configured but the
+// bubble has no audio yet. Lazily probes capability via /api/models so
+// we don't expose the button when nothing can synthesize speech.
+function renderBotSpeakerButton(botEl, text) {
+    if (!botEl || !text || !text.trim()) return;
+    const btn = botEl.querySelector('.speak-msg-btn');
+    if (!btn) return;
+    if (botEl.querySelector('.bot-audio-slot audio')) return;
+    _isTtsReady().then(ready => {
+        if (!ready) return;
+        btn.style.display = '';
+        btn.onclick = () => _triggerManualTts(btn, botEl, text);
+    });
+}
+
+let _ttsReadyPromise = null;
+let _ttsReadyTs = 0;
+function _isTtsReady() {
+    // Cache for 30s to avoid hammering /api/models on every bubble.
+    if (_ttsReadyPromise && Date.now() - _ttsReadyTs < 30000) {
+        return _ttsReadyPromise;
+    }
+    _ttsReadyTs = Date.now();
+    _ttsReadyPromise = fetch('/api/models')
+        .then(r => r.json())
+        .then(data => {
+            const tts = data && data.capabilities && data.capabilities.tts;
+            if (!tts) return false;
+            return Boolean(tts.current_provider || tts.suggested_provider);
+        })
+        .catch(() => false);
+    return _ttsReadyPromise;
+}
+
+function _triggerManualTts(btn, botEl, text) {
+    if (btn.dataset.busy === '1') return;
+    btn.dataset.busy = '1';
+    const icon = btn.querySelector('i');
+    const prev = icon ? icon.className : '';
+    if (icon) icon.className = 'fas fa-spinner fa-spin';
+    fetch('/api/voice/tts', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ text, session_id: sessionId }),
+    })
+        .then(r => r.json())
+        .then(data => {
+            if (data && data.status === 'success' && data.audio_url) {
+                attachAudioToBotBubble(botEl, data.audio_url, { autoplay: true });
+            }
+        })
+        .catch(() => {})
+        .finally(() => {
+            btn.dataset.busy = '0';
+            if (icon) icon.className = prev || 'fas fa-volume-up';
+        });
+}
+
 function addUserMessage(content, timestamp, attachments) {
     const el = createUserMessageEl(content, timestamp, attachments);
     messagesDiv.appendChild(el);
@@ -2478,7 +3075,12 @@ let cfgProviderValue = '';
 let cfgModelValue = '';
 
 // --- Custom dropdown helper ---
-function initDropdown(el, options, selectedValue, onChange) {
+function initDropdown(el, options, selectedValue, onChange, opts) {
+    // opts.placeholder: when set AND selectedValue is empty, render that text
+    // in a dim style instead of auto-selecting options[0]. Useful for
+    // "pick or empty" capabilities (asr / embedding) where we want the
+    // user to make an explicit choice.
+    opts = opts || {};
     const textEl = el.querySelector('.cfg-dropdown-text');
     const menuEl = el.querySelector('.cfg-dropdown-menu');
     const selEl = el.querySelector('.cfg-dropdown-selected');
@@ -2491,8 +3093,23 @@ function initDropdown(el, options, selectedValue, onChange) {
         options.forEach(opt => {
             const item = document.createElement('div');
             item.className = 'cfg-dropdown-item' + (opt.value === el._ddValue ? ' active' : '');
-            item.textContent = opt.label;
             item.dataset.value = opt.value;
+            // Hint is an optional dim secondary label rendered on the right
+            // side of the row (e.g. friendly brand name next to a technical
+            // model id). When absent the row degrades to the original
+            // single-string layout.
+            if (opt.hint) {
+                const labelEl = document.createElement('span');
+                labelEl.className = 'cfg-dropdown-label';
+                labelEl.textContent = opt.label;
+                const hintEl = document.createElement('span');
+                hintEl.className = 'cfg-dropdown-hint';
+                hintEl.textContent = opt.hint;
+                item.appendChild(labelEl);
+                item.appendChild(hintEl);
+            } else {
+                item.textContent = opt.label;
+            }
             item.addEventListener('click', (e) => {
                 e.stopPropagation();
                 el._ddValue = opt.value;
@@ -2505,8 +3122,20 @@ function initDropdown(el, options, selectedValue, onChange) {
             menuEl.appendChild(item);
         });
         const sel = options.find(o => o.value === el._ddValue);
-        textEl.textContent = sel ? sel.label : (options[0] ? options[0].label : '--');
-        if (!sel && options[0]) el._ddValue = options[0].value;
+        if (sel) {
+            textEl.textContent = sel.label;
+            textEl.classList.remove('text-slate-400', 'dark:text-slate-500');
+        } else if (opts.placeholder && !el._ddValue) {
+            // No selection yet — show the placeholder in muted style.
+            // Do NOT write a fallback value, so the dropdown stays
+            // "unsaved" until the user explicitly picks.
+            textEl.textContent = opts.placeholder;
+            textEl.classList.add('text-slate-400', 'dark:text-slate-500');
+        } else {
+            textEl.textContent = options[0] ? options[0].label : '--';
+            textEl.classList.remove('text-slate-400', 'dark:text-slate-500');
+            if (options[0]) el._ddValue = options[0].value;
+        }
     }
 
     render();
@@ -3113,12 +3742,14 @@ function closeMemoryViewer() {
 // =====================================================================
 // Custom Confirm Dialog
 // =====================================================================
-function showConfirmDialog({ title, message, okText, cancelText, onConfirm }) {
+function showConfirmDialog({ title, message, okText, cancelText, onConfirm, hideCancel }) {
     const overlay = document.getElementById('confirm-dialog-overlay');
     document.getElementById('confirm-dialog-title').textContent = title || '';
     document.getElementById('confirm-dialog-message').textContent = message || '';
     document.getElementById('confirm-dialog-ok').textContent = okText || 'OK';
-    document.getElementById('confirm-dialog-cancel').textContent = cancelText || t('channels_cancel');
+    const cancelBtn = document.getElementById('confirm-dialog-cancel');
+    cancelBtn.textContent = cancelText || t('channels_cancel');
+    cancelBtn.classList.toggle('hidden', !!hideCancel);
 
     function cleanup() {
         overlay.classList.add('hidden');
@@ -3131,13 +3762,1474 @@ function showConfirmDialog({ title, message, okText, cancelText, onConfirm }) {
     function onOverlayClick(e) { if (e.target === overlay) cleanup(); }
 
     const okBtn = document.getElementById('confirm-dialog-ok');
-    const cancelBtn = document.getElementById('confirm-dialog-cancel');
     okBtn.addEventListener('click', onOk);
     cancelBtn.addEventListener('click', onCancel);
     overlay.addEventListener('click', onOverlayClick);
     overlay.classList.remove('hidden');
 }
 
+// =====================================================================
+// Models View
+// =====================================================================
+// Capability cards rendered on the Models page. Order matters — main model
+// comes first because it transitively decides defaults for vision and image.
+// Icon palette is grouped by capability family:
+//   - chat                       → primary (brand green; the "main" capability)
+//   - vision + image             → blue    (everything visual)
+//   - asr + tts                  → amber   (everything audio)
+//   - embedding                  → purple  (vectors)
+//   - search                     → orange  (retrieval)
+// Each card uses an explicit `iconClass` string so Tailwind's CDN JIT can
+// see the literal class names — dynamic `bg-${color}-50` strings would not
+// be picked up reliably.
+const MODELS_CAPABILITY_DEFS = [
+    { id: 'chat',      icon: 'fa-microchip',        editable: true,  needsModel: true,  titleKey: 'models_capability_chat',      descKey: 'models_capability_chat_desc',
+      iconChip: 'bg-primary-50 dark:bg-primary-900/30',  iconGlyph: 'text-primary-500' },
+    { id: 'vision',    icon: 'fa-eye',              editable: true,  needsModel: true,  titleKey: 'models_capability_vision',    descKey: 'models_capability_vision_desc',
+      iconChip: 'bg-blue-50 dark:bg-blue-900/30',        iconGlyph: 'text-blue-500' },
+    { id: 'image',     icon: 'fa-image',            editable: true,  needsModel: true,  titleKey: 'models_capability_image',     descKey: 'models_capability_image_desc',
+      iconChip: 'bg-blue-50 dark:bg-blue-900/30',        iconGlyph: 'text-blue-500' },
+    { id: 'asr',       icon: 'fa-microphone',       editable: true,  needsModel: false, titleKey: 'models_capability_asr',       descKey: 'models_capability_asr_desc',
+      iconChip: 'bg-amber-50 dark:bg-amber-900/30',      iconGlyph: 'text-amber-500' },
+    { id: 'tts',       icon: 'fa-volume-high',      editable: true,  needsModel: true,  titleKey: 'models_capability_tts',       descKey: 'models_capability_tts_desc',
+      iconChip: 'bg-amber-50 dark:bg-amber-900/30',      iconGlyph: 'text-amber-500' },
+    { id: 'embedding', icon: 'fa-vector-square',    editable: true,  needsModel: false, titleKey: 'models_capability_embedding', descKey: 'models_capability_embedding_desc',
+      iconChip: 'bg-purple-50 dark:bg-purple-900/30',    iconGlyph: 'text-purple-500' },
+    { id: 'search',    icon: 'fa-magnifying-glass', editable: true,  needsModel: false, titleKey: 'models_capability_search',    descKey: 'models_capability_search_desc',
+      iconChip: 'bg-orange-50 dark:bg-orange-900/30',    iconGlyph: 'text-orange-500' },
+];
+
+// Provider logos: when a real SVG exists under static/logos/<id>.svg we use
+// it; otherwise we fall back to a neutral monogram chip. SVGs are fetched
+// via <img> with a hidden onerror so layout stays stable when files are
+// absent. Vendors whose mark is rendered in pure (or near-pure) black are
+// listed in MODELS_PROVIDER_LOGO_DARK_INVERT — for those, we apply a CSS
+// invert filter in dark mode so the glyph stays visible against #1A1A1A.
+const MODELS_PROVIDER_LOGO_PATH = 'assets/logos';
+const MODELS_PROVIDER_LOGO_DARK_INVERT = new Set([
+    'openai',     // black wordmark
+    'moonshot',   // dark monogram
+    'zhipu',      // dark monogram
+    'custom',     // single-color slider glyph
+]);
+
+let modelsState = { providers: [], capabilities: {} };
+
+// One-shot: { capabilityId, providerId } stashed before a Models reload,
+// consumed by renderCapabilityBody to preselect a just-configured vendor.
+let pendingCapabilitySelection = null;
+
+// `opts.preserveScroll` keeps the page's vertical scroll position across the
+// refresh. We capture it before unhiding the loading skeleton (which collapses
+// content height to zero) and restore it after the new content is mounted.
+// This matters when the user configures a vendor from inside a capability
+// card's dropdown — without preservation, the post-save reload bounces them
+// back to the top of the page, away from the card they were configuring.
+function loadModelsView(opts) {
+    const loading = document.getElementById('models-loading');
+    const content = document.getElementById('models-content');
+    if (!loading || !content) return;
+    const preserveScroll = !!(opts && opts.preserveScroll);
+    // The Models pane has its own scrollable container; capture its position
+    // (not window.scrollY) so we can put the user back exactly where they were.
+    const scroller = document.querySelector('#view-models .overflow-y-auto');
+    const savedTop = preserveScroll && scroller ? scroller.scrollTop : null;
+
+    loading.classList.remove('hidden');
+    content.classList.add('hidden');
+
+    fetch('/api/models').then(r => r.json()).then(data => {
+        if (data.status !== 'success') {
+            loading.innerHTML = `<span class="text-sm text-red-400">${escapeHtml(data.message || 'Failed to load')}</span>`;
+            return;
+        }
+        modelsState.providers = data.providers || [];
+        modelsState.capabilities = data.capabilities || {};
+        renderModelsView();
+        loading.classList.add('hidden');
+        content.classList.remove('hidden');
+        if (savedTop !== null && scroller) {
+            // Wait one frame for the new layout to settle, otherwise the
+            // restored scrollTop snaps to the previous (smaller) max.
+            requestAnimationFrame(() => { scroller.scrollTop = savedTop; });
+        }
+    }).catch(err => {
+        loading.innerHTML = `<span class="text-sm text-red-400">${escapeHtml(String(err))}</span>`;
+    });
+}
+
+function renderModelsView() {
+    const container = document.getElementById('models-content');
+    container.innerHTML = '';
+    container.appendChild(renderVendorsSection());
+    MODELS_CAPABILITY_DEFS.forEach(def => container.appendChild(renderCapabilityCard(def)));
+}
+
+// ---------- Vendor section (Layer 1) -----------------------------------
+
+function renderVendorsSection() {
+    const wrap = document.createElement('div');
+    wrap.className = 'bg-white dark:bg-[#1A1A1A] rounded-xl border border-slate-200 dark:border-white/10 p-6';
+
+    const configured = modelsState.providers.filter(p => p.configured);
+
+    const header = `
+        <div class="flex items-start gap-3 mb-5">
+            <div class="w-9 h-9 rounded-lg bg-primary-50 dark:bg-primary-900/30 flex items-center justify-center flex-shrink-0">
+                <i class="fas fa-key text-primary-500 text-sm"></i>
+            </div>
+            <div class="flex-1 min-w-0">
+                <h3 class="font-semibold text-slate-800 dark:text-slate-100">${t('models_section_vendors')}</h3>
+                <p class="text-xs text-slate-500 dark:text-slate-400 mt-0.5">${t('models_section_vendors_desc')}</p>
+            </div>
+            <span class="text-xs text-slate-400 dark:text-slate-500 mt-2 flex-shrink-0">${configured.length}/${modelsState.providers.length}</span>
+        </div>`;
+
+    let body;
+    if (configured.length === 0) {
+        body = `
+            <div class="flex flex-col items-center justify-center py-8 px-4 rounded-lg border border-dashed border-slate-200 dark:border-white/10">
+                <p class="text-sm text-slate-500 dark:text-slate-400 text-center">${t('models_not_configured')}</p>
+                <button onclick="openVendorModal('')"
+                        class="mt-3 px-3 py-1.5 rounded-lg text-xs font-medium bg-primary-50 dark:bg-primary-900/30 text-primary-600 dark:text-primary-400 hover:bg-primary-100 dark:hover:bg-primary-900/50 cursor-pointer transition-colors">
+                    <i class="fas fa-plus text-[10px] mr-1"></i>${t('models_add_vendor')}
+                </button>
+            </div>`;
+    } else {
+        body = `<div class="grid grid-cols-1 sm:grid-cols-2 gap-3">
+            ${configured.map(renderVendorChip).join('')}
+        </div>`;
+    }
+
+    wrap.innerHTML = header + body;
+    return wrap;
+}
+
+function renderVendorChip(p) {
+    // The masked API key is intentionally not surfaced here; it is shown
+    // inside the edit modal so the chip stays uncluttered and scannable.
+    return `
+        <button onclick="openVendorModal('${escapeHtml(p.id)}')"
+                class="group flex items-center gap-3 px-3 py-2.5 rounded-lg border border-slate-200 dark:border-white/10
+                       bg-slate-50 dark:bg-white/5 hover:border-primary-300 dark:hover:border-primary-500/50
+                       cursor-pointer transition-colors duration-150 text-left">
+            ${renderProviderLogo(p, 28)}
+            <span class="flex-1 min-w-0 text-sm font-medium text-slate-800 dark:text-slate-100 truncate">${escapeHtml(p.label)}</span>
+            <i class="fas fa-pen-to-square text-[11px] text-slate-400 dark:text-slate-500 group-hover:text-primary-500 transition-colors"></i>
+        </button>`;
+}
+
+// Render a uniformly-styled logo for a provider. Tries an SVG asset first; if
+// it 404s the <img> swaps itself for a monogram fallback via onerror.
+function renderProviderLogo(p, sizePx) {
+    const initial = (p.label || p.id || '?').slice(0, 1).toUpperCase();
+    const sz = sizePx || 32;
+    const url = `${MODELS_PROVIDER_LOGO_PATH}/${encodeURIComponent(p.id)}.svg`;
+    const fallbackId = `pl-${p.id}-${Math.random().toString(36).slice(2, 8)}`;
+    const imgClass = MODELS_PROVIDER_LOGO_DARK_INVERT.has(p.id)
+        ? 'absolute inset-0 m-auto provider-logo-img provider-logo-invert-dark'
+        : 'absolute inset-0 m-auto provider-logo-img';
+    return `
+        <span class="relative flex items-center justify-center rounded-lg bg-slate-100 dark:bg-white/10
+                     text-slate-600 dark:text-slate-300 flex-shrink-0 overflow-hidden"
+              style="width:${sz}px;height:${sz}px;">
+            <span id="${fallbackId}" class="text-xs font-bold">${escapeHtml(initial)}</span>
+            <img src="${url}" alt="" aria-hidden="true"
+                 class="${imgClass}"
+                 style="width:${Math.round(sz * 0.65)}px;height:${Math.round(sz * 0.65)}px;"
+                 onload="(function(el){var f=document.getElementById('${fallbackId}');if(f)f.style.display='none';})(this)"
+                 onerror="this.remove();">
+        </span>`;
+}
+
+// ---------- Capability cards (Layer 2) ---------------------------------
+
+function renderCapabilityCard(def) {
+    const cap = modelsState.capabilities[def.id] || {};
+    const wrap = document.createElement('div');
+    wrap.className = 'bg-white dark:bg-[#1A1A1A] rounded-xl border border-slate-200 dark:border-white/10 p-6';
+    wrap.id = `models-card-${def.id}`;
+
+    const headerRight = renderCapabilityHeaderTag(def, cap);
+
+    wrap.innerHTML = `
+        <div class="flex items-start gap-3 mb-5">
+            <div class="w-9 h-9 rounded-lg ${def.iconChip} flex items-center justify-center flex-shrink-0">
+                <i class="fas ${def.icon} ${def.iconGlyph} text-sm"></i>
+            </div>
+            <div class="flex-1 min-w-0">
+                <h3 class="font-semibold text-slate-800 dark:text-slate-100">${t(def.titleKey)}</h3>
+                <p class="text-xs text-slate-500 dark:text-slate-400 mt-0.5">${t(def.descKey)}</p>
+            </div>
+            ${headerRight}
+        </div>
+        <div class="space-y-4" data-cap-body="${def.id}"></div>`;
+
+    const body = wrap.querySelector(`[data-cap-body="${def.id}"]`);
+    renderCapabilityBody(def, cap, body);
+    return wrap;
+}
+
+function renderCapabilityHeaderTag(def, cap) {
+    return '';
+}
+
+function _searchProviderLabel(cap, providerId) {
+    const list = (cap && cap.providers) || [];
+    const hit = list.find(p => p.id === providerId);
+    return hit ? hit.label : providerId;
+}
+
+// Search card body: strategy picker + (when fixed) provider picker + a
+// status row that surfaces which providers are ready and how to add the
+// missing ones. Three of the four backends piggy-back on model-vendor
+// credentials (zhipu / qianfan / linkai); bocha owns its own key under
+// tools.web_search and gets its own minimal credential modal.
+function renderSearchCapability(def, cap, body) {
+    const providers = cap.providers || [];
+    const configuredIds = cap.configured_providers || [];
+    const hasAny = configuredIds.length > 0;
+    const strategy = cap.strategy || 'auto';
+
+    body.innerHTML = `
+        <div>
+            <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">${t('models_search_strategy_label')}</label>
+            <div id="cap-search-strategy" class="cfg-dropdown" tabindex="0">
+                <div class="cfg-dropdown-selected">
+                    <span class="cfg-dropdown-text">--</span>
+                    <i class="fas fa-chevron-down cfg-dropdown-arrow"></i>
+                </div>
+                <div class="cfg-dropdown-menu"></div>
+            </div>
+        </div>
+        <div id="cap-search-provider-wrap" class="hidden">
+            <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">${t('models_provider')}</label>
+            <div id="cap-search-provider" class="cfg-dropdown" tabindex="0">
+                <div class="cfg-dropdown-selected">
+                    <span class="cfg-dropdown-text">--</span>
+                    <i class="fas fa-chevron-down cfg-dropdown-arrow"></i>
+                </div>
+                <div class="cfg-dropdown-menu"></div>
+            </div>
+        </div>
+        <div id="cap-search-summary"></div>
+        <div class="flex items-center justify-end gap-3 pt-1">
+            <span id="cap-search-status" class="text-xs text-primary-500 opacity-0 transition-opacity duration-300"></span>
+            <button onclick="saveSearchCapability()"
+                    class="px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600 text-white text-sm font-medium
+                           cursor-pointer transition-colors duration-150 disabled:opacity-50 disabled:cursor-not-allowed">
+                ${t('save')}
+            </button>
+        </div>
+    `;
+
+    // Strategy dropdown — when no provider is configured the strategy
+    // value is meaningless, so we show a "待配置" placeholder instead of
+    // a default selection. Once any provider gets configured the saved
+    // strategy (or "auto") becomes the active value.
+    initDropdown(
+        body.querySelector('#cap-search-strategy'),
+        [
+            { value: 'auto',  label: t('models_strategy_auto'),         hint: t('models_search_strategy_auto_hint') },
+            { value: 'fixed', label: t('models_search_strategy_fixed'), hint: t('models_search_strategy_fixed_hint') },
+        ],
+        hasAny ? strategy : '',
+        (value) => _onSearchStrategyChange(cap, value, body),
+        hasAny ? null : { placeholder: t('models_pending_config') },
+    );
+
+    // Provider dropdown — populated with configured providers only;
+    // unconfigured ones cannot be pinned (they'd silently fall back).
+    const provOpts = configuredIds.map(id => ({
+        value: id,
+        label: _searchProviderLabel(cap, id),
+    }));
+    if (provOpts.length === 0) provOpts.push({ value: '', label: '--' });
+    initDropdown(
+        body.querySelector('#cap-search-provider'),
+        provOpts,
+        cap.fixed_provider || configuredIds[0] || '',
+        () => {},
+    );
+
+    _renderSearchSummary(body, cap);
+    _setSearchProviderPickerVisible(body, strategy === 'fixed' && hasAny);
+}
+
+function _onSearchStrategyChange(cap, value, body) {
+    const configuredIds = cap.configured_providers || [];
+    _setSearchProviderPickerVisible(body, value === 'fixed' && configuredIds.length > 0);
+}
+
+function _setSearchProviderPickerVisible(body, visible) {
+    const wrap = body.querySelector('#cap-search-provider-wrap');
+    if (!wrap) return;
+    if (visible) wrap.classList.remove('hidden');
+    else wrap.classList.add('hidden');
+}
+
+// Search summary line: just lists configured providers + a trailing "+
+// add" button. Unconfigured backends are hidden — the user picks one from
+// a small chooser when they click add. Empty state surfaces the same add
+// button as a primary CTA.
+function _renderSearchSummary(body, cap) {
+    const host = body.querySelector('#cap-search-summary');
+    if (!host) return;
+    const providers = cap.providers || [];
+    const configured = providers.filter(p => p.configured);
+    const missing = providers.filter(p => !p.configured);
+
+    const addBtn = missing.length
+        ? `<button type="button" id="cap-search-add-btn"
+                  class="inline-flex items-center gap-1 px-2 py-0.5 text-[11px] rounded-md cursor-pointer
+                         bg-slate-100 dark:bg-white/5 text-slate-500 dark:text-slate-400
+                         hover:bg-slate-200 dark:hover:bg-white/10 transition-colors">
+              <i class="fas fa-plus text-[10px]"></i>${t('models_search_add_provider')}
+           </button>`
+        : '';
+
+    if (configured.length === 0) {
+        host.innerHTML = `
+            <div class="flex items-center gap-2 text-xs text-slate-500 dark:text-slate-400">
+                <i class="fas fa-circle-info text-[10px] text-amber-500"></i>
+                <span>${t('models_search_none_configured')}</span>
+                ${addBtn}
+            </div>
+        `;
+    } else {
+        const chips = configured.map(p => `
+            <button type="button" data-search-edit-provider="${p.id}"
+                    title="${t('models_search_edit_hint')}"
+                    class="inline-flex items-center gap-1 px-2 py-0.5 text-[11px] rounded-md cursor-pointer
+                           bg-emerald-50 dark:bg-emerald-900/30 text-emerald-600 dark:text-emerald-400
+                           hover:bg-emerald-100 dark:hover:bg-emerald-900/50 transition-colors">
+                <i class="fas fa-check text-[10px]"></i>${escapeHtml(p.label)}
+            </button>
+        `).join('');
+        host.innerHTML = `
+            <div class="flex items-center flex-wrap gap-2 text-xs text-slate-500 dark:text-slate-400">
+                <span>${t('models_search_available_label')}</span>
+                ${chips}
+                ${addBtn}
+            </div>
+        `;
+    }
+
+    const addBtnEl = host.querySelector('#cap-search-add-btn');
+    if (addBtnEl) {
+        addBtnEl.addEventListener('click', (ev) => {
+            ev.preventDefault();
+            openSearchAddProviderPicker(missing);
+        });
+    }
+    host.querySelectorAll('[data-search-edit-provider]').forEach(el => {
+        el.addEventListener('click', (ev) => {
+            ev.preventDefault();
+            const pid = el.getAttribute('data-search-edit-provider');
+            const meta = (cap.providers || []).find(p => p.id === pid);
+            _launchSearchProviderConfig(pid, meta);
+        });
+    });
+}
+
+// Two-step add flow: click "+ 添加厂商" -> chooser dialog -> per-provider
+// credential editor. Bocha lands on the dedicated key modal; the others
+// piggy-back on the existing vendor credential modal.
+function openSearchAddProviderPicker(missingProviders) {
+    if (!missingProviders || missingProviders.length === 0) return;
+    if (missingProviders.length === 1) {
+        _launchSearchProviderConfig(missingProviders[0].id);
+        return;
+    }
+
+    const existing = document.getElementById('search-add-modal');
+    if (existing) existing.remove();
+
+    const rows = missingProviders.map(p => `
+        <button type="button" data-pid="${p.id}"
+                class="w-full flex items-center justify-between px-3 py-2.5 rounded-lg cursor-pointer
+                       bg-slate-50 dark:bg-white/5 hover:bg-slate-100 dark:hover:bg-white/10
+                       text-sm text-slate-700 dark:text-slate-200 transition-colors">
+            <span>${escapeHtml(p.label)}</span>
+            <i class="fas fa-chevron-right text-[10px] text-slate-400"></i>
+        </button>
+    `).join('');
+
+    const modal = document.createElement('div');
+    modal.id = 'search-add-modal';
+    modal.className = 'fixed inset-0 z-50 flex items-center justify-center bg-black/40 backdrop-blur-sm';
+    modal.innerHTML = `
+        <div class="bg-white dark:bg-[#1A1A1A] rounded-xl border border-slate-200 dark:border-white/10
+                    w-full max-w-md mx-4 p-6 shadow-xl">
+            <h3 class="text-lg font-semibold text-slate-800 dark:text-slate-100 mb-1">${t('models_search_add_provider')}</h3>
+            <p class="text-xs text-slate-500 dark:text-slate-400 mb-4">${t('models_search_add_desc')}</p>
+            <div class="space-y-2">${rows}</div>
+            <div class="flex items-center justify-end mt-5">
+                <button type="button" onclick="document.getElementById('search-add-modal').remove()"
+                        class="px-3 py-1.5 rounded-md text-sm text-slate-600 dark:text-slate-300
+                               hover:bg-slate-100 dark:hover:bg-white/5 transition-colors">
+                    ${t('cancel')}
+                </button>
+            </div>
+        </div>
+    `;
+    document.body.appendChild(modal);
+    modal.querySelectorAll('[data-pid]').forEach(el => {
+        el.addEventListener('click', () => {
+            const pid = el.getAttribute('data-pid');
+            modal.remove();
+            _launchSearchProviderConfig(pid);
+        });
+    });
+}
+
+function _launchSearchProviderConfig(providerId, providerMeta) {
+    if (providerId === 'bocha') {
+        openSearchBochaModal(providerMeta);
+    } else {
+        openVendorModal(providerId, () => loadModelsView({ preserveScroll: true }));
+    }
+}
+
+function saveSearchCapability() {
+    const strategyDd = document.getElementById('cap-search-strategy');
+    const providerDd = document.getElementById('cap-search-provider');
+    const strategy = strategyDd ? getDropdownValue(strategyDd) : 'auto';
+    const provider = (strategy === 'fixed' && providerDd) ? getDropdownValue(providerDd) : '';
+
+    fetch('/api/models', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+            action: 'set_capability',
+            capability: 'search',
+            strategy,
+            provider,
+        }),
+    }).then(r => r.json()).then(data => {
+        if (data.status === 'success') {
+            showStatus('cap-search-status', 'models_save_success', false);
+            setTimeout(() => loadModelsView({ preserveScroll: true }), 400);
+        } else {
+            showStatus('cap-search-status', 'models_save_failed', true);
+        }
+    }).catch(() => showStatus('cap-search-status', 'models_save_failed', true));
+}
+
+// Minimal bocha API-key modal. Reuses the existing vendor-modal markup
+// helpers would be nice, but bocha isn't in PROVIDER_MODELS (it's not a
+// model vendor), so we render a tiny dedicated dialog.
+function openSearchBochaModal(providerMeta) {
+    const existing = document.getElementById('search-bocha-modal');
+    if (existing) existing.remove();
+
+    let masked = (providerMeta && providerMeta.api_key_masked) || '';
+    if (!masked) {
+        const searchCap = (modelsState && modelsState.capabilities && modelsState.capabilities.search) || {};
+        const bocha = (searchCap.providers || []).find(p => p.id === 'bocha');
+        if (bocha && bocha.api_key_masked) masked = bocha.api_key_masked;
+    }
+    const hasKey = !!masked;
+    const clearBtnHtml = hasKey
+        ? `<button type="button" id="search-bocha-clear"
+                  class="px-3 py-1.5 rounded-md text-xs text-red-500 dark:text-red-400
+                         hover:bg-red-50 dark:hover:bg-red-900/20 cursor-pointer transition-colors">
+              ${t('models_clear_credential')}
+           </button>`
+        : '';
+
+    const modal = document.createElement('div');
+    modal.id = 'search-bocha-modal';
+    modal.className = 'fixed inset-0 z-50 flex items-center justify-center bg-black/40 backdrop-blur-sm';
+    modal.innerHTML = `
+        <div id="search-bocha-modal-card"
+             class="bg-white dark:bg-[#1A1A1A] rounded-xl border border-slate-200 dark:border-white/10
+                    w-full max-w-md mx-4 p-6 shadow-xl">
+            <h3 class="text-lg font-semibold text-slate-800 dark:text-slate-100 mb-1">${t('models_search_bocha_title')}</h3>
+            <p class="text-xs text-slate-500 dark:text-slate-400 mb-4">${t('models_search_bocha_desc')}</p>
+            <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">API Key</label>
+            <input id="search-bocha-key" type="text" autocomplete="off" data-1p-ignore data-lpignore="true"
+                   class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
+                          bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
+                          focus:outline-none focus:border-primary-500 font-mono ${hasKey ? 'cfg-key-masked' : ''}"
+                   value="${escapeHtml(masked)}"
+                   data-masked="${hasKey ? '1' : ''}"
+                   placeholder="sk-..." />
+            <div class="flex items-center justify-between gap-3 mt-5">
+                <div>${clearBtnHtml}</div>
+                <div class="flex items-center gap-3">
+                    <button type="button" onclick="document.getElementById('search-bocha-modal').remove()"
+                            class="px-3 py-1.5 rounded-md text-sm text-slate-600 dark:text-slate-300
+                                   hover:bg-slate-100 dark:hover:bg-white/5 transition-colors">
+                        ${t('cancel')}
+                    </button>
+                    <button type="button" onclick="_saveBochaKey()"
+                            class="px-4 py-1.5 rounded-md bg-primary-500 hover:bg-primary-600 text-white text-sm font-medium
+                                   cursor-pointer transition-colors">
+                        ${t('save')}
+                    </button>
+                </div>
+            </div>
+        </div>
+    `;
+    document.body.appendChild(modal);
+
+    // Reset masked sentinel as soon as the user starts editing so the save
+    // handler can tell apart "kept the existing key" vs "typed a new one".
+    const input = document.getElementById('search-bocha-key');
+    if (input) {
+        const unmask = () => {
+            if (input.dataset.masked === '1') {
+                input.value = '';
+                input.dataset.masked = '';
+                input.classList.remove('cfg-key-masked');
+            }
+        };
+        input.addEventListener('keydown', (e) => {
+            if (e.key === 'Tab' || e.key === 'Escape') return;
+            unmask();
+        });
+        input.addEventListener('paste', unmask);
+        if (!hasKey) setTimeout(() => input.focus(), 50);
+    }
+    const clearBtn = document.getElementById('search-bocha-clear');
+    if (clearBtn) clearBtn.addEventListener('click', _clearBochaKey);
+
+    modal.addEventListener('mousedown', (e) => {
+        if (e.target === modal) modal.remove();
+    });
+    const onKey = (e) => {
+        if (e.key === 'Escape') {
+            modal.remove();
+            document.removeEventListener('keydown', onKey);
+        }
+    };
+    document.addEventListener('keydown', onKey);
+}
+
+function _saveBochaKey() {
+    const input = document.getElementById('search-bocha-key');
+    if (!input) return;
+    // Untouched masked value => no change requested; close silently.
+    if (input.dataset.masked === '1') {
+        const modal = document.getElementById('search-bocha-modal');
+        if (modal) modal.remove();
+        return;
+    }
+    const apiKey = input.value.trim();
+    if (!apiKey) {
+        input.focus();
+        return;
+    }
+    fetch('/api/models', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ action: 'set_search_credential', api_key: apiKey }),
+    }).then(r => r.json()).then(data => {
+        if (data.status === 'success') {
+            const modal = document.getElementById('search-bocha-modal');
+            if (modal) modal.remove();
+            loadModelsView({ preserveScroll: true });
+        }
+    });
+}
+
+function _clearBochaKey() {
+    fetch('/api/models', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ action: 'set_search_credential', api_key: '' }),
+    }).then(r => r.json()).then(data => {
+        if (data.status === 'success') {
+            const modal = document.getElementById('search-bocha-modal');
+            if (modal) modal.remove();
+            loadModelsView({ preserveScroll: true });
+        }
+    });
+}
+
+function renderCapabilityBody(def, cap, body) {
+    if (def.id === 'search') {
+        renderSearchCapability(def, cap, body);
+        return;
+    }
+
+    // Editable cards: provider dropdown + (optional) model dropdown + save row
+    const providerOpts = buildCapabilityProviderOptions(def, cap);
+    const providerHtml = `
+        <div>
+            <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">${t('models_provider')}</label>
+            <div id="cap-${def.id}-provider" class="cfg-dropdown" tabindex="0">
+                <div class="cfg-dropdown-selected">
+                    <span class="cfg-dropdown-text">--</span>
+                    <i class="fas fa-chevron-down cfg-dropdown-arrow"></i>
+                </div>
+                <div class="cfg-dropdown-menu"></div>
+            </div>
+        </div>`;
+
+    // The model-picker container is always emitted so the provider-change
+    // handler can show/hide it; for `auto` capabilities it starts hidden and
+    // gets toggled by setCapabilityModelPickerVisible.
+    const modelHtml = def.needsModel ? `
+        <div id="cap-${def.id}-model-wrap">
+            <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">${t('models_model')}</label>
+            <div id="cap-${def.id}-model" class="cfg-dropdown" tabindex="0">
+                <div class="cfg-dropdown-selected">
+                    <span class="cfg-dropdown-text">--</span>
+                    <i class="fas fa-chevron-down cfg-dropdown-arrow"></i>
+                </div>
+                <div class="cfg-dropdown-menu"></div>
+            </div>
+            <div id="cap-${def.id}-model-custom-wrap" class="mt-2 hidden">
+                <input id="cap-${def.id}-model-custom" type="text"
+                       class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
+                              bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
+                              focus:outline-none focus:border-primary-500 font-mono transition-colors"
+                       placeholder="custom model name">
+            </div>
+        </div>` : '';
+
+    const dimHtml = (def.id === 'embedding' && cap.current_dim) ? `
+        <p class="text-xs text-slate-400 dark:text-slate-500">
+            <i class="fas fa-cube text-[10px] mr-1"></i>${t('models_dim_label')}: <span class="font-mono">${cap.current_dim}</span>
+        </p>` : '';
+
+    // Footer layout: a "hint slot" (filled later by renderCapabilityHints for
+    // auto-mode cards) sits on the left while status + save stay anchored on
+    // the right. Keeping them on the same row means the save button hugs the
+    // inputs above instead of being pushed down by a separate hint line.
+    const footer = `
+        <div class="flex items-center justify-between gap-3 pt-1">
+            <div data-cap-hint="${def.id}" class="flex-1 min-w-0"></div>
+            <div class="flex items-center gap-3 flex-shrink-0">
+                <span id="cap-${def.id}-status" class="text-xs text-primary-500 opacity-0 transition-opacity duration-300"></span>
+                <button onclick="saveCapability('${def.id}')"
+                        class="px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600 text-white text-sm font-medium
+                               cursor-pointer transition-colors duration-150 disabled:opacity-50 disabled:cursor-not-allowed">
+                    ${t('save')}
+                </button>
+            </div>
+        </div>`;
+
+    body.innerHTML = providerHtml + modelHtml + dimHtml + footer;
+
+    // TTS: mount reply-mode above provider; defer off-mode toggle to the end.
+    if (def.id === 'tts') {
+        renderVoiceReplyMode(body, cap.reply_mode || 'off', { skipVisibilityToggle: true });
+        // Voice-timbre picker depends on provider+model; rebuilt by callbacks.
+        const modelWrap = body.querySelector(`#cap-${def.id}-model-wrap`);
+        if (modelWrap) {
+            const voiceWrap = document.createElement('div');
+            voiceWrap.id = `cap-${def.id}-voice-wrap`;
+            voiceWrap.innerHTML = `
+                <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">${t('models_voice')}</label>
+                <div id="cap-${def.id}-voice" class="cfg-dropdown" tabindex="0">
+                    <div class="cfg-dropdown-selected">
+                        <span class="cfg-dropdown-text">--</span>
+                        <i class="fas fa-chevron-down cfg-dropdown-arrow"></i>
+                    </div>
+                    <div class="cfg-dropdown-menu"></div>
+                </div>
+                <div id="cap-${def.id}-voice-custom-wrap" class="hidden mt-2">
+                    <input id="cap-${def.id}-voice-custom" type="text"
+                           class="w-full px-3 py-2 text-sm rounded-md border border-slate-200 dark:border-slate-700
+                                  bg-white dark:bg-slate-800 text-slate-700 dark:text-slate-200
+                                  placeholder:text-slate-400 dark:placeholder:text-slate-500
+                                  focus:outline-none focus:ring-2 focus:ring-primary-500"
+                           placeholder="voice id" />
+                </div>
+            `;
+            modelWrap.parentNode.insertBefore(voiceWrap, modelWrap.nextSibling);
+        }
+    }
+
+    // `body` is still detached from `document`; scope lookups locally.
+    const provDd = body.querySelector(`#cap-${def.id}-provider`);
+    // Strip private fields before handing to the generic initDropdown helper.
+    const ddOpts = providerOpts.map(o => ({ value: o.value, label: o.label }));
+
+    let pendingProvider = null;
+    if (pendingCapabilitySelection
+            && pendingCapabilitySelection.capabilityId === def.id
+            && providerOpts.some(o => o.value === pendingCapabilitySelection.providerId)) {
+        pendingProvider = pendingCapabilitySelection.providerId;
+        pendingCapabilitySelection = null;
+    }
+
+    // Auto strategy => leave empty sentinel selected. `suggested_provider`
+    // is a UI-only preselect (not persisted until the user clicks Save).
+    // No current + no suggestion => leave unselected with a placeholder.
+    //
+    // Pending-config takes priority over both "auto" and "pick provider":
+    // when no real (non-sentinel) configured option exists, surfacing
+    // "auto" or "pick" misleads the user — there's nothing to auto-route
+    // to or pick from. Force a "待配置" placeholder instead so all
+    // capabilities behave consistently on a fresh environment.
+    const hasConfiguredOpt = providerOpts.some(o => !o._isAuto && o._configured);
+    const noSelectionAndNoHint = !cap.current_provider && !cap.suggested_provider;
+    let initialProviderValue;
+    let dropdownPlaceholder = null;
+    if (!hasConfiguredOpt) {
+        initialProviderValue = '';
+        dropdownPlaceholder = { placeholder: t('models_pending_config') };
+    } else {
+        initialProviderValue = pendingProvider
+            ? pendingProvider
+            : ((cap.strategy === 'auto' && capabilitySupportsAuto(def.id))
+                ? ''
+                : (cap.current_provider
+                    || cap.suggested_provider
+                    || (noSelectionAndNoHint ? '' : (ddOpts[0] && ddOpts[0].value))
+                    || ''));
+        if (noSelectionAndNoHint) {
+            dropdownPlaceholder = { placeholder: t('models_pick_provider') };
+        }
+    }
+    initDropdown(
+        provDd,
+        ddOpts,
+        initialProviderValue,
+        (value) => onCapabilityProviderChange(def, value, body),
+        dropdownPlaceholder,
+    );
+    decorateCapabilityProviderDropdown(def, provDd, providerOpts);
+
+    if (def.needsModel) {
+        rebuildCapabilityModelDropdown(def, initialProviderValue, cap.current_model || '', body);
+        // Hide model picker in auto mode — fallback hint below covers it.
+        setCapabilityModelPickerVisible(def, initialProviderValue !== '' || !capabilitySupportsAuto(def.id), body);
+    }
+
+    if (def.id === 'tts') {
+        rebuildCapabilityVoiceDropdown(
+            initialProviderValue,
+            cap.current_voice || '',
+            body,
+            cap.current_model || ''
+        );
+    }
+
+    // Inject auto/router-pending hint banners before the action footer.
+    renderCapabilityHints(def, cap, body, initialProviderValue);
+
+    if (def.id === 'tts') {
+        _setTtsConfigVisible(body, (cap.reply_mode || 'off') !== 'off');
+    }
+}
+
+// TTS reply-policy dropdown (off / voice_if_voice / always). Persists on
+// change. When off, hides the rest of the TTS card.
+function renderVoiceReplyMode(host, currentMode, options) {
+    options = options || {};
+    const opts = [
+        { value: 'off',            label: t('voice_reply_off') },
+        { value: 'voice_if_voice', label: t('voice_reply_if_voice') },
+        { value: 'always',         label: t('voice_reply_always') },
+    ];
+    const wrap = document.createElement('div');
+    wrap.id = 'voice-reply-mode-wrap';
+    wrap.innerHTML = `
+        <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">${t('voice_reply_mode_label')}</label>
+        <div id="voice-reply-mode-dd" class="cfg-dropdown" tabindex="0">
+            <div class="cfg-dropdown-selected">
+                <span class="cfg-dropdown-text">--</span>
+                <i class="fas fa-chevron-down cfg-dropdown-arrow"></i>
+            </div>
+            <div class="cfg-dropdown-menu"></div>
+        </div>
+    `;
+    host.prepend(wrap);
+
+    const dd = wrap.querySelector('#voice-reply-mode-dd');
+    const valid = ['off', 'voice_if_voice', 'always'];
+    const initial = valid.includes(currentMode) ? currentMode : 'off';
+    if (!options.skipVisibilityToggle) _setTtsConfigVisible(host, initial !== 'off');
+    initDropdown(dd, opts, initial, (mode) => {
+        if (!valid.includes(mode)) return;
+        _setTtsConfigVisible(host, mode !== 'off');
+        fetch('/api/models', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ action: 'set_voice_reply_mode', mode }),
+        })
+            .then(r => r.json())
+            .then(data => {
+                if (data && data.status === 'success') {
+                    _ttsReadyPromise = null;  // force re-probe on next bubble
+                }
+            })
+            .catch(() => {});
+    });
+}
+
+// Show/hide everything in the TTS card below the reply-mode dropdown.
+function _setTtsConfigVisible(host, visible) {
+    if (!host) return;
+    Array.from(host.children).forEach((child) => {
+        if (child.id === 'voice-reply-mode-wrap') return;
+        child.classList.toggle('hidden', !visible);
+    });
+}
+
+// Toggle wrapper visibility instead of re-rendering so dropdown state survives.
+function setCapabilityModelPickerVisible(def, visible, scope) {
+    const root = scope || document;
+    const wrap = root.querySelector(`#cap-${def.id}-model-wrap`);
+    if (!wrap) return;
+    wrap.classList.toggle('hidden', !visible);
+}
+
+function renderCapabilityHints(def, cap, body, currentProvider) {
+    // Capabilities that can be in "auto" mode show a fallback hint right
+    // under the inputs so users always know what'd actually be hit. The
+    // image card additionally surfaces a "router pending" warning until the
+    // standalone dispatcher lands.
+    // The hint slot is co-located with the save button in the footer row
+    // (see renderCapabilityBody) so the save button stays close to the
+    // inputs above. We just rewrite the slot's innerHTML — emptying it
+    // when the card leaves auto mode, or rendering a one-line hint when
+    // it's in auto mode.
+    const slot = body.querySelector(`[data-cap-hint="${def.id}"]`);
+    if (!slot) return;
+    slot.innerHTML = '';
+
+    if (currentProvider !== '' || !capabilitySupportsAuto(def.id)) return;
+
+    // The hint mirrors what the runtime would actually pick when in auto
+    // mode. fallback_provider/model are pre-computed on the backend (see
+    // _predict_vision_auto, _predict_image_auto) so we can trust them
+    // here without re-implementing the provider chain.
+    const fbProv = cap.fallback_provider || '';
+    const fbModel = cap.fallback_model || '';
+    if (!fbProv && !fbModel) return;
+    // Show the vendor's display label (e.g. "LinkAI") instead of the raw
+    // id ("linkai") when we know it. Falls back to the id when the
+    // provider isn't in our vendor table (rare).
+    const provMeta = modelsState.providers.find(p => p.id === fbProv);
+    const fbProvLabel = (provMeta && provMeta.label) || fbProv;
+    const fbText = fbModel ? `${fbProvLabel} / ${fbModel}` : fbProvLabel;
+    slot.innerHTML = `
+        <p class="flex items-center gap-1.5 text-xs text-slate-400 dark:text-slate-500 min-w-0">
+            <i class="fas fa-circle-info text-[10px] flex-shrink-0"></i>
+            <span class="flex-shrink-0">${t('models_auto_using')}</span>
+            <span class="font-mono text-slate-500 dark:text-slate-400 truncate">${escapeHtml(fbText)}</span>
+        </p>`;
+}
+
+function buildCapabilityProviderOptions(def, cap) {
+    // Show ALL vendors in capability dropdowns so users can see at a glance
+    // who's configured (green check) and who isn't (gray dot, click to set
+    // up). The list order puts configured vendors first; clicking an
+    // unconfigured row opens the vendor modal in-place. ASR/TTS engines that
+    // aren't tracked by PROVIDER_MODELS (azure/baidu/google etc.) are treated
+    // as "always available" — no credential gate.
+    const knownProviderMap = {};
+    modelsState.providers.forEach(p => { knownProviderMap[p.id] = p; });
+
+    const explicitList = cap.providers && cap.providers.length ? cap.providers : null;
+    let providerIds = explicitList ? explicitList.slice() : modelsState.providers.map(p => p.id);
+    if (cap.current_provider && !providerIds.includes(cap.current_provider)) {
+        providerIds = [cap.current_provider, ...providerIds];
+    }
+
+    const opts = providerIds.map(pid => {
+        const meta = knownProviderMap[pid];
+        const tracked = !!meta;
+        const configured = !tracked || !!meta.configured;
+        return {
+            value: pid,
+            label: (meta && meta.label) || pid,
+            _tracked: tracked,
+            _configured: configured,
+        };
+    });
+
+    opts.sort((a, b) => {
+        if (a._configured === b._configured) return 0;
+        return a._configured ? -1 : 1;
+    });
+
+    // Capabilities with a fallback ("auto") strategy expose it as a sentinel
+    // option pinned to the top of the list. We use empty-string as the auto
+    // value so the existing save handler propagates it untouched to the
+    // backend, which interprets "" as "fall back to the main model".
+    // Skip the sentinel when no real vendor is configured — "auto" would
+    // route to nothing useful and the renderer will show "待配置" instead.
+    const hasAnyConfigured = opts.some(o => o._configured);
+    if ((cap.strategy === 'auto' || cap.strategy === 'specified') && hasAnyConfigured) {
+        if (capabilitySupportsAuto(def.id)) {
+            opts.unshift({
+                value: '',
+                label: t('models_strategy_auto'),
+                _tracked: false,
+                _configured: true,
+                _isAuto: true,
+            });
+        }
+    }
+    return opts;
+}
+
+function capabilitySupportsAuto(capId) {
+    // Embedding is intentionally NOT here: runtime only auto-falls back to
+    // OpenAI/LinkAI, so dressing it up as "auto" hides reality from users.
+    return capId === 'image' || capId === 'vision';
+}
+
+// After initDropdown renders the capability provider menu, decorate each
+// row with the right-aligned configuration cue:
+//   - configured rows: nothing extra — the .active marker (a brand-green ✓)
+//     already comes from initDropdown's selected-state CSS for the row the
+//     user currently picked. Other configured rows show no chrome, mirroring
+//     a plain "switch to this" selector.
+//   - unconfigured rows: a subdued gear icon hints at "click to configure".
+//     The row's whole click handler is swapped to launch the vendor modal
+//     in place rather than selecting an unusable value.
+function decorateCapabilityProviderDropdown(def, ddEl, opts) {
+    if (!ddEl) return;
+    const menu = ddEl.querySelector('.cfg-dropdown-menu');
+    if (!menu) return;
+
+    const optByValue = {};
+    opts.forEach(o => { optByValue[o.value] = o; });
+
+    menu.querySelectorAll('.cfg-dropdown-item').forEach(item => {
+        const value = item.dataset.value;
+        const opt = optByValue[value];
+        if (!opt) return;
+        item.classList.add('cap-provider-item');
+        if (!opt._configured) item.classList.add('cap-provider-unconfigured');
+
+        // Wrap the label so the trailing affordance lines up via flex:auto.
+        const labelText = item.textContent;
+        item.textContent = '';
+        const labelEl = document.createElement('span');
+        labelEl.className = 'cap-provider-label';
+        labelEl.textContent = labelText;
+        item.appendChild(labelEl);
+
+        if (!opt._configured) {
+            // Trailing gear icon as the "configure this vendor" affordance.
+            const gear = document.createElement('i');
+            gear.className = 'fas fa-gear cap-provider-gear';
+            item.appendChild(gear);
+        }
+
+        if (!opt._configured && opt._tracked) {
+            // Hijack the click: open the vendor modal instead of selecting
+            // an unusable value, and remember which capability the user was
+            // configuring so the post-save reload can preselect the vendor.
+            const newItem = item.cloneNode(true);
+            item.replaceWith(newItem);
+            newItem.addEventListener('click', (e) => {
+                e.stopPropagation();
+                ddEl.classList.remove('open');
+                openVendorModal(value, (savedProviderId) => {
+                    pendingCapabilitySelection = {
+                        capabilityId: def.id,
+                        providerId: savedProviderId || value,
+                    };
+                    loadModelsView({ preserveScroll: true });
+                });
+            });
+        }
+    });
+}
+
+// Lightweight decorator for the "add vendor" modal's provider picker:
+// every configured vendor row gets a trailing brand-green ✓ so the user can
+// see at a glance who's already set up, without having to read each row.
+// Unlike decorateCapabilityProviderDropdown we don't hijack clicks here —
+// picking an unconfigured vendor in this modal *is* the intended action.
+function decorateVendorModalPicker(ddEl, opts) {
+    if (!ddEl) return;
+    const menu = ddEl.querySelector('.cfg-dropdown-menu');
+    if (!menu) return;
+
+    const optByValue = {};
+    opts.forEach(o => { optByValue[o.value] = o; });
+
+    menu.querySelectorAll('.cfg-dropdown-item').forEach(item => {
+        const opt = optByValue[item.dataset.value];
+        if (!opt) return;
+        // Tag the row so the global active-row ✓ rule is suppressed in CSS
+        // (otherwise configured AND selected rows would render two checks).
+        item.classList.add('vendor-picker-item');
+        if (!opt._configured) return;
+        const check = document.createElement('i');
+        check.className = 'fas fa-check vendor-picker-configured-mark';
+        item.appendChild(check);
+    });
+}
+
+function rebuildCapabilityModelDropdown(def, providerId, selectedModel, scope) {
+    // `scope` lets the caller (renderCapabilityBody) target a still-detached
+    // subtree. After the card is mounted, callers may pass `document` instead.
+    const root = scope || document;
+    const el = root.querySelector(`#cap-${def.id}-model`);
+    if (!el) return;
+
+    // Prefer the capability-scoped model list when the backend provides one
+    // (vision / image). It reflects the models the runtime can actually
+    // dispatch to for this capability, instead of the vendor's full chat-
+    // model catalog. Fall back to the generic provider.models for chat /
+    // embedding / tts where any vendor model is fair game.
+    //
+    // Entries may be plain strings or {value, hint} objects (image catalog
+    // uses the latter to surface brand aliases like "Nano Banana 2" next to
+    // the technical Gemini model id). We normalize to {value, label, hint}
+    // before handing off to initDropdown.
+    const cap = modelsState.capabilities[def.id] || {};
+    const capModelMap = cap.provider_models || {};
+    let rawList;
+    if (capModelMap[providerId]) {
+        rawList = capModelMap[providerId].slice();
+    } else {
+        const provider = modelsState.providers.find(p => p.id === providerId);
+        rawList = (provider && provider.models) ? provider.models.slice() : [];
+    }
+    const modelValues = [];
+    const opts = rawList.map(entry => {
+        if (typeof entry === 'string') {
+            modelValues.push(entry);
+            return { value: entry, label: entry };
+        }
+        modelValues.push(entry.value);
+        return { value: entry.value, label: entry.label || entry.value, hint: entry.hint || '' };
+    });
+    opts.push({ value: '__custom__', label: currentLang === 'zh' ? '自定义...' : 'Custom...' });
+
+    let initialValue = selectedModel || '';
+    if (initialValue && !modelValues.includes(initialValue)) {
+        initialValue = '__custom__';
+    }
+    if (!initialValue && opts.length) initialValue = opts[0].value;
+
+    initDropdown(el, opts, initialValue, (value) => {
+        const customWrap = document.getElementById(`cap-${def.id}-model-custom-wrap`);
+        if (customWrap) {
+            if (value === '__custom__') {
+                customWrap.classList.remove('hidden');
+                const input = document.getElementById(`cap-${def.id}-model-custom`);
+                if (input && !input.value) input.value = selectedModel || '';
+            } else {
+                customWrap.classList.add('hidden');
+            }
+        }
+        // TTS voice catalog may be scoped per engine model (aggregating
+        // gateways). Rebuild the voice picker whenever the model changes.
+        if (def.id === 'tts') {
+            const provDd = document.getElementById('cap-tts-provider');
+            const provId = provDd ? getDropdownValue(provDd) : '';
+            rebuildCapabilityVoiceDropdown(provId, '', null, value);
+        }
+    });
+
+    const customWrap = root.querySelector(`#cap-${def.id}-model-custom-wrap`);
+    if (customWrap) {
+        if (initialValue === '__custom__') {
+            customWrap.classList.remove('hidden');
+            const input = root.querySelector(`#cap-${def.id}-model-custom`);
+            if (input) input.value = selectedModel || '';
+        } else {
+            customWrap.classList.add('hidden');
+        }
+    }
+}
+
+// TTS-only: rebuild the voice timbre picker against the provider's
+// curated voice list. Hidden when no provider is picked.
+//
+// Each voice entry may be:
+//   - a bare string  (code = label)
+//   - {value, label, hint?}   so we can show a friendly Chinese name
+//     while persisting the raw API code that the runtime sends.
+function rebuildCapabilityVoiceDropdown(providerId, selectedVoice, scope, modelId) {
+    const root = scope || document;
+    const wrap = root.querySelector(`#cap-tts-voice-wrap`);
+    const el = root.querySelector(`#cap-tts-voice`);
+    if (!wrap || !el) return;
+    const cap = modelsState.capabilities.tts || {};
+    const voicesByProvider = cap.provider_voices || {};
+    let raw = (providerId && voicesByProvider[providerId]) || [];
+    // Some providers (gateways) scope voices by engine model id.
+    if (raw && !Array.isArray(raw) && typeof raw === 'object') {
+        const activeModel = modelId
+            || (root.querySelector(`#cap-tts-model`) ? getDropdownValue(root.querySelector(`#cap-tts-model`)) : '');
+        raw = (activeModel && raw[activeModel]) || [];
+    }
+    if (!raw || raw.length === 0) {
+        wrap.classList.add('hidden');
+        return;
+    }
+    wrap.classList.remove('hidden');
+    // Voice picker: friendly name on the left, raw API code as right-hand
+    // hint. Persisted/sent value is always the raw code.
+    const codes = [];
+    const opts = raw.map(entry => {
+        if (typeof entry === 'string') {
+            codes.push(entry);
+            return { value: entry, label: entry };
+        }
+        codes.push(entry.value);
+        const code = entry.value;
+        const desc = entry.hint || entry.label || code;
+        return {
+            value: code,
+            label: desc,
+            hint: desc === code ? '' : code,
+        };
+    });
+    opts.push({ value: '__custom__', label: currentLang === 'zh' ? '自定义...' : 'Custom...' });
+
+    // Off-catalog values route through the custom branch.
+    let initial = selectedVoice || '';
+    const isCustom = initial && !codes.includes(initial);
+    if (isCustom) initial = '__custom__';
+    if (!initial) initial = codes[0];
+
+    initDropdown(el, opts, initial, (value) => {
+        const customWrap = root.querySelector(`#cap-tts-voice-custom-wrap`);
+        if (!customWrap) return;
+        if (value === '__custom__') {
+            customWrap.classList.remove('hidden');
+            const input = root.querySelector(`#cap-tts-voice-custom`);
+            if (input && !input.value) input.value = isCustom ? selectedVoice : '';
+        } else {
+            customWrap.classList.add('hidden');
+        }
+    });
+
+    const customWrap = root.querySelector(`#cap-tts-voice-custom-wrap`);
+    if (customWrap) {
+        if (initial === '__custom__') {
+            customWrap.classList.remove('hidden');
+            const input = root.querySelector(`#cap-tts-voice-custom`);
+            if (input) input.value = isCustom ? selectedVoice : '';
+        } else {
+            customWrap.classList.add('hidden');
+        }
+    }
+}
+
+function onCapabilityProviderChange(def, providerId, scope) {
+    if (def.needsModel) {
+        // Empty sentinel hides the model picker (capability is in auto mode).
+        const isAuto = providerId === '' && capabilitySupportsAuto(def.id);
+        if (!isAuto) {
+            rebuildCapabilityModelDropdown(def, providerId, '', scope);
+        }
+        setCapabilityModelPickerVisible(def, !isAuto, scope);
+    }
+    if (def.id === 'tts') {
+        rebuildCapabilityVoiceDropdown(providerId, '', scope);
+    }
+    const body = scope || document.querySelector(`[data-cap-body="${def.id}"]`);
+    if (body) {
+        const cap = modelsState.capabilities[def.id] || {};
+        renderCapabilityHints(def, cap, body, providerId);
+    }
+}
+
+function getCapabilityModelValue(def) {
+    if (!def.needsModel) return '';
+    const dd = document.getElementById(`cap-${def.id}-model`);
+    if (!dd) return '';
+    const v = getDropdownValue(dd);
+    if (v === '__custom__') {
+        const input = document.getElementById(`cap-${def.id}-model-custom`);
+        return input ? input.value.trim() : '';
+    }
+    return v || '';
+}
+
+function saveCapability(capId) {
+    const def = MODELS_CAPABILITY_DEFS.find(d => d.id === capId);
+    if (!def || !def.editable) return;
+    // Search has its own form (strategy + provider, no model picker).
+    if (capId === 'search') { saveSearchCapability(); return; }
+    const provDd = document.getElementById(`cap-${capId}-provider`);
+    const provider = provDd ? getDropdownValue(provDd) : '';
+    // When the user is in auto mode (provider == ""), the model picker is
+    // hidden and any value left in it is stale; persist an empty model so
+    // the backend treats this as "fall back to the runtime chain".
+    const isAuto = provider === '' && capabilitySupportsAuto(capId);
+    const model = isAuto ? '' : getCapabilityModelValue(def);
+    // TTS carries an extra voice timbre (supports free-text custom ids).
+    let voice = '';
+    if (capId === 'tts' && !isAuto) {
+        const voiceDd = document.getElementById(`cap-${capId}-voice`);
+        voice = voiceDd ? getDropdownValue(voiceDd) : '';
+        if (voice === '__custom__') {
+            const input = document.getElementById(`cap-${capId}-voice-custom`);
+            voice = input ? input.value.trim() : '';
+        }
+    }
+
+    // Embedding changes invalidate any pre-existing vector index because
+    // dimensions / vendor differ. Gate the save behind a confirm, and on
+    // success surface a dedicated info dialog telling the user how to
+    // rebuild — both via the in-app custom dialog, not the native alert.
+    if (capId === 'embedding') {
+        const cap = modelsState.capabilities[capId] || {};
+        const before = (cap.current_provider || '').trim();
+        const after = (provider || '').trim();
+        if (before !== after) {
+            showConfirmDialog({
+                title: t('models_embedding_change_title'),
+                message: t('models_embedding_change_msg'),
+                okText: t('save'),
+                cancelText: t('cancel'),
+                onConfirm: () => _persistCapability(capId, provider, model, () => {
+                    showConfirmDialog({
+                        title: t('models_embedding_saved_title'),
+                        message: t('models_embedding_saved_msg'),
+                        okText: t('models_embedding_saved_ok'),
+                        hideCancel: true,
+                        onConfirm: () => {
+                            navigateTo('chat');
+                            // Defer focus + value set: navigateTo may
+                            // re-render the chat panel; setting value before
+                            // the input is mounted would be lost.
+                            setTimeout(() => {
+                                const input = document.getElementById('chat-input');
+                                if (!input) return;
+                                input.value = '/memory rebuild-index';
+                                input.focus();
+                                // Trigger any input listeners (autosize, send-button enable, etc.)
+                                input.dispatchEvent(new Event('input', { bubbles: true }));
+                            }, 60);
+                        },
+                    });
+                }),
+            });
+            return;
+        }
+    }
+    _persistCapability(capId, provider, model, undefined, { voice });
+}
+
+function _persistCapability(capId, provider, model, onAfterSuccess, extras) {
+    const payload = { action: 'set_capability', capability: capId, provider_id: provider, model: model };
+    if (extras && extras.voice !== undefined) payload.voice = extras.voice;
+    fetch('/api/models', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify(payload),
+    }).then(r => r.json()).then(data => {
+        if (data.status === 'success') {
+            // Flash "Saved" before reload so the status survives the rebuild.
+            showStatus(`cap-${capId}-status`, 'models_save_success', false);
+            setTimeout(() => {
+                loadModelsView({ preserveScroll: true });
+                if (onAfterSuccess) onAfterSuccess();
+            }, 400);
+        } else {
+            showStatus(`cap-${capId}-status`, 'models_save_failed', true);
+        }
+    }).catch(() => showStatus(`cap-${capId}-status`, 'models_save_failed', true));
+}
+
+// ---------- Vendor credential modal ------------------------------------
+
+let vendorModalState = { providerId: '', onSaved: null };
+
+function openVendorModal(providerId, onSaved) {
+    vendorModalState = { providerId: providerId || '', onSaved: onSaved || null };
+
+    const overlay = document.getElementById('vendor-modal-overlay');
+    const titleEl = document.getElementById('vendor-modal-title');
+    const subEl = document.getElementById('vendor-modal-subtitle');
+    const pickerWrap = document.getElementById('vendor-modal-picker-wrap');
+    const baseWrap = document.getElementById('vendor-modal-base-wrap');
+    const baseInput = document.getElementById('vendor-modal-base');
+    const baseHint = document.getElementById('vendor-modal-base-hint');
+    const keyInput = document.getElementById('vendor-modal-key');
+    const clearBtn = document.getElementById('vendor-modal-clear');
+
+    // Reset any leftover status (e.g. previous "Saved" message)
+    const statusEl = document.getElementById('vendor-modal-status');
+    if (statusEl) {
+        statusEl.textContent = '';
+        statusEl.classList.add('opacity-0');
+    }
+
+    if (!providerId) {
+        // Add flow — show provider picker, default to the first unconfigured one.
+        // We render every configured vendor with a trailing green ✓ via the
+        // dropdown decorator, mirroring the visual language used by the
+        // capability provider dropdowns. The .active row already shows the
+        // currently selected vendor via its own background highlight, so we
+        // intentionally suppress the global active-row ✓ for this picker
+        // (see CSS) — otherwise configured + selected rows would show two.
+        const unconfigured = modelsState.providers.filter(p => !p.configured);
+        const defaultId = (unconfigured[0] && unconfigured[0].id) || (modelsState.providers[0] && modelsState.providers[0].id) || '';
+        pickerWrap.classList.remove('hidden');
+        const pickerEl = document.getElementById('vendor-modal-picker');
+        const pickerOpts = modelsState.providers.map(p => ({
+            value: p.id,
+            label: p.label,
+            _configured: !!p.configured,
+        }));
+        initDropdown(pickerEl, pickerOpts, defaultId, (val) => fillVendorModalForProvider(val));
+        decorateVendorModalPicker(pickerEl, pickerOpts);
+        fillVendorModalForProvider(defaultId);
+    } else {
+        pickerWrap.classList.add('hidden');
+        fillVendorModalForProvider(providerId);
+    }
+
+    overlay.classList.remove('hidden');
+
+    document.getElementById('vendor-modal-cancel').onclick = closeVendorModal;
+    document.getElementById('vendor-modal-save').onclick = saveVendorModal;
+    clearBtn.onclick = clearVendorModal;
+
+    // Once the user edits the masked value, drop the "masked sentinel" dataset
+    // so the save handler treats their input as a real new key. We compare on
+    // the next tick because keydown fires before the new char lands in .value.
+    keyInput.oninput = function () {
+        if (keyInput.dataset.masked === '1' && keyInput.value !== keyInput.dataset.maskedVal) {
+            keyInput.dataset.masked = '';
+        }
+    };
+
+    function onOverlayClick(e) {
+        if (e.target === overlay) {
+            closeVendorModal();
+            overlay.removeEventListener('click', onOverlayClick);
+        }
+    }
+    overlay.addEventListener('click', onOverlayClick);
+    keyInput.focus();
+}
+
+function fillVendorModalForProvider(providerId) {
+    const meta = modelsState.providers.find(p => p.id === providerId);
+    if (!meta) return;
+    document.getElementById('vendor-modal-title').textContent = meta.label;
+    document.getElementById('vendor-modal-subtitle').textContent = meta.id;
+
+    // ----- API Base -----
+    // Always reflect the *current effective* base as the input value so the
+    // user can see (and edit) what's in use today. Placeholder is reserved
+    // strictly for the "not yet typed anything" state and shows the official
+    // default — never mixed with the actual value.
+    const baseWrap = document.getElementById('vendor-modal-base-wrap');
+    const baseInput = document.getElementById('vendor-modal-base');
+    const baseHint = document.getElementById('vendor-modal-base-hint');
+    if (meta.api_base_field) {
+        baseWrap.classList.remove('hidden');
+        baseInput.placeholder = meta.api_base_default || meta.api_base_placeholder || '';
+        baseInput.value = meta.api_base || '';
+        baseHint.classList.add('hidden');
+    } else {
+        baseWrap.classList.add('hidden');
+        baseInput.value = '';
+    }
+
+    // ----- API Key -----
+    // For configured vendors, surface the masked key as the input *value* so
+    // it shows up in the same dark text as a real entry — making "configured"
+    // visually unambiguous. The masked form (e.g. "sk-r***zRU") is also a
+    // sentinel: the save handler treats untouched masked input as "no change".
+    const keyInput = document.getElementById('vendor-modal-key');
+    if (meta.configured && meta.api_key_masked) {
+        keyInput.value = meta.api_key_masked;
+        keyInput.dataset.masked = '1';
+        keyInput.dataset.maskedVal = meta.api_key_masked;
+        keyInput.placeholder = '';
+    } else {
+        keyInput.value = '';
+        keyInput.dataset.masked = '';
+        keyInput.dataset.maskedVal = '';
+        keyInput.placeholder = 'sk-...';
+    }
+
+    const clearBtn = document.getElementById('vendor-modal-clear');
+    clearBtn.classList.toggle('hidden', !meta.configured);
+
+    vendorModalState.providerId = providerId;
+}
+
+function closeVendorModal() {
+    document.getElementById('vendor-modal-overlay').classList.add('hidden');
+}
+
+function saveVendorModal() {
+    const providerId = vendorModalState.providerId;
+    if (!providerId) return;
+    const keyInput = document.getElementById('vendor-modal-key');
+    const apiBase = document.getElementById('vendor-modal-base').value.trim();
+
+    // Treat "input still equals the masked value we surfaced on open" as "no
+    // change" — the backend uses missing/empty api_key to skip the field.
+    let apiKey = keyInput.value.trim();
+    const masked = keyInput.dataset.masked === '1';
+    const maskedVal = keyInput.dataset.maskedVal || '';
+    if (masked && apiKey === maskedVal) {
+        apiKey = '';
+    }
+
+    if (!apiKey && !masked) {
+        // First-time setup with no key entered → nudge the user.
+        keyInput.focus();
+        return;
+    }
+
+    const btn = document.getElementById('vendor-modal-save');
+    btn.disabled = true;
+    const payload = { action: 'set_provider', provider_id: providerId, api_base: apiBase };
+    if (apiKey) payload.api_key = apiKey;
+    fetch('/api/models', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify(payload),
+    }).then(r => r.json()).then(data => {
+        btn.disabled = false;
+        if (data.status === 'success') {
+            closeVendorModal();
+            const onSaved = vendorModalState.onSaved;
+            if (onSaved) {
+                try { onSaved(providerId); } catch (e) { /* noop */ }
+            } else {
+                loadModelsView();
+            }
+        } else {
+            showStatus('vendor-modal-status', 'models_save_failed', true);
+        }
+    }).catch(() => {
+        btn.disabled = false;
+        showStatus('vendor-modal-status', 'models_save_failed', true);
+    });
+}
+
+function clearVendorModal() {
+    const providerId = vendorModalState.providerId;
+    if (!providerId) return;
+    showConfirmDialog({
+        title: t('models_clear_confirm_title'),
+        message: t('models_clear_confirm_msg'),
+        okText: t('models_clear_credential'),
+        cancelText: t('cancel'),
+        onConfirm: () => {
+            fetch('/api/models', {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({ action: 'delete_provider', provider_id: providerId }),
+            }).then(r => r.json()).then(data => {
+                if (data.status === 'success') {
+                    closeVendorModal();
+                    loadModelsView();
+                } else {
+                    showStatus('vendor-modal-status', 'models_clear_failed', true);
+                }
+            }).catch(() => showStatus('vendor-modal-status', 'models_clear_failed', true));
+        }
+    });
+}
+
 // =====================================================================
 // Channels View
 // =====================================================================
@@ -4283,6 +6375,7 @@ navigateTo = function(viewId) {
 
     // Lazy-load view data
     if (viewId === 'config') loadConfigView();
+    else if (viewId === 'models') loadModelsView();
     else if (viewId === 'skills') loadSkillsView();
     else if (viewId === 'memory') {
         document.getElementById('memory-panel-viewer').classList.add('hidden');
diff --git a/channel/web/static/logos/claudeAPI.svg b/channel/web/static/logos/claudeAPI.svg
new file mode 100644
index 00000000..e9a401b7
--- /dev/null
+++ b/channel/web/static/logos/claudeAPI.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251656961" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="18432" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M252.8 652.8l167.893333-94.293333 2.773334-8.106667-2.773334-4.48h-8.106666l-28.16-1.706667-96-2.56-83.2-3.413333-80.64-4.266667-20.266667-4.266666L85.333333 504.746667l1.92-12.586667 17.066667-11.52 24.32 2.133333 53.973333 3.626667 81.066667 5.546667 58.666667 3.413333 87.04 9.173333h13.866666l1.92-5.546666-4.693333-3.413334-3.626667-3.413333-83.84-56.746667-90.666666-60.16-47.573334-34.56-25.813333-17.493333-13.013333-16.426667-5.546667-35.84 23.253333-25.813333 31.36 2.133333 7.893334 2.133334 31.786666 24.32 67.84 52.48L401.066667 391.466667l13.013333 10.88 5.12-3.626667 0.64-2.56-5.76-9.813333-48.213333-87.04L314.453333 210.773333l-22.826666-36.693333-5.973334-21.973333a107.861333 107.861333 0 0 1-3.626666-26.026667l26.666666-36.053333L323.413333 85.333333l35.413334 4.693334 14.933333 13.013333 21.973333 50.346667 35.626667 79.36 55.253333 107.733333 16.213334 32 8.746666 29.653333 3.2 9.173334h5.546667v-5.12l4.48-60.8 8.32-74.453334 8.106667-96 2.773333-27.093333 13.44-32.426667 26.666667-17.493333 20.693333 10.026667 17.066667 24.32-2.346667 15.786666-10.24 65.92-19.84 103.253334-13.013333 69.12h7.466666l8.746667-8.746667 34.986667-46.506667 58.666666-73.386666 26.026667-29.226667 30.293333-32.213333 19.413334-15.36h36.693333l27.093333 40.106666-12.16 41.386667-37.76 48-31.36 40.533333-45.013333 60.586667-28.16 48.426667 2.56 3.84 6.613333-0.64 101.546667-21.546667 54.826667-10.026667 65.493333-11.306666 29.653333 13.866666 3.2 14.08-11.733333 28.8-69.973333 17.28-82.133334 16.426667-122.24 29.013333-1.493333 1.066667 1.706667 2.133333 55.04 5.12 23.466666 1.28h57.6l107.306667 7.893334 28.16 18.56 16.853333 22.613333-2.773333 17.28-43.306667 21.973333-58.24-13.866666-136.106666-32.426667-46.72-11.733333h-6.4v3.84l38.826666 37.973333 71.253334 64.426667 89.173333 82.986666 4.48 20.48-11.52 16.213334-12.16-1.706667-78.506667-58.88-30.293333-26.666667-68.48-57.6h-4.48v5.973334l15.786667 23.04 83.413333 125.226666 4.266667 38.4-5.973334 12.586667-21.546666 7.466667-23.68-4.266667-48.853334-68.48-50.346666-77.226667-40.533334-69.12-4.906666 2.773334-23.893334 258.133333-11.306666 13.226667-26.026667 10.026666-21.546667-16.426666-11.52-26.666667 11.52-52.48 13.866667-68.48 11.306667-54.4 10.24-67.626667 5.973333-22.4-0.426667-1.493333-4.906666 0.64-50.986667 69.973333-77.653333 104.746667-61.44 65.706667-14.72 5.76-25.386667-13.226667 2.346667-23.466667 14.293333-20.906666 84.906667-107.946667 51.2-66.986667 33.066666-38.613333v-5.546667h-2.133333l-225.493333 146.56-40.106667 5.12-17.28-16.213333 2.133333-26.666667 8.106667-8.746666 67.84-46.72h-0.213333l0.853333 0.853333z" fill="#D97757" p-id="18433"></path></svg>
\ No newline at end of file
diff --git a/channel/web/static/logos/custom.svg b/channel/web/static/logos/custom.svg
new file mode 100644
index 00000000..63857648
--- /dev/null
+++ b/channel/web/static/logos/custom.svg
@@ -0,0 +1,10 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="200" height="200" fill="none" stroke="#475569" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
+  <!-- Horizontal slider tracks -->
+  <line x1="4" y1="7" x2="20" y2="7"/>
+  <line x1="4" y1="12" x2="20" y2="12"/>
+  <line x1="4" y1="17" x2="20" y2="17"/>
+  <!-- Knobs (filled circles) -->
+  <circle cx="9" cy="7"  r="2.2" fill="#475569" stroke="none"/>
+  <circle cx="15" cy="12" r="2.2" fill="#475569" stroke="none"/>
+  <circle cx="7" cy="17"  r="2.2" fill="#475569" stroke="none"/>
+</svg>
diff --git a/channel/web/static/logos/dashscope.svg b/channel/web/static/logos/dashscope.svg
new file mode 100644
index 00000000..a5801c86
--- /dev/null
+++ b/channel/web/static/logos/dashscope.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251621200" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="17444" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M1019.364785 620.816931L891.797142 397.807295 946.450846 293.15069a29.097778 29.097778 0 0 0 6.399732-36.393472l-70.184053-126.586684a30.078737 30.078737 0 0 0-24.574968-13.652427H597.4945L539.171949 14.549389a27.348852 27.348852 0 0 0-20.906122-14.549389H380.628607a29.139776 29.139776 0 0 0-24.616967 14.549389v5.545767L225.797108 243.062793H100.919352a29.182775 29.182775 0 0 0-25.513928 13.653427L3.428446 384.11187a32.766624 32.766624 0 0 0 0 29.182775L132.831012 638.096205 74.508461 740.064923a32.766624 32.766624 0 0 0 0 29.05478l66.514207 116.561105a29.905744 29.905744 0 0 0 25.513929 14.505391H427.132654l62.845361 109.222414A30.078737 30.078737 0 0 0 512.762058 1024H660.382859a29.139776 29.139776 0 0 0 24.574968-14.549389l128.463606-224.843558h114.76818a31.91366 31.91366 0 0 0 24.660965-15.444352l66.471208-117.414069a28.158818 28.158818 0 0 0 0-30.9747l0.042999 0.042999z m-161.273228 14.591387L791.57735 512.490479 518.265827 993.964261l-74.748861-122.87484h-273.268525l65.618244-119.205994h139.386147L101.856313 272.244568h143.055993L380.671605 30.121735l68.34913 119.247993-70.184053 122.87484H925.501726l-69.202094 121.936879 137.594222 241.183873H858.134555z" fill="#605BEC" p-id="17445"></path><path d="M499.962596 699.320634l174.371677-274.719464H324.694955z" fill="#605BEC" p-id="17446"></path></svg>
\ No newline at end of file
diff --git a/channel/web/static/logos/deepseek.svg b/channel/web/static/logos/deepseek.svg
new file mode 100644
index 00000000..ae90d3db
--- /dev/null
+++ b/channel/web/static/logos/deepseek.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251541870" class="icon" viewBox="0 0 1391 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="12864" xmlns:xlink="http://www.w3.org/1999/xlink" width="271.6796875" height="200"><path d="M1333.74443323 82.22042509c-13.80988113-6.90651166-19.77216769 6.25765149-27.83971486 12.94735271-2.7494075 2.15867766-5.09661597 4.96464441-7.44382443 7.55380074-20.17908001 22.01097094-43.75485659 36.47128333-74.589069 34.74465541-45.04943475-2.58915632-83.51757347 11.86958497-117.50810569 47.04629015-7.2285851-43.37779501-31.23798253-69.2740715-67.78939144-85.89149046-19.15315822-8.63156848-38.46813872-17.26470805-51.87582082-36.04080467-9.33227462-13.37940245-11.86958497-28.2701935-16.56243082-42.94417414-2.96778901-8.8483789-5.93557805-17.91199713-15.91514173-19.42338573-10.87194282-1.72662791-15.10760146 7.55380073-19.36996865 15.3228408-16.99448057 31.72344934-23.6040562 66.68491519-22.93005859 102.07685969 1.45797153 79.63383898 34.42258196 143.08073766 99.86633603 188.18516058 7.44539552 5.17831264 9.36055423 10.35819639 7.01334578 17.91042602-4.45089798 15.53808012-9.79260399 30.6456816-14.45874129 46.18376174-2.9693601 9.92771773-7.418687 12.0848243-17.85858007 7.76904006-35.90569092-15.3228408-66.92843413-37.9826719-94.36280776-65.38876592-46.53254371-45.9685224-88.61576053-96.6833077-141.08388229-136.39103645a620.44857966 620.44857966 0 0 0-37.41550843-26.11308694c-53.54746058-53.0887023 7.01334578-96.68173661 21.0416084-101.86162035 14.6472721-5.39512307 5.09818706-23.95440928-42.29845612-23.73916995s-90.74772965 16.40217963-145.99510964 37.98267191c-8.09111351 3.2364454-16.59071043 5.6103624-25.27569597 7.55380074-50.17590143-9.71247839-102.23868196-11.86958497-156.65024201-5.61193348-102.42721275 11.65434565-184.24643792 61.07455278-244.40190308 145.45465466-72.24186053 101.4295706-89.26462071 216.6721645-68.4115431 336.87626062 21.85071977 126.68012914 85.21592177 231.56295556 182.54651857 313.56914048 100.94410379 85.02739095 217.18433986 126.68012914 349.79847589 118.69584973 80.54978445-4.74940507 170.2181753-15.75489055 271.37751842-103.15776961 25.51921492 12.94892381 52.30629946 18.12880755 96.71001624 22.01254203 34.23248007 3.2364454 67.17038198-1.72662791 92.66288839-7.12175096 39.95124769-8.63156848 37.17198947-46.39900106 22.7399567-53.30394163-117.10276448-55.67942971-91.39501876-33.01959858-114.755556-51.36207439 59.50817604-71.86479892 149.17656689-146.53556459 184.24643795-388.45514546 2.77768711-19.20657529 0.43047867-31.29139958 0-46.82947971-0.21681042-9.49566798 1.88687908-13.16573423 12.54358259-14.24350198 29.32282382-3.45325582 57.80982774-11.65434565 83.9496232-26.32832626 75.85536753-42.29845616 106.47276951-111.78933809 113.70292571-195.09167222 1.07933883-12.73211339-0.21523932-25.89627652-13.40768208-32.58597776M672.59048267 831.93671913c-113.46097785-91.07137422-168.51982701-121.06819563-191.25978372-119.77361748-21.25684774 1.29457817-17.42653031 26.11308695-12.76039301 42.29845614 4.88294773 15.97012989 11.27571295 26.97561536 20.20421747 41.00387801 6.15238845 9.28042865 10.41475564 23.09188086-6.17595481 33.45007725-36.55298001 23.09188086-100.08157538-7.76904006-103.04779332-9.27885757-73.96848843-44.45713381-135.82544403-103.1577696-179.39176984-183.43732658-42.08164574-77.25992199-66.4948133-160.1302064-70.54194114-248.61085317-1.07933883-21.36525295 5.09818706-28.91905367 25.89784762-32.80435928a250.87636497 250.87636497 0 0 1 83.11223228-2.15710656c115.83646593 17.26627914 214.46006978 70.138171 297.11354374 153.8725549 47.18140388 47.69200813 82.87028441 104.66601601 119.66521225 160.34544572 39.08871926 59.12954337 81.17193611 115.45626214 134.71939669 161.63845278 18.90963927 16.18536923 33.98896113 28.48700395 48.44770242 37.55062216-43.56632578 4.96464441-116.26537349 6.04241215-165.98251663-34.09736632m54.40998899-357.16217477c0-9.49566798 7.44696661-17.04946873 16.80594974-17.04946872q3.18302835 0.05498814 5.71876762 1.07933883a16.91435498 16.91435498 0 0 1 10.84523431 15.97012989 16.83265829 16.83265829 0 0 1-16.77924123 17.04946872 16.6441275 16.6441275 0 0 1-16.59071044-17.04946872m168.95187674 88.48064679c-10.81852576 4.53259466-21.66218896 8.41790022-32.10208201 8.8483789-16.13195215 0.8640995-33.7737218-5.82560173-43.32280686-14.02669155-14.89079105-12.73368447-25.52078602-19.85543545-29.97168398-42.08321683-1.91515871-9.49566798-0.8640995-24.16964861 0.83739098-32.58597774 3.83031742-18.12880755-0.43204976-29.78158209-12.94892381-40.35658891-10.19637413-8.63313957-23.17357754-11.00705657-37.41550843-11.00705657-5.31499747 0-10.19637413-2.37234591-13.81145222-4.31578423a14.16180529 14.16180529 0 0 1-6.15081735-19.85386437c1.48310897-3.02120608 8.71326515-10.35976749 10.41318453-11.65434564 19.34011795-11.2222959 41.64959598-7.55222964 62.25915463 0.8640995 19.1264497 7.9842794 33.55848246 22.65983111 54.4115601 43.37779497 21.25684774 25.03374811 25.08716515 31.94025977 37.17198946 50.71478532 9.57736465 14.67398064 18.29062981 29.78158209 24.22620784 47.04471904 3.64021553 10.79181723-1.0526303 19.63862503-13.59621288 25.03374811" fill="#4D6BFE" p-id="12865"></path></svg>
\ No newline at end of file
diff --git a/channel/web/static/logos/doubao.svg b/channel/web/static/logos/doubao.svg
new file mode 100644
index 00000000..d67b4933
--- /dev/null
+++ b/channel/web/static/logos/doubao.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779261485522" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5381" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M958.976 439.808C804.864 336.896 642.56 321.536 642.56 321.536s8.192 235.008-10.752 306.176c-0.512 9.728-11.776 75.264-43.008 157.696-10.752 28.16-24.064 55.296-39.424 81.408-40.96 74.24-89.6 127.488-89.6 127.488 119.808-48.64 205.312-92.672 309.76-175.616 122.88-96.768 229.376-254.464 189.44-378.88z" fill="#37E1BE" p-id="5382"></path><path d="M329.728 395.776c158.208-100.864 308.736-78.848 312.32-74.752 0.512 0.512 1.024 0.512 1.024 0.512 0-14.336-6.656-60.928-13.312-106.496-11.776-60.928-22.528-124.928-23.04-133.632-170.496-139.264-356.864-78.336-448 25.6-61.44 70.144-103.424 169.984-102.4 224.256V762.88c0.512-12.8 1.536-20.48 2.048-20.48 17.92-197.12 271.36-346.624 271.36-346.624z" fill="#A569FF" p-id="5383"></path><path d="M792.064 272.384c-41.984-43.52-87.552-88.576-122.368-125.44-33.28-34.816-59.392-60.928-62.976-65.536 0.512 8.704 11.264 72.704 23.04 133.632 6.656 45.568 12.8 92.672 13.312 106.496 0 0 162.304 15.36 316.416 118.272-0.512 0-83.456-80.384-167.424-167.424zM549.888 866.816c-2.56 1.024-198.656 107.008-292.352-30.72-20.992-30.72-31.744-68.096-33.28-106.496-3.072-74.752 5.12-227.84 105.472-333.824 0 0-253.44 149.504-270.848 346.624-0.512 0.512-2.048 8.192-2.048 20.48-1.024 32.768 4.608 98.304 43.008 155.136 52.224 78.336 193.024 138.752 328.192 85.504l33.28-9.728c-1.024 0.512 47.616-52.224 88.576-126.976z" fill="#1E37FC" p-id="5384"></path></svg>
\ No newline at end of file
diff --git a/channel/web/static/logos/gemini.svg b/channel/web/static/logos/gemini.svg
new file mode 100644
index 00000000..8b63e171
--- /dev/null
+++ b/channel/web/static/logos/gemini.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251750646" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="29551" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M214.101333 512c0-32.512 5.546667-63.701333 15.36-92.928L57.173333 290.218667A491.861333 491.861333 0 0 0 4.693333 512c0 79.701333 18.858667 154.88 52.394667 221.610667l172.202667-129.066667A290.56 290.56 0 0 1 214.101333 512" fill="#FBBC05" p-id="29552"></path><path d="M516.693333 216.192c72.106667 0 137.258667 25.002667 188.458667 65.962667L854.101333 136.533333C763.349333 59.178667 646.997333 11.392 516.693333 11.392c-202.325333 0-376.234667 113.28-459.52 278.826667l172.373334 128.853333c39.68-118.016 152.832-202.88 287.146666-202.88" fill="#EA4335" p-id="29553"></path><path d="M516.693333 807.808c-134.357333 0-247.509333-84.864-287.232-202.88l-172.288 128.853333c83.242667 165.546667 257.152 278.826667 459.52 278.826667 124.842667 0 244.053333-43.392 333.568-124.757333l-163.584-123.818667c-46.122667 28.458667-104.234667 43.776-170.026666 43.776" fill="#34A853" p-id="29554"></path><path d="M1005.397333 512c0-29.568-4.693333-61.44-11.648-91.008H516.650667V614.4h274.602666c-13.696 65.962667-51.072 116.650667-104.533333 149.632l163.541333 123.818667c93.994667-85.418667 155.136-212.650667 155.136-375.850667" fill="#4285F4" p-id="29555"></path></svg>
\ No newline at end of file
diff --git a/channel/web/static/logos/linkai.svg b/channel/web/static/logos/linkai.svg
new file mode 100644
index 00000000..44628cc3
--- /dev/null
+++ b/channel/web/static/logos/linkai.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="168" height="168" viewBox="0 0 168 168"><image width="168" height="168" xlink:href="data:image/webp;base64,UklGRpIfAABXRUJQVlA4TIYfAAAvp8ApAAFIbhtJkhAOjR3V3f//cGZttzlG9H8C6n+3+zXUO6SXUHUv6DW1tVHT81JMdxWKp1/5/dkZVUnY0gt8ExupkBL6jY5tTy820mOSsBNLINt+w7GdqM/6IR1xQhKPQE6QHlJs/2YjZuynaNvukhNLINt0PyFlA4ktAQmjB9TLr4uBRcgG6QElOboLFqQCx/StX9burtp4VIwTNHe0HJtCti0xM/H0jaMTp0+Y2NMgJ0iXdCRJnxST2JKQk7khuAbEtFASS1dYvr8tVCGwpQJi+krpA67tBtn2BtBcQUASKEMVAqzeIV2ZyQo2UDCQkh64y1pV9W2oqu9Xeur68m89gOO4jSCpIAiN/f9LL7zcPb092YYG2UZqRnAI7w/5kSRJkiJJepplZraFqsruqRbJmWX+/y/2GXvs/wRY+MnSA71HsT3x4S2KjduMDUlgvKRjQBhE3AXLOxQAMACwgLsOC73DVQC8J5AWAIjlDWaEqQMotgFev1CnsOErPcSga8yCfQBFI2i5RfUJTwxQAHeBWOlWBWeHWIkC5q0KMy4HT9hAXAD4KBa6Uj/KqL1mAfBPDOJubJwBoCtU3zZ8ogFigLgLgP/8owu1YkZEFTsABCbFBgCcxfIdc/k06m59A6j2AAAMMMMfALHSV0uYhQBQnY84EmJgavmGb8u3Ueo5RhQAUB1ATO4CYGH5gu+Y9aMMUAGA+MpTaAAAN2OjI2YZCyb2ceIIAHEOio2HAHjPh6Ply2YJj4zaCUDgALYAgGKlk+VvKcCChgcAsQcAiAMxYwMA5hHzAwBwl7sAeDpI++CCQAM60AGWBIeLBcCpAEDsRHECdwFoO6Hl8wnqNUB1GqiIEQAg0AGWyw9mZwAUYPlUH0YAxRhgPblI45Be+eGqgW3bNoKi/de+2NFXX0aEArdtlB3k+EavmL9b2B/gG3gJehEU8GuwFdCcISjg77BPYLnxXPngi4BzeXQvGMxah7zzwXZySdsWTf8Q3y4KXEE8I0FUx6HpXa8YBs12PdKD6Im2fapJEfdI3fMoPiD+cMfhI4N7Lv37Mniy/38tSc6pqu5ZJwICLMGwsULvvTLvvffea1zVDe65ZW7dO/LSR14cFoGoKP+FJz4A5H64StFEG03Y6S8uCheBDBdFnbYQeEWHxzzDQAgEptLC8GWhVIA2/acXxx5chWHTDX+pDACBEJhFMLEoHCRiIATDROaflih0rki8FsPBALe2bdXKPvc+d41waIsGKcctdKLn/t69R2IbSY4kMfbOf1/1/Y/KmAC71bYtk+T8VFVd1V3NAz0rZmZmZmZmZmaWx8zMyzQ8OwzNOMxMzV1djdVV/y/qmYUe/zF3jjJQAMx5jPubJZbKFasEQawlzGLWGmuVwLqfWFMpTApCU75SWLDE0lhiayxBBDxHnqyNYD2xXPk6/zmy+hxZ6/7CAJTFWPI3hR1L55TOWmIpBv1xbAzrCmOQO+ZviaUAFIJMBaAMWK6OIlACnY4ogmVLaKGnz+1NYf21BJZy0FEizDyW2KKH///ZSPr8krRNUrez173ZDnaf4dk2/rRt27Zt2+baNm9U99rBdYy0UZPm9/MtSZIlSZJtEYl6XKq6+///r/8g7+kRoUIxAQvg5/z/U+t1ABenlwgvkUVHRmcACV6Qioza2rCQCCM8Fmzm8p55+rooLKFhFwNy6PFfTdjHiO8E+OzuTByryzCpAOVCxqF99YvFt2mVzxwsBzzbzXjh8IGvP0FlLGozlSuF2EVAohxy6PeBtv5uGd/6eGIEL3IiAdtCZe2NipT+Qxz2iXeG4JkLdkjczAw/n6C23WTzPe5tr/3zUG0zKQWo7vRGDWRspLpsCmvT2BE8c5ooVjn9q9TBSoYCbx62wW+gbrOCtnTbawU24ZhS4jCE6TNEPfwlImFO20W7lU6/+NToWGcFxyTrbdljPGKfNDzdXiKLMvNa10BxalarDI8ksvu3v2fhQj0n8SN6Z317Noic5WE5WV7GwYhm/DZuthew9dfJLR6DX1LUoKOYDTX/LTAc59Zcf7ZtzOSHYbEM7NDyFagj+YJIGFFppB8wI8mBEtyzVmsz3mO/ZOT+XFQl3b4n3vzh0OI5FAahI983ve/QqV8EI2qKFs7kZcYQUGGJjJ0rEmr0PTLn3ET9HFyPKQjhIIuAI0/ozPe25BPqdtLQBTTPwHIRDimAi5UJ76zui7dHT/zDOYgMtO4p+j7WweAYd335y0FHjkOu+b3/rt8lANuZ5UCzH6/6lsxiy7Oh5I5V8pNs4Zrjhzx8/4aGv24x4kjHG/xK6Q1n9TnL0VjcGAgnsI12SF13271D+P692AhgdLmrOZk8mIxPfbXhEj1692SP/F1LG05FJqZMc7lWd8zQrQnR9lGUqDR4syAkqjZahuQLHm78Nr+nwy/44WDBcxZips/+BGIZC4YG4FiKToFB6x6X2V3sts/uEJLXbur8rAe6mBuAC/Dzfrb/1/dWeJLVzvCUz8U57bufUe24NPQ6wQEqwHCK1DUXTd/Kd6sHbRuVBzbrxvz2aQSSDfBEbuNMniwC1bJLhOAQnY4DDuZMqpFQNJWJCUfz5PZM5nM/14API+D4ie2y6VpiMDYkMDQwkYMZfQxAxkhmXk4tEhFDq4tTY19caiAkd8B4p1zIjjflB4hE3C6qkm1s8uJPUxipBiHIp8QBBtHGAvr2fIVyhk4hpYgyu+YaC4kVJtOpAmN0mHiYKJWYGQdkuwAtxv5uoPd88Ac6SSgSwnCvIJnlyTAnTrtDO0ZTztDZuXQgu0WBiQWmwqO1O3siRg+I+23Sk6FGbf7Z8DDvNchKAu4OYEiqgBlIIFo5ChtjX0u+nTSz6xmvRo0YKmkDPs0s0DvL3h6Yde47bNfGxVsiwvO+FyoGiIF9Aeoq2cnqI9ByLO91UocB5U9TFJa7IIBhb+icQB7Y7vmv7LaH6FneuLWExnFVTEWQiRfoxVzl/azEU7/06EKA1sYOfcg2mSkiQtQgKjc+5QTVwcsNzrTbSx7HjC1amhWXZcvU0EmgQTZjpBwklh+8hFmrnqUT0hUoaAHd77CIIACGDq+6dr1E+BnDwW5bjEQa04nDe2ApCiRg9GQ6wqBYvPQIZgLKbsXI18LHfKQupBMiAUsXvBy3yk1wFlgAFN928MFJOUH5pbiLMYQQJUApJLHsTGBHt0jeCPOgNpJPx9Mvg6mOhRARgXuC3O+VaIYgJzv60dYtRhAaiUUHvPf/gheWpSiQCaIEWDp2Pe1FqtlPt3oMPLFbcR3fRdbMUzoJK0A7IqepkyFePgGc6NYWfJ22ah5J0TYoKGYeveQsieHd0rsSIOiBFJamEbSkxAwYJYvdHzqG2oBdffY+cBxjCIZSYTAhelDll1cH1dHg0JrJwbYBiFtIxg6N9b6LuUJYsmxkABUYH2a+/kvs8gu5HIXdpnGWqJGcpgRPEDjWizwQsS4cOlG6B3t/qK/i6yNvtKZXGNySqBBPsIYHShOSAZkVcpo4YWe9hPSBJk9pQ0jQSyIENnIbtYppduHJMEoMY5hug4/9vgZGgEnr7yyCvCgYouYooA8gcQbN4A3rHiTloWa/Cn2VUlKrybB1SryMmYDie+GMRBd5OWX6A6JVdP3jb8Yd98BYO5NU5aALu0skECsNvJPXbKfeA9svFblvQ0OaQTOVByGrCIoQLvBqhrDdC2dmAFrREUS7pgTLEUZPhckTjwQDZAmBgoDFS1GDry9OefOi8snVkdWnwY9MoGTOvWOkd43uKNgpnEdnAJG42TT0dZeYJnhfJIMJd2VydfOUAutBSFAzIFNUJ7Kjlq9M153Sr9jxFceZj3kpx6Fnkpr2IWVKPEwSanc2F7Ky2eiUFhgGyzpWuiFjZABDtE8JJKFhNGvD4tb95POW1cHuH3NwYiYSCm7p7NzkTlk7OkqLKfxit8l26CrWqTDWOYgGOiUwubOUiBR4BwQAEXXD96vl53wODp6uUGz/imPBPPDtRopEiWfKec4m198zM/+CNpeqhLjMZXAlFrrFSXyB4QEQIAMIlgdMh44f+lAIwiAph0TL+V/5iBeMIQIZCYSDVwjQDu7wytdNJsMomzWpcbZMTQYyi5snkZmhK5KAjBaR9iCtJzeCR4HTt0IkfZCGD05UY7WzCiQTg+bOwbnKT7SXY84Pq+dGm0qldnOz8ufDjBNHARwYK2T3qFojvdOBoBCqUwdlbqxMWlkzhgOiNoDopaSDh6moFAEBMHdYUAjSHXzK9LDBU/7/sE8iyLAOC80NBcjBCSZ742TZvyIqkUwQAJOZZAeRuAAmTlZOobljzHLT/v2BFAAKg8CpcnOeCgf5kNf/36sW2kwayTeM/Ye3XVoMEgiB+YS5W2U4RY3A+HEoW2Z6q1oDMoAEgehgBoM3FS6fWJA2xBftZ/eDCpBlKCAiUILwKlV7YnQbTbhQYccPkm/wz/dRKzC4IzHpmhA0M9PuyJvBv6dXbqQrMwiWAxwqL5oDxHis1s2PqBVpmPP4adQADMABfdKGsp2+mwxAtIkKydCI+xZrUyediBgzQJK7m5fhmtfEJhxrtF/SSb0ZjZIEQQIVJF2yMMv26fn6JtyhNkRNMl2aCgjxBqAKcskDdYrZR9cDnRlhQ0dnKnOtUisQsKBobSgvHykr5W6++RfYRnymL/KO2zqzHCABkXmPBQYfOf+jrrCbWua5KzsGrdQsyE0Cso84Km5Q2ZOXH70YcQKbZ2QjZznhAuRvjAQS5PjYS/qEqNtmzCKmBcDBOCE7Jb1wLoi7CJJISxweHQaa5e3CFiKxjR26ozaThu61EUACwqUJbWI/iz3GD0GFaxzDOq+zdDf/ZEmoF3gBHJmZG+g7bA+EkQvRzi6KcK5HsGwJSYD7UUCwa2glMhVrQ9Ea2wi1XrMIUy7Vfhhxv2253DCdzC+Mq2jG/DJ3TgwvRUiy7F0i79Chl6WycRWXUGQd8ibv3PwuORliSQYJDAKVsNm1LpGMhVb40IfQsj/9ekAiYdMjyVXvRYcoHX38jx/yZjGehw+ipywLLzCKIATgJkQ5inXrPn0gfBQa3eYNYBIB0RqZs0DUJ2Rq6yqWUKraALQkX9/25TW7UYFBgBjcuvoagXxLNj/1cZvFhx5uBxd95qZ3ESSY0PUeqshvi3J+Fp2l0hrzDyJcCJJYqgoKLl8vk7mHA94gl6P4juNu8MWtFcOpoosqgAogXq4BzNaN6JzJtklE/9K2lMVHMxkCCEgAk1rFSpYk/Vbjpl8SMgdMfvzF2tpxTwgiE8AcDyNw3hr+sWOp51wBbbo3S15BwI9QQDqB74PAFmK+zBzzof1GQYq8ORNNXBwRkUkAKcDLeynMsNHbXwladcT9NY/8l4KogAUo8eAlGSZt8IHyFTnlW+/rP3W8nhdBQniCrky3tVrathLZOaHNocgjJCwlMBJjUAMIUgKQGF5GrCxxDlr4H2qbQmyjRD/d204cjyAhyCrRMDwsg7oIM215ZtElnjZAYub2RMYgvwbYAQEI3Hu93tJG/PxM9bFRN/DmW2nWd5DIWqFiWGLccn5ys273TJGcMnUaGxB8VKLVEKEUdhSxwzl+uhhGYmc0miRqZD+EcxaAAJvKeSgcYovNZjOIr6LQyBAtI1yKugkswCRylO7Khpkm/libJbRKI8WWNjNep5IgM+gLCEEjC5+7TR5wQBrat4UPaWSjZX6yECEAQsAeXWJNbA8aA1QYtDK1WGk3T1RIPIkgaC8cJfIm3O7JB4/llBrCIqQ7d+mxQILIBBqGCS6LQYMvkqMNWflpM2uzuptIpwIicWPE42XdONROm5shSPFtBg0DynSbPyzfQnel6VfdDJmA+L0XZ2trJFQZkNPISg12SiSYEUgjCCsCQylx7VKJ6NwWr9X931Ab3oOuqAjzDsZhtECBj10CyNsMAfq1VI8sKkg6JVUfBnd6d+VxDg1lBGQVYZlfycmfLwuHcWqhF7F7+mFpQjQEtExUcpYMRvM+85Jt9vYZR8sBmv2aASvZjgcwifbeXibU3ErtFbtNIHyv207+vxh7wt96J38yVZbJP677jBegngFaGW2c77I1VeBxKQQCctYlCitiB+3xAZFQjmqPrhq9Qc12vp5QEogWYZAZbLJn8roOPNiAlg7H2IGxuJF0DsQYcnaYZRBQaYyTW9Aag2vO6EN/0HG3Qe24DLygT1Amian4bVF3M00/S1iKpw3Qej7epQQkElChxzNfXpQdkgcncPlUezhmJjTElOh4xNCB0w0sU2nEUpnwsNXOAiCW+heOV6JCtGrsC1Wjq6+XVr5J+EB5wsBc9Z4sct7+XyIhKokmBEEio8LO6DnW0GXTOEJxlWY3/ZJcVMpA+DTBAEHnloPr+YlDckCcWfEd4524foLqclli2UAGUHgjQGXoqKGORG3oiCPZol10rIxlY4A5g2KV7yUT4JunAR2y9vrw09SCZAIL8CCrMpg58Ako8UcrJ3OuSbAgjMCjAUUgUiBk8JFBxhQ6JiIrfSQHzbazQFHGYTKBQAHpDKr8juwEZ7FbVpmHaYpNmSGLDtd4merIaKgpcDo3yGrZGOv2vOl58J/zUBSA6IRWH86qaE8CmBBMTJUAMCC54SDUBh8SuXqXe9pYCESAX8HLvaohyLSzTkssRl4uoGOWtRQzNnBbEEgCv2LunO4o7IcfpP4TxuNchc6YdDrnvoVBhkCAyoIGj9kyTNOy2qfYQamSeQzJhiYiEw4gQfW6Obcb3d60kw8bVblUPckEnZdjbt7FmmVDOV7xJ1O6wxEZx6XzV/g9nRjPSSmxaNOVtQLrzk0gBFIkEI1oIXfcNhAS37lChHN0iRMQUW1YDSLmYMgEd3PY2NaMXgOITy8TcgcdUYD+EQz7GHzqCA10LE3wWjQh1xbGOcvoWrnd8/fOBEQlU1j2ZQK9/F2Vxo4o325qVh+LGQZhHCoEEAKckOgUMU52snBAf3lUB4Geq4+5XUYHLKvwK8CjayiW+c0ilRrzfOd4bnSZmDyF/8JJUIjEoAfk8KeNii/dABnZxlMBRMJevgJCNYeoEdyQBCbkNYl0aVxD1LhEnPA6TA1vx1TTMxJo0JNMGJjsjfY+DTGMksa5a+il5MK5H0OholUY0OuXzdptaDycTGWFkZpVXeIVp3glWU4DmGCCixFh2GOAb8WNLovKcvJuRDTfQuQxBEwIB+zAYCF1GrcU0AugWtyf3JWxZUgEQSSYEE5AwJ7eecGoE2jjmq+TPrl+e1ElRQLUSgD9/mHY11Zm8QQ6D1mIBJFTLODKw3sgo1JzAgcIYGI6DEsv1sslJE4usDAoR+rrE9SRS3WAoD1BsA78/xSKhJlibVQ+gctI9DRCESSCmESgU6vTkwTtcnh4IuujqBgrxjFAyDD6qIKS/N3z92tv3SsR+SJIJCzkJhegBtRcEqlBcgwI2TuiWSh3K0a+Be40L0WrJe39axaVaeES9FIUYiXHRj45xLkZRQgBhOhxL/Ebz/4a4AE+XKhoTonYe8faTKNkBK0qKdArjeMMJfzINtRGssdP3gflW4BQEjISqAnvifI2rWCaEE7gMiCISl6LuxXC/nFSIcBzdoCylgbQ/hdLtYsBSMHm1yGIVOrnIonaRwp7kPj2b21dF3I5kEmZfYtRDKdh3NDuGJk52anEYmtibmz9+jBHzN+cEIGxCdIiAWK+I2CMzIfS8w2HpqHngn3EzmrX4yX8RAaqJYIiaKQXe/USm8NDfW0hxKEBLRnd3BJ1KSuZxiDVCJGNDmYR5ERYP3DFchTgvdJIMi0SIBOW5ECnxYNkDTuxcqwMklXuHCAhEILy1+l+mmAxjMROmWOymqU9csRVkKkAqJnEbx6gxOA/b/p1U3zXcBUs0bC3NyZ0HiWAACJny/71eeAt+7+hujiJSP5SKC47gjzhUTcAlWWB1ZaZo8e+iNy3hQ/JZDsyH/o0c4ULAhgcY4MiTH/0/UwcrB2Vu4/qRItT9IASck4APKtY+uqcBzpr2NOL0GNIUztSzQUtvRem2iY+SHZitvgUa9lXVsqKAzUuf9h6+TSIewWSAJTvfm52hjFyAQd866Uh3aT/4kxHwJi6KSYHUwAgrODqvVZr1yTaOnCzRDvNAK2PyAXQUAaBoQk/x2bPGNQGDzrg0wbGsCRkNSoGBNA/ExQXm6ZEzM3pI7oMuvh5ZEJ4PdlDQqQOzmtJH90GgYy1FM7fKTGBDZxsS5ZFEEUJOLm8PjZ/1+GU9N1bvXZyk36LEABZac1D0XrG90y9pgOidarGULOM5Ws4UMaSErd4j+Ey4LE+yTmFtXNKTKg294AivsaqtyOkgATBBLAydvj5dPB0hYqd3nJceDCeKCQBUBWAmQT6tW594WKtjRYzkgpxTbB8bhBHFaCKlDnJ0gG3vCB2R8zOx87kw0rJm8avkNEFRBowTS9XJxWnmb28z1DkNpBb2jG4rG0RdIKEipd+/jJG5IDbHnlHa4QPQ7D2JJ9ZCN4RHhXE8iPLUtS5zPWeLjecc+osIEgwKUjOzzB1wkCFZRngA3YYJR5h1RVCX2yj4kZEeKHTAQ6yakLIgtuBfnYoPqJfJ5ku0zd/XDnm+ybFyyhEMM2MG1fF9V4yDOezu1jS2coL6zizDzm0IPAJSjBWSn7oy4VXmz3X44na0BHNKvYS1ZESMhJkOiQhlvmVy8VujeDDUIZrXFZ0KcLIdFXpPC0ZTVNHiU/BGOf3+15SFLErSLK+SN6Yjv992O3Hs/2DnQHA1r89+fm56mjIKi2BrFdVB0ToT2uNi9UaGYoabuf9L3uxRgdmgAHoHoPoYMIH/P8U1vs+6q7KtsKJogtmpiKHEH90vD2NsBO9ctyP6O66qOx979JBqHDqafIbIA/yDIoCRkx2gqQVdeiuTCwLJWU5p01odXbL/gm1wYMk9WMA7o+r4YAAJKBRDpyd0feSIoCR66S6rivjcO4CZo+9LAF6sjFa0NBTf0W9S3dy3amhipT9jvPIYiKaXwySnHUB6IodYLFoow1zpkBFCIo8JhNsO/rGalWx2dvo9a9pZMsUxksyQIXWB1FrzO5MwillKOsEtQ+hzqPXsc37WY3BPWRIMoT6vkf9yL5cl73rbqQ/oXEt+13YiH1I1TMoZZQYBes0R76IMCEMwISfq8LYfMt6h9qgInnd7dmNZS5iFYNOICowch8R3TM2lSQGxRr1WM81Cej7FtXpEBKG+N+9Uj07pJJ/4VJB/X4Sxvud4P3KF7enY0H0Aj0zL79cCNThSpZ1jMmy+ti/f8Qqpahc44/+huA6NV/fiwHJkSZ1OPXZ0GbOCAtBsN69+3AK8pIEFEboapBZjTRQJJESdHR7l5S8rzEI0BIEw1vLZ81HQbcXvatbMDqQUCZuDwNNi3bnFiPbMHSSNg0G1XvzKi8QsKew4A3439cF7PiWV67XSSMFMwz5UdF0PAGpTngRyweUQWQJ0OzswSVk4o3VA6E16PCIb8DFQ4Y0lYAAQqDEDn0mowY0F1SsDeoCjbiCJ3m2DtAnIghAHV8ywaZ88yc312sEYEBmnN1yMoRFYLgVEUsQYEkAXMF4pZ1DYoyAOD6BJ+5bh4PKxh+jMHl+65VoLRiYSvAc4kboKCzKipHoJta2hnUfXCa6H2KfgYrg5fICfcf6KBNrrZFitcBDkFSnAhBdVlCByITKBOA2ORSxLyNIzOXKygQfUEAA6MPUs+WrMcDq+fcMGIgkRaZkGDRTPSMHE7VBkVtJ97K3leXuFTSZaF/OofLhD7/fiuJ6jTAM9Rb3y/rFEF3ng4DoKxBJKHJKcaUFDznJ6IC/7z+CZXkekNMAUkGJL55og6OkQ8UhUPmQmb3sb/6/rBZfh6vhCxPRgUtXT2/wUpoQIJXX+qKGvy7m1latgkUuRjGOYAMQqhAKIJggrgzofG+AK1H/ek4IkwHyNCJ2EL1Wa6j/99pxhUZIIuRfzlEi5KB5VomxjUrrvyCVj5DRcZcCARwY1tqw0z5Iv2xUa4R4wYA6CvMKfg+0mJOhwwmWg6qYoEygyTvmyV0UYf0UhATE/WmQ0ZFNzqz5d+vc5XcgA1DVhwzp5VMrrO4ZWqTJY0S07BMSAW4BJjDBw+dn/GwR62TkxWU+pZI+u8WE18yqrCRkWuAsq8PF3oGA+UXLuwZ7Qafx0c/FjM+DfgeqITqMCSwAP37uAh9x9/5dixjbALHLv/7Auy1mSNALMnHnjGvu4Cyvnc574gTWCI0OVYx+J/7IK6nzWE3FxY4iJBWhIvm+II0qI0Vylb2rNI4Xqx3+vyz+ARkGyOgm27uIqWlEKnaInUr8habuL1R+Mxu2/G13cc8DK0VsoHHGyGr/rFb9kkySCKiSqoSi6T98OlmKPDcCGId2PQx9M8PxfMI3FkYhFaEiIY1XO8QOFZdUUmXvUil/+uyXkNMNDAj9NHTZ2MIJsqK8t7F7tSujhCRUsvahNb+XI2RPnZZz3waQgB3gZU1H6KjkVfcjRRISRX/3NEdQ9/PPDviCyDF38pEcneYdR8M4uVa7tNe4d51gvNqNamUn1Oq7QyyvNLxvOFc4rQ+9dFMKj1JizuBhJ7FT6ygGHq4061vu5+1hpY88s97IJu0uBHxZ3Y0DGt++Y//q26EqGfCEpWtHimeNAGR26TRkOTRAgBCtBRAgWgUCEMDut81e17WLD/iV1VWgLoKR4xCn3/2eSydAgKtzEwBDx2G5CoCNr7DCRwHR3lBXiKDKfZjGB8Xc25KXyuUS1LPonOoaxLr9nP9/hkY="/></svg>
\ No newline at end of file
diff --git a/channel/web/static/logos/minimax.svg b/channel/web/static/logos/minimax.svg
new file mode 100644
index 00000000..79557d77
--- /dev/null
+++ b/channel/web/static/logos/minimax.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251514432" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="11888" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M415.392 475.808v329.984c-22.304 111.744-170.56 82.944-171.2 1.92-0.672-101.824 0-202.976 0-304.064v-117.184c0-14.656-3.2-26.24-16-35.392-24.96-18.72-54.944 3.264-55.584 30.208-1.408 36.16-0.704 71.616-1.408 107.264 0 28.16 0 55.52 0.64 83.648-18.368 123.776-168.32 103.232-171.808 0.704V487.04c0-28.032 54.944-34.624 52.256 7.36-1.792 20.8-0.64 42.272-1.344 62.912-0.64 36.8 55.648 61.6 68.896 1.408 0.64-49.632 0.64-99.264 0.64-149.344 0-62.752 17.824-113.856 84.352-118.624 28.8-2.56 47.968 9.504 66.336 30.304 7.04 7.36 23.68 30.72 24.32 56.16 0 23.456 0.64 46.752 0.64 70.464 0 46.72-0.64 93.76-0.64 140.48 0 30.304 0.64 60.256 0.64 89.856 0 37.536 0 75.552-0.64 113.152-0.64 48.864 58.816 48.16 68.352-0.768 0-57.632 0.64-114.56 0.64-172.192 0-141.984-0.64-283.968-0.64-425.856 0-14.72-2.048-55.584 5.76-70.464 41.504-101.12 167.392-56.96 168.544 26.72 2.432 171.52 0 344.896 0.64 516.8 0 59.616-48.416 46.816-51.104 23.488 0-178.88 0-358.4 0.64-537.024-2.368-44.832-68.832-38.72-72.672-6.592-1.28 36.864-0.64 74.4-1.28 111.232v219.008h0.64l0.448 0.256h-0.064z" fill="#D4367A" p-id="11889"></path><path d="M610.016 473.184v242.336V143.648c21.632-112.512 169.824-83.264 170.464-2.176 0.704 101.12 0 202.912 0.704 304 0 38.784 0 77.728-0.64 116.544 0 15.36 3.776 26.176 16.64 36.032 24.32 18.24 54.24-3.2 55.584-30.592 1.344-35.488 0.64-70.976 0.64-107.328V376.96c18.56-123.776 168.128-103.232 171.264-0.704v310.592c0 28.16-54.304 34.848-51.872-7.296 1.472-21.44 0-267.104 0.768-288.64 1.28-36.16-55.712-61.664-68.928-0.768v148.576c0 63.68-17.856 113.92-84.96 119.36-63.264 1.504-88.704-42.24-90.752-86.432V271.328c0-38.24 0-75.552 0.64-113.088 0.64-48.864-58.784-48.864-68.896 0.704V831.36c0 14.592 2.048 55.52-5.184 70.432-41.44 101.056-168 56.864-169.152-26.752v-79.616c3.136-53.6 48.416-40.864 50.464-18.176v94.464c2.432 44.928 68.928 39.488 72.064 6.656 1.344-36.896 1.344-73.728 1.344-111.296v-293.824h-0.192v-0.064z" fill="#ED6D48" p-id="11890"></path></svg>
\ No newline at end of file
diff --git a/channel/web/static/logos/moonshot.svg b/channel/web/static/logos/moonshot.svg
new file mode 100644
index 00000000..20d60b5c
--- /dev/null
+++ b/channel/web/static/logos/moonshot.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251592968" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="16416" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M117.9648 684.6464l342.30272 93.57312v75.34592l209.7152 58.5728A428.99456 428.99456 0 0 1 512 942.08c-176.128 0-327.53664-105.8816-394.0352-257.4336zM83.29216 477.42976l407.30624 112.64-9.6256 37.00736-6.0416 35.0208 383.3856 104.96a432.5376 432.5376 0 0 1-65.10592 70.32832l-688.18944-185.9584A429.4656 429.4656 0 0 1 81.92 512c0-11.63264 0.47104-23.1424 1.37216-34.54976z m57.344-182.4768l429.07648 114.21696a279.94112 279.94112 0 0 0-23.06048 35.55328 201.17504 201.17504 0 0 0-14.70464 34.93888l403.08736 110.26432a426.8032 426.8032 0 0 1-23.552 81.7152L86.54848 448.7168a427.25376 427.25376 0 0 1 54.0672-153.76384z m158.47424-156.75392l404.23424 108.31872a190.2592 190.2592 0 0 0-32.80896 24.90368c-9.13408 8.8064-19.8656 21.4016-32.1536 37.74464l285.24544 77.78304c9.216 30.45376 15.03232 61.8496 17.32608 93.5936L156.61056 269.68064a432.27136 432.27136 0 0 1 142.49984-131.4816zM512 81.92c142.90944 0 269.55776 69.71392 347.7504 176.98816L337.26464 118.90688A428.50304 428.50304 0 0 1 512 81.92z" fill="#000000" p-id="16417"></path></svg>
\ No newline at end of file
diff --git a/channel/web/static/logos/openai.svg b/channel/web/static/logos/openai.svg
new file mode 100644
index 00000000..b7b1fc50
--- /dev/null
+++ b/channel/web/static/logos/openai.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251225589" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="9015" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M881.664 431.488a218.88 218.88 0 0 0-18.176-177.088A218.624 218.624 0 0 0 628.992 149.76c-40.576-45.824-100.288-71.424-162.176-71.424a219.136 219.136 0 0 0-208 150.4 215.68 215.68 0 0 0-144 104.512 218.944 218.944 0 0 0 26.688 254.912 218.752 218.752 0 0 0 19.2 177.152 217.088 217.088 0 0 0 234.624 104.512 219.136 219.136 0 0 0 162.112 72.512 219.136 219.136 0 0 0 208-150.4 215.68 215.68 0 0 0 144-104.512 219.008 219.008 0 0 0-27.712-256z m-324.288 454.4a158.08 158.08 0 0 1-103.424-37.376c1.088-1.088 4.288-2.176 5.376-3.2l171.712-99.2a28.16 28.16 0 0 0 13.824-24.512V479.488l72.576 41.6c1.024 0 1.024 1.024 1.024 2.112v200.512a160.512 160.512 0 0 1-161.088 162.112z m-347.712-148.288c-19.2-33.088-25.6-71.488-19.2-108.8 1.088 1.024 3.2 2.176 5.376 3.2l171.712 99.2a25.984 25.984 0 0 0 27.712 0l210.112-121.6v84.224c0 1.152 0 2.176-1.024 2.176L430.464 796.16c-76.8 44.8-176 18.176-220.8-58.624z m-44.736-375.424c19.2-32.64 48.896-57.856 84.224-71.488v204.8c0 9.6 5.376 19.2 13.888 24.512l210.176 121.6-72.576 41.6c-1.024 0-2.112 1.088-2.112 0L224.64 582.912a160.448 160.448 0 0 1-59.776-220.8h0.064z m597.312 138.688l-210.112-121.6 72.512-41.6c1.088 0 2.176-1.088 2.176 0l173.824 100.224a161.088 161.088 0 0 1-25.6 291.2V525.44a26.304 26.304 0 0 0-12.8-24.512z m71.488-108.8a23.232 23.232 0 0 0-5.312-3.2L656.64 289.536a26.048 26.048 0 0 0-27.712 0l-210.176 121.6V326.912c0-1.088 0-2.176 1.088-2.176l173.824-100.224a161.152 161.152 0 0 1 220.8 59.712c19.2 32 25.6 70.4 19.2 107.776z m-454.4 149.248l-72.64-41.6c-1.024 0-1.024-1.088-1.024-2.176V297.088A162.048 162.048 0 0 1 467.84 135.04a158.08 158.08 0 0 1 103.424 37.312 22.848 22.848 0 0 1-5.312 3.2L394.24 274.688a28.16 28.16 0 0 0-13.888 24.512v242.112h-1.088z m39.424-85.312l93.824-54.4 93.888 54.4v107.712l-93.888 54.4-93.824-54.4V456z" fill="#000000" p-id="9016"></path></svg>
\ No newline at end of file
diff --git a/channel/web/static/logos/qianfan.svg b/channel/web/static/logos/qianfan.svg
new file mode 100644
index 00000000..a9356678
--- /dev/null
+++ b/channel/web/static/logos/qianfan.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251568791" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="14450" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M96.20121136 636.3124965c-0.1472897-113.41305959-0.29457937-226.8261192-0.29457937-340.23917879 0-14.87625845 7.65906378-26.51214381 20.4732666-34.02391789 45.51251353-26.65943349 91.02502705-53.31886698 136.83211997-79.53643141 71.1409192-40.94653321 142.42912809-81.59848704 213.71733698-122.39773055 7.36448439-4.12411126 14.58167909-8.3955122 21.50429441-13.2560719 19.44223878-13.40336159 39.03176725-16.05457598 60.09419263-3.53495252 27.39588193 16.34915535 54.93905355 32.25644163 82.48222516 48.16372793 88.0792333 50.96223197 176.30575629 101.77717426 264.38498958 152.59211653 9.86840908 5.74429781 19.88410785 11.19401627 29.60522725 17.0856038 14.13981003 8.54280189 21.50429441 21.06242535 21.50429443 37.70616007 0 147.73155685 0.29457937 295.46311371-0.1472897 443.19467057 0 15.46541722-7.2171947 28.57419943-21.7988738 36.96971163-34.7603663 20.17868721-70.55176044 38.88447758-104.57567833 59.94690293-48.90017634 30.19438599-100.00969801 56.11737105-148.76258466 86.60633642-29.01606849 18.11663161-59.50503387 34.02391789-89.11026112 50.96223197-13.10878221 7.51177407-26.07027474 15.17083783-39.03176726 22.9771913-13.84523065 8.3955122-27.83775099 8.83738127-41.97756102 0.73644843-56.41195043-32.55102101-112.82390085-65.10204201-169.38314098-97.653063-61.86166887-35.64410444-123.72333775-71.1409192-185.4377169-106.78502365-11.19401627-6.48074626-22.24074286-12.81420285-32.99289009-19.88410785-11.48859565-7.65906378-17.08560379-19.14765941-17.08560378-32.69831069-0.1472897-34.7603663 0.1472897-69.52073264 0.29457938-104.28109895 1.62018657-0.58915875 1.62018657-1.62018657-0.29457938-2.65121438z m356.58833414-225.500512c2.20934532-1.76747625 4.41869063-3.68224221 6.77532565-5.15513907 68.93157389-39.62092601 137.86314777-79.24185204 206.94201135-118.86277807 2.79850407-1.62018657 6.48074626-1.62018657 6.62803594-6.18616688 0.1472897-4.8605597-4.12411126-4.71327001-6.77532564-6.18616688-40.65195383-23.56635005-81.59848704-46.83812071-122.10315117-70.84633984-16.79102442-10.01569877-32.84560039-8.54280189-48.45830728 0.58915876-45.9543826 26.51214381-91.46689612 53.61344636-137.27398903 80.42016953-31.96186226 18.70579035-64.21830387 37.11700133-96.32745581 55.67550198-18.41121097 10.60485751-27.54317163 25.33382629-27.24859225 47.72185885 0.88373813 89.55213018 0.58915875 179.10426036 0.14728969 268.65639053-0.1472897 20.17868721 9.27925033 33.58204881 25.33382629 43.15587853 31.3727035 18.70579035 63.18727606 37.11700133 95.14913832 54.93905355 10.89943689 6.03887719 21.06242535 13.99252034 35.79139414 18.41121096V505.51925374c6.48074626 19.58952848 18.55850066 34.02391789 36.67513226 44.6287754 27.83775099 16.20186565 63.18727606 12.51962347 86.31175705-10.45756784 26.95401286-26.65943349 28.72148912-62.89269668 12.81420282-90.14128893-16.34915535-28.42690974-43.59774757-37.55887038-74.38129233-38.73718787z m82.48222517 429.64401928c14.28709972-3.82953187 25.92298506-13.99252034 38.88447758-21.35700473 40.94653321-23.27177067 81.30390766-47.72185885 122.54502023-70.55176046 26.95401286-15.02354815 52.87699792-31.66728287 80.71474891-45.21793415 16.79102442-8.10093283 29.60522723-22.53532223 29.60522726-43.4504579 0.1472897-92.939793 0.29457937-185.73229631 0.14728969-278.6720893 0-11.19401627-5.15513907-13.99252034-13.84523067-7.06990501-26.51214381 20.76784598-57.29568854 34.46578693-86.16446735 51.25681135-54.49718448 31.81457257-109.14165865 63.33456576-163.78613282 95.00184862-8.54280189 4.8605597-11.78317502 10.45756784-11.63588535 20.47326662 0.29457937 96.18016613 0.1472897 192.50762194 0.1472897 288.68778806-0.29457937 3.5349525-1.47289687 7.65906378 3.38766282 10.8994369z" fill="#066AF3" p-id="14451"></path><path d="M96.20121136 636.3124965c1.91476594 1.03102783 1.91476594 2.06205563 0 3.09308345v-3.09308345z" fill="#4372E0" p-id="14452"></path><path d="M391.3697457 505.37196405c-5.44971845-44.33419602 13.84523065-74.08671296 61.4197998-94.55997955 30.93083443 1.17831749 58.03213699 10.31027814 74.38129233 38.5898982 15.75999659 27.39588193 14.13981003 63.48185543-12.81420282 90.14128893-23.27177067 22.97719129-58.47400606 26.65943349-86.31175705 10.45756783-18.11663161-10.60485751-30.34167568-25.03924691-36.67513226-44.62877541z" fill="#002A9A" p-id="14453"></path></svg>
\ No newline at end of file
diff --git a/channel/web/static/logos/zhipu.svg b/channel/web/static/logos/zhipu.svg
new file mode 100644
index 00000000..e4b55463
--- /dev/null
+++ b/channel/web/static/logos/zhipu.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251419020" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="10062" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M520.063496 0v77.563152c0 269.231173-144.758953 414.054122-434.212862 434.340854L86.106618 511.968002H76.827198V255.984001l443.236298-255.984001z" fill="#5B55F6" p-id="10063"></path><path d="M520.063496 1023.936004v-77.563152c0-269.231173-144.758953-414.054122-434.212862-434.340854L86.042622 511.968002H76.827198v255.984001l443.236298 255.984001z" fill="#376AF3" p-id="10064"></path><path d="M520.063496 0v77.563152c0 269.231173 144.758953 414.054122 434.276858 434.340854L954.08437 511.968002h9.215424V255.984001L520.063496 0z" fill="#5B55F6" p-id="10065"></path><path d="M520.063496 1023.936004v-77.563152c0-269.231173 144.758953-414.054122 434.276858-434.340854L954.08437 511.968002h9.27942v255.984001l-443.236298 255.984001z" fill="#376AF3" p-id="10066"></path></svg>
\ No newline at end of file
diff --git a/channel/web/web_channel.py b/channel/web/web_channel.py
index 2a5e71b8..0dd5a2d4 100644
--- a/channel/web/web_channel.py
+++ b/channel/web/web_channel.py
@@ -1,15 +1,17 @@
+import datetime
 import hashlib
 import hmac
-import time
 import json
 import logging
 import mimetypes
 import os
+import random
+import shutil
 import threading
 import time
 import uuid
 from queue import Queue, Empty
-from typing import Tuple
+from typing import List, Tuple
 
 import web
 
@@ -294,6 +296,12 @@ class WebChannel(ChatChannel):
                     "timestamp": time.time()
                 })
                 logger.debug(f"SSE done sent for request {request_id}")
+                # Auto-trigger TTS once the bot finishes its text reply. The
+                # synthesis runs in the background so the chat stream is never
+                # blocked; the resulting audio URL is pushed via a follow-up
+                # `voice_attach` SSE event and persisted to messages.extras.
+                if reply.type == ReplyType.TEXT and content.strip():
+                    self._maybe_dispatch_auto_tts(request_id, session_id, content, context)
                 return
 
             # Fallback: polling mode
@@ -340,6 +348,10 @@ class WebChannel(ChatChannel):
         # Use a single-element list as a mutable counter accessible from closure.
         reasoning_chars_sent = [0]
         reasoning_capped_notified = [False]
+        # Captures the first error message emitted by agent_stream so the
+        # subsequent agent_end handler can skip its "empty final_response"
+        # fallback (which would otherwise overwrite the real error).
+        streamed_error: List[str] = []
 
         def on_event(event: dict):
             if request_id not in self.sse_queues:
@@ -398,6 +410,25 @@ class WebChannel(ChatChannel):
                 if tool_calls:
                     q.put({"type": "message_end", "has_tool_calls": True})
 
+            elif event_type == "error":
+                # Agent raised an exception (LLM 401/timeout/etc). Surface the
+                # real message instead of letting the empty-response fallback
+                # below hide it as "(模型未返回任何内容)".
+                err_msg = data.get("error") or "unknown error"
+                logger.warning(
+                    f"[WebChannel] agent_stream emitted error for "
+                    f"request {request_id}: {err_msg}"
+                )
+                # Remember it so the agent_end handler below knows not to
+                # rewrite the message into a generic empty-response notice.
+                streamed_error.append(err_msg)
+                q.put({
+                    "type": "done",
+                    "content": f"❌ {err_msg}",
+                    "request_id": request_id,
+                    "timestamp": time.time(),
+                })
+
             elif event_type == "agent_end":
                 # Safety net: if the agent finishes with an empty final_response,
                 # chat_channel skips _send_reply (because reply.content is empty),
@@ -406,16 +437,21 @@ class WebChannel(ChatChannel):
                 # here so the frontend always gets closure.
                 final_response = data.get("final_response", "")
                 if not final_response or not str(final_response).strip():
-                    logger.warning(
-                        f"[WebChannel] agent_end with empty final_response for "
-                        f"request {request_id}, sending fallback done"
-                    )
-                    q.put({
-                        "type": "done",
-                        "content": "(模型未返回任何内容，请重试或换一种方式描述你的需求)",
-                        "request_id": request_id,
-                        "timestamp": time.time(),
-                    })
+                    if streamed_error:
+                        # Error was already surfaced via the `error` event
+                        # handler above; nothing more to do here.
+                        pass
+                    else:
+                        logger.warning(
+                            f"[WebChannel] agent_end with empty final_response for "
+                            f"request {request_id}, sending fallback done"
+                        )
+                        q.put({
+                            "type": "done",
+                            "content": "(模型未返回任何内容，请重试或换一种方式描述你的需求)",
+                            "request_id": request_id,
+                            "timestamp": time.time(),
+                        })
 
             elif event_type == "file_to_send":
                 file_path = data.get("path", "")
@@ -432,6 +468,156 @@ class WebChannel(ChatChannel):
 
         return on_event
 
+    # ------------------------------------------------------------------
+    # TTS auto-dispatch
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _resolve_voice_reply_mode() -> str:
+        """
+        Decide the TTS auto-reply policy.
+
+        Source of truth is the cross-channel pair
+        (`always_reply_voice`, `voice_reply_voice`) which chat_channel
+        also consults. The web UI presents these as a single three-state
+        picker (off / voice_if_voice / always) via a lossless mapping.
+        """
+        if conf().get("always_reply_voice", False):
+            return "always"
+        if conf().get("voice_reply_voice", False):
+            return "voice_if_voice"
+        return "off"
+
+    # Mirror of ModelsHandler._TTS_PROVIDERS. zhipu is intentionally omitted
+    # from the UI (GLM-TTS prelude beep); pinning it in config.json still works.
+    _TTS_PROVIDERS_SUGGEST_ORDER = ["openai", "minimax", "dashscope", "linkai"]
+
+    @classmethod
+    def _tts_provider_ready(cls) -> bool:
+        """True if user picked a provider OR any suggested vendor has an API key."""
+        if (conf().get("text_to_voice") or "").strip():
+            return True
+        for pid in cls._TTS_PROVIDERS_SUGGEST_ORDER:
+            meta = ConfigHandler.PROVIDER_MODELS.get(pid) or {}
+            key_field = meta.get("api_key_field")
+            if not key_field:
+                continue
+            val = (conf().get(key_field) or "").strip()
+            if val and val not in ("YOUR API KEY", "YOUR_API_KEY"):
+                return True
+        return False
+
+    def _maybe_dispatch_auto_tts(
+        self,
+        request_id: str,
+        session_id: str,
+        text: str,
+        context: dict,
+    ) -> None:
+        try:
+            mode = self._resolve_voice_reply_mode()
+            if mode == "off":
+                return
+            if mode == "voice_if_voice" and not context.get("is_voice_input"):
+                return
+            if not self._tts_provider_ready():
+                return
+            threading.Thread(
+                target=self._synthesize_tts_async,
+                args=(request_id, session_id, text),
+                daemon=True,
+            ).start()
+        except Exception as e:
+            logger.debug(f"[WebChannel] auto-tts dispatch skipped: {e}")
+
+    def _synthesize_tts_async(
+        self,
+        request_id: str,
+        session_id: str,
+        text: str,
+    ) -> None:
+        try:
+            from bridge.bridge import Bridge
+            reply = Bridge().fetch_text_to_voice(text)
+            if reply is None or reply.type != ReplyType.VOICE or not reply.content:
+                logger.warning(
+                    f"[WebChannel] TTS produced no audio for request {request_id}: "
+                    f"reply={reply}"
+                )
+                return
+            url = self._publish_tts_audio(reply.content)
+            if not url:
+                logger.warning(f"[WebChannel] TTS publish failed for request {request_id}")
+                return
+            payload = {"audio": {"url": url, "kind": "tts"}}
+            try:
+                from agent.memory import get_conversation_store
+                get_conversation_store().attach_extras_to_last_assistant(session_id, payload)
+            except Exception as e:
+                logger.debug(f"[WebChannel] tts persist skipped: {e}")
+            q = self.sse_queues.get(request_id)
+            if q is None:
+                logger.warning(
+                    f"[WebChannel] TTS ready but SSE queue already closed "
+                    f"for request {request_id} (url={url})"
+                )
+                return
+            q.put({
+                "type": "voice_attach",
+                "url": url,
+                "request_id": request_id,
+                "timestamp": time.time(),
+            })
+            logger.info(f"[WebChannel] TTS voice_attach pushed for request {request_id}: {url}")
+        except Exception as e:
+            # TTS failures are intentionally silent (no user-facing error).
+            logger.warning(f"[WebChannel] TTS synthesis failed: {e}")
+
+    @staticmethod
+    def _publish_tts_audio(src_path: str) -> str:
+        """Move a TTS file into uploads/ and return its public URL."""
+        try:
+            if not src_path or not os.path.isfile(src_path):
+                logger.warning(f"[WebChannel] publish_tts_audio missing source: {src_path!r}")
+                return ""
+            ext = os.path.splitext(src_path)[1].lower() or ".mp3"
+            upload_dir = _get_upload_dir()
+            os.makedirs(upload_dir, exist_ok=True)
+            ts = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+            dst_name = f"voice_reply_{ts}_{random.randint(0, 9999)}{ext}"
+            dst_path = os.path.join(upload_dir, dst_name)
+            shutil.move(src_path, dst_path)
+            logger.debug(f"[WebChannel] publish_tts_audio moved {src_path} -> {dst_path}")
+            return f"/uploads/{dst_name}"
+        except Exception as e:
+            logger.warning(f"[WebChannel] publish_tts_audio failed: {e}")
+            return ""
+
+    @staticmethod
+    def _cleanup_stale_voice_recordings(max_age_seconds: int = 3600) -> None:
+        """Drop voice_input_* uploads older than max_age_seconds (run at startup)."""
+        try:
+            upload_dir = _get_upload_dir()
+            if not os.path.isdir(upload_dir):
+                return
+            now = time.time()
+            removed = 0
+            for name in os.listdir(upload_dir):
+                if not name.startswith("voice_input_"):
+                    continue
+                full = os.path.join(upload_dir, name)
+                try:
+                    if not os.path.isfile(full):
+                        continue
+                    if now - os.path.getmtime(full) > max_age_seconds:
+                        os.remove(full)
+                        removed += 1
+                except OSError:
+                    continue
+            if removed:
+                logger.info(f"[WebChannel] cleaned up {removed} stale voice recording(s) from {upload_dir}")
+        except Exception as e:
+            logger.warning(f"[WebChannel] voice cleanup failed: {e}")
+
     def upload_file(self):
         """Handle file or directory upload via multipart/form-data."""
         try:
@@ -557,6 +743,10 @@ class WebChannel(ChatChannel):
             prompt = json_data.get('message', '')
             use_sse = json_data.get('stream', True)
             attachments = json_data.get('attachments', [])
+            # Tag the message as originating from voice input so the post-reply
+            # TTS hook can honour the `voice_if_voice` policy (mirrors the
+            # desire_rtype concept used by other channels).
+            is_voice_input = bool(json_data.get('is_voice', False))
 
             # Append file references to the prompt (same format as QQ channel)
             if attachments:
@@ -607,6 +797,11 @@ class WebChannel(ChatChannel):
             context["session_id"] = session_id
             context["receiver"] = session_id
             context["request_id"] = request_id
+            if is_voice_input:
+                # Web channel runs its own TTS post-pipeline via
+                # _maybe_dispatch_auto_tts; don't set desire_rtype here or
+                # chat_channel would synthesize a duplicate VOICE reply.
+                context["is_voice_input"] = True
 
             if use_sse:
                 context["on_event"] = self._make_sse_callback(request_id)
@@ -634,28 +829,40 @@ class WebChannel(ChatChannel):
         q = self.sse_queues[request_id]
         idle_timeout = 600  # 10 minutes without any real event
         deadline = time.time() + idle_timeout
-        done = False
+        # After the main reply is done we keep the stream open for a short
+        # tail so async post-processing (TTS auto-synthesis) can deliver a
+        # `voice_attach` event before the client disconnects.
+        POST_DONE_TAIL_SECONDS = 60
+        post_done = False
+        post_deadline = 0.0
 
         try:
             while time.time() < deadline:
                 try:
                     item = q.get(timeout=1)
                 except Empty:
+                    if post_done and time.time() >= post_deadline:
+                        break
                     yield b": keepalive\n\n"
                     continue
 
-                # Real event received, reset idle deadline
                 deadline = time.time() + idle_timeout
-
                 payload = json.dumps(item, ensure_ascii=False)
                 yield f"data: {payload}\n\n".encode("utf-8")
 
-                if item.get("type") == "done":
-                    done = True
-                    break
+                itype = item.get("type")
+                if itype == "done":
+                    post_done = True
+                    post_deadline = time.time() + POST_DONE_TAIL_SECONDS
+                elif itype == "voice_attach":
+                    # WSGI buffers the previous chunk until the next yield;
+                    # shrink the tail so the generator wakes up quickly to
+                    # emit a couple of keepalive comments that push the
+                    # voice_attach payload through to the browser.
+                    post_done = True
+                    post_deadline = time.time() + 2  # 2s post-attach tail
         finally:
-            if done:
-                self.sse_queues.pop(request_id, None)
+            self.sse_queues.pop(request_id, None)
 
     def poll_response(self):
         """
@@ -703,6 +910,8 @@ class WebChannel(ChatChannel):
         port = conf().get("web_port", 9899)
         is_public_bind = host in ("0.0.0.0", "::")
 
+        self._cleanup_stale_voice_recordings()
+
         # 打印可用渠道类型提示
         logger.info(
             "[WebChannel] 全部可用通道如下，可修改 config.json 配置文件中的 channel_type 字段进行切换，多个通道用逗号分隔：")
@@ -746,10 +955,13 @@ class WebChannel(ChatChannel):
             '/upload', 'UploadHandler',
             '/uploads/(.*)', 'UploadsHandler',
             '/api/file', 'FileServeHandler',
+            '/api/voice/asr', 'VoiceAsrHandler',
+            '/api/voice/tts', 'VoiceTtsHandler',
             '/poll', 'PollHandler',
             '/stream', 'StreamHandler',
             '/chat', 'ChatHandler',
             '/config', 'ConfigHandler',
+            '/api/models', 'ModelsHandler',
             '/api/channels', 'ChannelsHandler',
             '/api/weixin/qrlogin', 'WeixinQrHandler',
             '/api/feishu/register', 'FeishuRegisterHandler',
@@ -869,6 +1081,103 @@ class UploadHandler:
         return WebChannel().upload_file()
 
 
+class VoiceAsrHandler:
+    """Receive a mic recording, persist it under uploads/ and run ASR.
+    Returns {status, text, audio_url} so the UI can render a playback bubble."""
+    def POST(self):
+        _require_auth()
+        web.header('Content-Type', 'application/json; charset=utf-8')
+
+        saved_path = None
+        try:
+            params = _raw_web_input()
+            file_obj = params.get("file")
+            if file_obj is None:
+                return json.dumps({"status": "error", "message": "no audio file"})
+
+            filename = getattr(file_obj, "filename", "") or "recording.webm"
+            ext = os.path.splitext(filename)[1].lower() or ".webm"
+            if ext not in (".webm", ".ogg", ".opus", ".mp4", ".m4a", ".mp3", ".wav"):
+                ext = ".webm"
+
+            upload_dir = _get_upload_dir()
+            os.makedirs(upload_dir, exist_ok=True)
+            ts = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+            saved_name = f"voice_input_{ts}_{random.randint(0, 9999)}{ext}"
+            saved_path = os.path.join(upload_dir, saved_name)
+            with open(saved_path, "wb") as f:
+                f.write(file_obj.file.read() if hasattr(file_obj, "file") else file_obj.value)
+
+            audio_url = f"/uploads/{saved_name}"
+
+            from bridge.bridge import Bridge
+            reply = Bridge().fetch_voice_to_text(saved_path)
+            if reply is None:
+                return json.dumps({
+                    "status": "error",
+                    "message": "ASR returned no reply",
+                    "audio_url": audio_url,
+                })
+
+            from bridge.reply import ReplyType
+            if reply.type == ReplyType.TEXT:
+                return json.dumps({
+                    "status": "success",
+                    "text": reply.content or "",
+                    "audio_url": audio_url,
+                })
+            return json.dumps({
+                "status": "error",
+                "message": reply.content or "ASR failed",
+                "audio_url": audio_url,
+            })
+        except Exception as e:
+            logger.exception(f"[VoiceAsrHandler] failed: {e}")
+            return json.dumps({"status": "error", "message": str(e)})
+
+
+class VoiceTtsHandler:
+    """On-demand TTS for the in-chat "read aloud" button. Returns the
+    audio URL and (when session_id is given) persists it onto the message."""
+    def POST(self):
+        _require_auth()
+        web.header('Content-Type', 'application/json; charset=utf-8')
+        try:
+            data = json.loads(web.data() or b"{}")
+            text = (data.get("text") or "").strip()
+            session_id = (data.get("session_id") or "").strip()
+            if not text:
+                return json.dumps({"status": "error", "message": "empty text"})
+            # `@singleton` makes WebChannel a factory function — go via instance.
+            channel = WebChannel()
+            if not channel._tts_provider_ready():
+                return json.dumps({"status": "error", "message": "tts not configured"})
+
+            from bridge.bridge import Bridge
+            reply = Bridge().fetch_text_to_voice(text)
+            if reply is None or reply.type != ReplyType.VOICE or not reply.content:
+                msg = getattr(reply, "content", "") or "tts failed"
+                return json.dumps({"status": "error", "message": str(msg)})
+
+            url = channel._publish_tts_audio(reply.content)
+            if not url:
+                return json.dumps({"status": "error", "message": "publish failed"})
+
+            if session_id:
+                try:
+                    from agent.memory import get_conversation_store
+                    get_conversation_store().attach_extras_to_last_assistant(
+                        session_id, {"audio": {"url": url, "kind": "tts"}},
+                    )
+                except Exception as e:
+                    logger.debug(f"[VoiceTtsHandler] persist skipped: {e}")
+
+            return json.dumps({"status": "success", "audio_url": url})
+        except Exception as e:
+            logger.exception(f"[VoiceTtsHandler] failed: {e}")
+            return json.dumps({"status": "error", "message": str(e)})
+
+
 class UploadsHandler:
     def GET(self, file_name):
         _require_auth()
@@ -958,10 +1267,10 @@ class ConfigHandler:
         const.DEEPSEEK_V4_FLASH, const.DEEPSEEK_V4_PRO, const.DEEPSEEK_CHAT, const.DEEPSEEK_REASONER,
         const.MINIMAX_M2_7_HIGHSPEED, const.MINIMAX_M2_7, const.MINIMAX_M2_5, const.MINIMAX_M2_1, const.MINIMAX_M2_1_LIGHTNING,
         const.CLAUDE_4_6_SONNET, const.CLAUDE_4_7_OPUS, const.CLAUDE_4_6_OPUS, const.CLAUDE_4_5_SONNET,
-        const.GEMINI_31_FLASH_LITE_PRE, const.GEMINI_31_PRO_PRE, const.GEMINI_3_FLASH_PRE,
-        const.GPT_54, const.GPT_54_MINI, const.GPT_54_NANO, const.GPT_5, const.GPT_41, const.GPT_4o,
+        const.GEMINI_35_FLASH, const.GEMINI_31_FLASH_LITE_PRE, const.GEMINI_31_PRO_PRE, const.GEMINI_3_FLASH_PRE,
+        const.GPT_55, const.GPT_54, const.GPT_54_MINI, const.GPT_54_NANO, const.GPT_5, const.GPT_41, const.GPT_4o,
         const.GLM_5_1, const.GLM_5_TURBO, const.GLM_5, const.GLM_4_7,
-        const.QWEN36_PLUS, const.QWEN35_PLUS, const.QWEN3_MAX,
+        const.QWEN36_PLUS, const.QWEN37_MAX, const.QWEN35_PLUS, const.QWEN3_MAX,
         const.DOUBAO_SEED_2_PRO, const.DOUBAO_SEED_2_CODE,
         const.KIMI_K2_6, const.KIMI_K2_5, const.KIMI_K2,
         const.ERNIE_5_1, const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K,
@@ -1009,7 +1318,7 @@ class ConfigHandler:
             "api_base_key": "gemini_api_base",
             "api_base_default": "https://generativelanguage.googleapis.com",
             "api_base_placeholder": _PLACEHOLDER_GEMINI,
-            "models": [const.GEMINI_31_FLASH_LITE_PRE, const.GEMINI_31_PRO_PRE, const.GEMINI_3_FLASH_PRE],
+            "models": [const.GEMINI_35_FLASH, const.GEMINI_31_FLASH_LITE_PRE, const.GEMINI_31_PRO_PRE, const.GEMINI_3_FLASH_PRE],
         }),
         ("openai", {
             "label": "OpenAI",
@@ -1017,7 +1326,7 @@ class ConfigHandler:
             "api_base_key": "open_ai_api_base",
             "api_base_default": "https://api.openai.com/v1",
             "api_base_placeholder": _PLACEHOLDER_V1,
-            "models": [const.GPT_54, const.GPT_54_MINI, const.GPT_54_NANO, const.GPT_5, const.GPT_41, const.GPT_4o],
+            "models": [const.GPT_55, const.GPT_54, const.GPT_54_MINI, const.GPT_54_NANO, const.GPT_5, const.GPT_41, const.GPT_4o],
         }),
         ("zhipu", {
             "label": "智谱AI",
@@ -1033,7 +1342,7 @@ class ConfigHandler:
             "api_base_key": None,
             "api_base_default": None,
             "api_base_placeholder": "",
-            "models": [const.QWEN36_PLUS, const.QWEN35_PLUS, const.QWEN3_MAX],
+            "models": [const.QWEN36_PLUS, const.QWEN37_MAX, const.QWEN35_PLUS, const.QWEN3_MAX],
         }),
         ("doubao", {
             "label": "豆包",
@@ -1059,14 +1368,6 @@ class ConfigHandler:
             "api_base_placeholder": _PLACEHOLDER_QIANFAN,
             "models": [const.ERNIE_5_1, const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K],
         }),
-        ("modelscope", {
-            "label": "ModelScope",
-            "api_key_field": "modelscope_api_key",
-            "api_base_key": None,
-            "api_base_default": None,
-            "api_base_placeholder": "",
-            "models": [const.QWEN3_5_27B, const.QWEN3_235B_A22B_INSTRUCT_2507],
-        }),
         ("linkai", {
             "label": "LinkAI",
             "api_key_field": "linkai_api_key",
@@ -1212,6 +1513,1165 @@ class ConfigHandler:
             return json.dumps({"status": "error", "message": str(e)})
 
 
+class ModelsHandler:
+    """API for the unified Models console.
+
+    Layered model:
+      Layer 1 (providers): vendor credentials shared across capabilities.
+                            Stored as flat *_api_key / *_api_base fields in
+                            config.json — the same fields ConfigHandler
+                            already manages.
+      Layer 2 (capabilities): which provider/model is used by chat / vision /
+                            asr / tts / embedding / image / search.
+
+    GET  /api/models           -> overview (providers + capabilities)
+    POST /api/models/provider  -> upsert a vendor credential
+    DELETE /api/models/provider -> clear a vendor credential
+    POST /api/models/capability -> set provider/model for a capability
+    """
+
+    # Capability -> provider ids drawn from ConfigHandler.PROVIDER_MODELS.
+    _ASR_PROVIDERS = ["openai", "dashscope", "zhipu", "linkai"]
+    # Web-console white-list. Other vendors stay usable via direct config.
+    _TTS_PROVIDERS = ["openai", "minimax", "dashscope", "linkai"]
+
+    # TTS engine catalog (speech models, not voice timbres). Entries are
+    # either a bare code or {value, hint?} when a friendly label helps.
+    _TTS_PROVIDER_MODELS = {
+        "openai":    ["tts-1", "tts-1-hd", "gpt-4o-mini-tts"],
+        "minimax": [
+            {"value": "speech-2.8-hd",    "hint": "情绪渲染融合语气词,自然听感"},
+            {"value": "speech-2.8-turbo", "hint": "极致生成速度,更自然逼真"},
+            {"value": "speech-2.6-hd",    "hint": "超低延时,归一化升级"},
+            {"value": "speech-2.6-turbo", "hint": "更快更便宜,适合语音聊天/数字人"},
+        ],
+        "dashscope": [
+            {"value": "qwen3-tts-flash", "hint": "覆盖普通话、方言与主流外语"},
+        ],
+        # Aggregating gateway: a single endpoint multiplexes several
+        # underlying TTS engines, selected via the `model` field.
+        # Each engine exposes its own voice catalog (see _TTS_PROVIDER_VOICES).
+        "linkai": [
+            {"value": "tts-1",  "hint": "OpenAI · 多语种通用"},
+            {"value": "doubao", "hint": "字节豆包 · 中文音色丰富"},
+            {"value": "baidu",  "hint": "百度 · 中文主播音色"},
+        ],
+    }
+
+    # Per-provider voice timbres. Entries can be a bare code string
+    # (label = code) or {value, hint?} when a friendly secondary label
+    # helps recognition. We keep `value` as the raw API code so power
+    # users can cross-reference config.json.
+    _TTS_PROVIDER_VOICES = {
+        "openai":    [
+            "alloy", "echo", "fable", "onyx", "nova", "shimmer",
+            "ash", "ballad", "coral", "sage", "verse",
+        ],
+        "minimax": [
+            # Mandarin Chinese (full catalog)
+            {"value": "male-qn-qingse",                           "hint": "中文 · 青涩青年（男）"},
+            {"value": "male-qn-jingying",                         "hint": "中文 · 精英青年（男）"},
+            {"value": "male-qn-badao",                            "hint": "中文 · 霸道青年（男）"},
+            {"value": "male-qn-daxuesheng",                       "hint": "中文 · 青年大学生（男）"},
+            {"value": "female-shaonv",                            "hint": "中文 · 少女（女）"},
+            {"value": "female-yujie",                             "hint": "中文 · 御姐（女）"},
+            {"value": "female-chengshu",                          "hint": "中文 · 成熟女性（女）"},
+            {"value": "female-tianmei",                           "hint": "中文 · 甜美女性（女）"},
+            {"value": "male-qn-qingse-jingpin",                   "hint": "中文 · 青涩青年-beta（男）"},
+            {"value": "male-qn-jingying-jingpin",                 "hint": "中文 · 精英青年-beta（男）"},
+            {"value": "male-qn-badao-jingpin",                    "hint": "中文 · 霸道青年-beta（男）"},
+            {"value": "male-qn-daxuesheng-jingpin",               "hint": "中文 · 青年大学生-beta（男）"},
+            {"value": "female-shaonv-jingpin",                    "hint": "中文 · 少女-beta（女）"},
+            {"value": "female-yujie-jingpin",                     "hint": "中文 · 御姐-beta（女）"},
+            {"value": "female-chengshu-jingpin",                  "hint": "中文 · 成熟女性-beta（女）"},
+            {"value": "female-tianmei-jingpin",                   "hint": "中文 · 甜美女性-beta（女）"},
+            {"value": "clever_boy",                               "hint": "中文 · 聪明男童"},
+            {"value": "cute_boy",                                 "hint": "中文 · 可爱男童"},
+            {"value": "lovely_girl",                              "hint": "中文 · 萌萌女童"},
+            {"value": "cartoon_pig",                              "hint": "中文 · 卡通猪小琪"},
+            {"value": "bingjiao_didi",                            "hint": "中文 · 病娇弟弟"},
+            {"value": "junlang_nanyou",                           "hint": "中文 · 俊朗男友"},
+            {"value": "chunzhen_xuedi",                           "hint": "中文 · 纯真学弟"},
+            {"value": "lengdan_xiongzhang",                       "hint": "中文 · 冷淡学长"},
+            {"value": "badao_shaoye",                             "hint": "中文 · 霸道少爷"},
+            {"value": "tianxin_xiaoling",                         "hint": "中文 · 甜心小玲"},
+            {"value": "qiaopi_mengmei",                           "hint": "中文 · 俏皮萌妹"},
+            {"value": "wumei_yujie",                              "hint": "中文 · 妩媚御姐"},
+            {"value": "diadia_xuemei",                            "hint": "中文 · 嗲嗲学妹"},
+            {"value": "danya_xuejie",                             "hint": "中文 · 淡雅学姐"},
+            {"value": "Chinese (Mandarin)_Reliable_Executive",    "hint": "中文 · 沉稳高管"},
+            {"value": "Chinese (Mandarin)_News_Anchor",           "hint": "中文 · 新闻女声"},
+            {"value": "Chinese (Mandarin)_Mature_Woman",          "hint": "中文 · 傲娇御姐"},
+            {"value": "Chinese (Mandarin)_Unrestrained_Young_Man","hint": "中文 · 不羁青年"},
+            {"value": "Arrogant_Miss",                            "hint": "中文 · 嚣张小姐"},
+            {"value": "Robot_Armor",                              "hint": "中文 · 机械战甲"},
+            {"value": "Chinese (Mandarin)_Kind-hearted_Antie",    "hint": "中文 · 热心大婶"},
+            {"value": "Chinese (Mandarin)_HK_Flight_Attendant",   "hint": "中文 · 港普空姐"},
+            {"value": "Chinese (Mandarin)_Humorous_Elder",        "hint": "中文 · 搞笑大爷"},
+            {"value": "Chinese (Mandarin)_Gentleman",             "hint": "中文 · 温润男声"},
+            {"value": "Chinese (Mandarin)_Warm_Bestie",           "hint": "中文 · 温暖闺蜜"},
+            {"value": "Chinese (Mandarin)_Male_Announcer",        "hint": "中文 · 播报男声"},
+            {"value": "Chinese (Mandarin)_Sweet_Lady",            "hint": "中文 · 甜美女声"},
+            {"value": "Chinese (Mandarin)_Southern_Young_Man",    "hint": "中文 · 南方小哥"},
+            {"value": "Chinese (Mandarin)_Wise_Women",            "hint": "中文 · 阅历姐姐"},
+            {"value": "Chinese (Mandarin)_Gentle_Youth",          "hint": "中文 · 温润青年"},
+            {"value": "Chinese (Mandarin)_Warm_Girl",             "hint": "中文 · 温暖少女"},
+            {"value": "Chinese (Mandarin)_Kind-hearted_Elder",    "hint": "中文 · 花甲奶奶"},
+            {"value": "Chinese (Mandarin)_Cute_Spirit",           "hint": "中文 · 憨憨萌兽"},
+            {"value": "Chinese (Mandarin)_Radio_Host",            "hint": "中文 · 电台男主播"},
+            {"value": "Chinese (Mandarin)_Lyrical_Voice",         "hint": "中文 · 抒情男声"},
+            {"value": "Chinese (Mandarin)_Straightforward_Boy",   "hint": "中文 · 率真弟弟"},
+            {"value": "Chinese (Mandarin)_Sincere_Adult",         "hint": "中文 · 真诚青年"},
+            {"value": "Chinese (Mandarin)_Gentle_Senior",         "hint": "中文 · 温柔学姐"},
+            {"value": "Chinese (Mandarin)_Stubborn_Friend",       "hint": "中文 · 嘴硬竹马"},
+            {"value": "Chinese (Mandarin)_Crisp_Girl",            "hint": "中文 · 清脆少女"},
+            {"value": "Chinese (Mandarin)_Pure-hearted_Boy",      "hint": "中文 · 清澈邻家弟弟"},
+            {"value": "Chinese (Mandarin)_Soft_Girl",             "hint": "中文 · 柔和少女"},
+            # Cantonese (full catalog)
+            {"value": "Cantonese_ProfessionalHost（F)",            "hint": "粤语 · 专业女主持"},
+            {"value": "Cantonese_GentleLady",                     "hint": "粤语 · 温柔女声"},
+            {"value": "Cantonese_ProfessionalHost（M)",            "hint": "粤语 · 专业男主持"},
+            {"value": "Cantonese_PlayfulMan",                     "hint": "粤语 · 活泼男声"},
+            {"value": "Cantonese_CuteGirl",                       "hint": "粤语 · 可爱女孩"},
+            {"value": "Cantonese_KindWoman",                      "hint": "粤语 · 善良女声"},
+            # English (curated: 1F + 1M)
+            {"value": "English_Graceful_Lady",                    "hint": "英文 · Graceful Lady（女）"},
+            {"value": "English_Trustworthy_Man",                  "hint": "英文 · Trustworthy Man（男）"},
+            # Japanese (curated: 1F + 1M)
+            {"value": "Japanese_KindLady",                        "hint": "日文 · Kind Lady（女）"},
+            {"value": "Japanese_LoyalKnight",                     "hint": "日文 · Loyal Knight（男）"},
+            # Korean (curated: 1F + 1M)
+            {"value": "Korean_SweetGirl",                         "hint": "韩文 · Sweet Girl（女）"},
+            {"value": "Korean_CheerfulBoyfriend",                 "hint": "韩文 · Cheerful Boyfriend（男）"},
+        ],
+        "dashscope": [
+            {"value": "Cherry",   "hint": "芊悦 · 阳光女声"},
+            {"value": "Serena",   "hint": "苏瑶 · 温柔女声"},
+            {"value": "Chelsie",  "hint": "千雪 · 二次元少女"},
+            {"value": "Ethan",    "hint": "晨煦 · 阳光男声"},
+            {"value": "Moon",     "hint": "月白 · 率性男声"},
+            {"value": "Kai",      "hint": "凯 · 治愈男声"},
+            {"value": "Nofish",   "hint": "不吃鱼 · 设计师男声"},
+            {"value": "Bella",    "hint": "萌宝 · 小萝莉"},
+            {"value": "Bunny",    "hint": "萌小姬 · 萌系少女"},
+            {"value": "Stella",   "hint": "少女阿月 · 元气少女"},
+            {"value": "Neil",     "hint": "阿闻 · 新闻主播"},
+            {"value": "Seren",    "hint": "小婉 · 助眠女声"},
+            {"value": "Jada",     "hint": "上海话 · 阿珍"},
+            {"value": "Dylan",    "hint": "北京话 · 晓东"},
+            {"value": "Sunny",    "hint": "四川话 · 晴儿"},
+            {"value": "Eric",     "hint": "四川话 · 程川"},
+            {"value": "Rocky",    "hint": "粤语 · 阿强"},
+            {"value": "Kiki",     "hint": "粤语 · 阿清"},
+            {"value": "Peter",    "hint": "天津话 · 李彼得"},
+            {"value": "Marcus",   "hint": "陕西话 · 秦川"},
+            {"value": "Roy",      "hint": "闽南语 · 阿杰"},
+        ],
+        # Aggregating gateway: voices are scoped per engine model. The
+        # frontend picks the correct list based on the selected model so
+        # users don't see incompatible timbres for the active engine.
+        "linkai": {
+            "tts-1": [
+                "alloy", "echo", "fable", "onyx", "nova", "shimmer",
+            ],
+            "doubao": [
+                {"value": "zh_female_wanwanxiaohe_moon_bigtts",       "hint": "湾湾小何"},
+                {"value": "BV007_streaming",                          "hint": "亲切女声"},
+                {"value": "BV001_streaming",                          "hint": "通用女声"},
+                {"value": "BV002_streaming",                          "hint": "通用男声"},
+                {"value": "BV051_streaming",                          "hint": "奶气萌娃"},
+                {"value": "zh_female_linjianvhai_moon_bigtts",        "hint": "邻家女孩"},
+                {"value": "BV700_streaming",                          "hint": "灿灿"},
+                {"value": "BV019_streaming",                          "hint": "重庆小伙"},
+                {"value": "BV524_streaming",                          "hint": "日语男声"},
+                {"value": "BV021_streaming",                          "hint": "东北老铁"},
+                {"value": "BV701_streaming",                          "hint": "擎苍"},
+                {"value": "BV113_streaming",                          "hint": "甜宠少御"},
+                {"value": "BV056_streaming",                          "hint": "阳光男声"},
+                {"value": "BV213_streaming",                          "hint": "广西表哥"},
+                {"value": "BV119_streaming",                          "hint": "通用赘婿"},
+                {"value": "BV705_streaming",                          "hint": "炀炀"},
+                {"value": "BV033_streaming",                          "hint": "温柔小哥"},
+                {"value": "BV102_streaming",                          "hint": "儒雅青年"},
+                {"value": "BV522_streaming",                          "hint": "气质女生"},
+                {"value": "BV034_streaming",                          "hint": "知性姐姐 · 双语"},
+                {"value": "BV005_streaming",                          "hint": "活泼女声"},
+                {"value": "zh_female_wanqudashu_moon_bigtts",         "hint": "湾区大叔"},
+                {"value": "zh_female_daimengchuanmei_moon_bigtts",    "hint": "呆萌川妹"},
+                {"value": "zh_male_guozhoudege_moon_bigtts",          "hint": "广州德哥"},
+                {"value": "zh_male_beijingxiaoye_moon_bigtts",        "hint": "北京小爷"},
+                {"value": "zh_male_shaonianzixin_moon_bigtts",        "hint": "少年梓辛 / Brayan"},
+                {"value": "zh_female_meilinvyou_moon_bigtts",         "hint": "魅力女友"},
+                {"value": "zh_male_shenyeboke_moon_bigtts",           "hint": "深夜播客"},
+                {"value": "zh_female_sajiaonvyou_moon_bigtts",        "hint": "柔美女友"},
+                {"value": "zh_female_yuanqinvyou_moon_bigtts",        "hint": "撒娇学妹"},
+                {"value": "zh_male_haoyuxiaoge_moon_bigtts",          "hint": "浩宇小哥"},
+                {"value": "zh_male_guangxiyuanzhou_moon_bigtts",      "hint": "广西远舟"},
+                {"value": "zh_female_meituojieer_moon_bigtts",        "hint": "妹坨洁儿"},
+                {"value": "zh_male_yuzhouzixuan_moon_bigtts",         "hint": "豫州子轩"},
+                {"value": "BV115_streaming",                          "hint": "古风少御"},
+                {"value": "zh_female_gaolengyujie_moon_bigtts",       "hint": "高冷御姐"},
+                {"value": "zh_male_yuanboxiaoshu_moon_bigtts",        "hint": "渊博小叔"},
+                {"value": "zh_male_yangguangqingnian_moon_bigtts",    "hint": "阳光青年"},
+                {"value": "zh_male_aojiaobazong_moon_bigtts",         "hint": "傲娇霸总"},
+                {"value": "zh_male_jingqiangkanye_moon_bigtts",       "hint": "京腔侃爷 / Harmony"},
+                {"value": "zh_female_shuangkuaisisi_moon_bigtts",     "hint": "爽快思思 / Skye"},
+                {"value": "zh_male_wennuanahu_moon_bigtts",           "hint": "温暖阿虎 / Alvin"},
+                {"value": "multi_female_shuangkuaisisi_moon_bigtts",  "hint": "はるこ / Esmeralda"},
+                {"value": "multi_male_jingqiangkanye_moon_bigtts",    "hint": "かずね / Javier or Álvaro"},
+                {"value": "multi_female_gaolengyujie_moon_bigtts",    "hint": "あけみ"},
+                {"value": "multi_male_wanqudashu_moon_bigtts",        "hint": "ひろし / Roberto"},
+                {"value": "ICL_zh_female_bingruoshaonv_tob",          "hint": "病弱少女"},
+                {"value": "ICL_zh_female_huoponvhai_tob",             "hint": "活泼女孩"},
+                {"value": "ICL_zh_female_heainainai_tob",             "hint": "和蔼奶奶"},
+                {"value": "ICL_zh_female_linjuayi_tob",               "hint": "邻居阿姨"},
+                {"value": "zh_female_wenrouxiaoya_moon_bigtts",       "hint": "温柔小雅"},
+                {"value": "zh_female_tianmeixiaoyuan_moon_bigtts",    "hint": "甜美小源"},
+                {"value": "zh_female_qingchezizi_moon_bigtts",        "hint": "清澈梓梓"},
+                {"value": "zh_male_dongfanghaoran_moon_bigtts",       "hint": "东方浩然"},
+                {"value": "zh_male_jieshuoxiaoming_moon_bigtts",      "hint": "解说小明"},
+                {"value": "zh_female_kailangjiejie_moon_bigtts",      "hint": "开朗姐姐"},
+                {"value": "zh_male_linjiananhai_moon_bigtts",         "hint": "邻家男孩"},
+                {"value": "zh_female_tianmeiyueyue_moon_bigtts",      "hint": "甜美悦悦"},
+                {"value": "zh_female_xinlingjitang_moon_bigtts",      "hint": "心灵鸡汤"},
+            ],
+            "baidu": [
+                {"value": "baidu_0",    "hint": "度小美 · 标准女主播"},
+                {"value": "baidu_1",    "hint": "度小宇 · 亲切男声"},
+                {"value": "baidu_3",    "hint": "度逍遥 · 情感男声"},
+                {"value": "baidu_4",    "hint": "度丫丫 · 童声"},
+                {"value": "baidu_5",    "hint": "度小娇 · 成熟女主播"},
+                {"value": "baidu_5003", "hint": "度逍遥 · 情感男声"},
+                {"value": "baidu_5118", "hint": "度小鹿 · 甜美女声"},
+                {"value": "baidu_103",  "hint": "度米朵 · 可爱童声"},
+                {"value": "baidu_106",  "hint": "度博文 · 专业男主播"},
+                {"value": "baidu_110",  "hint": "度小童 · 童声主播"},
+                {"value": "baidu_111",  "hint": "度小萌 · 软萌妹子"},
+                {"value": "baidu_4003", "hint": "度逍遥 · 情感男声"},
+                {"value": "baidu_4100", "hint": "度小雯 · 活力女主播"},
+                {"value": "baidu_4103", "hint": "度米朵 · 可爱女声"},
+                {"value": "baidu_4105", "hint": "度灵儿 · 清澈女声"},
+                {"value": "baidu_4106", "hint": "度博文 · 专业男主播"},
+                {"value": "baidu_4115", "hint": "度小贤 · 电台男主播"},
+                {"value": "baidu_4117", "hint": "度小乔 · 活泼女声"},
+                {"value": "baidu_4119", "hint": "度小鹿 · 甜美女声"},
+                {"value": "baidu_4129", "hint": "度小彦 · 知识男主播"},
+                {"value": "baidu_4140", "hint": "度小新 · 专业女主播"},
+                {"value": "baidu_4143", "hint": "度清风 · 配音男声"},
+                {"value": "baidu_4144", "hint": "度姗姗 · 娱乐女声"},
+                {"value": "baidu_4149", "hint": "度星河 · 广告男声"},
+                {"value": "baidu_4206", "hint": "度博文 · 综艺男声"},
+                {"value": "baidu_4226", "hint": "南方 · 电台女主播"},
+                {"value": "baidu_4254", "hint": "度小清 · 广告女声"},
+                {"value": "baidu_4278", "hint": "度小贝 · 知识女主播"},
+            ],
+        },
+    }
+    _EMBEDDING_PROVIDERS = ["openai", "dashscope", "doubao", "zhipu", "linkai"]
+
+    # Capability-scoped model catalogs. The chat dropdown can reuse the
+    # provider's generic model list, but vision and image generation are
+    # served by a narrower subset that the runtime actually dispatches to —
+    # see agent/tools/vision/vision.py and skills/image-generation/SKILL.md.
+    # Anything not listed here intentionally hides the model dropdown so
+    # users cannot pin a chat-only model and silently get a 4xx at runtime.
+    _VISION_PROVIDER_MODELS = {
+        # OpenAI ordering matches the recommended GPT-5.4 family first, then
+        # GPT-5 and the GPT-4.1/4o backstops.
+        "openai":    [
+            const.GPT_55,
+            const.GPT_54,
+            const.GPT_54_MINI,
+            const.GPT_54_NANO,
+            const.GPT_5,
+            const.GPT_41,
+            const.GPT_41_MINI,
+            const.GPT_4o,
+        ],
+        "doubao":    [const.DOUBAO_SEED_2_PRO],
+        "moonshot":  [const.KIMI_K2_6],
+        "dashscope": [const.QWEN36_PLUS, const.QWEN35_PLUS, const.QWEN3_MAX],
+        "claudeAPI": [const.CLAUDE_4_6_SONNET, const.CLAUDE_4_7_OPUS, const.CLAUDE_4_6_OPUS],
+        "gemini":    [const.GEMINI_35_FLASH, const.GEMINI_31_FLASH_LITE_PRE, const.GEMINI_31_PRO_PRE, const.GEMINI_3_FLASH_PRE],
+        "qianfan":   [const.ERNIE_45_TURBO_VL],
+        # Zhipu's bot hard-codes the call to glm-5v-turbo regardless of what
+        # name is passed in (see models/zhipuai/zhipuai_bot.py::call_vision),
+        # so listing the chat models here would silently route to the same
+        # endpoint. Surface only the model the runtime can truly dispatch to.
+        "zhipu":     [const.GLM_5V_TURBO],
+        # MiniMax's vision endpoint is similarly hard-coded to MiniMax-Text-01
+        # (see models/minimax/minimax_bot.py::call_vision); the M2.x chat
+        # family is text-only.
+        "minimax":   [const.MINIMAX_TEXT_01],
+        # LinkAI proxies the underlying vendor; surface a curated set of
+        # multimodal models. Order: gpt-4.1-mini → gpt-5.4-mini as the
+        # cross-vendor baselines, then each vendor's recommended default.
+        "linkai":    [
+            const.GPT_41_MINI,
+            const.GPT_54_MINI,
+            const.QWEN36_PLUS,
+            const.DOUBAO_SEED_2_PRO,
+            const.KIMI_K2_6,
+            const.CLAUDE_4_6_SONNET,
+            const.GEMINI_31_FLASH_LITE_PRE,
+        ],
+    }
+
+    # Image-generation catalog. Source of truth: skills/image-generation/SKILL.md.
+    # Listed verbatim (not via const.*) because these are skill-side names
+    # the script forwards directly to the vendor's image endpoint.
+    #
+    # Two shapes are accepted per model entry:
+    #   - bare string                           → the model id, no hint
+    #   - {"value": ..., "hint": "..."}         → model id + dim secondary
+    #                                             label rendered on the right
+    #                                             of the dropdown row. Useful
+    #                                             for surfacing brand names
+    #                                             (e.g. "Nano Banana 2" next
+    #                                             to gemini-3.1-flash-image-preview).
+    # The skill itself maps either form to the real vendor endpoint, so the
+    # hint is purely cosmetic.
+    _IMAGE_PROVIDER_MODELS = {
+        "openai":    ["gpt-image-2", "gpt-image-1"],
+        "gemini": [
+            {"value": "gemini-3.1-flash-image-preview", "hint": "Nano Banana 2"},
+            {"value": "gemini-3-pro-image-preview",     "hint": "Nano Banana Pro"},
+            {"value": "gemini-2.5-flash-image",         "hint": "Nano Banana"},
+        ],
+        "doubao":    ["seedream-5.0-lite", "seedream-4.5"],
+        "dashscope": ["qwen-image-2.0-pro", "qwen-image-2.0"],
+        "minimax":   ["image-01"],
+        "linkai": [
+            "gpt-image-2",
+            {"value": "gemini-3.1-flash-image-preview", "hint": "Nano Banana 2"},
+            {"value": "gemini-3-pro-image-preview",     "hint": "Nano Banana Pro"},
+            "seedream-5.0-lite",
+        ],
+    }
+
+    @staticmethod
+    def _config_path() -> str:
+        return os.path.join(
+            os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
+            "config.json",
+        )
+
+    @classmethod
+    def _read_file_config(cls) -> dict:
+        path = cls._config_path()
+        if not os.path.exists(path):
+            return {}
+        with open(path, "r", encoding="utf-8") as f:
+            return json.load(f)
+
+    @classmethod
+    def _write_file_config(cls, data: dict) -> None:
+        with open(cls._config_path(), "w", encoding="utf-8") as f:
+            json.dump(data, f, indent=4, ensure_ascii=False)
+
+    @staticmethod
+    def _is_real_key(value: str) -> bool:
+        return bool(value) and value not in ("", "YOUR API KEY", "YOUR_API_KEY")
+
+    @classmethod
+    def _provider_overview(cls) -> List[dict]:
+        """All known providers (configured first, unconfigured after).
+        Re-uses ConfigHandler.PROVIDER_MODELS for the canonical list."""
+        local_config = conf()
+        items = []
+        for pid, p in ConfigHandler.PROVIDER_MODELS.items():
+            key_field = p.get("api_key_field")
+            base_field = p.get("api_base_key")
+            raw_key = local_config.get(key_field, "") if key_field else ""
+            raw_base = local_config.get(base_field, "") if base_field else ""
+            configured = cls._is_real_key(raw_key)
+            items.append({
+                "id": pid,
+                "label": p["label"],
+                "configured": configured,
+                "api_key_field": key_field,
+                "api_base_field": base_field,
+                "api_key_masked": ConfigHandler._mask_key(raw_key) if configured else "",
+                "api_base": raw_base or (p.get("api_base_default") or ""),
+                "api_base_default": p.get("api_base_default") or "",
+                "api_base_placeholder": p.get("api_base_placeholder") or "",
+                "models": list(p.get("models") or []),
+            })
+        items.sort(key=lambda it: (0 if it["configured"] else 1, list(ConfigHandler.PROVIDER_MODELS.keys()).index(it["id"])))
+        return items
+
+    @classmethod
+    def _chat_capability(cls, local_config: dict) -> dict:
+        """Main chat model — drives the agent. bot_type maps to a provider id."""
+        bot_type = local_config.get("bot_type") or ""
+        provider_id = "openai" if bot_type == "chatGPT" else bot_type
+        if provider_id not in ConfigHandler.PROVIDER_MODELS and local_config.get("use_linkai"):
+            provider_id = "linkai"
+        return {
+            "editable": True,
+            "current_provider": provider_id,
+            "current_model": local_config.get("model", ""),
+            "providers": list(ConfigHandler.PROVIDER_MODELS.keys()),
+            "use_linkai": bool(local_config.get("use_linkai", False)),
+        }
+
+    # Auto-fallback order for vision when no explicit model is pinned.
+    # Mirrors agent/tools/vision/vision.py::_resolve_providers — DeepSeek and
+    # other text-only chat bots are intentionally absent, since they cannot
+    # actually serve a vision request. Each entry is
+    #   (provider_id, api_key_field, default_vision_model)
+    # and lookups are case-insensitive on the api_key_field. LinkAI and
+    # OpenAI are handled separately below so use_linkai can promote LinkAI
+    # to the front of the chain.
+    _VISION_AUTO_ORDER = [
+        ("moonshot",  "moonshot_api_key",  const.KIMI_K2_6),
+        ("doubao",    "ark_api_key",       const.DOUBAO_SEED_2_PRO),
+        ("dashscope", "dashscope_api_key", const.QWEN36_PLUS),
+        ("claudeAPI", "claude_api_key",    const.CLAUDE_4_6_SONNET),
+        ("gemini",    "gemini_api_key",    const.GEMINI_35_FLASH),
+        ("qianfan",   "qianfan_api_key",   const.ERNIE_45_TURBO_VL),
+        ("zhipu",     "zhipu_ai_api_key",  const.GLM_5V_TURBO),
+        ("minimax",   "minimax_api_key",   const.MINIMAX_TEXT_01),
+    ]
+
+    @classmethod
+    def _predict_vision_auto(cls, local_config: dict) -> dict:
+        """Predict which provider vision.py will actually dispatch to when
+        no tools.vision.model is set. Mirrors the fallback order in
+        agent/tools/vision/vision.py::_resolve_providers so the UI hint
+        matches reality."""
+        chat = cls._chat_capability(local_config)
+        main_provider = chat["current_provider"]
+        main_model = chat["current_model"]
+        use_linkai_flag = bool(local_config.get("use_linkai", False))
+        linkai_configured = cls._is_real_key(local_config.get("linkai_api_key", ""))
+
+        def _try(pid: str, model_default: str):
+            # Look up the api_key for this provider via the canonical
+            # provider table so we don't hardcode field names here.
+            meta = ConfigHandler.PROVIDER_MODELS.get(pid) or {}
+            key_field = meta.get("api_key_field")
+            if not key_field:
+                return None
+            if not cls._is_real_key(local_config.get(key_field, "")):
+                return None
+            # Pick a model that the vision runtime can actually dispatch to
+            # for this provider. Using `main_model` here is unsafe — for
+            # vendors like Zhipu/MiniMax the bot hard-codes the vision model
+            # name regardless of the chat-model name, so surfacing the chat
+            # model name in the hint is misleading. Trust the curated
+            # _VISION_PROVIDER_MODELS list: prefer the main model only if
+            # it appears there; otherwise show the vendor's first vision-
+            # capable model.
+            allowed = cls._VISION_PROVIDER_MODELS.get(pid, [])
+            if pid == main_provider and main_model and main_model in allowed:
+                return {"provider": pid, "model": main_model}
+            fallback = allowed[0] if allowed else model_default
+            return {"provider": pid, "model": fallback}
+
+        # 1. use_linkai → suppress the hint entirely. LinkAI is a proxy and
+        #    we don't observe which underlying model it picks; surfacing
+        #    "LinkAI" with no model would not tell the user anything useful.
+        if use_linkai_flag and linkai_configured:
+            return {"provider": "", "model": ""}
+
+        # 2. Main bot — only when it natively supports vision. We approximate
+        #    "natively supports" by membership in _VISION_PROVIDER_MODELS,
+        #    which is the same set vision.py's _DISCOVERABLE_MODELS covers
+        #    (minus the chat-only DeepSeek family).
+        if main_provider in cls._VISION_PROVIDER_MODELS:
+            hit = _try(main_provider, main_model)
+            if hit:
+                return hit
+
+        # 3. Other discoverable providers in declared order
+        for pid, _key, default_model in cls._VISION_AUTO_ORDER:
+            hit = _try(pid, default_model)
+            if hit:
+                return hit
+
+        # 4. OpenAI raw HTTP
+        if cls._is_real_key(local_config.get("open_ai_api_key", "")):
+            return {"provider": "openai", "model": const.GPT_55}
+
+        # 5. LinkAI as last resort (only reached when use_linkai is off)
+        if linkai_configured:
+            return {"provider": "linkai", "model": const.GPT_41_MINI}
+
+        return {"provider": "", "model": ""}
+
+    @classmethod
+    def _vision_capability(cls, local_config: dict) -> dict:
+        """Vision model. tools.vision.model is the explicit override; otherwise
+        the runtime fallback chain in agent/tools/vision/vision.py decides."""
+        tools_conf = local_config.get("tools") or local_config.get("tool") or {}
+        if not isinstance(tools_conf, dict):
+            tools_conf = {}
+        vision_conf = tools_conf.get("vision") or {}
+        if not isinstance(vision_conf, dict):
+            vision_conf = {}
+        user_specified = (vision_conf.get("model") or "").strip()
+
+        # When the user pinned a specific model, infer which vendor card to
+        # highlight by scanning the per-provider model lists. Falls back to
+        # an empty provider so the dropdown stays on "auto" if we can't tell.
+        inferred_provider = ""
+        if user_specified:
+            for pid, models in cls._VISION_PROVIDER_MODELS.items():
+                if user_specified in models:
+                    inferred_provider = pid
+                    break
+
+        # In auto mode the hint should reflect what vision.py will actually
+        # dispatch to — surface that prediction via fallback_* so the UI
+        # shows e.g. "openai / gpt-4.1-mini" instead of the chat-model name.
+        predicted = cls._predict_vision_auto(local_config)
+
+        return {
+            "editable": True,
+            "strategy": "specified" if user_specified else "auto",
+            "user_specified_model": user_specified,
+            "current_provider": inferred_provider,
+            "current_model": user_specified,
+            "fallback_provider": predicted["provider"],
+            "fallback_model": predicted["model"],
+            "providers": list(cls._VISION_PROVIDER_MODELS.keys()),
+            "provider_models": cls._VISION_PROVIDER_MODELS,
+        }
+
+    @classmethod
+    def _asr_capability(cls, local_config: dict) -> dict:
+        # "Pick or empty" — when voice_to_text is unset we don't show a
+        # current selection. `suggested_provider` previews which vendor
+        # the bridge auto-picker would land on (purely a UX hint, NOT
+        # persisted). Once the user saves a vendor, we lock onto it.
+        explicit = (local_config.get("voice_to_text") or "").strip().lower()
+        suggested = ""
+        if not explicit:
+            for pid in cls._ASR_PROVIDERS:
+                meta = ConfigHandler.PROVIDER_MODELS.get(pid) or {}
+                key_field = meta.get("api_key_field")
+                if key_field and cls._is_real_key(local_config.get(key_field, "")):
+                    suggested = pid
+                    break
+        return {
+            "editable": True,
+            "current_provider": explicit,
+            "suggested_provider": suggested,
+            "current_model": "",
+            "providers": cls._ASR_PROVIDERS,
+        }
+
+    @classmethod
+    def _tts_capability(cls, local_config: dict) -> dict:
+        explicit = (local_config.get("text_to_voice") or "").strip().lower()
+        # Providers outside the white-list don't drive the picker, but their
+        # underlying runtime config is preserved so bridge still routes them.
+        ui_provider = explicit if explicit in cls._TTS_PROVIDERS else ""
+        suggested = ""
+        if not ui_provider:
+            for pid in cls._TTS_PROVIDERS:
+                meta = ConfigHandler.PROVIDER_MODELS.get(pid) or {}
+                key_field = meta.get("api_key_field")
+                if key_field and cls._is_real_key(local_config.get(key_field, "")):
+                    suggested = pid
+                    break
+        return {
+            "editable": True,
+            "current_provider": ui_provider,
+            "suggested_provider": suggested,
+            "current_model": (local_config.get("text_to_voice_model") or "") if ui_provider else "",
+            "current_voice": (local_config.get("tts_voice_id") or "") if ui_provider else "",
+            "providers": cls._TTS_PROVIDERS,
+            "provider_models": cls._TTS_PROVIDER_MODELS,
+            "provider_voices": cls._TTS_PROVIDER_VOICES,
+            "reply_mode": cls._tts_reply_mode(local_config),
+        }
+
+    @staticmethod
+    def _tts_reply_mode(local_config: dict) -> str:
+        if local_config.get("always_reply_voice", False):
+            return "always"
+        if local_config.get("voice_reply_voice", False):
+            return "voice_if_voice"
+        return "off"
+
+    @classmethod
+    def _embedding_capability(cls, local_config: dict) -> dict:
+        # Embedding is "pick or empty" — runtime's legacy openai/linkai
+        # fallback is a safety net, not a UX-visible auto mode.
+        # `suggested_provider` is a UI-only hint (NOT persisted) that
+        # preselects the dropdown to whichever configured vendor we'd
+        # recommend, so users don't have to expand the menu to find it.
+        explicit = (local_config.get("embedding_provider") or "").strip().lower()
+        suggested = ""
+        if not explicit:
+            for pid in cls._EMBEDDING_PROVIDERS:
+                meta = ConfigHandler.PROVIDER_MODELS.get(pid) or {}
+                key_field = meta.get("api_key_field")
+                if key_field and cls._is_real_key(local_config.get(key_field, "")):
+                    suggested = pid
+                    break
+        return {
+            "editable": True,
+            "current_provider": explicit,
+            "suggested_provider": suggested,
+            "current_model": local_config.get("embedding_model", "") or "",
+            "current_dim": int(local_config.get("embedding_dimensions") or 0) or None,
+            "providers": cls._EMBEDDING_PROVIDERS,
+        }
+
+    # Auto-fallback order for image generation. Mirrors the global priority
+    # used inside skills/image-generation/scripts/generate.py
+    # (`_DEFAULT_PROVIDER_ORDER`): OpenAI → Gemini → Seedream(Ark/doubao) →
+    # Qwen(dashscope) → MiniMax → LinkAI. Each entry maps the
+    # provider-card id to the script's per-provider DEFAULT_MODEL so the
+    # hint matches what the runtime would actually request.
+    _IMAGE_AUTO_ORDER = [
+        ("openai",    "gpt-image-2"),
+        ("gemini",    "gemini-3.1-flash-image-preview"),  # nano-banana-2
+        ("doubao",    "seedream-5.0-lite"),
+        ("dashscope", "qwen-image-2.0"),
+        ("minimax",   "image-01"),
+        ("linkai",    "gpt-image-2"),
+    ]
+
+    @classmethod
+    def _predict_image_auto(cls, local_config: dict) -> dict:
+        """Predict which provider/model the image-generation skill will hit
+        when no SKILL_IMAGE_GENERATION_MODEL override is set. Mirrors
+        skills/image-generation/scripts/generate.py::_build_providers so
+        the UI hint matches reality. Chat-only providers (DeepSeek etc.)
+        are absent by design — image generation never falls back to a chat
+        bot regardless of the main model.
+
+        When use_linkai is enabled the hint is suppressed entirely — LinkAI
+        proxies to whichever backend it deems appropriate and surfacing
+        "LinkAI" alone tells the user nothing actionable."""
+        use_linkai_flag = bool(local_config.get("use_linkai", False))
+        linkai_configured = cls._is_real_key(local_config.get("linkai_api_key", ""))
+        if use_linkai_flag and linkai_configured:
+            return {"provider": "", "model": ""}
+
+        for pid, default_model in cls._IMAGE_AUTO_ORDER:
+            meta = ConfigHandler.PROVIDER_MODELS.get(pid) or {}
+            key_field = meta.get("api_key_field")
+            if not key_field:
+                continue
+            if cls._is_real_key(local_config.get(key_field, "")):
+                return {"provider": pid, "model": default_model}
+        return {"provider": "", "model": ""}
+
+    @classmethod
+    def _image_capability(cls, local_config: dict) -> dict:
+        """Image generation. Source of truth: config["skills"]["image-generation"]["model"]
+        (mirrors the per-skill config schema documented in skills/image-generation).
+        The runtime resolver in skills/image-generation/scripts/generate.py
+        reads this via the SKILL_IMAGE_GENERATION_MODEL env var that the
+        agent_initializer syncs at startup; provider is inferred from the
+        model name prefix, mirroring vision.py's design.
+
+        ``skill`` (singular) is still tolerated as a legacy fallback —
+        config.load_config() folds it into ``skills`` at startup.
+        """
+        skills_node = local_config.get("skills") or local_config.get("skill") or {}
+        if not isinstance(skills_node, dict):
+            skills_node = {}
+        img_node = skills_node.get("image-generation") or {}
+        if not isinstance(img_node, dict):
+            img_node = {}
+        explicit_model = (img_node.get("model") or "").strip()
+
+        # Infer the provider card to highlight by scanning per-provider
+        # model lists, including alias values inside {value, hint} entries.
+        inferred_provider = ""
+        if explicit_model:
+            for pid, models in cls._IMAGE_PROVIDER_MODELS.items():
+                for entry in models:
+                    val = entry if isinstance(entry, str) else (entry.get("value") or "")
+                    if val == explicit_model:
+                        inferred_provider = pid
+                        break
+                if inferred_provider:
+                    break
+
+        # In auto mode the hint should reflect what generate.py will actually
+        # dispatch to — surface that prediction via fallback_* so the UI
+        # never claims a chat-only bot (e.g. minimax/MiniMax-M2.7) "would
+        # generate the image", which is impossible.
+        predicted = cls._predict_image_auto(local_config)
+
+        return {
+            "editable": True,
+            "strategy": "specified" if explicit_model else "auto",
+            "current_provider": inferred_provider,
+            "current_model": explicit_model,
+            "fallback_provider": predicted["provider"],
+            "fallback_model": predicted["model"],
+            "providers": list(cls._IMAGE_PROVIDER_MODELS.keys()),
+            "provider_models": cls._IMAGE_PROVIDER_MODELS,
+            # The dispatcher that honors a pinned provider isn't wired up
+            # yet; advertise this so the UI can show a "saved but not active"
+            # banner until the runtime catches up.
+            "runtime_active": False,
+            "note": "router_pending",
+        }
+
+    # Canonical search provider order. Mirrors PROVIDER_ORDER in
+    # agent/tools/web_search/web_search.py — keep them in sync.
+    _SEARCH_PROVIDERS = ("bocha", "qianfan", "zhipu", "linkai")
+
+    _SEARCH_PROVIDER_LABELS = {
+        "bocha":   "博查",
+        "zhipu":   "智谱",
+        "qianfan": "百度千帆",
+        "linkai":  "LinkAI",
+    }
+
+    @classmethod
+    def _search_provider_key(cls, provider: str, local_config: dict) -> str:
+        """Resolve the (raw) key for a given search provider."""
+        if provider == "bocha":
+            tools_cfg = local_config.get("tools") or {}
+            block = tools_cfg.get("web_search") or {} if isinstance(tools_cfg, dict) else {}
+            return (block.get("bocha_api_key") if isinstance(block, dict) else "") or os.environ.get("BOCHA_API_KEY", "")
+        if provider == "zhipu":
+            return local_config.get("zhipu_ai_api_key") or os.environ.get("ZHIPUAI_API_KEY", "")
+        if provider == "qianfan":
+            return local_config.get("qianfan_api_key") or os.environ.get("QIANFAN_API_KEY", "")
+        if provider == "linkai":
+            return local_config.get("linkai_api_key") or os.environ.get("LINKAI_API_KEY", "")
+        return ""
+
+    @classmethod
+    def _search_capability(cls, local_config: dict) -> dict:
+        """Search is editable: pick auto (default) or pin a specific backend.
+        Providers reuse model-vendor keys (zhipu/qianfan/linkai) so they show
+        up as configured once the user adds those vendors; bocha keeps its
+        own key under tools.web_search."""
+        tools_cfg = local_config.get("tools") or {}
+        ws_cfg = tools_cfg.get("web_search") or {} if isinstance(tools_cfg, dict) else {}
+        if not isinstance(ws_cfg, dict):
+            ws_cfg = {}
+
+        providers = []
+        configured_ids = []
+        for pid in cls._SEARCH_PROVIDERS:
+            ok = cls._is_real_key(cls._search_provider_key(pid, local_config))
+            raw_key = cls._search_provider_key(pid, local_config) if ok else ""
+            providers.append({
+                "id": pid,
+                "label": cls._SEARCH_PROVIDER_LABELS.get(pid, pid),
+                "configured": ok,
+                # bocha owns its key under tools.web_search; the other three
+                # piggy-back on a model-vendor credential. Frontend uses
+                # this hint to decide which credential editor to surface.
+                "needs_dedicated_key": pid == "bocha",
+                "api_key_masked": ConfigHandler._mask_key(raw_key) if raw_key else "",
+            })
+            if ok:
+                configured_ids.append(pid)
+
+        strategy = (ws_cfg.get("strategy") or "auto").strip().lower()
+        if strategy not in ("auto", "fixed"):
+            strategy = "auto"
+        fixed_provider = (ws_cfg.get("provider") or "").strip().lower()
+        if fixed_provider and fixed_provider not in configured_ids:
+            fixed_provider = ""
+
+        # current_provider drives the chip in the header — show the actually
+        # active backend (pinned or first auto-picked).
+        if strategy == "fixed" and fixed_provider:
+            current = fixed_provider
+        else:
+            current = configured_ids[0] if configured_ids else ""
+
+        return {
+            "editable": True,
+            "strategy": strategy,
+            "providers": providers,
+            "configured_providers": configured_ids,
+            "current_provider": current,
+            "fixed_provider": fixed_provider,
+            "available": bool(current),
+        }
+
+    @classmethod
+    def _capabilities(cls, local_config: dict) -> dict:
+        return {
+            "chat":      cls._chat_capability(local_config),
+            "vision":    cls._vision_capability(local_config),
+            "asr":       cls._asr_capability(local_config),
+            "tts":       cls._tts_capability(local_config),
+            "embedding": cls._embedding_capability(local_config),
+            "image":     cls._image_capability(local_config),
+            "search":    cls._search_capability(local_config),
+        }
+
+    def GET(self):
+        _require_auth()
+        web.header("Content-Type", "application/json; charset=utf-8")
+        try:
+            local_config = conf()
+            return json.dumps({
+                "status": "success",
+                "providers": self._provider_overview(),
+                "capabilities": self._capabilities(local_config),
+            }, ensure_ascii=False)
+        except Exception as e:
+            logger.error(f"[ModelsHandler] GET failed: {e}")
+            return json.dumps({"status": "error", "message": str(e)})
+
+    def POST(self):
+        _require_auth()
+        web.header("Content-Type", "application/json; charset=utf-8")
+        try:
+            data = json.loads(web.data() or b"{}")
+            action = data.get("action") or ""
+            if action == "set_provider":
+                return self._handle_set_provider(data)
+            if action == "delete_provider":
+                return self._handle_delete_provider(data)
+            if action == "set_capability":
+                return self._handle_set_capability(data)
+            if action == "set_voice_reply_mode":
+                return self._handle_set_voice_reply_mode(data)
+            if action == "set_search_credential":
+                return self._handle_set_search_credential(data)
+            return json.dumps({"status": "error", "message": f"unknown action: {action!r}"})
+        except Exception as e:
+            logger.error(f"[ModelsHandler] POST failed: {e}")
+            return json.dumps({"status": "error", "message": str(e)})
+
+    def _handle_set_provider(self, data: dict) -> str:
+        provider_id = (data.get("provider_id") or "").strip()
+        meta = ConfigHandler.PROVIDER_MODELS.get(provider_id)
+        if not meta:
+            return json.dumps({"status": "error", "message": f"unknown provider: {provider_id}"})
+
+        # api_key absent / empty / null => leave the existing key untouched
+        # (used by the "edit only base url" flow). To clear the key, callers
+        # must use action=delete_provider explicitly.
+        api_key_raw = data.get("api_key")
+        api_key = api_key_raw.strip() if isinstance(api_key_raw, str) else ""
+
+        # api_base presence is significant: an explicit "" means "reset to
+        # default", whereas a missing key means "no change".
+        api_base_present = "api_base" in data
+        api_base = (data.get("api_base") or "").strip() if api_base_present else None
+
+        applied = {}
+        local_config = conf()
+        file_cfg = self._read_file_config()
+
+        key_field = meta.get("api_key_field")
+        if key_field and api_key:
+            local_config[key_field] = api_key
+            file_cfg[key_field] = api_key
+            applied[key_field] = True
+        base_field = meta.get("api_base_key")
+        if base_field and api_base_present:
+            local_config[base_field] = api_base
+            file_cfg[base_field] = api_base
+            applied[base_field] = True
+
+        if not applied:
+            # Nothing actually changed (e.g. user opened the modal and hit
+            # save without editing). Treat as a successful no-op so the
+            # frontend can show "Saved" instead of surfacing an error.
+            return json.dumps({"status": "success", "provider": provider_id, "noop": True})
+
+        self._write_file_config(file_cfg)
+        logger.info(f"[ModelsHandler] provider {provider_id} updated: {sorted(applied.keys())}")
+
+        # Vendor credentials affect bot routing for any capability that uses
+        # them; safest to reset Bridge so the next request rebuilds bots.
+        self._reset_bridge()
+        return json.dumps({"status": "success", "provider": provider_id})
+
+    def _handle_delete_provider(self, data: dict) -> str:
+        provider_id = (data.get("provider_id") or "").strip()
+        meta = ConfigHandler.PROVIDER_MODELS.get(provider_id)
+        if not meta:
+            return json.dumps({"status": "error", "message": f"unknown provider: {provider_id}"})
+
+        local_config = conf()
+        file_cfg = self._read_file_config()
+
+        cleared = []
+        for field_name in (meta.get("api_key_field"), meta.get("api_base_key")):
+            if not field_name:
+                continue
+            # Always write the key — even if it was absent before — so the
+            # in-memory conf() reflects the cleared state without needing a
+            # restart. (`in local_config` was too strict: provider keys that
+            # were ever set then deleted manually wouldn't get reset.)
+            local_config[field_name] = ""
+            file_cfg[field_name] = ""
+            cleared.append(field_name)
+
+        self._write_file_config(file_cfg)
+        logger.info(f"[ModelsHandler] provider {provider_id} cleared: {cleared}")
+        self._reset_bridge()
+        return json.dumps({"status": "success", "provider": provider_id, "cleared": cleared})
+
+    def _handle_set_capability(self, data: dict) -> str:
+        capability = (data.get("capability") or "").strip()
+        provider_id = (data.get("provider_id") or "").strip()
+        model = (data.get("model") or "").strip()
+
+        if capability == "chat":
+            return self._set_chat(provider_id, model)
+        if capability == "vision":
+            return self._set_vision(provider_id, model)
+        if capability == "asr":
+            return self._set_simple("voice_to_text", provider_id)
+        if capability == "tts":
+            return self._set_tts(provider_id, model, (data.get("voice") or "").strip())
+        if capability == "embedding":
+            return self._set_embedding(provider_id, model)
+        if capability == "image":
+            return self._set_image(provider_id, model)
+        if capability == "search":
+            return self._set_search(
+                (data.get("strategy") or "").strip().lower(),
+                (data.get("provider") or "").strip().lower(),
+            )
+        return json.dumps({"status": "error", "message": f"capability not editable: {capability}"})
+
+    def _set_image(self, provider_id: str, model: str) -> str:
+        # Source of truth: skills.image-generation.model. provider_id is
+        # informational only; the resolver picks the vendor by model prefix.
+        local_config = conf()
+        file_cfg = self._read_file_config()
+
+        self._set_nested_namespace_value(local_config, "skills", "image-generation", "model", model or "")
+        self._set_nested_namespace_value(file_cfg, "skills", "image-generation", "model", model or "")
+        self._drop_legacy_namespace(local_config, "skill", "skills", child="image-generation")
+        self._drop_legacy_namespace(file_cfg, "skill", "skills", child="image-generation")
+
+        self._write_file_config(file_cfg)
+
+        # The skill subprocess reads SKILL_IMAGE_GENERATION_MODEL from env at
+        # startup; mirror the change so live edits apply without restart.
+        env_key = "SKILL_IMAGE_GENERATION_MODEL"
+        if model:
+            os.environ[env_key] = model
+        else:
+            os.environ.pop(env_key, None)
+
+        logger.info(f"[ModelsHandler] image updated: provider_hint={provider_id!r} model={model!r}")
+        return json.dumps({
+            "status": "success",
+            "provider": provider_id,
+            "model": model,
+            "router_pending": True,
+        })
+
+    def _set_chat(self, provider_id: str, model: str) -> str:
+        if provider_id and provider_id not in ConfigHandler.PROVIDER_MODELS:
+            return json.dumps({"status": "error", "message": f"unknown provider: {provider_id}"})
+
+        applied = {}
+        local_config = conf()
+        file_cfg = self._read_file_config()
+
+        if provider_id:
+            bot_type_value = "chatGPT" if provider_id == "openai" else provider_id
+            local_config["bot_type"] = bot_type_value
+            file_cfg["bot_type"] = bot_type_value
+            applied["bot_type"] = bot_type_value
+            use_linkai = (provider_id == "linkai")
+            local_config["use_linkai"] = use_linkai
+            file_cfg["use_linkai"] = use_linkai
+            applied["use_linkai"] = use_linkai
+        if model:
+            local_config["model"] = model
+            file_cfg["model"] = model
+            applied["model"] = model
+
+        if not applied:
+            return json.dumps({"status": "success", "applied": {}, "noop": True})
+
+        self._write_file_config(file_cfg)
+        logger.info(f"[ModelsHandler] chat updated: {applied}")
+        self._reset_bridge()
+        return json.dumps({"status": "success", "applied": applied})
+
+    def _set_vision(self, provider_id: str, model: str) -> str:
+        # Source of truth: tools.vision.model. provider_id is informational
+        # only; the resolver picks the vendor by model prefix.
+        local_config = conf()
+        file_cfg = self._read_file_config()
+        self._set_nested_namespace_value(file_cfg, "tools", "vision", "model", model)
+        self._set_nested_namespace_value(local_config, "tools", "vision", "model", model)
+        self._drop_legacy_namespace(file_cfg, "tool", "tools", child="vision")
+        self._drop_legacy_namespace(local_config, "tool", "tools", child="vision")
+
+        self._write_file_config(file_cfg)
+        logger.info(f"[ModelsHandler] vision model set: {model!r}")
+        return json.dumps({"status": "success", "model": model})
+
+    @staticmethod
+    def _set_nested_namespace_value(cfg, top: str, name: str, key: str, value):
+        """Set ``cfg[top][name][key] = value``, creating missing dicts."""
+        bucket = cfg.get(top)
+        if not isinstance(bucket, dict):
+            bucket = {}
+        node = bucket.get(name)
+        if not isinstance(node, dict):
+            node = {}
+        node[key] = value
+        bucket[name] = node
+        cfg[top] = bucket
+
+    @staticmethod
+    def _drop_legacy_namespace(cfg, legacy: str, canonical: str, child: str) -> None:
+        """Strip the deprecated singular key so config.json stays single-source."""
+        legacy_section = cfg.get(legacy)
+        if not isinstance(legacy_section, dict):
+            return
+        legacy_section.pop(child, None)
+        if legacy_section:
+            cfg[legacy] = legacy_section
+        else:
+            cfg.pop(legacy, None)
+
+    def _handle_set_voice_reply_mode(self, data: dict) -> str:
+        # UI picker (off / voice_if_voice / always) maps to the legacy
+        # always_reply_voice + voice_reply_voice pair that chat_channel.py
+        # reads, so all channels (web/feishu/wecom/...) share the routing.
+        mode = (data.get("mode") or "").strip().lower()
+        if mode not in ("off", "voice_if_voice", "always"):
+            return json.dumps({"status": "error", "message": f"invalid mode: {mode!r}"})
+        always = (mode == "always")
+        if_voice = (mode == "voice_if_voice")
+        local_config = conf()
+        file_cfg = self._read_file_config()
+        local_config["always_reply_voice"] = always
+        local_config["voice_reply_voice"] = if_voice
+        file_cfg["always_reply_voice"] = always
+        file_cfg["voice_reply_voice"] = if_voice
+        self._write_file_config(file_cfg)
+        logger.info(
+            f"[ModelsHandler] voice reply mode set: {mode!r} "
+            f"(always_reply_voice={always}, voice_reply_voice={if_voice})"
+        )
+        return json.dumps({"status": "success", "mode": mode})
+
+    def _set_simple(self, key: str, value: str) -> str:
+        local_config = conf()
+        file_cfg = self._read_file_config()
+        local_config[key] = value
+        file_cfg[key] = value
+        self._write_file_config(file_cfg)
+        logger.info(f"[ModelsHandler] {key} set: {value!r}")
+        # Hot-swap the cached voice bot so the change takes effect immediately.
+        if key in ("voice_to_text", "text_to_voice"):
+            self._refresh_voice_routing()
+        return json.dumps({"status": "success", key: value})
+
+    def _set_tts(self, provider_id: str, model: str, voice: str = "") -> str:
+        local_config = conf()
+        file_cfg = self._read_file_config()
+        local_config["text_to_voice"] = provider_id
+        file_cfg["text_to_voice"] = provider_id
+        local_config["text_to_voice_model"] = model
+        file_cfg["text_to_voice_model"] = model
+        local_config["tts_voice_id"] = voice
+        file_cfg["tts_voice_id"] = voice
+        self._write_file_config(file_cfg)
+        logger.info(
+            f"[ModelsHandler] tts updated: provider={provider_id!r} "
+            f"model={model!r} voice={voice!r}"
+        )
+        self._refresh_voice_routing()
+        return json.dumps({
+            "status": "success",
+            "provider": provider_id, "model": model, "voice": voice,
+        })
+
+    @staticmethod
+    def _refresh_voice_routing() -> None:
+        try:
+            from bridge.bridge import Bridge
+            Bridge().refresh_voice()
+        except Exception as e:
+            logger.warning(f"[ModelsHandler] Bridge voice refresh failed: {e}")
+
+    def _set_embedding(self, provider_id: str, model: str) -> str:
+        # Two valid states: both empty (reset to pick-or-empty) OR both set.
+        # A provider without a model leaves the runtime in a broken half-state,
+        # so reject that explicitly instead of silently writing it through.
+        if provider_id and not model:
+            return json.dumps({
+                "status": "error",
+                "message": "embedding model is required when a provider is selected",
+            })
+        local_config = conf()
+        file_cfg = self._read_file_config()
+        local_config["embedding_provider"] = provider_id
+        file_cfg["embedding_provider"] = provider_id
+        local_config["embedding_model"] = model
+        file_cfg["embedding_model"] = model
+        self._write_file_config(file_cfg)
+        logger.info(f"[ModelsHandler] embedding updated: provider={provider_id!r} model={model!r}")
+        # The next /memory rebuild-index command hot-swaps the provider onto
+        # the running MemoryManager (see plugins/cow_cli). The dim may have
+        # changed, so the frontend prompts the user to rebuild.
+        return json.dumps({"status": "success", "provider": provider_id, "model": model})
+
+    def _set_search(self, strategy: str, provider: str) -> str:
+        """Persist search routing under tools.web_search.{strategy,provider}.
+
+        strategy 'auto'  -> provider field is cleared (auto picks at call time)
+        strategy 'fixed' -> provider must be in the canonical list; runtime
+                            silently falls back to auto if its key is missing.
+        """
+        if strategy not in ("auto", "fixed"):
+            return json.dumps({"status": "error", "message": f"invalid strategy: {strategy!r}"})
+        if strategy == "fixed":
+            if provider not in self._SEARCH_PROVIDERS:
+                return json.dumps({"status": "error", "message": f"unknown provider: {provider!r}"})
+        else:
+            provider = ""
+
+        local_config = conf()
+        file_cfg = self._read_file_config()
+        self._set_nested_namespace_value(local_config, "tools", "web_search", "strategy", strategy)
+        self._set_nested_namespace_value(file_cfg,     "tools", "web_search", "strategy", strategy)
+        self._set_nested_namespace_value(local_config, "tools", "web_search", "provider", provider)
+        self._set_nested_namespace_value(file_cfg,     "tools", "web_search", "provider", provider)
+        self._write_file_config(file_cfg)
+        logger.info(f"[ModelsHandler] search updated: strategy={strategy!r} provider={provider!r}")
+        return json.dumps({"status": "success", "strategy": strategy, "provider": provider})
+
+    def _handle_set_search_credential(self, data: dict) -> str:
+        """Persist the bocha API key under tools.web_search.bocha_api_key.
+
+        The other three providers (zhipu/qianfan/linkai) reuse model-vendor
+        credentials, so they go through set_provider with the standard
+        model-vendor flow.
+        """
+        api_key = (data.get("api_key") or "").strip() if isinstance(data.get("api_key"), str) else ""
+        local_config = conf()
+        file_cfg = self._read_file_config()
+        self._set_nested_namespace_value(local_config, "tools", "web_search", "bocha_api_key", api_key)
+        self._set_nested_namespace_value(file_cfg,     "tools", "web_search", "bocha_api_key", api_key)
+        self._write_file_config(file_cfg)
+        logger.info(f"[ModelsHandler] search credential set: bocha_api_key={'***' if api_key else ''}")
+        return json.dumps({"status": "success", "provider": "bocha"})
+
+    @staticmethod
+    def _reset_bridge() -> None:
+        try:
+            from bridge.bridge import Bridge
+            Bridge().reset_bot()
+            logger.info("[ModelsHandler] Bridge bot routing reset")
+        except Exception as e:
+            logger.warning(f"[ModelsHandler] Bridge reset failed: {e}")
+
+
 class ChannelsHandler:
     """API for managing external channel configurations (feishu, dingtalk, etc)."""
 
@@ -2242,7 +3702,12 @@ class AssetsHandler:
                 raise web.notfound()
 
             if not os.path.exists(full_path) or not os.path.isfile(full_path):
-                logger.error(f"File not found: {full_path}")
+                # Browsers routinely probe optional asset variants (e.g. a
+                # .ttf fallback declared alongside .woff2 in @font-face);
+                # logging these as errors floods the console with harmless
+                # noise. Keep it at debug level — real misconfigurations
+                # will still surface via the network panel.
+                logger.debug(f"Static file not found: {full_path}")
                 raise web.notfound()
 
             # 设置正确的Content-Type
@@ -2257,8 +3722,12 @@ class AssetsHandler:
             with open(full_path, 'rb') as f:
                 return f.read()
 
+        except web.HTTPError:
+            # The 404 path above already logged at debug; re-raise as-is so
+            # web.py returns the original status to the client.
+            raise
         except Exception as e:
-            logger.error(f"Error serving static file: {e}", exc_info=True)  # 添加更详细的错误信息
+            logger.error(f"Error serving static file: {e}", exc_info=True)
             raise web.notfound()
 
 
diff --git a/channel/wecom_bot/wecom_bot_channel.py b/channel/wecom_bot/wecom_bot_channel.py
index 7aaca56b..0fe4500b 100644
--- a/channel/wecom_bot/wecom_bot_channel.py
+++ b/channel/wecom_bot/wecom_bot_channel.py
@@ -81,6 +81,8 @@ def _loads_wecom_ws_json(raw):
 @singleton
 class WecomBotChannel(ChatChannel):
 
+    NOT_SUPPORT_REPLYTYPE = []
+
     def __init__(self):
         super().__init__()
         self.bot_id = ""
@@ -472,6 +474,8 @@ class WecomBotChannel(ChatChannel):
             else:
                 context.type = ContextType.TEXT
             context.content = content.strip()
+            if "desire_rtype" not in context and conf().get("always_reply_voice"):
+                context["desire_rtype"] = ReplyType.VOICE
 
         return context
 
@@ -498,6 +502,8 @@ class WecomBotChannel(ChatChannel):
             self._send_file(reply.content, receiver, is_group, req_id)
         elif reply.type == ReplyType.VIDEO or reply.type == ReplyType.VIDEO_URL:
             self._send_file(reply.content, receiver, is_group, req_id, media_type="video")
+        elif reply.type == ReplyType.VOICE:
+            self._send_voice(reply.content, receiver, is_group, req_id)
         else:
             logger.warning(f"[WecomBot] Unsupported reply type: {reply.type}, falling back to text")
             self._send_text(str(reply.content), receiver, is_group, req_id)
@@ -730,6 +736,65 @@ class WecomBotChannel(ChatChannel):
                 },
             })
 
+    def _send_voice(self, voice_path: str, receiver: str, is_group: bool, req_id: str = None):
+        """Send native voice reply. WeCom voice media must be amr."""
+        local_path = voice_path
+        if local_path.startswith("file://"):
+            local_path = local_path[7:]
+
+        if local_path.startswith(("http://", "https://")):
+            try:
+                resp = requests.get(local_path, timeout=60)
+                resp.raise_for_status()
+                ext = os.path.splitext(local_path)[1] or ".mp3"
+                tmp_path = f"/tmp/wecom_voice_{uuid.uuid4().hex[:8]}{ext}"
+                with open(tmp_path, "wb") as f:
+                    f.write(resp.content)
+                local_path = tmp_path
+            except Exception as e:
+                logger.error(f"[WecomBot] Failed to download voice for sending: {e}")
+                return
+
+        if not os.path.exists(local_path):
+            logger.error(f"[WecomBot] Voice file not found: {local_path}")
+            return
+
+        amr_path = local_path
+        if not local_path.lower().endswith(".amr"):
+            try:
+                from voice.audio_convert import any_to_amr
+                amr_path = os.path.splitext(local_path)[0] + ".amr"
+                any_to_amr(local_path, amr_path)
+            except Exception as e:
+                logger.error(f"[WecomBot] Failed to convert voice to amr: {e}")
+                return
+
+        media_id = self._upload_media(amr_path, "voice")
+        if not media_id:
+            logger.error("[WecomBot] Failed to upload voice media")
+            return
+
+        if req_id:
+            self._ws_send({
+                "cmd": "aibot_respond_msg",
+                "headers": {"req_id": req_id},
+                "body": {
+                    "msgtype": "voice",
+                    "voice": {"media_id": media_id},
+                },
+            })
+        else:
+            self._ws_send({
+                "cmd": "aibot_send_msg",
+                "headers": {"req_id": self._gen_req_id()},
+                "body": {
+                    "chatid": receiver,
+                    "chat_type": 2 if is_group else 1,
+                    "msgtype": "voice",
+                    "voice": {"media_id": media_id},
+                },
+            })
+
     def _active_send_markdown(self, content: str, receiver: str, is_group: bool):
         """Proactively send markdown message (for scheduled tasks, no req_id)."""
         self._ws_send({
diff --git a/channel/weixin/weixin_channel.py b/channel/weixin/weixin_channel.py
index dba9060f..61f5cbb1 100644
--- a/channel/weixin/weixin_channel.py
+++ b/channel/weixin/weixin_channel.py
@@ -60,6 +60,9 @@ def _save_credentials(cred_path: str, data: dict):
 @singleton
 class WeixinChannel(ChatChannel):
 
+    # ilink bot protocol has no outbound voice item; deliver TTS as a file.
+    NOT_SUPPORT_REPLYTYPE = []
+
     LOGIN_STATUS_IDLE = "idle"
     LOGIN_STATUS_WAITING = "waiting_scan"
     LOGIN_STATUS_SCANNED = "scanned"
@@ -464,6 +467,14 @@ class WeixinChannel(ChatChannel):
             else:
                 context.type = ContextType.TEXT
             context.content = content.strip()
+            if "desire_rtype" not in context and conf().get("always_reply_voice"):
+                context["desire_rtype"] = ReplyType.VOICE
+
+        elif ctype == ContextType.VOICE:
+            if "desire_rtype" not in context and (
+                conf().get("voice_reply_voice") or conf().get("always_reply_voice")
+            ):
+                context["desire_rtype"] = ReplyType.VOICE
 
         return context
 
@@ -486,6 +497,9 @@ class WeixinChannel(ChatChannel):
             self._send_file(reply.content, receiver, context_token)
         elif reply.type in (ReplyType.VIDEO, ReplyType.VIDEO_URL):
             self._send_video(reply.content, receiver, context_token)
+        elif reply.type == ReplyType.VOICE:
+            # ilink has no outbound voice item; deliver TTS as a file attachment.
+            self._send_file(reply.content, receiver, context_token)
         else:
             logger.warning(f"[Weixin] Unsupported reply type: {reply.type}, fallback to text")
             self._send_text(str(reply.content), receiver, context_token)
diff --git a/cli/VERSION b/cli/VERSION
index 815e68dd..09843e3b 100644
--- a/cli/VERSION
+++ b/cli/VERSION
@@ -1 +1 @@
-2.0.8
+2.0.9
diff --git a/common/const.py b/common/const.py
index abe3c2c1..9cfcd63c 100644
--- a/common/const.py
+++ b/common/const.py
@@ -47,6 +47,7 @@ GEMINI_3_FLASH_PRE = "gemini-3-flash-preview"  # Gemini 3 Flash Preview - Agent
 GEMINI_3_PRO_PRE = "gemini-3-pro-preview"  # Gemini 3 Pro Preview
 GEMINI_31_PRO_PRE = "gemini-3.1-pro-preview"  # Gemini 3.1 Pro Preview - Agent推荐模型
 GEMINI_31_FLASH_LITE_PRE = "gemini-3.1-flash-lite-preview"  # Gemini 3.1 Flash Lite Preview - Agent推荐模型
+GEMINI_35_FLASH = "gemini-3.5-flash"  # Gemini 3.5 Flash - Agent推荐模型
 
 # OpenAI
 GPT35 = "gpt-3.5-turbo"
@@ -74,6 +75,7 @@ GPT_5_NANO = "gpt-5-nano"
 GPT_54 = "gpt-5.4"  # GPT-5.4 - Agent recommended model
 GPT_54_MINI = "gpt-5.4-mini"
 GPT_54_NANO = "gpt-5.4-nano"
+GPT_55 = "gpt-5.5"  # GPT-5.5 - top-tier (expensive), not default
 O1 = "o1-preview"
 O1_MINI = "o1-mini"
 WHISPER_1 = "whisper-1"
@@ -104,10 +106,12 @@ QWEN_LONG = "qwen-long"
 QWEN3_MAX = "qwen3-max"  # Qwen3 Max - Agent推荐模型
 QWEN35_PLUS = "qwen3.5-plus"  # Qwen3.5 Plus - Omni model (MultiModalConversation)
 QWEN36_PLUS = "qwen3.6-plus"  # Qwen3.6 Plus - Omni model (MultiModalConversation)
+QWEN37_MAX = "qwen3.7-max"  # Qwen3.7 Max - Agent推荐模型
 QWQ_PLUS = "qwq-plus"
 
 # MiniMax
 MINIMAX_M2_7 = "MiniMax-M2.7"  # MiniMax M2.7 - Latest
+MINIMAX_TEXT_01 = "MiniMax-Text-01"  # MiniMax 多模态 (vision)
 MINIMAX_M2_7_HIGHSPEED = "MiniMax-M2.7-highspeed"  # MiniMax M2.7 highspeed
 MINIMAX_M2_5 = "MiniMax-M2.5"  # MiniMax M2.5
 MINIMAX_M2_1 = "MiniMax-M2.1"  # MiniMax M2.1
@@ -119,6 +123,7 @@ MINIMAX_ABAB6_5 = "abab6.5-chat"  # MiniMax abab6.5
 GLM_5_1 = "glm-5.1"  # 智谱 GLM-5.1 - Agent recommended model (default)
 GLM_5_TURBO = "glm-5-turbo"  # 智谱 GLM-5-Turbo
 GLM_5 = "glm-5"  # 智谱 GLM-5
+GLM_5V_TURBO = "glm-5v-turbo"  # 智谱多模态 (vision)
 GLM_4 = "glm-4"
 GLM_4_PLUS = "glm-4-plus"
 GLM_4_flash = "glm-4-flash"
@@ -183,7 +188,7 @@ MODEL_LIST = [
               "claude", "claude-3-haiku", "claude-3-sonnet", "claude-3-opus", "claude-3.5-sonnet",
 
               # Gemini
-              GEMINI_31_FLASH_LITE_PRE, GEMINI_31_PRO_PRE, GEMINI_3_PRO_PRE, GEMINI_3_FLASH_PRE, GEMINI_25_PRO_PRE, GEMINI_25_FLASH_PRE,
+              GEMINI_35_FLASH, GEMINI_31_FLASH_LITE_PRE, GEMINI_31_PRO_PRE, GEMINI_3_PRO_PRE, GEMINI_3_FLASH_PRE, GEMINI_25_PRO_PRE, GEMINI_25_FLASH_PRE,
               GEMINI_20_FLASH, GEMINI_20_flash_exp, GEMINI_15_PRO, GEMINI_15_flash, GEMINI_PRO, GEMINI,
 
               # OpenAI
@@ -193,7 +198,7 @@ MODEL_LIST = [
               GPT_4o, GPT_4O_0806, GPT_4o_MINI,
               GPT_41, GPT_41_MINI, GPT_41_NANO,
               GPT_5, GPT_5_MINI, GPT_5_NANO,
-              GPT_54, GPT_54_MINI, GPT_54_NANO,
+              GPT_54, GPT_55, GPT_54_MINI, GPT_54_NANO,
               O1, O1_MINI,
 
               # GLM (智谱AI)
@@ -201,7 +206,7 @@ MODEL_LIST = [
               GLM_4_0520, GLM_4_AIR, GLM_4_AIRX, GLM_4_7,
 
               # Qwen (通义千问)
-              QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG,
+              QWEN37_MAX, QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG,
 
               # Doubao (豆包)
               DOUBAO, DOUBAO_SEED_2_CODE, DOUBAO_SEED_2_PRO, DOUBAO_SEED_2_LITE, DOUBAO_SEED_2_MINI,
diff --git a/config-template.json b/config-template.json
index bf7e8b3c..4e4a7d36 100644
--- a/config-template.json
+++ b/config-template.json
@@ -16,8 +16,8 @@
   "open_ai_api_base": "https://api.openai.com/v1",
   "gemini_api_key": "",
   "gemini_api_base": "https://generativelanguage.googleapis.com",
-  "voice_to_text": "openai",
-  "text_to_voice": "openai",
+  "voice_to_text": "",
+  "text_to_voice": "",
   "voice_reply_voice": false,
   "speech_recognition": true,
   "group_speech_recognition": false,
diff --git a/config.py b/config.py
index d172fd3d..6a3a00df 100644
--- a/config.py
+++ b/config.py
@@ -330,8 +330,18 @@ def load_config():
     config_str = read_file(config_path)
     logger.debug("[INIT] config str: {}".format(drag_sensitive(config_str)))
 
-    # 将json字符串反序列化为dict类型
-    config = Config(json.loads(config_str))
+    # 将json字符串反序列化为dict类型。
+    # `object_pairs_hook` lets us catch users who accidentally typed the
+    # same key twice (e.g. two `"tools"` blocks) — json.loads would
+    # otherwise silently drop all but the last occurrence.
+    config = Config(json.loads(config_str, object_pairs_hook=_merge_duplicate_keys))
+
+    # Migrate legacy singular keys (`tool`, `skill`) into the canonical
+    # plural buckets so the rest of the codebase only reads one schema.
+    # Deep-merge so existing `tools`/`skills` entries are preserved and
+    # only missing namespaces are filled in from the legacy section.
+    _merge_legacy_namespace(config, legacy="tool",  canonical="tools")
+    _merge_legacy_namespace(config, legacy="skill", canonical="skills")
 
     # override config with environment variables.
     # Some online deployment platforms (e.g. Railway) deploy project from github directly. So you shouldn't put your secrets like api key in a config file, instead use environment variables to override the default config.
@@ -422,7 +432,7 @@ def load_config():
                 os.environ[env_key] = str(val)
                 injected += 1
 
-    injected += _sync_skill_config_to_env(config.get("skill", {}))
+    injected += _sync_skill_config_to_env(config.get("skills", {}))
 
     if injected:
         logger.info("[INIT] Synced {} config values to environment variables".format(injected))
@@ -430,11 +440,90 @@ def load_config():
     config.load_user_datas()
 
 
+def _deep_merge_dicts(base: dict, incoming: dict) -> dict:
+    """Recursively merge ``incoming`` into ``base`` (incoming wins on leaves)."""
+    for key, val in incoming.items():
+        if (
+            key in base
+            and isinstance(base[key], dict)
+            and isinstance(val, dict)
+        ):
+            _deep_merge_dicts(base[key], val)
+        else:
+            base[key] = val
+    return base
+
+
+def _merge_duplicate_keys(pairs):
+    """object_pairs_hook for json.loads: deep-merge duplicate top-level keys
+    (lists concat, dicts merge, scalars take the latter) instead of dropping."""
+    out = {}
+    duplicates = []
+    for key, val in pairs:
+        if key not in out:
+            out[key] = val
+            continue
+        duplicates.append(key)
+        prev = out[key]
+        if isinstance(prev, dict) and isinstance(val, dict):
+            _deep_merge_dicts(prev, val)
+        elif isinstance(prev, list) and isinstance(val, list):
+            prev.extend(val)
+        else:
+            out[key] = val
+    if duplicates:
+        # logger may not be wired yet — fall back to print so we never lose the warning.
+        unique = sorted(set(duplicates))
+        try:
+            logger.warning("[INIT] config.json has duplicate keys (merged): %s", unique)
+        except Exception:
+            print("[INIT] config.json has duplicate keys (merged):", unique)
+    return out
+
+
+def _merge_legacy_namespace(cfg, legacy: str, canonical: str) -> None:
+    """Fold deprecated singular keys (``tool`` / ``skill``) into their plural
+    canonical counterparts at load time. Canonical entries always win."""
+    legacy_section = cfg.get(legacy)
+    if not isinstance(legacy_section, dict) or not legacy_section:
+        cfg.pop(legacy, None)
+        return
+    canonical_section = cfg.get(canonical)
+    if not isinstance(canonical_section, dict):
+        canonical_section = {}
+    merged_keys = []
+    for name, val in legacy_section.items():
+        if name in canonical_section:
+            if isinstance(canonical_section[name], dict) and isinstance(val, dict):
+                for sub_key, sub_val in val.items():
+                    if (
+                        sub_key in canonical_section[name]
+                        and isinstance(canonical_section[name][sub_key], dict)
+                        and isinstance(sub_val, dict)
+                    ):
+                        _deep_merge_dicts(sub_val, canonical_section[name][sub_key])
+                        canonical_section[name][sub_key] = sub_val
+                    else:
+                        canonical_section[name].setdefault(sub_key, sub_val)
+            continue
+        canonical_section[name] = val
+        merged_keys.append(name)
+    cfg[canonical] = canonical_section
+    cfg.pop(legacy, None)
+    if merged_keys:
+        logger.warning(
+            "[INIT] Legacy config key '{}' is deprecated; merged into '{}': {}. "
+            "Please rename '{}' to '{}' in your config.json.".format(
+                legacy, canonical, merged_keys, legacy, canonical,
+            )
+        )
+
+
 def _sync_skill_config_to_env(skill_section) -> int:
     """Flatten skill-namespaced config into environment variables.
 
-    Mapping rule: ``config["skill"][<name>][<key>]`` -> ``SKILL_<NAME>_<KEY>``
-    (e.g. ``skill["image-generation"].model`` -> ``SKILL_IMAGE_GENERATION_MODEL``).
+    Mapping rule: ``config["skills"][<name>][<key>]`` -> ``SKILL_<NAME>_<KEY>``
+    (e.g. ``skills["image-generation"].model`` -> ``SKILL_IMAGE_GENERATION_MODEL``).
 
     This lets subprocess-based skill scripts read their own settings without
     importing project code. Existing env vars are NOT overwritten so the
diff --git a/docs/channels/index.mdx b/docs/channels/index.mdx
new file mode 100644
index 00000000..9049a51d
--- /dev/null
+++ b/docs/channels/index.mdx
@@ -0,0 +1,39 @@
+---
+title: 通道概览
+description: CowAgent 支持的通道及能力矩阵
+---
+
+CowAgent 支持接入多种聊天通道，启动时通过 `channel_type` 切换。Web 控制台默认开启，可与其他接入通道并行运行。
+
+## 能力矩阵
+
+下表汇总各通道支持的入站消息类型、机器人回复类型与群聊能力，方便按场景选择。
+
+| 通道 | 文本 | 图片 | 文件 | 语音 | 群聊 |
+| --- | :-: | :-: | :-: | :-: | :-: |
+| [微信](/channels/weixin) | ✅ | ✅ | ✅ | ✅ |  |
+| [Web 控制台](/channels/web) | ✅ | ✅ | ✅ | ✅ | |
+| [飞书](/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [钉钉](/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [企微智能机器人](/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [QQ](/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
+| [企业微信应用](/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
+| [公众号](/channels/wechatmp) | ✅ | ✅ | | ✅ | |
+
+- **图片 / 文件 / 语音**列表示通道支持收发对应消息类型，具体细节详见各通道文档
+- **群聊**列指可识别并响应群消息
+
+<Tip>
+  每个通道的语音 / 图像能力依赖对应模型厂商的配置，详见 [模型概览](/models)。
+</Tip>
+
+## 通道一览
+
+- [Web 控制台](/channels/web) — 内置浏览器对话和管理面板，默认开启
+- [微信](/channels/weixin) — 通过个人微信扫码登录
+- [飞书](/channels/feishu) — 飞书自建机器人
+- [钉钉](/channels/dingtalk) — 钉钉自建机器人
+- [企微智能机器人](/channels/wecom-bot) — 企业微信智能机器人
+- [QQ](/channels/qq) — QQ 官方机器人开放平台
+- [企业微信应用](/channels/wecom) — 企业微信自建应用接入
+- [公众号](/channels/wechatmp) — 微信公众号（订阅号 / 服务号）
diff --git a/docs/channels/web.mdx b/docs/channels/web.mdx
index 29d9ed97..30bea09b 100644
--- a/docs/channels/web.mdx
+++ b/docs/channels/web.mdx
@@ -59,9 +59,9 @@ Web 控制台是 CowAgent 的默认通道，启动后会自动运行，通过浏
 
 ### 模型管理
 
-支持在线管理模型配置，无需手动编辑配置文件：
+支持在线管理不同模型厂商的文本、图像、语音、向量模型配置，无需手动编辑配置文件：
 
-<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" />
+<img width="850" src="https://cdn.link-ai.tech/doc/20260521212949.png" />
 
 ### 技能管理
 
diff --git a/docs/docs.json b/docs/docs.json
index 3eebadb0..bacc9566 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -181,6 +181,7 @@
               {
                 "group": "接入渠道",
                 "pages": [
+                  "channels/index",
                   "channels/weixin",
                   "channels/web",
                   "channels/feishu",
diff --git a/docs/en/models/qianfan.mdx b/docs/en/models/qianfan.mdx
index aa88d040..a477bed0 100644
--- a/docs/en/models/qianfan.mdx
+++ b/docs/en/models/qianfan.mdx
@@ -40,7 +40,7 @@ To force a specific Vision model, set it explicitly in `config.json`:
 
 ```json
 {
-  "tool": {
+  "tools": {
     "vision": {
       "model": "ernie-4.5-turbo-vl"
     }
diff --git a/docs/en/releases/v2.0.7.mdx b/docs/en/releases/v2.0.7.mdx
index 3519812c..522e5339 100644
--- a/docs/en/releases/v2.0.7.mdx
+++ b/docs/en/releases/v2.0.7.mdx
@@ -11,7 +11,7 @@ New built-in `image-generation` skill supporting text-to-image, image-to-image,
 - **Zero model selection**: Just configure an API key and it works — no need to manually specify a model. You can also name a specific model in conversation (e.g. "draw a cat with seedream")
 - **Flexible control**: Supports `quality`, `size` (512/1K–4K), and `aspect_ratio` parameters, with each provider automatically mapping to its supported values
 - **Image editing**: Pass existing images for editing, style transfer, or multi-image fusion (Seedream supports up to 14 reference images)
-- **Skill-level config**: Pin a default model via `skill.image-generation.model` in `config.json`
+- **Skill-level config**: Pin a default model via `skills.image-generation.model` in `config.json`
 - **Image lightbox**: All images in the Web console now support click-to-enlarge preview
 
 Docs: [Image Generation Skill](https://docs.cowagent.ai/en/skills/image-generation)
diff --git a/docs/en/releases/v2.0.8.mdx b/docs/en/releases/v2.0.8.mdx
index 13a63d3a..a243cced 100644
--- a/docs/en/releases/v2.0.8.mdx
+++ b/docs/en/releases/v2.0.8.mdx
@@ -51,7 +51,7 @@ The voice and streaming building blocks come from a community contribution #2791
 
 ## 🔧 Tools and Safety
 
-- **Vision model selection**: `tool.vision.model` config now actually takes effect, with automatic fallback when unconfigured #2792
+- **Vision model selection**: `tools.vision.model` config now actually takes effect, with automatic fallback when unconfigured #2792
 - **Bash safety prompt**: The destructive-deletion confirm prompt is now scoped to paths outside the workspace — routine in-workspace operations are no longer interrupted
 
 ## 🐛 Other Fixes
diff --git a/docs/en/skills/image-generation.mdx b/docs/en/skills/image-generation.mdx
index 49c0ed7d..307ff5bb 100644
--- a/docs/en/skills/image-generation.mdx
+++ b/docs/en/skills/image-generation.mdx
@@ -87,7 +87,7 @@ Configure ARK_API_KEY as xxx
 To force all image generation through a specific provider's model, add this to `config.json`:
 
 ```json
-"skill": {
+"skills": {
   "image-generation": {
     "model": "seedream-5.0-lite"
   }
diff --git a/docs/en/tools/vision.mdx b/docs/en/tools/vision.mdx
index 9e9da7f5..07f43490 100644
--- a/docs/en/tools/vision.mdx
+++ b/docs/en/tools/vision.mdx
@@ -51,7 +51,7 @@ To specify a particular model for the vision tool, add to `config.json`:
 
 ```json
 {
-    "tool": {
+    "tools": {
         "vision": {
             "model": "ernie-4.5-turbo-vl"
         }
diff --git a/docs/ja/models/qianfan.mdx b/docs/ja/models/qianfan.mdx
index 6e5fde15..4c3651f8 100644
--- a/docs/ja/models/qianfan.mdx
+++ b/docs/ja/models/qianfan.mdx
@@ -40,7 +40,7 @@ description: Baidu Qianfan ERNIE モデル設定
 
 ```json
 {
-  "tool": {
+  "tools": {
     "vision": {
       "model": "ernie-4.5-turbo-vl"
     }
diff --git a/docs/ja/releases/v2.0.7.mdx b/docs/ja/releases/v2.0.7.mdx
index 81390dd0..bcf46778 100644
--- a/docs/ja/releases/v2.0.7.mdx
+++ b/docs/ja/releases/v2.0.7.mdx
@@ -11,7 +11,7 @@ description: CowAgent 2.0.7 - 画像生成スキル（6プロバイダー自動
 - **モデル選択不要**：API Key を設定するだけで使用可能、モデルを手動で指定する必要なし。会話で特定モデルを指名することも可能（例：「seedream で猫を描いて」）
 - **柔軟な制御**：`quality`（画質）、`size`（解像度、512/1K〜4K）、`aspect_ratio`（アスペクト比）パラメータ対応、各プロバイダーが自動的に有効な値にマッピング
 - **画像編集**：既存の画像を渡して編集・スタイル変換・複数画像融合が可能（Seedream は最大 14 枚の参照画像をサポート）
-- **スキルレベル設定**：`config.json` の `skill.image-generation.model` でデフォルトモデルを固定可能
+- **スキルレベル設定**：`config.json` の `skills.image-generation.model` でデフォルトモデルを固定可能
 - **画像ライトボックス**：Web コンソールのすべての画像がクリックで拡大プレビュー対応
 
 ドキュメント：[画像生成スキル](https://docs.cowagent.ai/ja/skills/image-generation)
diff --git a/docs/ja/releases/v2.0.8.mdx b/docs/ja/releases/v2.0.8.mdx
index 4456fb70..310d98b2 100644
--- a/docs/ja/releases/v2.0.8.mdx
+++ b/docs/ja/releases/v2.0.8.mdx
@@ -51,7 +51,7 @@ description: CowAgent 2.0.8 - 飛書チャネル全面アップグレード（
 
 ## 🔧 ツールと安全性
 
-- **Vision モデル選択**：`tool.vision.model` 設定が実際に反映されるようになり、未設定時は自動フォールバック #2792
+- **Vision モデル選択**：`tools.vision.model` 設定が実際に反映されるようになり、未設定時は自動フォールバック #2792
 - **Bash セーフティ確認**：破壊的削除の確認プロンプトをワークスペース外のパスに限定。ワークスペース内の通常操作は中断されません
 
 ## 🐛 その他の修正
diff --git a/docs/ja/skills/image-generation.mdx b/docs/ja/skills/image-generation.mdx
index cafc9eb3..fbf84e4d 100644
--- a/docs/ja/skills/image-generation.mdx
+++ b/docs/ja/skills/image-generation.mdx
@@ -87,7 +87,7 @@ ARK_API_KEY を xxx に設定して
 すべての画像生成を特定のプロバイダーのモデルで固定したい場合、`config.json` に以下を追加：
 
 ```json
-"skill": {
+"skills": {
   "image-generation": {
     "model": "seedream-5.0-lite"
   }
diff --git a/docs/ja/tools/vision.mdx b/docs/ja/tools/vision.mdx
index 0c3c9d9a..2777e27a 100644
--- a/docs/ja/tools/vision.mdx
+++ b/docs/ja/tools/vision.mdx
@@ -51,7 +51,7 @@ Vision ツールで使用するモデルを指定するには、`config.json` 
 
 ```json
 {
-    "tool": {
+    "tools": {
         "vision": {
             "model": "ernie-4.5-turbo-vl"
         }
diff --git a/docs/models/claude.mdx b/docs/models/claude.mdx
index 920f54cd..05b6164c 100644
--- a/docs/models/claude.mdx
+++ b/docs/models/claude.mdx
@@ -1,8 +1,16 @@
 ---
 title: Claude
-description: Claude 模型配置
+description: Anthropic Claude 模型配置（文本对话 + 图像理解）
 ---
 
+Claude 由 Anthropic 提供，支持文本对话与图像理解，主流 Sonnet / Opus 模型均原生支持视觉，无需额外指定 Vision 模型。
+
+<Tip>
+  通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力，无需手动改配置文件。
+</Tip>
+
+## 文本对话
+
 ```json
 {
   "model": "claude-sonnet-4-6",
@@ -14,4 +22,28 @@ description: Claude 模型配置
 | --- | --- |
 | `model` | 支持 `claude-sonnet-4-6`、`claude-opus-4-7`、`claude-opus-4-6`、`claude-sonnet-4-5`、`claude-sonnet-4-0`、`claude-3-5-sonnet-latest` 等，参考 [官方模型](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
 | `claude_api_key` | 在 [Claude 控制台](https://console.anthropic.com/settings/keys) 创建 |
-| `claude_api_base` | 可选，默认为 `https://api.anthropic.com/v1`，修改可接入第三方代理 |
+| `claude_api_base` | 可选，默认为 `https://api.anthropic.com/v1`，可改为第三方代理 |
+
+### 模型选择
+
+| 模型 | 适用场景 |
+| --- | --- |
+| `claude-sonnet-4-6` | 默认推荐，性价比与速度平衡 |
+| `claude-opus-4-7` | 复杂推理与长链路任务，效果最佳但成本更高 |
+| `claude-sonnet-4-5` / `claude-sonnet-4-0` | 上一代旗舰，价格更低 |
+
+## 图像理解
+
+配置 `claude_api_key` 后 Agent 的 Vision 工具会自动使用 Claude 主模型识别图像，无需额外配置。
+
+如需手动指定 Vision 模型，可在配置文件中显式配置：
+
+```json
+{
+  "tools": {
+    "vision": {
+      "model": "claude-sonnet-4-6"
+    }
+  }
+}
+```
diff --git a/docs/models/custom.mdx b/docs/models/custom.mdx
index 907dbac3..2673a8de 100644
--- a/docs/models/custom.mdx
+++ b/docs/models/custom.mdx
@@ -13,7 +13,7 @@ description: 自定义厂商配置，适用于第三方 API 代理和本地模
   与 `openai` 厂商的区别：选择自定义厂商后，通过 `/config model` 切换模型时，不会自动切换厂商类型，始终使用自定义的 API 地址。
 </Note>
 
-## 配置方式
+## 文本对话
 
 ### 第三方 API 代理
 
@@ -35,7 +35,7 @@ description: 自定义厂商配置，适用于第三方 API 代理和本地模
 
 ### 本地模型
 
-本地模型通常不需要 API Key，只需填写 API Base 即可：
+本地模型通常不需要 API Key，只需填写 API Base：
 
 ```json
 {
@@ -53,7 +53,7 @@ description: 自定义厂商配置，适用于第三方 API 代理和本地模
 | [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` |
 | [LocalAI](https://localai.io) | `http://localhost:8080/v1` |
 
-## 切换模型
+### 切换模型
 
 自定义厂商下切换模型时，只会修改 `model`，不会改变 `bot_type` 和 API 地址：
 
diff --git a/docs/models/deepseek.mdx b/docs/models/deepseek.mdx
index a522ce98..57b96d55 100644
--- a/docs/models/deepseek.mdx
+++ b/docs/models/deepseek.mdx
@@ -1,9 +1,11 @@
 ---
 title: DeepSeek
-description: DeepSeek 模型配置
+description: DeepSeek 模型配置（文本对话 + 思考模式）
 ---
 
-方式一：官方接入（推荐）：
+DeepSeek 是当前 Agent 模式默认推荐的厂商之一，主打高性价比的文本对话和任务规划能力。
+
+## 文本对话
 
 ```json
 {
@@ -18,20 +20,20 @@ description: DeepSeek 模型配置
 | `deepseek_api_key` | 在 [DeepSeek 平台](https://platform.deepseek.com/api_keys) 创建 |
 | `deepseek_api_base` | 可选，默认为 `https://api.deepseek.com/v1`，可修改为第三方代理地址 |
 
-## 模型选择
+### 模型选择
 
 | 模型 | 适用场景 |
 | --- | --- |
 | `deepseek-v4-flash` | 默认推荐，速度快、成本低 |
-| `deepseek-v4-pro` | 更智能、复杂任务效果更强 |
+| `deepseek-v4-pro` | 更智能，复杂任务效果更强 |
 
 ## 思考模式
 
-V4 系列（`deepseek-v4-flash` / `deepseek-v4-pro`）支持显式的"思考模式"：模型在输出最终回答前，先输出一段思维链（`reasoning_content`），从而提升答案质量。
+V4 系列（`deepseek-v4-flash` / `deepseek-v4-pro`）支持显式的「思考模式」：模型在输出最终回答前，先输出一段思维链（`reasoning_content`），从而提升答案质量。
 
 ### 开关
 
-通过全局配置 `enable_thinking` 控制：
+通过全局配置 `enable_thinking` 控制，也可在 web控制台 - 配置页面中进行切换：
 
 ```json
 {
@@ -66,16 +68,5 @@ V4 系列（`deepseek-v4-flash` / `deepseek-v4-pro`）支持显式的"思考模
 - **多轮工具调用**：当历史中包含工具调用时，DeepSeek 要求所有 assistant 消息必须回传 `reasoning_content`。CowAgent 会自动处理回传逻辑，跨轮次切换思考开关也不会出错。
 
 <Tip>
-  默认使用 `deepseek-v4-flash`；复杂任务可使用 `deepseek-v4-pro`；需要深度思考可开启 `enable_thinking`。
+  默认使用 `deepseek-v4-flash`；复杂任务可使用 `deepseek-v4-pro`；需要深度推理可开启 `enable_thinking`。
 </Tip>
-
-方式二：OpenAI 兼容方式接入：
-
-```json
-{
-  "model": "deepseek-v4-flash",
-  "bot_type": "openai",
-  "open_ai_api_key": "YOUR_API_KEY",
-  "open_ai_api_base": "https://api.deepseek.com/v1"
-}
-```
diff --git a/docs/models/doubao.mdx b/docs/models/doubao.mdx
index e7440434..cfdc5670 100644
--- a/docs/models/doubao.mdx
+++ b/docs/models/doubao.mdx
@@ -1,17 +1,66 @@
 ---
 title: 豆包 Doubao
-description: 豆包 (火山方舟) 模型配置
+description: 豆包（火山方舟）模型配置（文本 / 图像理解 / 图像生成 / 向量）
 ---
 
+豆包（火山方舟）支持文本对话、图像理解、图像生成（Seedream）和向量能力，一份 `ark_api_key` 即可启用全部能力。
+
+<Tip>
+  通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力，无需手动改配置文件。
+</Tip>
+
+## 文本对话
+
 ```json
 {
-  "model": "doubao-seed-2-0-code-preview-260215",
+  "model": "doubao-seed-2-0-pro-260215",
   "ark_api_key": "YOUR_API_KEY"
 }
 ```
 
 | 参数 | 说明 |
 | --- | --- |
-| `model` | 可填 `doubao-seed-2-0-code-preview-260215`、`doubao-seed-2-0-pro-260215`、`doubao-seed-2-0-lite-260215` 等 |
+| `model` | 可填 `doubao-seed-2-0-pro-260215`、`doubao-seed-2-0-code-preview-260215`、`doubao-seed-2-0-lite-260215` 等 |
 | `ark_api_key` | 在 [火山方舟控制台](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) 创建 |
 | `ark_base_url` | 可选，默认为 `https://ark.cn-beijing.volces.com/api/v3` |
+
+## 图像理解
+
+配置 `ark_api_key` 后 Agent 的 Vision 工具会自动使用 `doubao-seed-2-0-pro-260215` 识别图像，无需额外配置。
+
+如需手动指定 Vision 模型：
+
+```json
+{
+  "tools": {
+    "vision": {
+      "model": "doubao-seed-2-0-pro-260215"
+    }
+  }
+}
+```
+
+## 图像生成
+
+```json
+{
+  "skills": {
+    "image-generation": {
+      "model": "seedream-5.0-lite"
+    }
+  }
+}
+```
+
+可选模型：`seedream-5.0-lite`、`seedream-4.5`。
+
+## 向量
+
+```json
+{
+  "embedding_provider": "doubao",
+  "embedding_model": "doubao-embedding-vision-251215"
+}
+```
+
+默认模型 `doubao-embedding-vision-251215`（多模态 embedding），可在配置文件中通过 `embedding_dimensions` 指定 1024 或 2048 维。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。
diff --git a/docs/models/gemini.mdx b/docs/models/gemini.mdx
index 220e53a2..f1c8991a 100644
--- a/docs/models/gemini.mdx
+++ b/docs/models/gemini.mdx
@@ -1,16 +1,59 @@
 ---
 title: Gemini
-description: Google Gemini 模型配置
+description: Google Gemini 模型配置（文本对话 + 图像理解 + 图像生成）
 ---
 
+Google Gemini 支持文本对话、图像理解和图像生成（Nano Banana 系列），一个 `gemini_api_key` 即可启用全部能力。
+
+<Tip>
+  通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力，无需手动改配置文件。
+</Tip>
+
+## 文本对话
+
 ```json
 {
-  "model": "gemini-3.1-pro-preview",
+  "model": "gemini-3.5-flash",
   "gemini_api_key": "YOUR_API_KEY"
 }
 ```
 
 | 参数 | 说明 |
 | --- | --- |
-| `model` | 支持 `gemini-3.1-flash-lite-preview`、`gemini-3.1-pro-preview`、`gemini-3-flash-preview`、`gemini-3-pro-preview` 等，参考 [官方文档](https://ai.google.dev/gemini-api/docs/models) |
+| `model` | 推荐 `gemini-3.5-flash`，亦支持 `gemini-3.1-pro-preview`、`gemini-3.1-flash-lite-preview`、`gemini-3-flash-preview`、`gemini-3-pro-preview` 等，参考 [官方文档](https://ai.google.dev/gemini-api/docs/models) |
 | `gemini_api_key` | 在 [Google AI Studio](https://aistudio.google.com/app/apikey) 创建 |
+| `gemini_api_base` | 可选，默认为 `https://generativelanguage.googleapis.com`，可改为第三方代理 |
+
+## 图像理解
+
+Gemini 全系列模型均原生支持视觉，配置 `gemini_api_key` 后 Agent 的 Vision 工具会自动使用主模型识别图像，无需额外配置。
+
+如需手动指定 Vision 模型：
+
+```json
+{
+  "tools": {
+    "vision": {
+      "model": "gemini-3.1-flash-lite-preview"
+    }
+  }
+}
+```
+
+## 图像生成
+
+```json
+{
+  "skills": {
+    "image-generation": {
+      "model": "gemini-3.1-flash-image-preview"
+    }
+  }
+}
+```
+
+| 模型 ID | 别名 |
+| --- | --- |
+| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
+| `gemini-3-pro-image-preview` | Nano Banana Pro |
+| `gemini-2.5-flash-image` | Nano Banana |
diff --git a/docs/models/glm.mdx b/docs/models/glm.mdx
index f667efdf..ad5f8fd3 100644
--- a/docs/models/glm.mdx
+++ b/docs/models/glm.mdx
@@ -1,8 +1,16 @@
 ---
 title: 智谱 GLM
-description: 智谱AI GLM 模型配置
+description: 智谱 AI GLM 模型配置（文本 / 图像理解 / 语音识别 / 向量）
 ---
 
+智谱 AI 支持文本对话、图像理解、语音识别（ASR）和向量（Embedding），一份 `zhipu_ai_api_key` 即可启用全部能力。
+
+<Tip>
+  通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力，无需手动改配置文件。
+</Tip>
+
+## 文本对话
+
 ```json
 {
   "model": "glm-5.1",
@@ -13,15 +21,36 @@ description: 智谱AI GLM 模型配置
 | 参数 | 说明 |
 | --- | --- |
 | `model` | 可填 `glm-5.1`、`glm-5-turbo`、`glm-5`、`glm-4.7`、`glm-4-plus`、`glm-4-flash`、`glm-4-air` 等，参考 [模型编码](https://bigmodel.cn/dev/api/normal-model/glm-4) |
-| `zhipu_ai_api_key` | 在 [智谱AI 控制台](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) 创建 |
+| `zhipu_ai_api_key` | 在 [智谱 AI 控制台](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) 创建 |
+| `zhipu_ai_api_base` | 可选，默认为 `https://open.bigmodel.cn/api/paas/v4` |
 
-也支持 OpenAI 兼容方式接入：
+## 图像理解
+
+智谱 chat 系列模型（`glm-5.1`、`glm-5-turbo` 等）不支持视觉，视觉调用统一路由到 `glm-5v-turbo`。配置 `zhipu_ai_api_key` 后 Agent 的 Vision 工具会自动使用该模型，无需在配置文件中显式指定。
+
+## 语音识别
 
 ```json
 {
-  "bot_type": "openai",
-  "model": "glm-5.1",
-  "open_ai_api_base": "https://open.bigmodel.cn/api/paas/v4",
-  "open_ai_api_key": "YOUR_API_KEY"
+  "voice_to_text": "zhipu",
+  "voice_to_text_model": "glm-asr-2512"
 }
 ```
+
+| 参数 | 说明 |
+| --- | --- |
+| `voice_to_text` | 设为 `zhipu` 启用智谱 ASR |
+| `voice_to_text_model` | 可选，默认 `glm-asr-2512` |
+
+凭证自动复用 `zhipu_ai_api_key`。语音文件建议小于 25MB，超大文件可能被服务端拒绝。
+
+## 向量
+
+```json
+{
+  "embedding_provider": "zhipu",
+  "embedding_model": "embedding-3"
+}
+```
+
+可选模型：`embedding-3`、`embedding-2`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。
diff --git a/docs/models/index.mdx b/docs/models/index.mdx
index afe9798e..99a2f243 100644
--- a/docs/models/index.mdx
+++ b/docs/models/index.mdx
@@ -1,67 +1,45 @@
 ---
 title: 模型概览
-description: CowAgent 支持的模型及推荐选择
+description: CowAgent 支持的模型厂商及能力矩阵
 ---
 
-CowAgent 支持国内外主流厂商的大语言模型，模型接口实现在项目的 `models/` 目录下。
+CowAgent 支持国内外主流厂商的大语言模型，模型接口实现在项目的 `models/` 目录下。除文本对话外，部分厂商还提供视觉理解、图像生成、语音识别、语音合成、向量等能力，可在 Agent 流程中按需调用。
 
 <Note>
-  Agent 模式下推荐使用以下模型，可根据效果及成本综合选择：deepseek-v4-flash、MiniMax-M2.7、claude-sonnet-4-6、gemini-3.1-pro-preview、glm-5.1、qwen3.6-plus、kimi-k2.6、ernie-5.1
+  Agent 模式下推荐使用以下模型，可根据效果及成本综合选择：deepseek-v4-flash、MiniMax-M2.7、claude-sonnet-4-6、gemini-3.5-flash、glm-5.1、qwen3.6-plus、kimi-k2.6、ernie-5.1。
 
-  同时支持使用 [LinkAI](https://link-ai.tech) 平台接口，可灵活切换多种模型，并支持知识库、工作流、插件等 Agent 能力。
+  同时支持使用 [LinkAI](https://link-ai.tech) 平台接口，一个 Key 即可灵活切换多家厂商，并附带知识库、工作流、插件等能力。
 </Note>
 
+
+## 模型能力总览
+
+各厂商提供的能力一览。「文本」指主对话模型，其余列表示该厂商可承担对应 Agent 能力。
+
+| 厂商 | 代表模型 | 文本 | 图像理解 | 图像生成 | 语音识别 | 语音合成 | 向量 |
+| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: |
+| [DeepSeek](/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | |
+| [MiniMax](/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
+| [Claude](/models/claude) | claude-opus-4-7 | ✅ | ✅ | | | | |
+| [Gemini](/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | |
+| [OpenAI](/models/openai) | gpt-5.5、o 系列 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [智谱 GLM](/models/glm) | glm-5.1、glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ |
+| [通义千问](/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [豆包 Doubao](/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ |
+| [Kimi](/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
+| [百度千帆](/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
+| [LinkAI](/models/linkai) | 多厂商 100+ 模型统一接入 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [自定义](/models/custom) |本地模型 / 三方代理 | ✅ | | | | | |
+
+<Tip>
+  Web 控制台中各项能力（视觉 / 图像 / 语音识别 / 语音合成 / 向量 / 网络搜索）均可独立配置厂商与模型，互相之间不强制绑定。
+</Tip>
+
+
 ## 配置方式
 
-**方式一（推荐）：** 通过 [Web 控制台](/channels/web) 在线管理模型配置，无需手动编辑配置文件：
+**方式一（推荐）：** 通过 [Web 控制台](/channels/web) 在线管理模型与各项能力，无需手动编辑配置文件：
 
-<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" />
+<img width="900" src="https://cdn.link-ai.tech/doc/20260521212527.png" />
 
 **方式二：** 手动编辑 `config.json`，根据所选模型填写对应的模型名称和 API Key。每个模型也支持 OpenAI 兼容方式接入，将 `bot_type` 设为 `openai`，配置 `open_ai_api_base` 和 `open_ai_api_key` 即可。
-
-
-## 支持的模型
-
-<CardGroup cols={2}>
-  <Card title="DeepSeek" href="/models/deepseek">
-    deepseek-v4-flash、deepseek-v4-pro 等
-  </Card>
-  <Card title="百度千帆 / ERNIE" href="/models/qianfan">
-    ernie-5.1、ernie-5.0、ernie-4.5-turbo-128k 等
-  </Card>
-  <Card title="MiniMax" href="/models/minimax">
-    MiniMax-M2.7 等系列模型
-  </Card>
-  <Card title="Claude" href="/models/claude">
-    claude-sonnet-4-6 等
-  </Card>
-  <Card title="Gemini" href="/models/gemini">
-    gemini-3.1-pro-preview 等
-  </Card>
-  <Card title="OpenAI" href="/models/openai">
-    gpt-5.4、gpt-4.1、o 系列等
-  </Card>
-  <Card title="智谱 GLM" href="/models/glm">
-    glm-5.1、glm-5-turbo、glm-5 等系列模型
-  </Card>
-  <Card title="通义千问 Qwen" href="/models/qwen">
-    qwen3.6-plus、qwen3-max 等
-  </Card>
-  <Card title="豆包 Doubao" href="/models/doubao">
-    doubao-seed 系列模型
-  </Card>
-  <Card title="Kimi" href="/models/kimi">
-    kimi-k2.6、kimi-k2.5、kimi-k2 等
-  </Card>
-  <Card title="LinkAI" href="/models/linkai">
-    多模型统一接口 + 知识库
-  </Card>
-  <Card title="自定义" href="/models/custom">
-    第三方代理、本地模型等
-  </Card>
-</CardGroup>
-
-
-<Tip>
-  全部模型名称可参考项目 [`common/const.py`](https://github.com/zhayujie/CowAgent/blob/master/common/const.py) 文件。
-</Tip>
diff --git a/docs/models/kimi.mdx b/docs/models/kimi.mdx
index a75cadea..beb5beaf 100644
--- a/docs/models/kimi.mdx
+++ b/docs/models/kimi.mdx
@@ -1,8 +1,16 @@
 ---
 title: Kimi
-description: Kimi (Moonshot) 模型配置
+description: Kimi（Moonshot）模型配置（文本对话 + 图像理解）
 ---
 
+Kimi 由 Moonshot 提供，支持文本对话与图像理解，`kimi-k2.x` 系列原生支持视觉。
+
+<Tip>
+  通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力，无需手动改配置文件。
+</Tip>
+
+## 文本对话
+
 ```json
 {
   "model": "kimi-k2.6",
@@ -14,14 +22,20 @@ description: Kimi (Moonshot) 模型配置
 | --- | --- |
 | `model` | 可填 `kimi-k2.6`、`kimi-k2.5`、`kimi-k2`、`moonshot-v1-8k`、`moonshot-v1-32k`、`moonshot-v1-128k` |
 | `moonshot_api_key` | 在 [Moonshot 控制台](https://platform.moonshot.cn/console/api-keys) 创建 |
+| `moonshot_base_url` | 可选，默认为 `https://api.moonshot.cn/v1` |
 
-也支持 OpenAI 兼容方式接入：
+## 图像理解
+
+配置 `moonshot_api_key` 后 Agent 的 Vision 工具会自动使用 `kimi-k2.6` 识别图像，无需额外配置。
+
+如需手动指定 Vision 模型：
 
 ```json
 {
-  "bot_type": "openai",
-  "model": "kimi-k2.6",
-  "open_ai_api_base": "https://api.moonshot.cn/v1",
-  "open_ai_api_key": "YOUR_API_KEY"
+  "tools": {
+    "vision": {
+      "model": "kimi-k2.6"
+    }
+  }
 }
 ```
diff --git a/docs/models/linkai.mdx b/docs/models/linkai.mdx
index 776bc7c9..68647ebc 100644
--- a/docs/models/linkai.mdx
+++ b/docs/models/linkai.mdx
@@ -1,9 +1,15 @@
 ---
 title: LinkAI
-description: 通过 LinkAI 平台统一接入多种模型
+description: 通过 LinkAI 平台统一接入文本、视觉、图像、语音与向量能力
 ---
 
-通过 [LinkAI](https://link-ai.tech) 平台可灵活切换 OpenAI、Claude、Gemini、DeepSeek、MiniMax、Qwen、Kimi 等多种模型，并支持知识库、工作流、插件等 Agent 能力。
+通过一份 `linkai_api_key` 即可访问 OpenAI、Claude、Gemini、DeepSeek、MiniMax、Qwen、Kimi、豆包 等主流厂商的全部能力。
+
+<Tip>
+  通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力，无需手动改配置文件。
+</Tip>
+
+## 文本对话
 
 ```json
 {
@@ -14,8 +20,84 @@ description: 通过 LinkAI 平台统一接入多种模型
 
 | 参数 | 说明 |
 | --- | --- |
-| `use_linkai` | 设为 `true` 启用 LinkAI 接口 |
+| `use_linkai` | 设为 `true` 启用 |
 | `linkai_api_key` | 在 [控制台](https://link-ai.tech/console/interface) 创建 |
-| `model` | 留空则使用智能体默认模型，可在平台中灵活切换，[模型列表](https://link-ai.tech/console/models) 中的全部模型均可使用 |
+| `model` | 可填写 [模型列表](https://link-ai.tech/console/models) 中任意编码 |
 
-参考 [接口文档](https://docs.link-ai.tech/platform/api) 了解更多。
+前往 [模型服务](https://link-ai.tech/console/models) 了解更多。
+
+## 图像理解
+
+配置完成后 Agent 的 Vision 工具会自动调用网关上的多模态模型，无需额外配置。如需手动指定 Vision 模型：
+
+```json
+{
+  "tools": {
+    "vision": {
+      "model": "gpt-5.4-mini"
+    }
+  }
+}
+```
+
+可选模型：`gpt-4.1-mini`、`gpt-5.4-mini`、`qwen3.6-plus`、`doubao-seed-2-0-pro-260215`、`kimi-k2.6`、`claude-sonnet-4-6`、`gemini-3.1-flash-lite-preview` 等。
+
+## 图像生成
+
+```json
+{
+  "skills": {
+    "image-generation": {
+      "model": "gpt-image-2"
+    }
+  }
+}
+```
+
+| 模型 ID | 别名 |
+| --- | --- |
+| `gpt-image-2` | OpenAI |
+| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
+| `gemini-3-pro-image-preview` | Nano Banana Pro |
+| `seedream-5.0-lite` | 字节豆包 Seedream |
+
+## 语音识别
+
+```json
+{
+  "voice_to_text": "linkai"
+}
+```
+
+ASR 固定使用 Whisper，凭证自动复用 `linkai_api_key`。
+
+## 语音合成
+
+语音合成网关下支持多个底层 TTS 引擎，按 `text_to_voice_model` 选择引擎，音色随引擎切换。
+
+```json
+{
+  "text_to_voice": "linkai",
+  "text_to_voice_model": "doubao",
+  "tts_voice_id": "BV001_streaming"
+}
+```
+
+| `text_to_voice_model` | 引擎说明 |
+| --- | --- |
+| `tts-1` | OpenAI · 多语种通用（音色 `alloy` / `nova` / `echo` 等） |
+| `doubao` | 字节豆包 · 中文音色丰富 |
+| `baidu` | 百度 · 中文主播音色 |
+
+不同引擎对应的音色不同，建议在 Web 控制台「模型管理 → 语音合成」中可视化选择。
+
+## 向量
+
+```json
+{
+  "embedding_provider": "linkai",
+  "embedding_model": "text-embedding-3-small"
+}
+```
+
+默认模型 `text-embedding-3-small`（OpenAI 兼容）。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。
diff --git a/docs/models/minimax.mdx b/docs/models/minimax.mdx
index 299a7064..8282f88b 100644
--- a/docs/models/minimax.mdx
+++ b/docs/models/minimax.mdx
@@ -1,8 +1,16 @@
 ---
 title: MiniMax
-description: MiniMax 模型配置
+description: MiniMax 模型配置（文本 / 图像理解 / 图像生成 / 语音合成）
 ---
 
+MiniMax 支持文本对话、图像理解、图像生成与语音合成，一份 `minimax_api_key` 即可启用全部能力。
+
+<Tip>
+  通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力，无需手动改配置文件。
+</Tip>
+
+## 文本对话
+
 ```json
 {
   "model": "MiniMax-M2.7",
@@ -12,16 +20,52 @@ description: MiniMax 模型配置
 
 | 参数 | 说明 |
 | --- | --- |
-| `model` | 可填 `MiniMax-M2.7`、`MiniMax-M2.5`、`MiniMax-M2.1`、`MiniMax-M2.1-lightning`、`MiniMax-M2` 等 |
+| `model` | 可填 `MiniMax-M2.7`、`MiniMax-M2.7-highspeed`、`MiniMax-M2.5`、`MiniMax-M2.1`、`MiniMax-M2.1-lightning`、`MiniMax-M2` 等 |
 | `minimax_api_key` | 在 [MiniMax 控制台](https://platform.minimaxi.com/user-center/basic-information/interface-key) 创建 |
 
-也支持 OpenAI 兼容方式接入：
+## 图像理解
+
+MiniMax 的 M2.x 系列 chat 模型本身不支持视觉，视觉调用统一路由到 `MiniMax-Text-01`。配置 `minimax_api_key` 后 Agent 的 Vision 工具会自动使用该模型，无需在配置文件中显式指定。
+
+## 图像生成
 
 ```json
 {
-  "bot_type": "openai",
-  "model": "MiniMax-M2.7",
-  "open_ai_api_base": "https://api.minimaxi.com/v1",
-  "open_ai_api_key": "YOUR_API_KEY"
+  "skills": {
+    "image-generation": {
+      "model": "image-01"
+    }
+  }
 }
 ```
+
+可选模型：`image-01`。
+
+## 语音合成
+
+```json
+{
+  "text_to_voice": "minimax",
+  "text_to_voice_model": "speech-2.8-hd",
+  "tts_voice_id": "female-shaonv"
+}
+```
+
+| 参数 | 说明 |
+| --- | --- |
+| `text_to_voice_model` | `speech-2.8-hd`（情绪渲染、自然听感）、`speech-2.8-turbo`（极速）、`speech-2.6-hd`、`speech-2.6-turbo` |
+| `tts_voice_id` | 音色 ID，支持中文 / 粤语 / 英 / 日 / 韩，共 70+ 种 |
+
+常用音色示例：
+
+| 音色 ID | 说明 |
+| --- | --- |
+| `female-shaonv` | 中文 · 少女（女） |
+| `female-yujie` | 中文 · 御姐（女） |
+| `female-tianmei` | 中文 · 甜美女性（女） |
+| `male-qn-jingying` | 中文 · 精英青年（男） |
+| `male-qn-badao` | 中文 · 霸道青年（男） |
+| `Cantonese_GentleLady` | 粤语 · 温柔女声 |
+| `English_Graceful_Lady` | 英文 · Graceful Lady |
+
+完整音色（中文 / 粤语 / 英 / 日 / 韩共 70+ 种）可参考 [系统音色列表](https://platform.minimaxi.com/docs/faq/system-voice-id)，也可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。
diff --git a/docs/models/openai.mdx b/docs/models/openai.mdx
index c3406aca..aad83c8f 100644
--- a/docs/models/openai.mdx
+++ b/docs/models/openai.mdx
@@ -1,11 +1,20 @@
 ---
 title: OpenAI
-description: OpenAI 模型配置
+description: OpenAI 模型配置（文本 / 视觉 / 图像 / 语音 / 向量）
 ---
 
+OpenAI 是覆盖最完整的厂商，可同时承担文本对话、视觉理解、图像生成、语音识别（ASR）、语音合成（TTS）和向量（Embedding）能力。一份 `open_ai_api_key` 即可让 Agent 用到全部能力。
+
+<Tip>
+  通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力，无需手动改配置文件。
+</Tip>
+
+
+## 文本对话
+
 ```json
 {
-  "model": "gpt-5.4",
+  "model": "gpt-5.5",
   "open_ai_api_key": "YOUR_API_KEY",
   "open_ai_api_base": "https://api.openai.com/v1"
 }
@@ -13,7 +22,82 @@ description: OpenAI 模型配置
 
 | 参数 | 说明 |
 | --- | --- |
-| `model` | 与 OpenAI 接口的 [model 参数](https://platform.openai.com/docs/models) 一致，支持 o 系列、gpt-5.4、gpt-5.4-mini、gpt-5.4-nano、gpt-5 系列、gpt-4.1 等，Agent 模式推荐使用 `gpt-5.4` |
+| `model` | 与 OpenAI 接口的 [model 参数](https://platform.openai.com/docs/models) 一致，支持 `gpt-5.5`、`gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano`、`gpt-5` 系列、`gpt-4.1`、o 系列等；Agent 模式默认 `gpt-5.5`，追求性价比可改为 `gpt-5.4` |
 | `open_ai_api_key` | 在 [OpenAI 平台](https://platform.openai.com/api-keys) 创建 |
-| `open_ai_api_base` | 可选，修改可接入第三方代理接口 |
-| `bot_type` | 使用 OpenAI 官方模型时无需填写。当通过代理接口使用 Claude 等非 OpenAI 模型时，设为 `openai` |
+| `open_ai_api_base` | 可选，修改可接入第三方代理 |
+| `bot_type` | 使用 OpenAI 官方模型时无需填写；通过兼容协议接入厂商模型时需设为 `openai` |
+
+## 图像理解
+
+`gpt-5.5`、`gpt-5.4`、`gpt-4o`、`gpt-4.1` 等 OpenAI 模型均原生支持视觉，配置 `open_ai_api_key` 后 Agent 的 Vision 工具会自动使用主模型识别图像。若主模型不支持视觉或希望显式指定，可在配置文件中配置：
+
+```json
+{
+  "tools": {
+    "vision": {
+      "model": "gpt-5.4-mini"
+    }
+  }
+}
+```
+
+支持的 Vision 模型：`gpt-5.5`、`gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano`、`gpt-5`、`gpt-4.1`、`gpt-4.1-mini`、`gpt-4o`。
+
+## 图像生成
+
+在配置文件中指定图像生成模型，Agent 调用图像生成技能时会自动路由到 OpenAI：
+
+```json
+{
+  "skills": {
+    "image-generation": {
+      "model": "gpt-image-2"
+    }
+  }
+}
+```
+
+支持的图像生成模型：`gpt-image-2`、`gpt-image-1`。
+
+## 语音识别
+
+```json
+{
+  "voice_to_text": "openai",
+  "voice_to_text_model": "gpt-4o-mini-transcribe"
+}
+```
+
+| 参数 | 说明 |
+| --- | --- |
+| `voice_to_text` | 设为 `openai` 启用 OpenAI 语音识别 |
+| `voice_to_text_model` | 可选，默认 `gpt-4o-mini-transcribe`；也可填 `gpt-4o-transcribe`、`whisper-1` |
+
+凭证自动复用 `open_ai_api_key`。
+
+## 语音合成
+
+```json
+{
+  "text_to_voice": "openai",
+  "text_to_voice_model": "tts-1",
+  "tts_voice_id": "alloy"
+}
+```
+
+| 参数 | 说明 |
+| --- | --- |
+| `text_to_voice_model` | `tts-1`、`tts-1-hd`、`gpt-4o-mini-tts` |
+| `tts_voice_id` | 音色：`alloy`、`echo`、`fable`、`onyx`、`nova`、`shimmer`、`ash`、`ballad`、`coral`、`sage`、`verse` |
+
+## 向量
+
+```json
+{
+  "embedding_provider": "openai",
+  "embedding_model": "text-embedding-3-small"
+}
+```
+
+可选模型：`text-embedding-3-small`、`text-embedding-3-large`、`text-embedding-ada-002`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。
+
diff --git a/docs/models/qianfan.mdx b/docs/models/qianfan.mdx
index 819713e0..bdd87214 100644
--- a/docs/models/qianfan.mdx
+++ b/docs/models/qianfan.mdx
@@ -1,14 +1,20 @@
 ---
 title: 百度千帆
-description: 百度千帆 ERNIE 模型配置
+description: 百度千帆 ERNIE 模型配置（文本对话 + 图像理解）
 ---
 
-方式一：官方接入（推荐）：
+百度千帆提供 ERNIE 系列模型，支持文本对话与图像理解。
+
+<Tip>
+  通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力，无需手动改配置文件。
+</Tip>
+
+## 文本对话
 
 ```json
 {
   "model": "ernie-5.1",
-  "qianfan_api_key": "",
+  "qianfan_api_key": "YOUR_API_KEY",
   "qianfan_api_base": "https://qianfan.baidubce.com/v2"
 }
 ```
@@ -19,7 +25,7 @@ description: 百度千帆 ERNIE 模型配置
 | `qianfan_api_key` | 千帆 API Key，格式通常以 `bce-v3/` 开头 |
 | `qianfan_api_base` | 可选，默认为 `https://qianfan.baidubce.com/v2` |
 
-## 模型选择
+### 模型选择
 
 | 模型 | 适用场景 |
 | --- | --- |
@@ -29,18 +35,18 @@ description: 百度千帆 ERNIE 模型配置
 | `ernie-4.5-turbo-128k` | 长上下文和通用对话 |
 | `ernie-4.5-turbo-32k` | 通用对话，成本和上下文更均衡 |
 
-## Vision 工具
+## 图像理解
 
 配置 `qianfan_api_key` 后，Agent 的 Vision 工具可以自动使用千帆视觉模型：
 
 - 当主模型本身是多模态时（如 `ernie-5.1`、`ernie-5.0`、`ernie-x1.1`、`ernie-4.5-turbo-vl`），直接由主模型识别图像，无需额外配置
 - 当主模型是纯文本时（如 `ernie-4.5-turbo-128k`），Vision 工具会自动 fallback 到 `ernie-4.5-turbo-vl`
 
-如需手动指定 Vision 模型，可在 `config.json` 中显式配置：
+如需手动指定 Vision 模型，可在配置文件中显式配置：
 
 ```json
 {
-  "tool": {
+  "tools": {
     "vision": {
       "model": "ernie-4.5-turbo-vl"
     }
@@ -48,17 +54,6 @@ description: 百度千帆 ERNIE 模型配置
 }
 ```
 
-方式二：OpenAI 兼容方式接入：
-
-```json
-{
-  "model": "ernie-5.1",
-  "bot_type": "openai",
-  "open_ai_api_key": "",
-  "open_ai_api_base": "https://qianfan.baidubce.com/v2"
-}
-```
-
 <Tip>
   新配置推荐使用 `qianfan_api_key`。旧的 `wenxin`、`wenxin-4`、`baidu_wenxin_api_key`、`baidu_wenxin_secret_key` 配置仍保持兼容。
 </Tip>
diff --git a/docs/models/qwen.mdx b/docs/models/qwen.mdx
index 2bc6517d..765bae64 100644
--- a/docs/models/qwen.mdx
+++ b/docs/models/qwen.mdx
@@ -1,8 +1,16 @@
 ---
 title: 通义千问 Qwen
-description: 通义千问模型配置
+description: 通义千问模型配置（文本 / 图像理解 / 图像生成 / 语音识别 / 语音合成 / 向量）
 ---
 
+通义千问（DashScope / 百炼）是国内覆盖最完整的厂商之一，文本、图像理解、图像生成、语音识别、语音合成与向量能力均可用一份 `dashscope_api_key` 启用。
+
+<Tip>
+  通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力，无需手动改配置文件。
+</Tip>
+
+## 文本对话
+
 ```json
 {
   "model": "qwen3.6-plus",
@@ -12,16 +20,93 @@ description: 通义千问模型配置
 
 | 参数 | 说明 |
 | --- | --- |
-| `model` | 可填 `qwen3.6-plus`、`qwen3.5-plus`、`qwen3-max`、`qwen-max`、`qwen-plus`、`qwen-turbo`、`qwq-plus` 等 |
+| `model` | 可填 `qwen3.6-plus`、`qwen3.7-max`、`qwen3.5-plus`、`qwen3-max`、`qwen-max`、`qwen-plus`、`qwen-turbo`、`qwq-plus` 等 |
 | `dashscope_api_key` | 在 [百炼控制台](https://bailian.console.aliyun.com/?tab=model#/api-key) 创建，参考 [官方文档](https://bailian.console.aliyun.com/?tab=api#/api) |
 
-也支持 OpenAI 兼容方式接入：
+## 图像理解
+
+配置 `dashscope_api_key` 后 Agent 的 Vision 工具会自动调用千问的视觉模型识别图像。`qwen3-max` / `qwen3.5-plus` / `qwen3.6-plus` 等模型本身就是多模态；若主模型是纯文本（如 `qwen-turbo`），会自动回落到 `qwen-vl-max`。
+
+如需手动指定 Vision 模型：
 
 ```json
 {
-  "bot_type": "openai",
-  "model": "qwen3.6-plus",
-  "open_ai_api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1",
-  "open_ai_api_key": "YOUR_API_KEY"
+  "tools": {
+    "vision": {
+      "model": "qwen3.6-plus"
+    }
+  }
 }
 ```
+
+支持模型：`qwen3.6-plus`、`qwen3.5-plus`、`qwen3-max`。
+
+## 图像生成
+
+```json
+{
+  "skills": {
+    "image-generation": {
+      "model": "qwen-image-2.0"
+    }
+  }
+}
+```
+
+可选模型：`qwen-image-2.0`、`qwen-image-2.0-pro`。
+
+## 语音识别
+
+```json
+{
+  "voice_to_text": "dashscope",
+  "voice_to_text_model": "qwen3-asr-flash"
+}
+```
+
+| 参数 | 说明 |
+| --- | --- |
+| `voice_to_text` | 设为 `dashscope` 启用通义千问 ASR |
+| `voice_to_text_model` | 可选，默认 `qwen3-asr-flash` |
+
+凭证自动复用 `dashscope_api_key`。单段音频建议小于 10MB、时长不超过 300 秒。
+
+## 语音合成
+
+```json
+{
+  "text_to_voice": "dashscope",
+  "text_to_voice_model": "qwen3-tts-flash",
+  "tts_voice_id": "Cherry"
+}
+```
+
+| 参数 | 说明 |
+| --- | --- |
+| `text_to_voice_model` | 可选，默认 `qwen3-tts-flash`，覆盖普通话、方言与主流外语 |
+| `tts_voice_id` | 音色 ID，详见下方常用列表 |
+
+常用音色示例：
+
+| 音色 ID | 说明 |
+| --- | --- |
+| `Cherry` | 芊悦 · 阳光女声 |
+| `Serena` | 苏瑶 · 温柔女声 |
+| `Ethan` | 晨煦 · 阳光男声 |
+| `Chelsie` | 千雪 · 二次元少女 |
+| `Dylan` | 北京话 · 晓东 |
+| `Rocky` | 粤语 · 阿强 |
+| `Sunny` | 四川话 · 晴儿 |
+
+完整音色（普通话 / 各地方言 / 双语等）可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。
+
+## 向量
+
+```json
+{
+  "embedding_provider": "dashscope",
+  "embedding_model": "text-embedding-v4"
+}
+```
+
+默认模型 `text-embedding-v4`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。
diff --git a/docs/releases/overview.mdx b/docs/releases/overview.mdx
index 6f685799..f70d6bc8 100644
--- a/docs/releases/overview.mdx
+++ b/docs/releases/overview.mdx
@@ -5,6 +5,7 @@ description: CowAgent 版本更新历史
 
 | 版本 | 日期 | 说明 |
 | --- | --- | --- |
+| [2.0.9](/releases/v2.0.9) | 2026.05.21 | MCP 工具生态接入、模型管理页重构（厂商凭据共享 + 多能力统一调度）、语音系统升级、浏览器持久登录 |
 | [2.0.8](/releases/v2.0.8) | 2026.05.06 | 飞书渠道全面升级（语音、流式输出和Markdown、扫码一键接入）、DeepSeek V4和百度模型新增、定时任务工具增强 |
 | [2.0.7](/releases/v2.0.7) | 2026.04.22 | 图像生成技能（六厂商自动路由）、新模型支持（Kimi K2.6、Claude Opus 4.7、GLM 5.1）、知识库增强、Web 控制台优化 |
 | [2.0.6](/releases/v2.0.6) | 2026.04.14 | 项目更名、知识库系统、梦境记忆蒸馏、上下文智能压缩、Web 控制台多会话及多项优化 |
diff --git a/docs/releases/v2.0.7.mdx b/docs/releases/v2.0.7.mdx
index d9e2275d..b4b6e27b 100644
--- a/docs/releases/v2.0.7.mdx
+++ b/docs/releases/v2.0.7.mdx
@@ -11,7 +11,7 @@ description: CowAgent 2.0.7 - 图像生成技能（六厂商自动路由）、
 - **开箱即用**：配置 API Key 即可使用，无需手动指定模型。也支持在对话中指定特定模型
 - **灵活控制**：支持 `quality`（画质）、`size`（分辨率，512/1K~4K）、`aspect_ratio`（宽高比）等参数，各厂商自动适配有效值
 - **图片编辑**：传入已有图片即可进行编辑、风格迁移、多图融合
-- **Skill 级配置**：支持通过 `config.json` 中的 `skill.image-generation.model` 固定默认模型
+- **Skill 级配置**：支持通过 `config.json` 中的 `skills.image-generation.model` 固定默认模型
 
 相关文档：[图像生成技能](https://docs.cowagent.ai/skills/image-generation)
 
diff --git a/docs/releases/v2.0.8.mdx b/docs/releases/v2.0.8.mdx
index ccb72827..ced1b967 100644
--- a/docs/releases/v2.0.8.mdx
+++ b/docs/releases/v2.0.8.mdx
@@ -46,7 +46,7 @@ description: CowAgent 2.0.8 - 飞书渠道全面升级（语音、流式打字
 
 ## 🔧 工具与安全
 
-- **图像识别模型**：让 `tool.vision.model` 配置真正生效，未配置时自动 fallback #2792 Thanks CNXudiandian
+- **图像识别模型**：让 `tools.vision.model` 配置真正生效，未配置时自动 fallback #2792 Thanks CNXudiandian
 - **Bash 安全确认**：仅对工作区外的破坏性删除做二次确认，工作区内常规操作不再打扰
 
 ## 🐛 其他修复
diff --git a/docs/releases/v2.0.9.mdx b/docs/releases/v2.0.9.mdx
new file mode 100644
index 00000000..f5b3f609
--- /dev/null
+++ b/docs/releases/v2.0.9.mdx
@@ -0,0 +1,92 @@
+---
+title: v2.0.9
+description: CowAgent 2.0.9 - MCP 工具生态接入、模型管理页重构、语音系统升级、浏览器持久登录
+---
+
+## 🧩 MCP 工具生态接入
+
+新增 **MCP（Model Context Protocol）** 工具集成，CowAgent 从固定工具集扩展为开放可插拔的工具生态。任何兼容 MCP 协议的服务（高德地图、Chrome DevTools、Filesystem、Playwright 等）都可作为工具直接接入 Agent。
+
+- **零额外依赖**：原生 JSON-RPC 实现，同时支持 `stdio`（本地进程）和 `sse`（远程 URL）两种传输
+- **兼容主流配置**：兼容 Claude Desktop / Cursor 风格的 `mcpServers` 配置，优先读取 `~/cow/mcp.json`，未配置则回退 `config.json`
+- **异步启动**：MCP 服务在后台线程启动，不阻塞 Agent 初始化；单个服务失败不影响整体
+
+相关文档：[MCP 工具](https://docs.cowagent.ai/tools/mcp) · 社区贡献 #2801 Thanks @yangluxin613
+
+## 🖥️ 模型管理页面重构
+
+「模型」页面整体重新设计，从原来按 LLM 厂商堆叠的列表，重构为 **厂商凭据 + 能力调度** 两层结构：一处配置厂商凭据，对话、图像、语音、向量、搜索等多个能力共享。
+
+- **厂商凭据集中管理**：所有支持厂商（OpenAI / Claude / Gemini / DeepSeek / Qwen / 豆包 / Kimi / 智谱 / MiniMax / 千帆 / LinkAI / Custom 等）的 API Key / API Base 在顶部统一维护，编辑后下方所有能力立即生效
+- **能力卡片**：按主模型、图像理解、图像生成、语音识别、语音合成、向量、联网搜索分卡，每个能力可独立选择厂商和模型，未配置时自动跟随主模型或按默认顺序回退
+
+### 多厂商联网搜索
+
+联网搜索升级为多厂商架构，**输出格式统一**：
+
+- 四家可选：博查（bocha）、百度千帆（qianfan）、智谱（zhipu）、LinkAI
+- 两种调度策略：`auto`（按 bocha > qianfan > zhipu > linkai 顺序自动选第一个已配置的厂商）/ `fixed`（固定指定厂商）
+- 配置 ≥2 家且为 `auto` 时，Agent 可在单次调用中临时指定 `provider` 切换搜索源
+
+### 向量厂商热切换
+
+向量（Embedding）支持多厂商，告别对 OpenAI 的单一依赖：
+
+- 原生支持 `openai` / `dashscope` / `doubao` / `zhipu` / `linkai`
+- **在线重建索引**：切换厂商后执行 `/memory rebuild-index`，无需重启、不会中断当前对话
+- 梦境日记默认排除在向量索引之外，避免反复出现在检索结果中干扰对话
+
+## 🎙️ 语音系统升级
+
+- **TTS 适配更多通道**：个人微信（ilink）、钉钉、企微智能机器人现已原生支持语音回复，开关沿用 `always_reply_voice` / `voice_reply_voice`；触发 TTS 时先发文本气泡再发语音消息，方便对照阅读
+- **新增 ASR 厂商**：百炼（DashScope）、智谱
+- **TTS 多厂商重构**：MiniMax / LinkAI / DashScope / 智谱 TTS 在流式合成、长文本切分、错误回退上更稳
+- **网页麦克风输入**：Web 控制台聊天框新增麦克风按钮，可直接录音发送，自动走 ASR 转文本
+
+## 🌐 浏览器工具
+
+浏览器工具支持三种启动模式，告别"每次开会话都得重新登录"：
+
+- **持久化用户配置（默认）**：复用 `~/.cow/browser_profile`，登录一次后下次自动复用登录态
+- **CDP 模式**：通过 `cdp_endpoint` 附加到手动启动的真实 Chrome，享有完整指纹，适合反爬严格的站点
+- **Fresh 模式**：每次清空环境，适合做隔离任务
+
+此外，浏览器被用户中途关闭后下次调用会自动重新拉起，CDP 模式下不会误杀用户的 Chrome 进程。相关文档：[浏览器工具](https://docs.cowagent.ai/tools/browser) #2809
+
+## 🤖 新模型与模型增强
+
+- **百度 ERNIE 5.1**：新增 `ernie-5.1` 模型
+- **DeepSeek V4 reasoning_effort**：DeepSeek V4 系列支持 `reasoning_effort` 配置思考深度
+- **OpenRouter / Vercel AI Gateway 归因**：调用这两个平台时自动注入归因 Header，平台可正确识别 CowAgent 用量
+- 修复 MiMo 等思考模型在多轮对话中 `reasoning_content` 丢失的问题
+
+## 🚀 启动与运行体验
+
+来自社区的多项体验改进 Thanks @yangluxin613
+
+- **自动选端口 + 自动开浏览器**：默认端口被占用时自动切换，启动成功后默认打开控制台
+- **Ctrl+C 干净退出**：不再打印一长串堆栈
+- **日志面板**：差异化级别配色、多行日志继承级别、新增级别筛选 Checkbox
+
+## 🔒 部署与安全
+
+- **默认仅本机访问**：Web 控制台 `web_host` 默认 `127.0.0.1`，避免无密码情况下被外网直接访问；显式 `0.0.0.0` 且未设密码时给出提示
+- **前端资源完全本地化**：第三方 CSS / JS 全部本地分发，离线 / 内网环境也能正常加载控制台 #2816 Thanks @TryToMakeUsBetter
+- **支持文件夹上传**：上传区支持整目录一次性上传，路径校验适配 Windows #2815 Thanks @TryToMakeUsBetter
+
+## 🛠 其他改进与修复
+
+- **定时任务防重复执行**：调度器初始化做幂等处理
+- **工具失败状态持久化**：刷新页面或重载历史时失败的工具调用正确显示失败状态 #2822 Thanks @a1094174619
+- **企微机器人非法字符**：修复消息中包含非法控制字符导致投递失败的问题 #2810 Thanks @Jacques-Zhao
+- **飞书文件消息**：飞书通道支持文件消息接收
+- **工具配置合并**：修复用户自定义工具配置（如 `tools.browser`）被工作区默认值整体覆盖的问题，现按字段合并
+- 修复单文件上传偶发 TypeError、切换语言后 JS 动态视图未重渲染等问题
+
+## 📦 升级方式
+
+源码部署可执行 `cow update` 或 `./run.sh update` 一键升级，或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。
+
+> ⚠️ 切换向量厂商后，建议执行一次 `/memory rebuild-index`，让历史记忆按新的向量维度重新入库。
+
+**发布日期**：2026.05.21 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.8...2.0.9)
diff --git a/docs/skills/image-generation.mdx b/docs/skills/image-generation.mdx
index e64cc846..288fd656 100644
--- a/docs/skills/image-generation.mdx
+++ b/docs/skills/image-generation.mdx
@@ -3,149 +3,87 @@ title: image-generation - 图像生成
 description: 文生图 / 图生图 / 多图融合，支持多家厂商自动路由与回退
 ---
 
-通用的图像生成与编辑技能，支持 OpenAI、Gemini、Seedream（火山方舟）、Qwen（百炼）、MiniMax、LinkAI 共六家厂商。不需要手动选模型，脚本会按固定优先级自动挑选已配置的厂商来出图。
+通用的图像生成与编辑技能，支持 OpenAI、Gemini、Seedream（火山方舟）、Qwen（百炼）、MiniMax、LinkAI 共六家厂商。配好任意一家的 Key 即可使用，配多家可享受自动回退。
 
-## 模型选择
-
-`image-generation` 采用「固定优先级 + 自动回退」的策略，配好 Key 就能用：
-
-1. **优先级顺序**：`OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI`
-2. **没配 Key 的跳过**：只有设了 API Key 的厂商才会参与
-3. **失败自动切下一家**：遇到 401、模型未开通、网络异常等错误时，会自动试下一个
-4. **指定模型时前置**：如果明确传了某个模型名，对应厂商会被提到最前面先试
-
-### 支持的模型
+## 支持的模型
 
 | 厂商 | 模型 / 别名 | 特点 |
 | --- | --- | --- |
-| OpenAI | `gpt-image-2`、`gpt-image-1` | 通用文生图，高质量、高智能，支持 `quality` 参数控制画质 |
+| OpenAI | `gpt-image-2`、`gpt-image-1` | 通用文生图，高质量，支持 `quality` 控制画质 |
 | Gemini Nano Banana | `nano-banana-2`、`nano-banana-pro`、`nano-banana` | 对应 `gemini-3.1-flash`、`gemini-3-pro`、`gemini-2.5-flash` 的图像版本 |
 | Seedream（火山方舟） | `seedream-5.0-lite`、`seedream-4.5` | 原生 2K–4K，最多 14 张图融合 |
 | Qwen（百炼） | `qwen-image-2.0`、`qwen-image-2.0-pro` | 擅长中文排版和图文融合 |
-| MiniMax | `image-01` | 简单快速的图片生成 |
-| LinkAI | 任意模型 | 通用代理，兜底用 |
+| MiniMax | `image-01` | 简单快速 |
+| LinkAI | 任意模型 | 统一网关，作为兜底 |
 
-<Note>
-默认情况下 Agent 不会主动选模型，而是走自动路由。如果你想用某个特定模型，直接在对话里说就行，比如「用 seedream 画一只猫」或「用 gpt-image-2 生成海报」。也可以通过下面的「自定义配置」固定默认模型。
-</Note>
+## 模型选择
 
-## 自定义配置
+默认走「自动路由 + 失败回退」：
 
-### API Key 配置
+1. 按 `OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI` 顺序选第一个已配置的厂商
+2. 遇到 401、模型未开通、网络异常等错误时，自动切到下一家
+3. 用户在对话里指定模型时（如「用 seedream 画一只猫」），对应厂商会被提到最前优先尝试
 
-至少需要配**一个**厂商的 Key，配多个就能享受自动回退能力。有三种配置方式：
-
-#### 方式一：已有模型 Key 自动复用
-
-如果你在 web控制台 或 `config.json` 中配置了对话模型的 Key（比如 `openai_api_key`、`gemini_api_key` 等），启动时这些 Key 会被**自动同步**到对应的环境变量。也就是说，只要你的对话模型能用，图像生成就能直接用同一个 Key，不需要额外配置。
-
-#### 方式二：在 config.json 中配置
-
-在 `config.json` 中直接写对应的 Key 字段即可，支持的字段如下：
+如需固定使用某个模型：
 
 ```json
 {
-  "openai_api_key": "sk-xxx",
-  "openai_api_base": "https://api.openai.com/v1",
-  "gemini_api_key": "AIza-xxx",
-  "ark_api_key": "xxx",
-  "dashscope_api_key": "sk-xxx",
-  "minimax_api_key": "xxx",
-  "linkai_api_key": "xxx"
-}
-```
-
-修改后需要重启生效。每个 Key 还有对应的 `*_api_base` 字段可以自定义接口地址。
-
-#### 方式三：对话中直接配置
-
-在对话里发送 API Key，Agent 会通过 `env_config` 工具自动保存到 `~/cow/.env`，**不需要重启**就能生效。例如：
-
-```
-帮我配置 OPENAI_API_KEY 为 sk-xxx
-```
-
-或者：
-
-```
-设置 ARK_API_KEY 为 xxx
-```
-
-### API Key 一览
-
-| 环境变量 | config.json 字段 | 对应厂商 | 默认 Base URL |
-| --- | --- | --- | --- |
-| `OPENAI_API_KEY` | `openai_api_key` | OpenAI | `https://api.openai.com/v1` |
-| `GEMINI_API_KEY` | `gemini_api_key` | Gemini | `https://generativelanguage.googleapis.com` |
-| `ARK_API_KEY` | `ark_api_key` | 火山方舟（Seedream） | `https://ark.cn-beijing.volces.com/api/v3` |
-| `DASHSCOPE_API_KEY` | `dashscope_api_key` | 阿里百炼（Qwen） | `https://dashscope.aliyuncs.com` |
-| `MINIMAX_API_KEY` | `minimax_api_key` | MiniMax | `https://api.minimaxi.com` |
-| `LINKAI_API_KEY` | `linkai_api_key` | LinkAI | `https://api.link-ai.tech` |
-
-
-### 指定默认模型
-
-如果想让所有图像生成固定走某个厂商的模型，可以在 `config.json` 里加：
-
-```json
-"skill": {
-  "image-generation": {
-    "model": "seedream-5.0-lite"
+  "skills": {
+    "image-generation": {
+      "model": "seedream-5.0-lite"
+    }
   }
 }
 ```
 
-启动时这段配置会被自动转成环境变量 `SKILL_IMAGE_GENERATION_MODEL`，脚本读到后会固定使用这个模型所在的厂商进行生成。
+## 配置 API Key
+
+<Tip>
+  推荐通过 [Web 控制台](/channels/web) 的「模型管理」页面配置，配好的对话模型 Key 会被图像生成技能自动复用，无需重复配置。也可手动编辑配置文件或在对话中通过 `env_config` 工具临时设置。
+</Tip>
+
+凭证统一复用主模型厂商的 Key：
+
+| 字段 | 对应厂商 |
+| --- | --- |
+| `openai_api_key` | OpenAI |
+| `gemini_api_key` | Gemini |
+| `ark_api_key` | 火山方舟（Seedream） |
+| `dashscope_api_key` | 阿里百炼（Qwen） |
+| `minimax_api_key` | MiniMax |
+| `linkai_api_key` | LinkAI |
 
 
 ## 开启和关闭
 
-`image-generation` 是内置技能，**会根据 API Key 自动调整状态**：
+技能会根据 API Key 自动调整状态：
 
-- **Key 已配置**：技能正常可用，Agent 收到画图请求时会直接调用
-- **Key 未配置**：技能仍然会出现在上下文中（标记为「需要配置」），Agent 会引导用户去配 Key，而不是直接调用失败
+- **已配置 Key**：Agent 收到画图请求时直接调用
+- **未配置 Key**：技能仍会出现在上下文中（标记为「需要配置」），Agent 会引导用户去配 Key
 
-如果想手动控制，也可以用命令：
+如需手动控制：
 
 ```text
-/skill disable image-generation    # 手动关闭（即使有 Key 也不会被调用）
+/skill disable image-generation    # 关闭
 /skill enable image-generation     # 重新开启
 ```
 
-终端里对应的命令是 `cow skill disable image-generation` / `cow skill enable image-generation`。
+终端等价命令：`cow skill disable image-generation` / `cow skill enable image-generation`。
 
 ## 参数
 
 | 参数 | 类型 | 必填 | 默认 | 说明 |
 | --- | --- | --- | --- | --- |
 | `prompt` | string | 是 | — | 图像描述 |
-| `image_url` | string / list | 否 | null | 编辑用的输入图，支持本地路径或 URL。传多个就是多图融合 |
-| `quality` | string | 否 | auto | `low` / `medium` / `high`，只有部分厂商支持 |
-| `size` | string | 否 | auto | `512` / `1K` / `2K` / `3K` / `4K`，也可以写像素值如 `1024x1024` |
+| `image_url` | string / list | 否 | null | 编辑用的输入图，本地路径或 URL；传列表为多图融合 |
+| `quality` | string | 否 | auto | `low` / `medium` / `high`，仅部分厂商支持 |
+| `size` | string | 否 | auto | `512` / `1K` / `2K` / `3K` / `4K`，或像素值如 `1024x1024` |
 | `aspect_ratio` | string | 否 | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9`；Gemini 还支持 `1:4` / `4:1` / `1:8` / `8:1` |
 
 <Warning>
-**质量越高、分辨率越大，花的钱越多、等的时间越长。**
-
-- 日常对话和快速预览直接用默认（`auto`），或者 `quality=low` + `size=1K`，大概 20 秒出图
-- 做海报、用户明确要高清的时候再上 `quality=high` + `size=2K/4K`，可能要等 1～5 分钟，取决于不同模型的速度
+  **质量越高、分辨率越大，耗时和成本越高。** 日常对话用默认（`auto`）或 `quality=low` + `size=1K` 即可，约 20 秒出图；做海报或明确要高清时再上 `high` + `2K/4K`，可能需要 1–5 分钟。
 </Warning>
 
-## 输出
-
-成功时返回：
-
-```json
-{
-  "model": "doubao-seedream-5-0-260128",
-  "images": [
-    {"url": "/path/to/output.png"}
-  ]
-}
-```
-
-失败时返回 `{ "error": "..." }`。出错后**不要直接重试**——大概率是配置问题（Key 填错、API 地址不对、模型没开通），让用户修好配置再试。
-
 ## 常见用法
 
 - **文生图**：根据描述生成插画、海报、图标、头像、分镜图等
@@ -153,8 +91,8 @@ description: 文生图 / 图生图 / 多图融合，支持多家厂商自动路
 - **多图融合**：把多张参考图合成一张（换装、角色合影等）
 
 <Note>
-- bash 超时建议设 600 秒。单个厂商的 HTTP 超时是 300 秒，但脚本可能依次尝试多个厂商
-- 输入的图片会自动压缩到 4MB 以内、最长边不超过 4096px
-- Gemini / Seedream / Qwen / MiniMax 不支持 `quality` 参数，传了也没用
-- Seedream 默认出 2K 图，`seedream-5.0-lite` 支持到 3K，`seedream-4.5` 支持到 4K
+- bash 超时建议设 600 秒：单厂商 HTTP 超时 300 秒，脚本可能依次尝试多家
+- 输入图片自动压缩到 4MB 以内、最长边不超过 4096px
+- Gemini / Seedream / Qwen / MiniMax 不支持 `quality` 参数
+- Seedream 默认出 2K 图；`seedream-5.0-lite` 支持到 3K，`seedream-4.5` 支持到 4K
 </Note>
diff --git a/docs/tools/vision.mdx b/docs/tools/vision.mdx
index 66cfdebf..675afe41 100644
--- a/docs/tools/vision.mdx
+++ b/docs/tools/vision.mdx
@@ -40,7 +40,7 @@ Vision 工具采用多级自动选择 + 自动兜底策略，无需手动配置
 
 ```json
 {
-    "tool": {
+    "tools": {
         "vision": {
             "model": "gpt-4.1"
         }
diff --git a/docs/tools/web-search.mdx b/docs/tools/web-search.mdx
index 2622d0c5..928eb633 100644
--- a/docs/tools/web-search.mdx
+++ b/docs/tools/web-search.mdx
@@ -1,32 +1,51 @@
 ---
 title: web_search - 联网搜索
-description: 搜索互联网获取实时信息
+description: 搜索互联网获取实时信息，支持多个搜索厂商
 ---
 
-搜索互联网获取实时信息、新闻、研究等内容。支持两个搜索后端，自动选择可用的后端。
+搜索互联网获取实时信息、新闻、研究等内容。支持博查、百度千帆、智谱、LinkAI 四个后端，配置任意一家即可使用。
 
-## 依赖
+<Tip>
+  推荐通过 [Web 控制台](/channels/web) 的「模型管理 → 搜索」面板可视化配置厂商与策略，无需手动编辑配置文件。
+</Tip>
 
-需要配置至少一个搜索 API Key（通过 `env_config` 工具或工作空间 `.env` 文件配置）：
+## 厂商
 
-| 后端 | 环境变量 | 优先级 | 获取方式 |
-| --- | --- | --- | --- |
-| 博查搜索 | `BOCHA_API_KEY` | 优先使用 | [博查开放平台](https://open.bochaai.com/) |
-| LinkAI 搜索 | `LINKAI_API_KEY` | 可选 | [LinkAI 控制台](https://link-ai.tech/console/interface) |
+| 厂商 | 凭证 | 申请入口 |
+| --- | --- | --- |
+| 博查 Bocha | `tools.web_search.bocha_api_key` | [博查开放平台](https://open.bochaai.com/) |
+| 百度千帆 | 复用 `qianfan_api_key` | [千帆控制台](https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy) |
+| 智谱 Zhipu | 复用 `zhipu_ai_api_key` | [智谱开放平台](https://docs.bigmodel.cn/cn/guide/tools/web-search) |
+| LinkAI | 复用 `linkai_api_key` | [LinkAI 控制台](https://link-ai.tech/console/interface) |
 
-## 参数
+除博查需要单独的 `bocha_api_key` 外，其他三家直接复用对应模型的 API Key，配好模型即同时获得搜索能力。
+
+## 路由策略
+
+```json
+{
+  "tools": {
+    "web_search": {
+      "strategy": "auto",
+      "provider": ""
+    }
+  }
+}
+```
+
+- `auto`（默认）：由 Agent 在已配置的厂商中智能选择，并可在一次任务中多次调用、切换不同厂商以获取更全面的结果；未指定时按 `bocha → qianfan → zhipu → linkai` 顺序兜底。
+- `fixed`：固定使用 `provider` 指定的厂商；该厂商凭证缺失时自动回落到 auto 顺序。
+
+## 工具参数
 
 | 参数 | 类型 | 必填 | 说明 |
 | --- | --- | --- | --- |
 | `query` | string | 是 | 搜索关键词 |
-| `count` | integer | 否 | 返回结果数量（1-50，默认 10） |
-| `freshness` | string | 否 | 时间范围：`noLimit`、`oneDay`、`oneWeek`、`oneMonth`、`oneYear`，或日期范围如 `2025-01-01..2025-02-01` |
+| `count` | integer | 否 | 返回结果数量（1–50，默认 10） |
+| `freshness` | string | 否 | 时间范围：`noLimit`（默认）、`oneDay`、`oneWeek`、`oneMonth`、`oneYear`，或日期范围如 `2025-01-01..2025-02-01` |
 | `summary` | boolean | 否 | 是否返回页面摘要（默认 false） |
-
-## 使用场景
-
-当用户询问最新信息、需要事实核查或获取实时数据时，Agent 会自动调用此工具。
+| `provider` | string | 否 | `auto` 策略下配置了多个厂商时可见，用于单次切换厂商 |
 
 <Note>
-  如果未配置任何搜索 API Key，该工具不会被加载。
+  四家凭证均未配置时，该工具不会注册到 Agent。
 </Note>
diff --git a/models/chatgpt/chat_gpt_bot.py b/models/chatgpt/chat_gpt_bot.py
index 0ec95a25..d5b7703d 100644
--- a/models/chatgpt/chat_gpt_bot.py
+++ b/models/chatgpt/chat_gpt_bot.py
@@ -60,7 +60,7 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot):
             "timeout": conf().get("request_timeout", None),  # 重试超时时间，在这个时间内，将会自动重试
         }
         # 部分模型暂不支持一些参数，特殊处理
-        if conf_model in [const.O1, const.O1_MINI, const.GPT_5, const.GPT_5_MINI, const.GPT_5_NANO]:
+        if conf_model in [const.O1, const.O1_MINI, const.GPT_5, const.GPT_5_MINI, const.GPT_5_NANO, const.GPT_55]:
             remove_keys = ["temperature", "top_p", "frequency_penalty", "presence_penalty"]
             for key in remove_keys:
                 self.args.pop(key, None)  # 如果键不存在，使用 None 来避免抛出错、
diff --git a/models/gemini/google_gemini_bot.py b/models/gemini/google_gemini_bot.py
index 6716e971..3c9ac9ae 100644
--- a/models/gemini/google_gemini_bot.py
+++ b/models/gemini/google_gemini_bot.py
@@ -38,9 +38,9 @@ class GoogleGeminiBot(Bot):
 
     @property
     def model(self):
-        model_name = conf().get("model") or "gemini-3.1-pro-preview"
+        model_name = conf().get("model") or "gemini-3.5-flash"
         if model_name == "gemini":
-            model_name = "gemini-3.1-pro-preview"
+            model_name = "gemini-3.5-flash"
         return model_name
 
     @property
diff --git a/models/openai_compatible_bot.py b/models/openai_compatible_bot.py
index aba5b327..e669fed2 100644
--- a/models/openai_compatible_bot.py
+++ b/models/openai_compatible_bot.py
@@ -89,8 +89,9 @@ class OpenAICompatibleBot:
                     messages[0] = {"role": "system", "content": system_prompt}
             
             # Build request parameters
+            model_name = kwargs.get("model", api_config.get('model', 'gpt-5.4'))
             request_params = {
-                "model": kwargs.get("model", api_config.get('model', 'gpt-3.5-turbo')),
+                "model": model_name,
                 "messages": messages,
                 "temperature": kwargs.get("temperature", api_config.get('default_temperature', 0.9)),
                 "top_p": kwargs.get("top_p", api_config.get('default_top_p', 1.0)),
@@ -98,6 +99,10 @@ class OpenAICompatibleBot:
                 "presence_penalty": kwargs.get("presence_penalty", api_config.get('default_presence_penalty', 0.0)),
                 "stream": stream
             }
+            # GPT-5 / GPT-5.5 / o1 series only accept default temperature/top_p and reject penalty params
+            if model_name in ("gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-5.5", "o1", "o1-mini"):
+                for key in ("temperature", "top_p", "frequency_penalty", "presence_penalty"):
+                    request_params.pop(key, None)
             
             # Add max_tokens if specified
             if kwargs.get("max_tokens"):
diff --git a/plugins/cow_cli/cow_cli.py b/plugins/cow_cli/cow_cli.py
index aafa1813..fc721f51 100644
--- a/plugins/cow_cli/cow_cli.py
+++ b/plugins/cow_cli/cow_cli.py
@@ -1056,6 +1056,38 @@ class CowCliPlugin(Plugin):
             logger.warning(f"[CowCli] /memory dream sync failed: {e}")
             return f"❌ 记忆蒸馏失败: {e}"
 
+    @staticmethod
+    def _resolve_active_embedding():
+        """
+        Resolve (provider_label, model, dim) from the LATEST config, not the
+        possibly-stale provider instance cached on a running agent. Used by
+        /memory status and rebuild-index hints so they reflect what a rebuild
+        will actually run as after the user changes embedding_provider.
+        Returns (label, model, dim) where any field may be None when unknown.
+        """
+        from agent.memory.embedding import EMBEDDING_VENDORS
+        from config import conf
+
+        provider_key = (conf().get("embedding_provider") or "").strip().lower()
+        cfg_model = (conf().get("embedding_model") or "").strip()
+        try:
+            cfg_dim = int(conf().get("embedding_dimensions") or 0)
+        except (TypeError, ValueError):
+            cfg_dim = 0
+
+        if not provider_key:
+            # Legacy auto path: openai -> linkai, both default to text-embedding-3-small (1536).
+            if (conf().get("open_ai_api_key") or "").strip():
+                return "openai (legacy)", "text-embedding-3-small", 1536
+            if (conf().get("linkai_api_key") or "").strip():
+                return "linkai (legacy)", "text-embedding-3-small", 1536
+            return "(legacy)", None, None
+
+        meta = EMBEDDING_VENDORS.get(provider_key) or {}
+        model = cfg_model or meta.get("default_model")
+        dim = cfg_dim if cfg_dim > 0 else meta.get("default_dimensions")
+        return provider_key, model, dim
+
     def _memory_status(self) -> str:
         """Show current memory index status."""
         from agent.memory.embedding import detect_index_dim
@@ -1078,15 +1110,14 @@ class CowCliPlugin(Plugin):
         lines.append(f"  Chunks  : {chunks} (embedded: {embedded})")
         lines.append("")
 
-        # Active provider (from running config + provider instance).
+        # Resolve from the latest config so users see what /memory rebuild-index
+        # will actually run as — not what the cached agent was initialized with.
+        cfg_provider, cfg_model, cfg_dim = self._resolve_active_embedding()
         provider_obj = memory_manager.embedding_provider
-        cfg_provider = (conf().get("embedding_provider") or "").strip().lower() or "(legacy)"
-        if provider_obj is not None:
-            cfg_model = getattr(provider_obj, "model", "?")
-            cfg_dim = getattr(provider_obj, "_dimensions", None) or "?"
+        if cfg_model:
             lines.append(f"  Provider : {cfg_provider}")
             lines.append(f"  Model    : {cfg_model}")
-            lines.append(f"  Dim      : {cfg_dim}")
+            lines.append(f"  Dim      : {cfg_dim if cfg_dim else '?'}")
         else:
             lines.append("  Provider : (未初始化, keyword-only)")
 
@@ -1105,7 +1136,6 @@ class CowCliPlugin(Plugin):
                 )
 
             index_dim = detect_index_dim(memory_manager.storage)
-            cfg_dim = getattr(provider_obj, "_dimensions", None)
             if index_dim is not None and cfg_dim and index_dim != cfg_dim:
                 warnings.append(
                     f"  ⚠️ 索引中存量向量为 {index_dim} 维，与当前配置 {cfg_dim} 维不一致；"
@@ -1129,15 +1159,27 @@ class CowCliPlugin(Plugin):
             )
 
         memory_manager = agent.memory_manager
-        if memory_manager.embedding_provider is None:
+
+        # Rebuild against the LATEST config: build a fresh provider from
+        # config.json and swap it onto memory_manager. The agent's
+        # conversation_history and other state are untouched.
+        try:
+            from bridge.agent_initializer import AgentInitializer
+            fresh_provider = AgentInitializer(bridge=None, agent_bridge=None) \
+                ._init_embedding_provider(memory_manager.config, session_id=session_id)
+        except Exception as e:
+            logger.exception("[CowCli] /memory rebuild-index: build provider failed")
+            return f"⚠️ 无法根据当前配置构造 embedding provider: {e}"
+
+        if fresh_provider is None:
             return (
                 "⚠️ 当前没有可用的 embedding provider。\n"
                 "请检查 config.json 中的 embedding 相关配置 (provider / api key)。"
             )
+        memory_manager.embedding_provider = fresh_provider
 
-        provider_obj = memory_manager.embedding_provider
-        model_label = getattr(provider_obj, "model", "?")
-        dim_label = getattr(provider_obj, "dimensions", "?")
+        model_label = getattr(fresh_provider, "model", "?")
+        dim_label = getattr(fresh_provider, "dimensions", "?")
 
         # SaaS (e_context is None): run synchronously, return final result
         if e_context is None:
@@ -1168,7 +1210,7 @@ class CowCliPlugin(Plugin):
         threading.Thread(target=_run, daemon=True).start()
         return (
             f"🔧 索引重建已启动 (model={model_label}, dim={dim_label})\n\n"
-            f"将清空现有 chunks 并重新 embed 所有记忆文件，完成后会通知你。"
+            f"将重新向量化所有记忆和知识文件，完成后会通知你。"
         )
 
     @staticmethod
diff --git a/skills/image-generation/scripts/generate.py b/skills/image-generation/scripts/generate.py
index 905390b5..9a52f94a 100644
--- a/skills/image-generation/scripts/generate.py
+++ b/skills/image-generation/scripts/generate.py
@@ -1110,7 +1110,7 @@ def main():
     # Model resolution priority:
     #   1. Explicit `model` in the call args (agent / user override)
     #   2. SKILL_IMAGE_GENERATION_MODEL env var (synced from
-    #      config["skill"]["image-generation"]["model"] at startup)
+    #      config["skills"]["image-generation"]["model"] at startup)
     #   3. None → fall back to automatic provider routing (try every
     #      provider with a configured API key in global priority order)
     model = args.get("model") or os.environ.get("SKILL_IMAGE_GENERATION_MODEL") or ""
diff --git a/tests/test_qianfan_provider.py b/tests/test_qianfan_provider.py
index 4c7900e5..99eb4130 100644
--- a/tests/test_qianfan_provider.py
+++ b/tests/test_qianfan_provider.py
@@ -394,7 +394,7 @@ class TestQianfanVisionTool(unittest.TestCase):
             "open_ai_api_key": "",
             "linkai_api_key": "",
             "use_linkai": False,
-            "tool": {},
+            "tools": {},
         }
         if values:
             data.update(values)
@@ -424,7 +424,7 @@ class TestQianfanVisionTool(unittest.TestCase):
     def test_vision_routes_ernie_model_override_to_qianfan(self):
         fake_conf = self._fake_conf({
             "qianfan_api_key": "test-qianfan-key",
-            "tool": {"vision": {"model": "ernie-4.5-turbo-vl-32k"}},
+            "tools": {"vision": {"model": "ernie-4.5-turbo-vl-32k"}},
         })
         fake_bot = MagicMock()
         fake_bot.call_vision = MagicMock()
diff --git a/voice/dashscope/__init__.py b/voice/dashscope/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/voice/dashscope/dashscope_voice.py b/voice/dashscope/dashscope_voice.py
new file mode 100644
index 00000000..746bb59a
--- /dev/null
+++ b/voice/dashscope/dashscope_voice.py
@@ -0,0 +1,175 @@
+# encoding:utf-8
+"""DashScope voice: qwen3-asr-flash (ASR) + qwen3-tts-flash (TTS)
+via dashscope.MultiModalConversation."""
+import datetime
+import os
+import random
+from typing import Optional
+
+import dashscope
+import requests
+from dashscope import MultiModalConversation
+
+from bridge.reply import Reply, ReplyType
+from common.log import logger
+from config import conf
+from voice import audio_convert
+from voice.voice import Voice
+
+
+DEFAULT_ASR_MODEL = "qwen3-asr-flash"
+DEFAULT_TTS_MODEL = "qwen3-tts-flash"
+DEFAULT_TTS_VOICE = "Cherry"
+MAX_DURATION_SECONDS = 300
+MAX_FILE_BYTES = 10 * 1024 * 1024
+
+
+class DashScopeVoice(Voice):
+    def __init__(self):
+        pass
+
+    def voiceToText(self, voice_file: str):
+        try:
+            voice_file = self._ensure_compatible_format(voice_file)
+
+            try:
+                size = os.path.getsize(voice_file)
+                if size > MAX_FILE_BYTES:
+                    logger.warning(
+                        f"[DashScopeVoice] audio file {size}B exceeds {MAX_FILE_BYTES}B; "
+                        f"qwen3-asr-flash may reject it"
+                    )
+            except OSError:
+                pass
+
+            api_key = conf().get("dashscope_api_key", "")
+            if not api_key:
+                logger.error("[DashScopeVoice] dashscope_api_key is not configured")
+                return Reply(ReplyType.ERROR, "未配置 DashScope API key")
+            dashscope.api_key = api_key
+
+            model = conf().get("voice_to_text_model") or DEFAULT_ASR_MODEL
+            abs_path = os.path.abspath(voice_file)
+            file_uri = f"file://{abs_path}"
+
+            messages = [
+                {"role": "user", "content": [{"audio": file_uri}]},
+            ]
+            response = MultiModalConversation.call(
+                model=model,
+                messages=messages,
+                result_format="message",
+                asr_options={"enable_itn": False, "enable_lid": True},
+            )
+
+            text = self._extract_text(response)
+            if text is None:
+                logger.error(f"[DashScopeVoice] voiceToText failed: {response}")
+                return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音，请稍后再试吧~")
+
+            logger.info(f"[DashScopeVoice] voiceToText model={model} text={text}")
+            return Reply(ReplyType.TEXT, text)
+        except Exception as e:
+            logger.exception(f"[DashScopeVoice] voiceToText exception: {e}")
+            return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音，请稍后再试吧~")
+
+    def textToVoice(self, text: str):
+        try:
+            api_key = conf().get("dashscope_api_key", "")
+            if not api_key:
+                logger.error("[DashScopeVoice] dashscope_api_key is not configured")
+                return Reply(ReplyType.ERROR, "未配置 DashScope API key")
+            dashscope.api_key = api_key
+
+            model = conf().get("text_to_voice_model") or DEFAULT_TTS_MODEL
+            voice = conf().get("tts_voice_id") or DEFAULT_TTS_VOICE
+            response = MultiModalConversation.call(
+                model=model,
+                api_key=api_key,
+                text=text,
+                voice=voice,
+                stream=False,
+            )
+
+            url = self._extract_audio_url(response)
+            if not url:
+                logger.error(f"[DashScopeVoice] textToVoice failed: {response}")
+                return Reply(ReplyType.ERROR, "语音合成失败")
+
+            local_path = self._download_audio(url)
+            if not local_path:
+                return Reply(ReplyType.ERROR, "语音合成失败")
+
+            logger.info(f"[DashScopeVoice] textToVoice model={model} voice={voice} file={local_path}")
+            return Reply(ReplyType.VOICE, local_path)
+        except Exception as e:
+            logger.exception(f"[DashScopeVoice] textToVoice exception: {e}")
+            return Reply(ReplyType.ERROR, "语音合成失败")
+
+    @staticmethod
+    def _extract_audio_url(response) -> Optional[str]:
+        try:
+            if getattr(response, "status_code", 200) != 200:
+                return None
+            audio = response.output.get("audio") if response.output else None
+            if isinstance(audio, dict):
+                return audio.get("url") or None
+            return getattr(audio, "url", None)
+        except Exception:
+            return None
+
+    @staticmethod
+    def _download_audio(url: str) -> Optional[str]:
+        try:
+            tmp_dir = os.path.join(os.getcwd(), "tmp")
+            os.makedirs(tmp_dir, exist_ok=True)
+            ts = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+            ext = os.path.splitext(url.split("?", 1)[0])[1].lower() or ".wav"
+            if ext not in (".mp3", ".wav", ".m4a", ".aac", ".opus"):
+                ext = ".wav"
+            dst = os.path.join(tmp_dir, f"dashscope_tts_{ts}_{random.randint(0, 9999)}{ext}")
+            resp = requests.get(url, timeout=60)
+            resp.raise_for_status()
+            with open(dst, "wb") as f:
+                f.write(resp.content)
+            return dst
+        except Exception as e:
+            logger.error(f"[DashScopeVoice] download audio failed: {e}")
+            return None
+
+    @staticmethod
+    def _ensure_compatible_format(voice_file: str) -> str:
+        # qwen3-asr-flash doesn't accept AMR/SILK; mp3/wav/m4a/aac/opus pass through.
+        lower = voice_file.lower()
+        if lower.endswith(".amr") or lower.endswith(".silk") or lower.endswith(".slk"):
+            try:
+                mp3_file = os.path.splitext(voice_file)[0] + ".mp3"
+                audio_convert.any_to_mp3(voice_file, mp3_file)
+                return mp3_file
+            except Exception as e:
+                logger.warning(f"[DashScopeVoice] mp3 convert failed: {e}")
+        return voice_file
+
+    @staticmethod
+    def _extract_text(response) -> Optional[str]:
+        try:
+            if getattr(response, "status_code", 200) != 200:
+                return None
+            choices = response.output.get("choices") or []
+            if not choices:
+                return None
+            content = choices[0].get("message", {}).get("content")
+            if isinstance(content, str):
+                return content.strip() or None
+            if isinstance(content, list):
+                parts = []
+                for item in content:
+                    if isinstance(item, dict) and "text" in item:
+                        parts.append(item["text"])
+                    elif isinstance(item, str):
+                        parts.append(item)
+                text = "".join(parts).strip()
+                return text or None
+            return None
+        except Exception:
+            return None
diff --git a/voice/factory.py b/voice/factory.py
index abe7ba57..3be60bbf 100644
--- a/voice/factory.py
+++ b/voice/factory.py
@@ -58,4 +58,12 @@ def create_voice(voice_type):
         from voice.minimax.minimax_voice import MinimaxVoice
 
         return MinimaxVoice()
+    elif voice_type == "dashscope":
+        from voice.dashscope.dashscope_voice import DashScopeVoice
+
+        return DashScopeVoice()
+    elif voice_type == "zhipu" or voice_type == "zhipuai":
+        from voice.zhipuai.zhipuai_voice import ZhipuAIVoice
+
+        return ZhipuAIVoice()
     raise RuntimeError
diff --git a/voice/linkai/linkai_voice.py b/voice/linkai/linkai_voice.py
index 739b5f60..ec59812e 100644
--- a/voice/linkai/linkai_voice.py
+++ b/voice/linkai/linkai_voice.py
@@ -1,16 +1,18 @@
-"""
-google voice service
-"""
+"""LinkAI voice: Whisper ASR + multi-vendor TTS (OpenAI / Doubao / Baidu)
+proxied via https://docs.link-ai.tech/platform/api/voice-speech."""
+import datetime
+import os
 import random
+
 import requests
-from voice import audio_convert
+
 from bridge.reply import Reply, ReplyType
+from common import const
 from common.log import logger
 from config import conf
+from voice import audio_convert
 from voice.voice import Voice
-from common import const
-import os
-import datetime
+
 
 class LinkAIVoice(Voice):
     def __init__(self):
@@ -21,63 +23,67 @@ class LinkAIVoice(Voice):
         try:
             url = conf().get("linkai_api_base", "https://api.link-ai.tech") + "/v1/audio/transcriptions"
             headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
-            model = None
-            if not conf().get("text_to_voice") or conf().get("voice_to_text") == "openai":
-                model = const.WHISPER_1
+            # Pin whisper-1: gateway ignores any other ASR model id.
+            model = const.WHISPER_1
             if voice_file.endswith(".amr"):
                 try:
                     mp3_file = os.path.splitext(voice_file)[0] + ".mp3"
                     audio_convert.any_to_mp3(voice_file, mp3_file)
                     voice_file = mp3_file
                 except Exception as e:
-                    logger.warn(f"[LinkVoice] amr file transfer failed, directly send amr voice file: {format(e)}")
-            file = open(voice_file, "rb")
-            file_body = {
-                "file": file
-            }
-            data = {
-                "model": model
-            }
-            res = requests.post(url, files=file_body, headers=headers, data=data, timeout=(5, 60))
-            if res.status_code == 200:
-                text = res.json().get("text")
-            else:
-                res_json = res.json()
-                logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={res_json.get('message')}")
+                    logger.warning(f"[LinkVoice] amr file transfer failed, directly send amr voice file: {e}")
+            with open(voice_file, "rb") as file:
+                res = requests.post(
+                    url,
+                    files={"file": file},
+                    headers=headers,
+                    data={"model": model},
+                    timeout=(5, 60),
+                )
+            if res.status_code != 200:
+                msg = ""
+                try:
+                    msg = res.json().get("message", "")
+                except Exception:
+                    pass
+                logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={msg}")
                 return None
-            reply = Reply(ReplyType.TEXT, text)
+            text = res.json().get("text")
             logger.info(f"[LinkVoice] voiceToText success, text={text}, file name={voice_file}")
+            return Reply(ReplyType.TEXT, text)
         except Exception as e:
             logger.error(e)
             return None
-        return reply
 
     def textToVoice(self, text):
         try:
             url = conf().get("linkai_api_base", "https://api.link-ai.tech") + "/v1/audio/speech"
             headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
-            model = const.TTS_1
-            if not conf().get("text_to_voice") or conf().get("text_to_voice") in ["openai", const.TTS_1, const.TTS_1_HD]:
-                model = conf().get("text_to_voice_model") or const.TTS_1
+            # Gateway routes by `model` (tts-1 / doubao / baidu) + `voice` from
+            # that engine's catalog. `app_code` is optional workspace override.
             data = {
-                "model": model,
                 "input": text,
                 "voice": conf().get("tts_voice_id"),
-                "app_code": conf().get("linkai_app_code")
+                "app_code": conf().get("linkai_app_code"),
             }
+            model = conf().get("text_to_voice_model")
+            if model:
+                data["model"] = model
             res = requests.post(url, headers=headers, json=data, timeout=(5, 120))
-            if res.status_code == 200:
-                tmp_file_name = "tmp/" + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + str(random.randint(0, 1000)) + ".mp3"
-                with open(tmp_file_name, 'wb') as f:
-                    f.write(res.content)
-                reply = Reply(ReplyType.VOICE, tmp_file_name)
-                logger.info(f"[LinkVoice] textToVoice success, input={text}, model={model}, voice_id={data.get('voice')}")
-                return reply
-            else:
-                res_json = res.json()
-                logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={res_json.get('message')}")
+            if res.status_code != 200:
+                msg = ""
+                try:
+                    msg = res.json().get("message", "")
+                except Exception:
+                    pass
+                logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={msg}")
                 return None
+            tmp_file_name = "tmp/" + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + str(random.randint(0, 1000)) + ".mp3"
+            os.makedirs(os.path.dirname(tmp_file_name), exist_ok=True)
+            with open(tmp_file_name, 'wb') as f:
+                f.write(res.content)
+            logger.info(f"[LinkVoice] textToVoice success, input={text}, voice_id={data.get('voice')}")
+            return Reply(ReplyType.VOICE, tmp_file_name)
         except Exception as e:
             logger.error(e)
-            # reply = Reply(ReplyType.ERROR, "遇到了一点小问题，请稍后再问我吧")
             return None
diff --git a/voice/minimax/minimax_voice.py b/voice/minimax/minimax_voice.py
index 1446a3f1..8456c479 100644
--- a/voice/minimax/minimax_voice.py
+++ b/voice/minimax/minimax_voice.py
@@ -1,8 +1,7 @@
 # encoding:utf-8
-"""
-MiniMax TTS voice service
-"""
+"""MiniMax TTS via /v1/t2a_v2 (SSE stream, hex-encoded mp3 chunks)."""
 import datetime
+import json
 import random
 import requests
 
@@ -12,24 +11,12 @@ from config import conf
 from voice.voice import Voice
 
 
-MINIMAX_TTS_VOICES = [
-    "English_Graceful_Lady",
-    "English_Insightful_Speaker",
-    "English_radiant_girl",
-    "English_Persuasive_Man",
-    "English_Lucky_Robot",
-    "English_expressive_narrator",
-    "Chinese_Warm_Woman",
-    "Chinese_Gentle_Man",
-]
-
-
 class MinimaxVoice(Voice):
     def __init__(self):
         self.api_key = conf().get("minimax_api_key")
-        self.api_base = conf().get("minimax_api_base") or "https://api.minimax.io"
-        # Strip trailing /v1 if present so we can always append /v1/t2a_v2
-        self.api_base = self.api_base.rstrip("/")
+        # Mainland endpoint matches `sk-api-0-...` keys; override via
+        # `minimax_api_base` for international (api.minimax.io) workspaces.
+        self.api_base = (conf().get("minimax_api_base") or "https://api.minimaxi.com").rstrip("/")
         if self.api_base.endswith("/v1"):
             self.api_base = self.api_base[:-3]
 
@@ -68,12 +55,14 @@ class MinimaxVoice(Voice):
             response = requests.post(url, headers=headers, json=payload, stream=True, timeout=60)
             response.raise_for_status()
 
-            # Parse SSE stream and collect hex-encoded audio chunks
+            # MiniMax returns HTTP 200 even on errors; capture base_resp for diagnostics.
             audio_chunks = []
-            buffer = ""
+            last_base_resp = None
+            event_count = 0
             for raw in response.iter_lines():
                 if not raw:
                     continue
+                event_count += 1
                 line = raw.decode("utf-8") if isinstance(raw, bytes) else raw
                 if not line.startswith("data:"):
                     continue
@@ -81,16 +70,31 @@ class MinimaxVoice(Voice):
                 if not json_str or json_str == "[DONE]":
                     continue
                 try:
-                    import json
                     event_data = json.loads(json_str)
-                    audio_hex = event_data.get("data", {}).get("audio")
-                    if audio_hex:
-                        audio_chunks.append(bytes.fromhex(audio_hex))
                 except Exception:
                     continue
+                base_resp = event_data.get("base_resp") or {}
+                if base_resp:
+                    last_base_resp = base_resp
+                audio_hex = (event_data.get("data") or {}).get("audio")
+                if audio_hex:
+                    try:
+                        audio_chunks.append(bytes.fromhex(audio_hex))
+                    except Exception as e:
+                        logger.warning(f"[MINIMAX] skip bad audio hex chunk: {e}")
 
             if not audio_chunks:
-                logger.error("[MINIMAX] TTS returned no audio data")
+                ct = response.headers.get("Content-Type", "")
+                if last_base_resp and last_base_resp.get("status_code") not in (None, 0):
+                    logger.error(
+                        f"[MINIMAX] TTS failed: status_code={last_base_resp.get('status_code')}, "
+                        f"status_msg={last_base_resp.get('status_msg')}, model={model}, voice_id={voice_id}"
+                    )
+                else:
+                    logger.error(
+                        f"[MINIMAX] TTS returned no audio data, model={model}, voice_id={voice_id}, "
+                        f"url={url}, http={response.status_code}, content_type={ct!r}, events={event_count}"
+                    )
                 return Reply(ReplyType.ERROR, "语音合成失败，未获取到音频数据")
 
             audio_data = b"".join(audio_chunks)
diff --git a/voice/openai/openai_voice.py b/voice/openai/openai_voice.py
index 3ffa00aa..f0db53b4 100644
--- a/voice/openai/openai_voice.py
+++ b/voice/openai/openai_voice.py
@@ -31,7 +31,8 @@ class OpenaiVoice(Voice):
                 "file": file,
             }
             data = {
-                "model": "whisper-1",
+                # Override via `voice_to_text_model` (e.g. fall back to whisper-1).
+                "model": conf().get("voice_to_text_model") or "gpt-4o-mini-transcribe",
             }
             response = requests.post(url, headers=headers, files=files, data=data)
             response_data = response.json()
diff --git a/voice/zhipuai/__init__.py b/voice/zhipuai/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/voice/zhipuai/zhipuai_voice.py b/voice/zhipuai/zhipuai_voice.py
new file mode 100644
index 00000000..1fdcdc7c
--- /dev/null
+++ b/voice/zhipuai/zhipuai_voice.py
@@ -0,0 +1,173 @@
+# encoding:utf-8
+"""ZhipuAI voice: glm-asr-2512 (ASR) + glm-tts (TTS) via BigModel REST API."""
+import datetime
+import os
+import random
+
+import requests
+
+from bridge.reply import Reply, ReplyType
+from common.log import logger
+from config import conf
+from voice import audio_convert
+from voice.voice import Voice
+
+
+DEFAULT_ASR_MODEL = "glm-asr-2512"
+DEFAULT_TTS_MODEL = "glm-tts"
+DEFAULT_TTS_VOICE = "tongtong"
+DEFAULT_API_BASE = "https://open.bigmodel.cn/api/paas/v4"
+MAX_FILE_BYTES = 25 * 1024 * 1024
+REQUEST_TIMEOUT = (5, 60)
+
+
+class ZhipuAIVoice(Voice):
+    def __init__(self):
+        pass
+
+    def voiceToText(self, voice_file: str):
+        try:
+            voice_file = self._ensure_compatible_format(voice_file)
+
+            try:
+                size = os.path.getsize(voice_file)
+                if size > MAX_FILE_BYTES:
+                    logger.warning(
+                        f"[ZhipuAIVoice] audio file {size}B exceeds {MAX_FILE_BYTES}B; "
+                        f"glm-asr-2512 may reject it"
+                    )
+            except OSError:
+                pass
+
+            api_key = conf().get("zhipu_ai_api_key", "")
+            if not api_key:
+                logger.error("[ZhipuAIVoice] zhipu_ai_api_key is not configured")
+                return Reply(ReplyType.ERROR, "未配置 ZhipuAI API key")
+
+            api_base = (conf().get("zhipu_ai_api_base") or DEFAULT_API_BASE).rstrip("/")
+            url = f"{api_base}/audio/transcriptions"
+            model = conf().get("voice_to_text_model") or DEFAULT_ASR_MODEL
+
+            with open(voice_file, "rb") as f:
+                files = {"file": (os.path.basename(voice_file), f)}
+                data = {"model": model, "stream": "false"}
+                headers = {"Authorization": f"Bearer {api_key}"}
+                response = requests.post(
+                    url, headers=headers, files=files, data=data, timeout=REQUEST_TIMEOUT
+                )
+
+            if response.status_code != 200:
+                logger.error(
+                    f"[ZhipuAIVoice] voiceToText failed: status={response.status_code} "
+                    f"body={response.text[:500]}"
+                )
+                return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音，请稍后再试吧~")
+
+            payload = response.json()
+            text = (payload.get("text") or "").strip()
+            if not text:
+                logger.error(f"[ZhipuAIVoice] voiceToText empty text: {payload}")
+                return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音，请稍后再试吧~")
+
+            logger.info(f"[ZhipuAIVoice] voiceToText model={model} text={text}")
+            return Reply(ReplyType.TEXT, text)
+        except Exception as e:
+            logger.exception(f"[ZhipuAIVoice] voiceToText exception: {e}")
+            return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音，请稍后再试吧~")
+
+    def textToVoice(self, text: str):
+        try:
+            api_key = conf().get("zhipu_ai_api_key", "")
+            if not api_key:
+                logger.error("[ZhipuAIVoice] zhipu_ai_api_key is not configured")
+                return Reply(ReplyType.ERROR, "未配置 ZhipuAI API key")
+
+            api_base = (conf().get("zhipu_ai_api_base") or DEFAULT_API_BASE).rstrip("/")
+            url = f"{api_base}/audio/speech"
+            model = conf().get("text_to_voice_model") or DEFAULT_TTS_MODEL
+            voice_id = conf().get("tts_voice_id") or DEFAULT_TTS_VOICE
+
+            payload = {
+                "model": model,
+                "input": text,
+                "voice": voice_id,
+                "response_format": "wav",
+                "speed": 1.0,
+                "volume": 1.0,
+            }
+            headers = {
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            }
+            response = requests.post(
+                url, headers=headers, json=payload, timeout=REQUEST_TIMEOUT
+            )
+
+            if response.status_code != 200:
+                logger.error(
+                    f"[ZhipuAIVoice] textToVoice failed: status={response.status_code} "
+                    f"body={response.text[:500]} model={model} voice={voice_id}"
+                )
+                return Reply(ReplyType.ERROR, "语音合成失败，请稍后再试")
+
+            # Some errors come back as JSON / SSE with HTTP 200.
+            ct = response.headers.get("Content-Type", "")
+            if "application/json" in ct or "text/event-stream" in ct:
+                try:
+                    err = response.json()
+                except Exception:
+                    err = {"raw": response.text[:500]}
+                logger.error(
+                    f"[ZhipuAIVoice] textToVoice unexpected text response "
+                    f"(content_type={ct}): {err}"
+                )
+                return Reply(ReplyType.ERROR, "语音合成失败，请稍后再试")
+
+            audio_bytes = response.content
+            ext = self._sniff_audio_ext(audio_bytes) or "wav"
+
+            file_name = (
+                "tmp/" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+                + str(random.randint(0, 1000)) + "." + ext
+            )
+            os.makedirs(os.path.dirname(file_name), exist_ok=True)
+            with open(file_name, "wb") as f:
+                f.write(audio_bytes)
+            logger.info(
+                f"[ZhipuAIVoice] textToVoice model={model} voice={voice_id} "
+                f"file={file_name} bytes={len(audio_bytes)} ext={ext}"
+            )
+            return Reply(ReplyType.VOICE, file_name)
+        except Exception as e:
+            logger.exception(f"[ZhipuAIVoice] textToVoice exception: {e}")
+            return Reply(ReplyType.ERROR, "语音合成失败，请稍后再试")
+
+    @staticmethod
+    def _sniff_audio_ext(data: bytes) -> str:
+        """Detect audio container by magic bytes; returns '' on unknown."""
+        if len(data) < 12:
+            return ""
+        head = data[:12]
+        if head[:4] == b"RIFF" and head[8:12] == b"WAVE":
+            return "wav"
+        if head[:3] == b"ID3" or head[:2] == b"\xff\xfb" or head[:2] == b"\xff\xf3" or head[:2] == b"\xff\xf2":
+            return "mp3"
+        if head[:4] == b"OggS":
+            return "ogg"
+        if head[:4] == b"fLaC":
+            return "flac"
+        return ""
+
+    @staticmethod
+    def _ensure_compatible_format(voice_file: str) -> str:
+        # glm-asr-2512 only accepts .wav / .mp3
+        lower = voice_file.lower()
+        if lower.endswith(".mp3") or lower.endswith(".wav"):
+            return voice_file
+        try:
+            mp3_file = os.path.splitext(voice_file)[0] + ".mp3"
+            audio_convert.any_to_mp3(voice_file, mp3_file)
+            return mp3_file
+        except Exception as e:
+            logger.warning(f"[ZhipuAIVoice] mp3 convert failed: {e}")
+            return voice_file