Merge branch 'master' into feat/wechatcom-kf-channel

2026-07-17 11:07:11 +08:00 · 2026-05-30 17:17:29 +08:00
parent 99bddb79d6 3c161df526
commit b0ac0731c7
212 changed files with 14439 additions and 3691 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -32,7 +32,6 @@ plugins/banwords/lib/__pycache__
 !plugins/role
 !plugins/keyword
 !plugins/linkai
-!plugins/agent
 !plugins/cow_cli
 client_config.json
 ref/
--- a/README.md
+++ b/README.md
--- a/agent/memory/conversation_store.py
+++ b/agent/memory/conversation_store.py
@@ -44,6 +44,7 @@ CREATE TABLE IF NOT EXISTS messages (
    role         TEXT    NOT NULL,
    content      TEXT    NOT NULL,
    created_at   INTEGER NOT NULL,
+    extras       TEXT    NOT NULL DEFAULT '',
    UNIQUE (session_id, seq)
 );

@@ -67,6 +68,12 @@ _MIGRATION_ADD_CONTEXT_START_SEQ = """
 ALTER TABLE sessions ADD COLUMN context_start_seq INTEGER NOT NULL DEFAULT 0;
 """

+# Generic JSON sidecar for per-message attachments (TTS audio URL, future use).
+# Always optional — readers must tolerate missing column / empty / invalid JSON.
+_MIGRATION_ADD_MSG_EXTRAS = """
+ALTER TABLE messages ADD COLUMN extras TEXT NOT NULL DEFAULT '';
+"""
+
 DEFAULT_MAX_AGE_DAYS: int = 30


@@ -169,20 +176,26 @@ def _group_into_display_turns(
    cur_rest: List[tuple] = []
    started = False

-    for role, raw_content, created_at in rows:
+    for role, raw_content, created_at, raw_extras in rows:
        try:
            content = json.loads(raw_content)
        except Exception:
            content = raw_content
+        try:
+            extras = json.loads(raw_extras) if raw_extras else {}
+            if not isinstance(extras, dict):
+                extras = {}
+        except Exception:
+            extras = {}

        if role == "user" and _is_visible_user_message(content):
            if started:
                groups.append((cur_user, cur_rest))
-            cur_user = (content, created_at)
+            cur_user = (content, created_at, extras)
            cur_rest = []
            started = True
        else:
-            cur_rest.append((role, content, created_at))
+            cur_rest.append((role, content, created_at, extras))

    if started:
        groups.append((cur_user, cur_rest))
@@ -195,7 +208,7 @@ def _group_into_display_turns(
    for user_row, rest in groups:
        # User turn
        if user_row:
-            content, created_at = user_row
+            content, created_at, _u_extras = user_row
            text = _extract_display_text(content)
            if text:
                turns.append({"role": "user", "content": text, "created_at": created_at})
@@ -206,8 +219,11 @@ def _group_into_display_turns(
        tool_results: Dict[str, str] = {}
        final_text = ""
        final_ts: Optional[int] = None
+        merged_extras: Dict[str, Any] = {}

-        for role, content, created_at in rest:
+        for role, content, created_at, extras in rest:
+            if role == "assistant" and isinstance(extras, dict):
+                merged_extras.update(extras)
            if role == "user":
                tool_results.update(_extract_tool_results(content))
            elif role == "assistant":
@@ -256,6 +272,8 @@ def _group_into_display_turns(
                "steps": steps,
                "created_at": final_ts or (user_row[1] if user_row else 0),
            }
+            if merged_extras:
+                turn["extras"] = merged_extras
            turns.append(turn)

    return turns
@@ -411,13 +429,15 @@ class ConversationStore:
                        content = json.dumps(
                            msg.get("content", ""), ensure_ascii=False
                        )
+                        extras_obj = msg.get("extras") or {}
+                        extras = json.dumps(extras_obj, ensure_ascii=False) if extras_obj else ""
                        conn.execute(
                            """
                            INSERT OR IGNORE INTO messages
-                                (session_id, seq, role, content, created_at)
-                            VALUES (?, ?, ?, ?, ?)
+                                (session_id, seq, role, content, created_at, extras)
+                            VALUES (?, ?, ?, ?, ?, ?)
                            """,
-                            (session_id, next_seq, role, content, now),
+                            (session_id, next_seq, role, content, now, extras),
                        )
                        next_seq += 1

@@ -651,6 +671,55 @@ class ConversationStore:
            logger.info(f"[ConversationStore] Pruned {deleted} expired sessions")
        return deleted

+    def attach_extras_to_last_assistant(
+        self,
+        session_id: str,
+        extras: Dict[str, Any],
+    ) -> Optional[int]:
+        """
+        Merge ``extras`` into the latest assistant message of a session.
+
+        Used by post-processing (e.g. TTS) that needs to annotate an already
+        persisted bot reply with attachments such as audio URLs.
+
+        Returns the message seq that was updated, or ``None`` if no assistant
+        message exists or the update could not be applied.
+        """
+        if not extras:
+            return None
+        with self._lock:
+            conn = self._connect()
+            try:
+                row = conn.execute(
+                    """
+                    SELECT seq, extras FROM messages
+                    WHERE session_id = ? AND role = 'assistant'
+                    ORDER BY seq DESC LIMIT 1
+                    """,
+                    (session_id,),
+                ).fetchone()
+                if not row:
+                    return None
+                seq, raw = row
+                try:
+                    cur = json.loads(raw) if raw else {}
+                    if not isinstance(cur, dict):
+                        cur = {}
+                except Exception:
+                    cur = {}
+                cur.update(extras)
+                conn.execute(
+                    "UPDATE messages SET extras = ? WHERE session_id = ? AND seq = ?",
+                    (json.dumps(cur, ensure_ascii=False), session_id, seq),
+                )
+                conn.commit()
+                return seq
+            except Exception as e:
+                logger.warning(f"[ConversationStore] attach_extras failed: {e}")
+                return None
+            finally:
+                conn.close()
+
    def load_history_page(
        self,
        session_id: str,
@@ -698,7 +767,22 @@ class ConversationStore:
                ).fetchone()
                ctx_start = ctx_row[0] if ctx_row else 0

+                # extras column is added by migration; tolerate older DBs that
+                # might miss it by falling back to a NULL literal.
+                try:
                    rows = conn.execute(
+                        """
+                        SELECT seq, role, content, created_at, extras
+                        FROM messages
+                        WHERE session_id = ?
+                        ORDER BY seq ASC
+                        """,
+                        (session_id,),
+                    ).fetchall()
+                except sqlite3.OperationalError:
+                    rows = [
+                        (seq, role, content, created_at, "")
+                        for (seq, role, content, created_at) in conn.execute(
                            """
                            SELECT seq, role, content, created_at
                            FROM messages
@@ -707,6 +791,7 @@ class ConversationStore:
                            """,
                            (session_id,),
                        ).fetchall()
+                    ]
            finally:
                conn.close()

@@ -719,13 +804,16 @@ class ConversationStore:
            include_thinking = False

        # Strip seq for display grouping, but record max seq per visible user group
-        plain_rows = [(role, content, created_at) for _seq, role, content, created_at in rows]
+        plain_rows = [
+            (role, content, created_at, extras_raw)
+            for _seq, role, content, created_at, extras_raw in rows
+        ]
        visible = _group_into_display_turns(plain_rows, include_thinking=include_thinking)

        # Build a mapping: find the seq of each visible user message to annotate context boundary.
        # Walk through rows to find visible user message seqs in order.
        visible_user_seqs: List[int] = []
-        for seq, role, raw_content, _ts in rows:
+        for seq, role, raw_content, _ts, _extras in rows:
            if role != "user":
                continue
            try:
@@ -911,6 +999,18 @@ class ConversationStore:
            except Exception as e:
                logger.warning(f"[ConversationStore] Migration (context_start_seq) failed: {e}")

+        msg_cols = {
+            row[1]
+            for row in conn.execute("PRAGMA table_info(messages)").fetchall()
+        }
+        if "extras" not in msg_cols:
+            try:
+                conn.execute(_MIGRATION_ADD_MSG_EXTRAS)
+                conn.commit()
+                logger.info("[ConversationStore] Migrated: added messages.extras column")
+            except Exception as e:
+                logger.warning(f"[ConversationStore] Migration (extras) failed: {e}")
+
    def _connect(self) -> sqlite3.Connection:
        conn = sqlite3.connect(str(self._db_path), timeout=10)
        conn.execute("PRAGMA journal_mode=WAL")
--- a/agent/memory/embedding/state.py
+++ b/agent/memory/embedding/state.py
@@ -31,9 +31,13 @@ def detect_index_dim(storage) -> Optional[int]:
    if not row or not row["embedding"]:
        return None
    try:
-        emb = json.loads(row["embedding"])
+        raw = row["embedding"]
+        if isinstance(raw, (bytes, bytearray)):
+            # New BLOB format: 4 bytes per float32
+            return len(raw) // 4
+        emb = json.loads(raw)
        return len(emb) if isinstance(emb, list) else None
-    except (json.JSONDecodeError, TypeError):
+    except (json.JSONDecodeError, TypeError, Exception):
        return None


--- a/agent/memory/manager.py
+++ b/agent/memory/manager.py
@@ -13,7 +13,7 @@ from datetime import datetime, timedelta
 from agent.memory.config import MemoryConfig, get_default_memory_config
 from agent.memory.storage import MemoryStorage, MemoryChunk, SearchResult
 from agent.memory.chunker import TextChunker
-from agent.memory.embedding import EmbeddingProvider
+from agent.memory.embedding import EmbeddingProvider, EmbeddingCache
 from agent.memory.summarizer import MemoryFlushManager, create_memory_files_if_needed


@@ -62,6 +62,10 @@ class MemoryManager:
                "[MemoryManager] No embedding provider; memory will use keyword search only"
            )

+        # Cache for query embeddings (avoids redundant API calls within a session)
+        self._embedding_cache = EmbeddingCache()
+
+
        # Initialize memory flush manager
        workspace_dir = self.config.get_workspace()
        self.flush_manager = MemoryFlushManager(
@@ -128,7 +132,14 @@ class MemoryManager:
        vector_results = []
        if self.embedding_provider:
            try:
+                provider_name = type(self.embedding_provider).__name__
+                model_name = getattr(self.embedding_provider, 'model', '')
+                cached = self._embedding_cache.get(query, provider_name, model_name)
+                if cached is not None:
+                    query_embedding = cached
+                else:
                    query_embedding = self.embedding_provider.embed_query(query)
+                    self._embedding_cache.put(query, provider_name, model_name, query_embedding)
                vector_results = self.storage.search_vector(
                    query_embedding=query_embedding,
                    user_id=user_id,
--- a/agent/memory/storage.py
+++ b/agent/memory/storage.py
@@ -5,12 +5,42 @@ Provides vector and keyword search capabilities
 """

 from __future__ import annotations
+import re
 import sqlite3
 import json
 import hashlib
+import threading
 from typing import List, Dict, Optional, Any
 from pathlib import Path
 from dataclasses import dataclass
+try:
+    import numpy as np
+    _HAS_NUMPY = True
+except ImportError:
+    _HAS_NUMPY = False
+    np = None  # type: ignore[assignment]
+
+# UPSERT (INSERT … ON CONFLICT DO UPDATE) requires SQLite ≥ 3.24.0 (2018).
+# Older systems (e.g. CentOS 7 ships SQLite 3.7) fall back to INSERT OR REPLACE,
+# which risks FTS5 rowid drift on chunk updates (see save_chunk docstring).
+_HAS_UPSERT = sqlite3.sqlite_version_info >= (3, 24, 0)
+
+# ---------------------------------------------------------------------------
+# CJK character ranges, compiled once at module load.
+# Covers: CJK Symbols/Punctuation, Japanese kana (hiragana + katakana),
+#         CJK Unified Ideographs + Extension A, Korean syllables (Hangul),
+#         CJK Compatibility Ideographs, and CJK Extension B–F.
+# ---------------------------------------------------------------------------
+_CJK_RANGES = (
+    r'\u3000-\u30ff'          # CJK Symbols/Punctuation + Japanese kana
+    r'\u3400-\u9fff'          # CJK Unified Ideographs (incl. Extension A)
+    r'\uac00-\ud7af'          # Korean syllables (Hangul)
+    r'\uf900-\ufaff'          # CJK Compatibility Ideographs
+    r'\U00020000-\U0002fa1f'  # CJK Extension B–F
+)
+_RE_CONTAINS_CJK   = re.compile(f'[{_CJK_RANGES}]')
+_RE_CJK_WORDS      = re.compile(f'[{_CJK_RANGES}]+')
+_RE_TRIGRAM_TOKENS = re.compile(f'[{_CJK_RANGES}]+|[A-Za-z0-9_]+')


@dataclass
@@ -48,6 +78,10 @@ class MemoryStorage:
        self.db_path = db_path
        self.conn: Optional[sqlite3.Connection] = None
        self.fts5_available = False  # Track FTS5 availability
+        # RLock protects concurrent writes from the same process.
+        # SQLite WAL mode handles read/write concurrency at the file level,
+        # but same-process concurrent writes still need a Python-level lock.
+        self._lock = threading.RLock()
        self._init_db()
    
    def _check_fts5_support(self) -> bool:
@@ -69,6 +103,14 @@ class MemoryStorage:
            
            # Check FTS5 support
            self.fts5_available = self._check_fts5_support()
+            if not _HAS_UPSERT:
+                from common.log import logger
+                logger.warning(
+                    "[MemoryStorage] SQLite %s < 3.24 — UPSERT unavailable. "
+                    "Falling back to INSERT OR REPLACE; FTS5 rowid may drift on "
+                    "chunk updates (rebuild index periodically to recover).",
+                    sqlite3.sqlite_version,
+                )
            if not self.fts5_available:
                from common.log import logger
                logger.debug("[MemoryStorage] FTS5 not available, using LIKE-based keyword search")
@@ -175,6 +217,75 @@ class MemoryStorage:
                )
                self._rebuild_fts5_from_chunks()

+        # Internal key-value store for persistent flags (e.g. backfill tracking)
+        self.conn.execute("""
+            CREATE TABLE IF NOT EXISTS _meta (
+                key TEXT PRIMARY KEY,
+                value TEXT NOT NULL
+            )
+        """)
+
+        # Create trigram FTS5 table for CJK / mixed-language search
+        self.trigram_fts5_available = False
+        if self.fts5_available:
+            try:
+                self.conn.execute("""
+                    CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts_trigram USING fts5(
+                        text,
+                        id UNINDEXED,
+                        user_id UNINDEXED,
+                        path UNINDEXED,
+                        source UNINDEXED,
+                        scope UNINDEXED,
+                        content='chunks',
+                        content_rowid='rowid',
+                        tokenize='trigram case_sensitive 0'
+                    )
+                """)
+                self.conn.execute("""
+                    CREATE TRIGGER IF NOT EXISTS chunks_trigram_ai
+                    AFTER INSERT ON chunks BEGIN
+                        INSERT INTO chunks_fts_trigram(rowid, text, id, user_id, path, source, scope)
+                        VALUES (new.rowid, new.text, new.id, new.user_id, new.path, new.source, new.scope);
+                    END
+                """)
+                self.conn.execute("""
+                    CREATE TRIGGER IF NOT EXISTS chunks_trigram_ad
+                    AFTER DELETE ON chunks BEGIN
+                        DELETE FROM chunks_fts_trigram WHERE rowid = old.rowid;
+                    END
+                """)
+                self.conn.execute("""
+                    CREATE TRIGGER IF NOT EXISTS chunks_trigram_au
+                    AFTER UPDATE ON chunks BEGIN
+                        UPDATE chunks_fts_trigram
+                        SET text=new.text, id=new.id, user_id=new.user_id,
+                            path=new.path, source=new.source, scope=new.scope
+                        WHERE rowid = new.rowid;
+                    END
+                """)
+                # One-time backfill for existing rows.
+                # NOTE: COUNT(*) on an FTS5 content table always returns 0, so we
+                # use a persistent flag in _meta instead of counting trigram rows.
+                backfill_done = self.conn.execute(
+                    "SELECT 1 FROM _meta WHERE key = 'trigram_backfill_done'"
+                ).fetchone()
+                chunks_count = self.conn.execute(
+                    "SELECT COUNT(*) as c FROM chunks"
+                ).fetchone()['c']
+                if chunks_count > 0 and not backfill_done:
+                    self.conn.execute(
+                        "INSERT INTO chunks_fts_trigram(chunks_fts_trigram) VALUES('rebuild')"
+                    )
+                    self.conn.execute(
+                        "INSERT OR REPLACE INTO _meta(key, value) VALUES('trigram_backfill_done', '1')"
+                    )
+                self.trigram_fts5_available = True
+            except Exception:
+                from common.log import logger
+                logger.warning("[MemoryStorage] trigram FTS5 unavailable, CJK search will use LIKE fallback", exc_info=True)
+                self.trigram_fts5_available = False
+
        # Create files metadata table
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS files (
@@ -299,42 +410,97 @@ class MemoryStorage:
        self.conn.commit()

    def save_chunk(self, chunk: MemoryChunk):
-        """Save a memory chunk"""
-        self.conn.execute("""
-            INSERT OR REPLACE INTO chunks 
-            (id, user_id, scope, source, path, start_line, end_line, text, embedding, hash, metadata, updated_at)
+        """Save a memory chunk (insert or update by id).
+
+        Uses SQLite UPSERT (INSERT … ON CONFLICT DO UPDATE) instead of
+        INSERT OR REPLACE.  INSERT OR REPLACE internally does DELETE+INSERT,
+        which changes the row's rowid.  Because both FTS5 tables use
+        content_rowid='rowid', a new rowid would leave the old FTS index
+        entries pointing at a non-existent rowid and trigger
+        "fts5: missing row N from content table" errors.
+        ON CONFLICT DO UPDATE fires the AFTER UPDATE trigger (chunks_au /
+        chunks_trigram_au) and keeps the original rowid intact.
+        """
+        if _HAS_UPSERT:
+            _SQL = """
+                INSERT INTO chunks
+                (id, user_id, scope, source, path, start_line, end_line,
+                 text, embedding, hash, metadata, updated_at)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
-        """, (
-            chunk.id,
-            chunk.user_id,
-            chunk.scope,
-            chunk.source,
-            chunk.path,
-            chunk.start_line,
-            chunk.end_line,
-            chunk.text,
-            json.dumps(chunk.embedding) if chunk.embedding else None,
+                ON CONFLICT(id) DO UPDATE SET
+                    user_id     = excluded.user_id,
+                    scope       = excluded.scope,
+                    source      = excluded.source,
+                    path        = excluded.path,
+                    start_line  = excluded.start_line,
+                    end_line    = excluded.end_line,
+                    text        = excluded.text,
+                    embedding   = excluded.embedding,
+                    hash        = excluded.hash,
+                    metadata    = excluded.metadata,
+                    updated_at  = strftime('%s', 'now')
+            """
+        else:
+            _SQL = """
+                INSERT OR REPLACE INTO chunks
+                (id, user_id, scope, source, path, start_line, end_line,
+                 text, embedding, hash, metadata, updated_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
+            """
+        params = (
+            chunk.id, chunk.user_id, chunk.scope, chunk.source, chunk.path,
+            chunk.start_line, chunk.end_line, chunk.text,
+            self._encode_embedding(chunk.embedding),
            chunk.hash,
-            json.dumps(chunk.metadata) if chunk.metadata else None
-        ))
+            json.dumps(chunk.metadata) if chunk.metadata else None,
+        )
+        with self._lock:
+            self.conn.execute(_SQL, params)
            self.conn.commit()

    def save_chunks_batch(self, chunks: List[MemoryChunk]):
-        """Save multiple chunks in a batch"""
-        self.conn.executemany("""
-            INSERT OR REPLACE INTO chunks 
-            (id, user_id, scope, source, path, start_line, end_line, text, embedding, hash, metadata, updated_at)
+        """Save multiple chunks in a batch (insert or update by id).
+
+        See save_chunk for why UPSERT is used instead of INSERT OR REPLACE.
+        """
+        if _HAS_UPSERT:
+            _SQL = """
+                INSERT INTO chunks
+                (id, user_id, scope, source, path, start_line, end_line,
+                 text, embedding, hash, metadata, updated_at)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
-        """, [
+                ON CONFLICT(id) DO UPDATE SET
+                    user_id     = excluded.user_id,
+                    scope       = excluded.scope,
+                    source      = excluded.source,
+                    path        = excluded.path,
+                    start_line  = excluded.start_line,
+                    end_line    = excluded.end_line,
+                    text        = excluded.text,
+                    embedding   = excluded.embedding,
+                    hash        = excluded.hash,
+                    metadata    = excluded.metadata,
+                    updated_at  = strftime('%s', 'now')
+            """
+        else:
+            _SQL = """
+                INSERT OR REPLACE INTO chunks
+                (id, user_id, scope, source, path, start_line, end_line,
+                 text, embedding, hash, metadata, updated_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
+            """
+        params_list = [
            (
                c.id, c.user_id, c.scope, c.source, c.path,
                c.start_line, c.end_line, c.text,
-                json.dumps(c.embedding) if c.embedding else None,
+                self._encode_embedding(c.embedding),
                c.hash,
-                json.dumps(c.metadata) if c.metadata else None
+                json.dumps(c.metadata) if c.metadata else None,
            )
            for c in chunks
-        ])
+        ]
+        with self._lock:
+            self.conn.executemany(_SQL, params_list)
            self.conn.commit()
    
    def get_chunk(self, chunk_id: str) -> Optional[MemoryChunk]:
@@ -356,17 +522,17 @@ class MemoryStorage:
        limit: int = 10
    ) -> List[SearchResult]:
        """
-        Vector similarity search using in-memory cosine similarity
-        (sqlite-vec can be added later for better performance)
+        Vector similarity search using numpy-vectorized cosine similarity.
+        All embeddings are loaded then scored in a single BLAS matrix-vector
+        multiply, which is ~100x faster than the pure-Python per-row loop.
        """
        if scopes is None:
            scopes = ["shared"]
            if user_id:
                scopes.append("user")

-        # Build query
        scope_placeholders = ','.join('?' * len(scopes))
-        params = scopes
+        params = list(scopes)

        if user_id:
            query = f"""
@@ -384,44 +550,88 @@ class MemoryStorage:
            """

        rows = self.conn.execute(query, params).fetchall()
+        if not rows:
+            return []

-        # Calculate cosine similarity. We probe the first row's dim to fail
-        # loudly on a query/index dim mismatch — otherwise every doc would
-        # score 0 silently, leaving the user wondering why search broke.
-        results = []
-        query_dim = len(query_embedding)
-        if rows:
-            first = json.loads(rows[0]['embedding'])
-            if isinstance(first, list) and len(first) != query_dim:
-                raise ValueError(
-                    f"Embedding dim mismatch: query is {query_dim}-dim but "
-                    f"index stores {len(first)}-dim vectors. The configured "
-                    f"embedding model differs from the one that built the "
-                    f"index — run /memory rebuild-index to re-embed."
-                )
-
+        # Parse embeddings and build a (N, D) matrix in one pass.
+        # New rows store BLOB bytes (np.frombuffer); legacy rows fall back to JSON.
+        # Filter out rows whose embedding dimension differs from the query —
+        # mixing dimensions would cause np.array() to produce an object array
+        # and matrix @ q_vec to raise ValueError.
+        expected_dim = len(query_embedding)
+        valid_rows = []
+        vectors = []
        for row in rows:
-            embedding = json.loads(row['embedding'])
-            similarity = self._cosine_similarity(query_embedding, embedding)
+            vec = self._decode_embedding(row['embedding'])
+            if not vec:
+                continue
+            if len(vec) != expected_dim:
+                from common.log import logger
+                logger.warning(
+                    "[MemoryStorage] Skipping chunk %s: embedding dim %d != query dim %d",
+                    row['id'], len(vec), expected_dim
+                )
+                continue
+            valid_rows.append(row)
+            vectors.append(vec)

-            if similarity > 0:
-                results.append((similarity, row))
+        if not vectors:
+            return []

-        # Sort by similarity and limit
-        results.sort(key=lambda x: x[0], reverse=True)
-        results = results[:limit]
+        if _HAS_NUMPY:
+            matrix = np.array(vectors, dtype=np.float32)        # (N, D)
+            q_vec = np.array(query_embedding, dtype=np.float32)  # (D,)

+            # Vectorized cosine similarity: dot(matrix, q) / (||matrix|| * ||q||)
+            dots = matrix @ q_vec                                # (N,)
+            row_norms = np.linalg.norm(matrix, axis=1)           # (N,)
+            q_norm = float(np.linalg.norm(q_vec))
+            denominators = row_norms * q_norm
+            np.maximum(denominators, 1e-10, out=denominators)    # avoid div-by-zero
+            sims = dots / denominators                           # (N,)
+
+            # Select TopK using argpartition (O(N) average), then sort only those K
+            k = min(limit, len(valid_rows))
+            top_idx = np.argpartition(sims, -k)[-k:]
+            top_idx = top_idx[np.argsort(sims[top_idx])[::-1]]
+
+            return [
+                SearchResult(
+                    path=valid_rows[i]['path'],
+                    start_line=valid_rows[i]['start_line'],
+                    end_line=valid_rows[i]['end_line'],
+                    score=float(sims[i]),
+                    snippet=self._truncate_text(valid_rows[i]['text'], 500),
+                    source=valid_rows[i]['source'],
+                    user_id=valid_rows[i]['user_id']
+                )
+                for i in top_idx
+                if sims[i] > 0
+            ]
+        else:
+            # Pure-Python cosine similarity fallback (numpy not installed)
+            import math
+            q = query_embedding
+            q_norm = math.sqrt(sum(x * x for x in q)) or 1e-10
+            scored = []
+            for i, vec in enumerate(vectors):
+                dot = sum(a * b for a, b in zip(vec, q))
+                v_norm = math.sqrt(sum(x * x for x in vec)) or 1e-10
+                sim = dot / (v_norm * q_norm)
+                if sim > 0:
+                    scored.append((sim, valid_rows[i]))
+            scored.sort(key=lambda x: x[0], reverse=True)
            return [
                SearchResult(
                    path=row['path'],
                    start_line=row['start_line'],
                    end_line=row['end_line'],
-                score=score,
+                    score=sim,
                    snippet=self._truncate_text(row['text'], 500),
                    source=row['source'],
                    user_id=row['user_id']
                )
-            for score, row in results
+                for sim, row in scored[:limit]
            ]
    
    def search_keyword(
@@ -445,13 +655,38 @@ class MemoryStorage:
            if user_id:
                scopes.append("user")

-        if self.fts5_available:
+        # Step 1: Standard FTS5 (unicode61) — pure ASCII queries only.
+        # Skipped when query contains any CJK characters: unicode61 tokenises CJK
+        # as individual characters without forming meaningful tokens, so it would
+        # match only the ASCII portion of a mixed query (e.g. "Python" from
+        # "Python教程") and silently discard the CJK part.  Those queries go
+        # directly to Step 2 (trigram), which handles both ASCII and CJK together.
+        fts1_attempted = False
+        if (self.fts5_available
+                and not MemoryStorage._contains_cjk(query)
+                and MemoryStorage._build_fts_query(query)):
+            fts1_attempted = True
            fts_results = self._search_fts5(query, user_id, scopes, limit)
            if fts_results:
                return fts_results

+        # Step 2: Trigram FTS5 — CJK/mixed queries, plus fallback when unicode61
+        # returned nothing (trigram indexes all scripts with 3-char sliding windows,
+        # so it can catch terms that unicode61 tokenisation misses).
+        if self.trigram_fts5_available and (
+            MemoryStorage._contains_cjk(query) or fts1_attempted
+        ):
+            trigram_results = self._search_fts5_trigram(query, user_id, scopes, limit)
+            if trigram_results:
+                return trigram_results
+
+        # Step 3: LIKE fallback — last resort (FTS5 unavailable, or CJK tokens
+        # shorter than 3 characters that trigram cannot match, e.g. a single-char query).
+        if not self.fts5_available or MemoryStorage._contains_cjk(query):
            return self._search_like(query, user_id, scopes, limit)

+        return []
+    
    def _search_fts5(
        self,
        query: str,
@@ -471,7 +706,7 @@ class MemoryStorage:
            sql_query = f"""
                SELECT chunks.*, bm25(chunks_fts) as rank
                FROM chunks_fts
-                JOIN chunks ON chunks.id = chunks_fts.id
+                JOIN chunks ON chunks.rowid = chunks_fts.rowid
                WHERE chunks_fts MATCH ? 
                AND chunks.scope IN ({scope_placeholders})
                AND (chunks.scope = 'shared' OR chunks.user_id = ?)
@@ -483,7 +718,7 @@ class MemoryStorage:
            sql_query = f"""
                SELECT chunks.*, bm25(chunks_fts) as rank
                FROM chunks_fts
-                JOIN chunks ON chunks.id = chunks_fts.id
+                JOIN chunks ON chunks.rowid = chunks_fts.rowid
                WHERE chunks_fts MATCH ? 
                AND chunks.scope IN ({scope_placeholders})
                ORDER BY rank
@@ -505,11 +740,9 @@ class MemoryStorage:
                )
                for row in rows
            ]
-        except Exception as e:
+        except Exception:
            from common.log import logger
-            logger.error(
-                f"[MemoryStorage] FTS5 search failed (caller will fall back to LIKE): {e}"
-            )
+            logger.warning("[MemoryStorage] _search_fts5 failed, returning empty", exc_info=True)
            return []

    def _search_like(
@@ -522,12 +755,11 @@ class MemoryStorage:
        """LIKE-based search.

        Used as the keyword-search fallback when FTS5 is unavailable, fails,
-        or returns empty. Supports both CJK runs and ASCII word tokens so it
-        can serve as a true safety net for any query.
+        or returns empty. Supports both CJK runs (1+ chars) and ASCII word
+        tokens (3+ chars) so it can serve as a true safety net for any query.
        """
-        import re
-        # CJK runs (2+ chars) + ASCII word tokens (3+ chars to avoid noise)
-        cjk_words = re.findall(r'[\u4e00-\u9fff]{2,}', query)
+        # CJK runs (1+ chars, wide Unicode range) + ASCII words (3+ chars to avoid noise)
+        cjk_words = _RE_CJK_WORDS.findall(query)
        ascii_words = [t for t in re.findall(r'[A-Za-z0-9_]+', query) if len(t) >= 3]
        words = cjk_words + ascii_words
        if not words:
@@ -565,28 +797,37 @@ class MemoryStorage:
        
        try:
            rows = self.conn.execute(sql_query, params).fetchall()
-            return [
-                SearchResult(
+            results = []
+            for row in rows:
+                # Dynamic score: reward chunks that contain more of the query words.
+                # Use all tokens (CJK + ASCII) so pure-ASCII queries are not skipped.
+                # matched_count is always ≥1 because the WHERE clause uses OR, but
+                # guard defensively so unexpected zero-match rows are never surfaced.
+                text_lower = row['text'].lower()
+                matched_count = sum(1 for w in words if w.lower() in text_lower)
+                if matched_count == 0:
+                    continue
+                score = min(0.85, 0.3 + 0.15 * matched_count)
+                results.append(SearchResult(
                    path=row['path'],
                    start_line=row['start_line'],
                    end_line=row['end_line'],
-                    score=0.5,  # Fixed score for LIKE search
+                    score=score,
                    snippet=self._truncate_text(row['text'], 500),
                    source=row['source'],
                    user_id=row['user_id']
-                )
-                for row in rows
-            ]
-        except Exception as e:
+                ))
+            results.sort(key=lambda r: r.score, reverse=True)
+            return results
+        except Exception:
            from common.log import logger
-            logger.error(f"[MemoryStorage] LIKE search failed: {e}")
+            logger.warning("[MemoryStorage] _search_like failed, returning empty", exc_info=True)
            return []

    def delete_by_path(self, path: str):
        """Delete all chunks from a file"""
-        self.conn.execute("""
-            DELETE FROM chunks WHERE path = ?
-        """, (path,))
+        with self._lock:
+            self.conn.execute("DELETE FROM chunks WHERE path = ?", (path,))
            self.conn.commit()

    def get_file_hash(self, path: str) -> Optional[str]:
@@ -598,6 +839,7 @@ class MemoryStorage:

    def update_file_metadata(self, path: str, source: str, file_hash: str, mtime: int, size: int):
        """Update file metadata"""
+        with self._lock:
            self.conn.execute("""
                INSERT OR REPLACE INTO files (path, source, hash, mtime, size, updated_at)
                VALUES (?, ?, ?, ?, ?, strftime('%s', 'now'))
@@ -632,7 +874,8 @@ class MemoryStorage:
                self.conn.close()
                self.conn = None  # Mark as closed
            except Exception as e:
-                print(f"⚠️  Error closing database connection: {e}")
+                from common.log import logger
+                logger.warning("[MemoryStorage] Error closing database connection: %s", e)
    
    def __del__(self):
        """Destructor to ensure connection is closed"""
@@ -643,6 +886,32 @@ class MemoryStorage:
    
    # Helper methods

+    @staticmethod
+    def _encode_embedding(embedding: Optional[List[float]]) -> Optional[bytes]:
+        """Encode embedding as float32 BLOB bytes (~6x smaller and faster than JSON).
+        Falls back to struct.pack when numpy is unavailable."""
+        if embedding is None:
+            return None
+        if _HAS_NUMPY:
+            return np.array(embedding, dtype=np.float32).tobytes()
+        import struct
+        return struct.pack(f'{len(embedding)}f', *embedding)
+
+    @staticmethod
+    def _decode_embedding(raw) -> Optional[List[float]]:
+        """Decode embedding from BLOB bytes or legacy JSON string.
+        Handles both numpy and numpy-free environments."""
+        if raw is None:
+            return None
+        if isinstance(raw, (bytes, bytearray)):
+            if _HAS_NUMPY:
+                return np.frombuffer(raw, dtype=np.float32).tolist()
+            import struct
+            n = len(raw) // 4
+            return list(struct.unpack(f'{n}f', raw))
+        # Legacy JSON format written by older versions
+        return json.loads(raw)
+
    def _row_to_chunk(self, row) -> MemoryChunk:
        """Convert database row to MemoryChunk"""
        return MemoryChunk(
@@ -654,31 +923,88 @@ class MemoryStorage:
            start_line=row['start_line'],
            end_line=row['end_line'],
            text=row['text'],
-            embedding=json.loads(row['embedding']) if row['embedding'] else None,
+            embedding=self._decode_embedding(row['embedding']),
            hash=row['hash'],
            metadata=json.loads(row['metadata']) if row['metadata'] else None
        )
    
    @staticmethod
-    def _cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
-        """Calculate cosine similarity between two vectors"""
-        if len(vec1) != len(vec2):
-            return 0.0
-        
-        dot_product = sum(a * b for a, b in zip(vec1, vec2))
-        norm1 = sum(a * a for a in vec1) ** 0.5
-        norm2 = sum(b * b for b in vec2) ** 0.5
-        
-        if norm1 == 0 or norm2 == 0:
-            return 0.0
-        
-        return dot_product / (norm1 * norm2)
+    def _contains_cjk(text: str) -> bool:
+        """Check if text contains CJK or related characters (Chinese, Japanese, Korean)."""
+        return bool(_RE_CONTAINS_CJK.search(text))
    
    @staticmethod
-    def _contains_cjk(text: str) -> bool:
-        """Check if text contains CJK (Chinese/Japanese/Korean) characters"""
-        import re
-        return bool(re.search(r'[\u4e00-\u9fff]', text))
+    def _build_trigram_query(raw_query: str) -> Optional[str]:
+        """
+        Build FTS5 MATCH query for the trigram tokenizer.
+        Extracts CJK sequences (including single characters) and ASCII words,
+        joining them with AND so all terms must appear in the matched chunk.
+        """
+        tokens = _RE_TRIGRAM_TOKENS.findall(raw_query)
+        tokens = [t for t in tokens if t]
+        if not tokens:
+            return None
+        # Escape embedded double-quotes (FTS5 uses "" inside quoted phrases)
+        quoted = [f'"{t.replace(chr(34), chr(34)*2)}"' for t in tokens]
+        return ' AND '.join(quoted)
+
+    def _search_fts5_trigram(
+        self,
+        query: str,
+        user_id: Optional[str],
+        scopes: List[str],
+        limit: int
+    ) -> List[SearchResult]:
+        """Trigram FTS5 search — handles CJK and mixed queries with BM25 ranking."""
+        trigram_query = self._build_trigram_query(query)
+        if not trigram_query:
+            return []
+
+        scope_placeholders = ','.join('?' * len(scopes))
+        params = [trigram_query] + list(scopes)
+
+        if user_id:
+            sql = f"""
+                SELECT chunks.*, bm25(chunks_fts_trigram) as rank
+                FROM chunks_fts_trigram
+                JOIN chunks ON chunks.rowid = chunks_fts_trigram.rowid
+                WHERE chunks_fts_trigram MATCH ?
+                AND chunks.scope IN ({scope_placeholders})
+                AND (chunks.scope = 'shared' OR chunks.user_id = ?)
+                ORDER BY rank
+                LIMIT ?
+            """
+            params.extend([user_id, limit])
+        else:
+            sql = f"""
+                SELECT chunks.*, bm25(chunks_fts_trigram) as rank
+                FROM chunks_fts_trigram
+                JOIN chunks ON chunks.rowid = chunks_fts_trigram.rowid
+                WHERE chunks_fts_trigram MATCH ?
+                AND chunks.scope IN ({scope_placeholders})
+                ORDER BY rank
+                LIMIT ?
+            """
+            params.append(limit)
+
+        try:
+            rows = self.conn.execute(sql, params).fetchall()
+            return [
+                SearchResult(
+                    path=row['path'],
+                    start_line=row['start_line'],
+                    end_line=row['end_line'],
+                    score=self._bm25_rank_to_score(row['rank']),
+                    snippet=self._truncate_text(row['text'], 500),
+                    source=row['source'],
+                    user_id=row['user_id']
+                )
+                for row in rows
+            ]
+        except Exception:
+            from common.log import logger
+            logger.warning("[MemoryStorage] _search_fts5_trigram failed, returning empty", exc_info=True)
+            return []

    @staticmethod
    def _build_fts_query(raw_query: str) -> Optional[str]:
@@ -688,7 +1014,6 @@ class MemoryStorage:
        Works best for English and word-based languages.
        For CJK characters, LIKE search will be used as fallback.
        """
-        import re
        # Extract words (primarily English words and numbers)
        tokens = re.findall(r'[A-Za-z0-9_]+', raw_query)
        if not tokens:
@@ -701,9 +1026,22 @@ class MemoryStorage:
    
    @staticmethod
    def _bm25_rank_to_score(rank: float) -> float:
-        """Convert BM25 rank to 0-1 score"""
-        normalized = max(0, rank) if rank is not None else 999
-        return 1 / (1 + normalized)
+        """Convert SQLite BM25 rank to a [0, 1) relevance score.
+
+        SQLite's bm25() returns a non-positive float (0 or negative).
+        More negative = more relevant.  max(0, rank) would clip every
+        negative value to 0, making every score 1/(1+0) = 1.0 and
+        destroying all ranking information.
+
+        abs(rank) / (1 + abs(rank)) maps the absolute relevance magnitude
+        to [0, 1): larger |rank| (stronger match) → score closer to 1.
+        """
+        if rank is None:
+            return 0.0
+        # Add a floor of 0.3 so any FTS5 match always exceeds typical
+        # min_score thresholds (default 0.1).  Small-corpus ranks close to
+        # 0 would otherwise produce score≈0 and be filtered out downstream.
+        return 0.3 + 0.69 * (abs(rank) / (1.0 + abs(rank)))
    
    @staticmethod
    def _truncate_text(text: str, max_chars: int) -> str:
--- a/agent/protocol/init.py
+++ b/agent/protocol/init.py
@@ -3,6 +3,11 @@ from .agent_stream import AgentStreamExecutor
 from .task import Task, TaskType, TaskStatus
 from .result import AgentResult, AgentAction, AgentActionType, ToolResult
 from .models import LLMModel, LLMRequest, ModelFactory
+from .cancel import (
+    AgentCancelledError,
+    CancelTokenRegistry,
+    get_cancel_registry,
+)

 __all__ = [
    'Agent', 
@@ -16,5 +21,8 @@ __all__ = [
    'ToolResult',
    'LLMModel',
    'LLMRequest', 
-    'ModelFactory'
+    'ModelFactory',
+    'AgentCancelledError',
+    'CancelTokenRegistry',
+    'get_cancel_registry',
 ]
--- a/agent/protocol/agent.py
+++ b/agent/protocol/agent.py
@@ -365,7 +365,8 @@ class Agent:

        return action

-    def run_stream(self, user_message: str, on_event=None, clear_history: bool = False, skill_filter=None) -> str:
+    def run_stream(self, user_message: str, on_event=None, clear_history: bool = False,
+                   skill_filter=None, cancel_event=None) -> str:
        """
        Execute single agent task with streaming (based on tool-call)

@@ -374,6 +375,7 @@ class Agent:
        - Multi-turn reasoning based on tool-call
        - Event callbacks
        - Persistent conversation history across calls
+        - User-initiated cancellation via ``cancel_event``

        Args:
            user_message: User message
@@ -381,6 +383,11 @@ class Agent:
                     event = {"type": str, "timestamp": float, "data": dict}
            clear_history: If True, clear conversation history before this call (default: False)
            skill_filter: Optional list of skill names to include in this run
+            cancel_event: Optional threading.Event polled at agent checkpoints.
+                When set, the loop exits at the next safe point, injects a
+                "[Interrupted by user]" assistant note, and returns the
+                partial response. ``messages`` stays in a valid state
+                (tool_use/tool_result pairs preserved).

        Returns:
            Final response text
@@ -424,7 +431,8 @@ class Agent:
            max_turns=self.max_steps,
            on_event=on_event,
            messages=messages_copy,  # Pass copied message history
-            max_context_turns=max_context_turns
+            max_context_turns=max_context_turns,
+            cancel_event=cancel_event,
        )

        # Execute
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -7,11 +7,19 @@ import json
 import time
 from typing import List, Dict, Any, Optional, Callable, Tuple

+from agent.protocol.cancel import AgentCancelledError
 from agent.protocol.models import LLMRequest, LLMModel
 from agent.protocol.message_utils import sanitize_claude_messages, compress_turn_to_text_only
 from agent.tools.base_tool import BaseTool, ToolResult
 from common.log import logger

+# Optional: repair malformed JSON args from non-strict providers (e.g. unescaped quotes in long content).
+try:
+    from json_repair import repair_json as _repair_json
+    _HAS_JSON_REPAIR = True
+except ImportError:
+    _HAS_JSON_REPAIR = False
+

 # Maximum number of characters of model "reasoning / thinking" content to persist
 # in conversation history. The full reasoning is still streamed to the UI in real
@@ -44,6 +52,30 @@ def _truncate_reasoning_for_storage(text: str) -> str:
    return head + _REASONING_TRUNCATE_MARKER.format(omitted=omitted) + tail


+def _parse_tool_args(args_str: str, finish_reason: Optional[str]) -> Tuple[dict, Optional[str]]:
+    """Parse tool args JSON. Returns (args, error_msg); error_msg is None on success.
+
+    On JSONDecodeError: detect truncation first (skip repair, surface max_tokens hint);
+    otherwise try json-repair for escape issues; finally fall back to the raw decoder error.
+    """
+    if not args_str:
+        return {}, None
+    try:
+        return json.loads(args_str), None
+    except json.JSONDecodeError as e:
+        if finish_reason in ("length", "max_tokens") or not args_str.rstrip().endswith("}"):
+            return {}, "Output truncated (max_tokens reached). Split content into smaller chunks across multiple tool calls."
+        if _HAS_JSON_REPAIR:
+            try:
+                repaired = _repair_json(args_str, return_objects=True)
+                if isinstance(repaired, dict):
+                    logger.warning(f"Tool args JSON repaired ({len(args_str)} chars)")
+                    return repaired, None
+            except Exception:
+                pass
+        return {}, f"Invalid JSON in tool arguments: {e.msg}"
+
+
 class AgentStreamExecutor:
    """
    Agent Stream Executor
@@ -64,7 +96,8 @@ class AgentStreamExecutor:
            max_turns: int = 50,
            on_event: Optional[Callable] = None,
            messages: Optional[List[Dict]] = None,
-            max_context_turns: int = 30
+            max_context_turns: int = 30,
+            cancel_event=None,
    ):
        """
        Initialize stream executor
@@ -78,6 +111,10 @@ class AgentStreamExecutor:
            on_event: Event callback function
            messages: Optional existing message history (for persistent conversations)
            max_context_turns: Maximum number of conversation turns to keep in context
+            cancel_event: Optional threading.Event used to signal user cancel.
+                Checked at every safe point (turn boundary, before tool execution,
+                during LLM streaming). When set, raises AgentCancelledError which
+                run_stream catches to gracefully wind down.
        """
        self.agent = agent
        self.model = model
@@ -87,6 +124,7 @@ class AgentStreamExecutor:
        self.max_turns = max_turns
        self.on_event = on_event
        self.max_context_turns = max_context_turns
+        self.cancel_event = cancel_event

        # Message history - use provided messages or create new list
        self.messages = messages if messages is not None else []
@@ -97,6 +135,73 @@ class AgentStreamExecutor:
        # Track files to send (populated by read tool)
        self.files_to_send = []  # List of file metadata dicts

+    def _check_cancelled(self) -> None:
+        """Raise AgentCancelledError if the user requested cancellation.
+
+        Called at safe points (turn start, between tool calls, between LLM
+        chunks). Cheap to call: just an Event.is_set() probe.
+        """
+        if self.cancel_event is not None and self.cancel_event.is_set():
+            raise AgentCancelledError("agent cancelled by user")
+
+    def _handle_cancelled(self, partial_response: str) -> None:
+        """Wind down ``self.messages`` after a user-initiated cancel.
+
+        The messages list may be in any of these states when we get here:
+          (a) Last message is an assistant message containing tool_use
+              blocks but the matching tool_result has not been appended yet.
+          (b) Last message is an assistant text-only reply (cancel happened
+              right before the next turn started).
+          (c) Last message is a user tool_result message and we cancelled
+              between turns.
+
+        For (a) we MUST synthesise tool_result blocks, otherwise the next
+        request will fail Claude/OpenAI's strict pairing validation. For
+        (b)/(c) the state is already valid and we just append a small
+        cancellation note so the user/LLM both see the boundary clearly.
+        """
+        try:
+            # Step 1: close any orphaned tool_use in the trailing assistant
+            # message by injecting matching tool_result blocks.
+            if self.messages and isinstance(self.messages[-1], dict) \
+                    and self.messages[-1].get("role") == "assistant":
+                last = self.messages[-1]
+                content = last.get("content")
+                if isinstance(content, list):
+                    pending_tool_use_ids = [
+                        block.get("id")
+                        for block in content
+                        if isinstance(block, dict) and block.get("type") == "tool_use"
+                    ]
+                    pending_tool_use_ids = [tid for tid in pending_tool_use_ids if tid]
+                    if pending_tool_use_ids:
+                        tool_result_blocks = [
+                            {
+                                "type": "tool_result",
+                                "tool_use_id": tid,
+                                "content": "Cancelled by user before this tool finished.",
+                                "is_error": True,
+                            }
+                            for tid in pending_tool_use_ids
+                        ]
+                        self.messages.append({
+                            "role": "user",
+                            "content": tool_result_blocks,
+                        })
+                        logger.info(
+                            f"[Agent] Injected {len(tool_result_blocks)} cancellation "
+                            f"tool_result blocks to keep message history valid"
+                        )
+
+            # Step 2: append a stable "interrupted" marker so the LLM sees a
+            # clear stop boundary on the next turn.
+            self.messages.append({
+                "role": "assistant",
+                "content": [{"type": "text", "text": "_(Cancelled by user)_"}],
+            })
+        except Exception as e:
+            logger.warning(f"[Agent] _handle_cancelled cleanup failed: {e}")
+
    def _emit_event(self, event_type: str, data: dict = None):
        """Emit event"""
        if self.on_event:
@@ -270,8 +375,13 @@ class AgentStreamExecutor:
        final_response = ""
        turn = 0

+        cancelled = False
        try:
            while turn < self.max_turns:
+                # Check at the very top of every turn so a cancel arriving
+                # between turns short-circuits cleanly.
+                self._check_cancelled()
+
                turn += 1
                logger.info(f"[Agent] 第 {turn} 轮")
                self._emit_event("turn_start", {"turn": turn})
@@ -375,6 +485,8 @@ class AgentStreamExecutor:

                try:
                    for tool_call in tool_calls:
+                        # Honour cancel between tool invocations within the same turn
+                        self._check_cancelled()
                        result = self._execute_tool(tool_call)
                        tool_results.append(result)
                        
@@ -557,6 +669,15 @@ class AgentStreamExecutor:
                        self.messages.pop(prompt_insert_idx)
                        logger.debug("[Agent] Removed injected max-steps prompt from message history")

+        except AgentCancelledError:
+            # User-initiated stop: wind down message history cleanly so the
+            # next turn is unaffected; channels emit a "cancelled" UI event.
+            cancelled = True
+            logger.info(f"[Agent] 🛑 已被用户中止 (第 {turn} 轮)")
+            self._handle_cancelled(final_response)
+            if not final_response or not final_response.strip():
+                final_response = "_(Cancelled)_"
+
        except Exception as e:
            logger.error(f"❌ Agent执行错误: {e}")
            self._emit_event("error", {"error": str(e)})
@@ -564,8 +685,11 @@ class AgentStreamExecutor:

        finally:
            final_response = final_response.strip() if final_response else final_response
-            logger.info(f"[Agent] 🏁 完成 ({turn}轮)")
-            self._emit_event("agent_end", {"final_response": final_response})
+            if cancelled:
+                # Emit before agent_end so channels can mark UI as cancelled
+                self._emit_event("agent_cancelled", {"final_response": final_response})
+            logger.info(f"[Agent] 🏁 完成 ({turn}轮)" + (" [cancelled]" if cancelled else ""))
+            self._emit_event("agent_end", {"final_response": final_response, "cancelled": cancelled})

        return final_response

@@ -603,15 +727,24 @@ class AgentStreamExecutor:
        except Exception as e:
            logger.debug(f"[Agent] MCP sync skipped: {e}")

-        # Prepare tool definitions (OpenAI/Claude format)
+        # Prepare tool definitions. Prefer get_json_schema() when it yields
+        # real properties (lets tools augment schema at runtime), otherwise
+        # fall back to the static `tool.params` (MCP tools rely on this).
        tools_schema = None
        if self.tools:
            tools_schema = []
            for tool in self.tools.values():
+                input_schema = tool.params
+                try:
+                    dynamic = (tool.get_json_schema() or {}).get("parameters") or {}
+                    if dynamic.get("properties"):
+                        input_schema = dynamic
+                except Exception:
+                    pass
                tools_schema.append({
                    "name": tool.name,
                    "description": tool.description,
-                    "input_schema": tool.params  # Claude uses input_schema
+                    "input_schema": input_schema,
                })

        # Create request
@@ -635,7 +768,32 @@ class AgentStreamExecutor:
        try:
            stream = self.model.call_stream(request)

+            # Probe cancel every N chunks to bound reaction time without
+            # checking on every token.
+            _cancel_probe_counter = 0
+            _CANCEL_PROBE_EVERY = 8
+
            for chunk in stream:
+                _cancel_probe_counter += 1
+                if _cancel_probe_counter >= _CANCEL_PROBE_EVERY:
+                    _cancel_probe_counter = 0
+                    if self.cancel_event is not None and self.cancel_event.is_set():
+                        # Persist partial text only; tool_use args may be
+                        # truncated mid-stream and would fail validation.
+                        logger.info("[Agent] cancel detected mid-stream, aborting LLM call")
+                        if full_content:
+                            partial_msg = {
+                                "role": "assistant",
+                                "content": [{"type": "text", "text": full_content}],
+                            }
+                            self.messages.append(partial_msg)
+                        self._emit_event("message_end", {
+                            "content": full_content,
+                            "tool_calls": [],
+                            "cancelled": True,
+                        })
+                        raise AgentCancelledError("cancelled during LLM streaming")
+
                # Check for errors
                if isinstance(chunk, dict) and chunk.get("error"):
                    # Extract error message from nested structure
@@ -729,6 +887,10 @@ class AgentStreamExecutor:
                    elif isinstance(choice, dict) and choice.get("_gemini_raw_parts"):
                        gemini_raw_parts = choice["_gemini_raw_parts"]

+        except AgentCancelledError:
+            # Must propagate untouched; never treat as a retryable error.
+            raise
+
        except Exception as e:
            error_str = str(e)
            error_str_lower = error_str.lower()
@@ -842,26 +1004,17 @@ class AgentStreamExecutor:
                import uuid
                tool_id = f"call_{uuid.uuid4().hex[:24]}"

-            try:
-                # Safely get arguments, handle None case
            args_str = tc.get("arguments") or ""
-                arguments = json.loads(args_str) if args_str else {}
-            except json.JSONDecodeError as e:
-                # Handle None or invalid arguments safely
-                args_str = tc.get('arguments') or ""
-                args_preview = args_str[:200] if len(args_str) > 200 else args_str
-                logger.error(f"Failed to parse tool arguments for {tc['name']}")
-                logger.error(f"Arguments length: {len(args_str)} chars")
-                logger.error(f"Arguments preview: {args_preview}...")
-                logger.error(f"JSON decode error: {e}")
-
-                # Return a clear error message to the LLM instead of empty dict
-                # This helps the LLM understand what went wrong
+            arguments, parse_err = _parse_tool_args(args_str, stop_reason)
+            if parse_err:
+                logger.error(
+                    f"Tool args parse failed for {tc['name']} ({len(args_str)} chars): {parse_err}"
+                )
                tool_calls.append({
                    "id": tool_id,
                    "name": tc["name"],
                    "arguments": {},
-                    "_parse_error": f"Invalid JSON in tool arguments: {args_preview}... Error: {str(e)}. Tip: For large content, consider splitting into smaller chunks or using a different approach."
+                    "_parse_error": parse_err,
                })
                continue

@@ -949,14 +1102,11 @@ class AgentStreamExecutor:
        tool_id = tool_call["id"]
        arguments = tool_call["arguments"]

-        # Check if there was a JSON parse error
        if "_parse_error" in tool_call:
-            parse_error = tool_call["_parse_error"]
-            logger.error(f"Skipping tool execution due to parse error: {parse_error}")
            result = {
                "status": "error",
-                "result": f"Failed to parse tool arguments. {parse_error}. Please ensure your tool call uses valid JSON format with all required parameters.",
-                "execution_time": 0
+                "result": tool_call["_parse_error"],
+                "execution_time": 0,
            }
            self._record_tool_result(tool_name, arguments, False)
            return result
--- a/agent/protocol/cancel.py
+++ b/agent/protocol/cancel.py
@@ -0,0 +1,121 @@
+"""
+Cancel token registry for aborting in-flight agent runs.
+
+A user cancel (web Cancel button, /cancel command) sets a threading.Event
+that the agent loop polls at safe checkpoints. Tokens are keyed by
+request_id (preferred) and tracked under session_id as a fallback. Entries
+are released after the run completes to keep the registry bounded.
+
+No project deps — importable from any layer without circular imports.
+"""
+
+from __future__ import annotations
+
+import threading
+from typing import Dict, Optional
+
+
+class AgentCancelledError(Exception):
+    """Raised inside the agent loop when a stop has been requested.
+
+    The agent stream executor catches this, injects a "[Interrupted]" note
+    into the message history (preserving tool_use/tool_result integrity)
+    and returns a partial response to the caller.
+    """
+
+
+class _CancelEntry:
+    __slots__ = ("event", "session_id")
+
+    def __init__(self, session_id: Optional[str]):
+        self.event = threading.Event()
+        self.session_id = session_id
+
+
+class CancelTokenRegistry:
+    """In-process registry mapping request_id -> cancel Event.
+
+    Thread-safe. Singleton via module-level ``_registry``.
+    """
+
+    def __init__(self):
+        self._lock = threading.Lock()
+        self._by_request: Dict[str, _CancelEntry] = {}
+        # session_id -> set of request_ids currently in flight (usually 1).
+        self._by_session: Dict[str, set] = {}
+
+    def register(self, request_id: str, session_id: Optional[str] = None) -> threading.Event:
+        """Create (or return existing) cancel event for a request.
+
+        Returns the threading.Event the caller should poll via ``is_set()``.
+        """
+        if not request_id:
+            return threading.Event()
+        with self._lock:
+            entry = self._by_request.get(request_id)
+            if entry is None:
+                entry = _CancelEntry(session_id)
+                self._by_request[request_id] = entry
+                if session_id:
+                    self._by_session.setdefault(session_id, set()).add(request_id)
+            return entry.event
+
+    def get_event(self, request_id: str) -> Optional[threading.Event]:
+        if not request_id:
+            return None
+        with self._lock:
+            entry = self._by_request.get(request_id)
+            return entry.event if entry else None
+
+    def cancel_request(self, request_id: str) -> bool:
+        """Trigger cancel for a specific request. Returns True when matched."""
+        if not request_id:
+            return False
+        with self._lock:
+            entry = self._by_request.get(request_id)
+        if entry is None:
+            return False
+        entry.event.set()
+        return True
+
+    def cancel_session(self, session_id: str) -> int:
+        """Trigger cancel for every in-flight request of a session.
+
+        Returns the number of requests cancelled (0 when nothing was running).
+        """
+        if not session_id:
+            return 0
+        with self._lock:
+            request_ids = list(self._by_session.get(session_id, ()))
+            entries = [self._by_request[r] for r in request_ids if r in self._by_request]
+        for entry in entries:
+            entry.event.set()
+        return len(entries)
+
+    def unregister(self, request_id: str) -> None:
+        """Remove an entry once the agent run is done. Safe to call twice."""
+        if not request_id:
+            return
+        with self._lock:
+            entry = self._by_request.pop(request_id, None)
+            if entry and entry.session_id:
+                bucket = self._by_session.get(entry.session_id)
+                if bucket is not None:
+                    bucket.discard(request_id)
+                    if not bucket:
+                        self._by_session.pop(entry.session_id, None)
+
+    def has_active(self, session_id: str) -> bool:
+        if not session_id:
+            return False
+        with self._lock:
+            bucket = self._by_session.get(session_id)
+            return bool(bucket)
+
+
+_registry = CancelTokenRegistry()
+
+
+def get_cancel_registry() -> CancelTokenRegistry:
+    """Module-level accessor for the singleton registry."""
+    return _registry
--- a/agent/tools/browser/browser_service.py
+++ b/agent/tools/browser/browser_service.py
@@ -15,7 +15,7 @@ import threading
 from typing import Optional, Dict, Any, List, Callable

 from common.log import logger
-from common.utils import expand_path
+from common.utils import expand_path, is_cloud_deployment


 _DEFAULT_USER_DATA_DIR = "~/.cow/browser_profile"
@@ -436,6 +436,20 @@ class BrowserService:
        if self._headless:
            launch_args.append("--no-sandbox")

+        if is_cloud_deployment():
+            launch_args.extend([
+                "--disable-gpu",
+                "--disable-software-rasterizer",
+                "--disable-extensions",
+                "--disable-background-networking",
+                "--disable-background-timer-throttling",
+                "--disable-renderer-backgrounding",
+                "--disable-features=site-per-process,TranslateUI,IsolateOrigins",
+                "--no-zygote",
+                "--js-flags=--max-old-space-size=384",
+                "--memory-pressure-off",
+            ])
+
        extra_args = self._config.get("launch_args", [])
        if extra_args:
            launch_args.extend(extra_args)
--- a/agent/tools/browser/browser_tool.py
+++ b/agent/tools/browser/browser_tool.py
@@ -145,7 +145,8 @@ class BrowserTool(BaseTool):
        url = args.get("url", "").strip()
        if not url:
            return ToolResult.fail("Error: 'url' is required for navigate action")
-        if not url.startswith(("http://", "https://")):
+        # Only auto-prepend https:// for bare hosts; preserve file://, about:, data:, etc.
+        if "://" not in url and not url.startswith(("about:", "data:")):
            url = "https://" + url
        timeout = args.get("timeout", 30000)
        service = self._get_service()
--- a/agent/tools/mcp/mcp_client.py
+++ b/agent/tools/mcp/mcp_client.py
@@ -1,8 +1,8 @@
 """
 MCP (Model Context Protocol) client module.

-Implements JSON-RPC 2.0 over stdio and SSE transports without any external
-MCP SDK dependency.
+Implements JSON-RPC 2.0 over stdio, SSE and Streamable HTTP transports
+without any external MCP SDK dependency.
 """

 import json
@@ -17,18 +17,29 @@ from typing import Optional
 from common.log import logger


+# Aliases accepted for the Streamable HTTP transport type
+_STREAMABLE_HTTP_ALIASES = {"streamable-http", "streamable_http", "streamablehttp", "http"}
+
+
 class McpClient:
-    """Single MCP Server client supporting stdio and SSE transports."""
+    """Single MCP Server client supporting stdio, SSE and Streamable HTTP transports."""

    def __init__(self, config: dict):
        """
        config examples:
          stdio:           {"name": "filesystem", "type": "stdio", "command": "npx", "args": [...]}
          SSE:             {"name": "my-api",    "type": "sse",   "url": "http://localhost:8000/sse"}
+          streamable-http: {"name": "pubmed",    "type": "streamable-http", "url": "https://x/mcp"}
        """
        self.config = config
        self.name: str = config.get("name", "unknown")
-        self.transport: str = config.get("type", "stdio")
+        raw_transport: str = config.get("type", "stdio")
+        # Normalize streamable-http aliases to a single internal key
+        self.transport: str = (
+            "streamable-http"
+            if raw_transport.lower() in _STREAMABLE_HTTP_ALIASES
+            else raw_transport
+        )

        # stdio state
        self._proc: Optional[subprocess.Popen] = None
@@ -37,6 +48,11 @@ class McpClient:
        self._sse_url: Optional[str] = None
        self._post_url: Optional[str] = None  # endpoint for sending messages (resolved from SSE)

+        # Streamable HTTP state
+        self._http_url: Optional[str] = None
+        self._http_headers: dict = {}  # extra headers from user config (e.g. Authorization)
+        self._http_session_id: Optional[str] = None  # Mcp-Session-Id assigned by the server
+
        # Shared state
        self._next_id = 1
        self._id_lock = threading.Lock()
@@ -54,6 +70,8 @@ class McpClient:
                return self._init_stdio()
            elif self.transport == "sse":
                return self._init_sse()
+            elif self.transport == "streamable-http":
+                return self._init_streamable_http()
            else:
                logger.warning(f"[MCP:{self.name}] Unknown transport type: {self.transport!r}")
                return False
@@ -109,6 +127,21 @@ class McpClient:
                    pass
            self._proc = None
            logger.debug(f"[MCP:{self.name}] stdio process terminated")
+
+        # Best-effort streamable-http session termination
+        if self.transport == "streamable-http" and self._http_session_id and self._http_url:
+            try:
+                req = urllib.request.Request(
+                    self._http_url,
+                    method="DELETE",
+                    headers={"Mcp-Session-Id": self._http_session_id, **self._http_headers},
+                )
+                with urllib.request.urlopen(req, timeout=5):
+                    pass
+            except Exception:
+                pass
+            self._http_session_id = None
+
        self._initialized = False

    # ------------------------------------------------------------------
@@ -234,6 +267,120 @@ class McpClient:
            raw = resp.read().decode("utf-8")
            return json.loads(raw)

+    # ------------------------------------------------------------------
+    # Streamable HTTP transport (MCP spec 2025-03-26)
+    # ------------------------------------------------------------------
+
+    def _init_streamable_http(self) -> bool:
+        url = self.config.get("url")
+        if not url:
+            logger.warning(f"[MCP:{self.name}] streamable-http config missing 'url'")
+            return False
+
+        self._http_url = url
+        # Allow user-provided headers (e.g. {"Authorization": "Bearer xxx"})
+        extra_headers = self.config.get("headers") or {}
+        if isinstance(extra_headers, dict):
+            self._http_headers = {str(k): str(v) for k, v in extra_headers.items()}
+
+        return self._handshake()
+
+    def _streamable_http_send(self, message: dict) -> dict:
+        """POST a JSON-RPC request and return the response (JSON or SSE-wrapped)."""
+        return self._streamable_http_post(message, expect_response=True)
+
+    def _streamable_http_post(self, message: dict, expect_response: bool) -> dict:
+        """
+        POST a JSON-RPC message over Streamable HTTP.
+
+        Per the spec, the response Content-Type can be either:
+          - application/json   -> single JSON-RPC response in body
+          - text/event-stream  -> SSE stream; we read until we get a matching response
+        """
+        body = json.dumps(message).encode("utf-8")
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json, text/event-stream",
+        }
+        if self._http_session_id:
+            headers["Mcp-Session-Id"] = self._http_session_id
+        headers.update(self._http_headers)
+
+        req = urllib.request.Request(
+            self._http_url,
+            data=body,
+            method="POST",
+            headers=headers,
+        )
+
+        try:
+            resp = urllib.request.urlopen(req, timeout=30)
+        except urllib.error.HTTPError as e:
+            # Surface the server-provided error body for easier debugging
+            detail = ""
+            try:
+                detail = e.read().decode("utf-8", errors="ignore")
+            except Exception:
+                pass
+            raise IOError(
+                f"[MCP:{self.name}] streamable-http HTTP {e.code}: {detail[:200]}"
+            )
+
+        with resp:
+            # Capture session id assigned by the server (if any)
+            session_id = resp.headers.get("Mcp-Session-Id")
+            if session_id and not self._http_session_id:
+                self._http_session_id = session_id
+
+            status = resp.status if hasattr(resp, "status") else resp.getcode()
+
+            # Notifications: server may reply with 202 Accepted and no body
+            if not expect_response or status == 202:
+                try:
+                    resp.read()
+                except Exception:
+                    pass
+                return {}
+
+            content_type = (resp.headers.get("Content-Type") or "").lower()
+            expected_id = message.get("id")
+
+            if "text/event-stream" in content_type:
+                return self._read_sse_response(resp, expected_id)
+
+            raw = resp.read().decode("utf-8")
+            if not raw:
+                return {}
+            return json.loads(raw)
+
+    def _read_sse_response(self, resp, expected_id) -> dict:
+        """Read an SSE stream and return the first JSON-RPC response with matching id."""
+        data_buf: list = []
+        for raw_line in resp:
+            line = raw_line.decode("utf-8").rstrip("\n\r")
+            if line == "":
+                # End of an SSE event, attempt to parse accumulated data
+                if data_buf:
+                    payload = "\n".join(data_buf)
+                    data_buf = []
+                    try:
+                        msg = json.loads(payload)
+                    except json.JSONDecodeError:
+                        continue
+                    # Skip notifications / mismatched ids
+                    if "id" not in msg:
+                        continue
+                    if expected_id is None or msg.get("id") == expected_id:
+                        return msg
+                continue
+            if line.startswith(":"):
+                continue  # SSE comment / keepalive
+            if line.startswith("data:"):
+                data_buf.append(line[len("data:"):].lstrip())
+            # Ignore 'event:' / 'id:' lines; we only care about JSON-RPC payloads
+
+        raise IOError(f"[MCP:{self.name}] streamable-http SSE stream closed before response")
+
    # ------------------------------------------------------------------
    # Common JSON-RPC helpers
    # ------------------------------------------------------------------
@@ -267,6 +414,8 @@ class McpClient:
                return self._stdio_send(message)
            elif self.transport == "sse":
                return self._sse_send(message)
+            elif self.transport == "streamable-http":
+                return self._streamable_http_send(message)
            else:
                raise ValueError(f"[MCP:{self.name}] Unsupported transport: {self.transport}")

@@ -291,6 +440,11 @@ class McpClient:
                    pass
            except Exception:
                pass  # notifications are fire-and-forget
+        elif self.transport == "streamable-http":
+            try:
+                self._streamable_http_post(notification, expect_response=False)
+            except Exception:
+                pass  # notifications are fire-and-forget

    def _handshake(self) -> bool:
        """Perform the MCP initialize / notifications/initialized handshake."""
--- a/agent/tools/scheduler/integration.py
+++ b/agent/tools/scheduler/integration.py
@@ -57,34 +57,44 @@ def init_scheduler(agent_bridge) -> bool:
                _task_store = TaskStore(store_path)
                logger.debug(f"[Scheduler] Task store initialized: {store_path}")

-            # Create execute callback
+            # Create execute callback. Returns True on success, False to ask
+            # the scheduler to retry on the next tick (e.g. channel not yet
+            # ready right after process start).
            def execute_task_callback(task: dict):
-                """Callback to execute a scheduled task"""
                try:
                    action = task.get("action", {})
                    action_type = action.get("type")
+                    channel_type = action.get("channel_type", "unknown")
+                    receiver = action.get("receiver", "")
+
+                    if not _is_channel_ready(channel_type, receiver):
+                        logger.warning(
+                            f"[Scheduler] Task {task.get('id')}: channel "
+                            f"'{channel_type}' not ready for receiver={receiver} "
+                            f"(no inbound msg cached since restart?); deferring"
+                        )
+                        return False

                    if action_type == "agent_task":
-                        _execute_agent_task(task, agent_bridge)
+                        return _execute_agent_task(task, agent_bridge)
                    elif action_type == "send_message":
-                        # Legacy support for old tasks
-                        _execute_send_message(task, agent_bridge)
+                        return _execute_send_message(task, agent_bridge)
                    elif action_type == "tool_call":
-                        # Legacy support for old tasks
-                        _execute_tool_call(task, agent_bridge)
+                        return _execute_tool_call(task, agent_bridge)
                    elif action_type == "skill_call":
-                        # Legacy support for old tasks
-                        _execute_skill_call(task, agent_bridge)
+                        return _execute_skill_call(task, agent_bridge)
                    else:
                        logger.warning(f"[Scheduler] Unknown action type: {action_type}")
+                        return True
                except Exception as e:
                    logger.error(f"[Scheduler] Error executing task {task.get('id')}: {e}")
+                    return False

            # Create scheduler service
            _scheduler_service = SchedulerService(_task_store, execute_task_callback)
            _scheduler_service.start()

-            logger.debug("[Scheduler] Scheduler service initialized and started")
+            logger.info("[Scheduler] Service initialized and started")
            return True

        except Exception as e:
@@ -92,6 +102,40 @@ def init_scheduler(agent_bridge) -> bool:
            return False


+def _is_channel_ready(channel_type: str, receiver: str) -> bool:
+    """Best-effort readiness probe for outbound channels.
+
+    Returns False when we know the send will drop (e.g. weixin not yet
+    logged in, web session has no polling queue), so the scheduler can
+    defer instead of consuming the task. Unknown channels return True
+    to preserve previous behaviour.
+    """
+    if not channel_type or channel_type == "unknown":
+        return True
+    try:
+        from channel.channel_factory import create_channel
+        channel = create_channel(channel_type)
+        if channel is None:
+            return False
+
+        if channel_type == "weixin":
+            tokens = getattr(channel, "_context_tokens", None)
+            if not tokens or receiver not in tokens:
+                return False
+            return True
+
+        if channel_type == "web":
+            queues = getattr(channel, "session_queues", None)
+            if not queues or receiver not in queues:
+                return False
+            return True
+
+        return True
+    except Exception as e:
+        logger.warning(f"[Scheduler] Channel readiness check failed for {channel_type}: {e}")
+        return True
+
+
 def get_task_store():
    """Get the global task store instance"""
    return _task_store
@@ -145,13 +189,10 @@ def _remember_delivered_output(
        )


-def _execute_agent_task(task: dict, agent_bridge):
+def _execute_agent_task(task: dict, agent_bridge) -> bool:
    """
-    Execute an agent_task action - let Agent handle the task
-    
-    Args:
-        task: Task dictionary
-        agent_bridge: AgentBridge instance
+    Execute an agent_task action - let Agent handle the task.
+    Returns True on successful delivery, False to retry next tick.
    """
    try:
        action = task.get("action", {})
@@ -162,11 +203,11 @@ def _execute_agent_task(task: dict, agent_bridge):
        
        if not task_description:
            logger.error(f"[Scheduler] Task {task['id']}: No task_description specified")
-            return
+            return True  # malformed task, don't loop forever
        
        if not receiver:
            logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
-            return
+            return True
        
        # Check for unsupported channels
        if channel_type == "dingtalk":
@@ -210,50 +251,46 @@ def _execute_agent_task(task: dict, agent_bridge):
            # Don't clear history - scheduler tasks use isolated session_id so they won't pollute user conversations
            reply = agent_bridge.agent_reply(task_description, context=context, on_event=None, clear_history=False)

-            if reply and reply.content:
-                # Send the reply via channel
-                from channel.channel_factory import create_channel
+            if not (reply and reply.content):
+                logger.error(f"[Scheduler] Task {task['id']}: No result from agent execution")
+                return True  # agent ran but produced nothing; don't loop

-                try:
+            from channel.channel_factory import create_channel
            channel = create_channel(channel_type)
-                    if channel:
-                        # For web channel, register request_id
+            if not channel:
+                logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
+                return False
+
            if channel_type == "web" and hasattr(channel, 'request_to_session'):
                request_id = context.get("request_id")
                if request_id:
                    channel.request_to_session[request_id] = receiver
-                                logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")

-                        # Send the reply
+            try:
                channel.send(reply, context)
-                        _remember_delivered_output(agent_bridge, task, channel_type, reply.content)
-                        logger.info(f"[Scheduler] Task {task['id']} executed successfully, result sent to {receiver}")
-                    else:
-                        logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
            except Exception as e:
                logger.error(f"[Scheduler] Failed to send result: {e}")
-            else:
-                logger.error(f"[Scheduler] Task {task['id']}: No result from agent execution")
+                return False
+
+            _remember_delivered_output(agent_bridge, task, channel_type, reply.content)
+            logger.info(f"[Scheduler] Task {task['id']} executed successfully, result sent to {receiver}")
+            return True

        except Exception as e:
            logger.error(f"[Scheduler] Failed to execute task via Agent: {e}")
            import traceback
            logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
+            return False

    except Exception as e:
        logger.error(f"[Scheduler] Error in _execute_agent_task: {e}")
        import traceback
        logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
+        return False


-def _execute_send_message(task: dict, agent_bridge):
-    """
-    Execute a send_message action
-    
-    Args:
-        task: Task dictionary
-        agent_bridge: AgentBridge instance
-    """
+def _execute_send_message(task: dict, agent_bridge) -> bool:
+    """Execute a send_message action. Returns True/False for delivery."""
    try:
        action = task.get("action", {})
        content = action.get("content", "")
@@ -263,7 +300,7 @@ def _execute_send_message(task: dict, agent_bridge):
        
        if not receiver:
            logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
-            return
+            return True
        
        # Create context for sending message
        context = Context(ContextType.TEXT, content)
@@ -308,41 +345,35 @@ def _execute_send_message(task: dict, agent_bridge):
        # Get channel and send
        from channel.channel_factory import create_channel
        
-        try:
        channel = create_channel(channel_type)
-            if channel:
-                # For web channel, register the request_id to session mapping
+        if not channel:
+            logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
+            return False
+
        if channel_type == "web" and hasattr(channel, 'request_to_session'):
            channel.request_to_session[request_id] = receiver
-                    logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")

+        try:
            channel.send(reply, context)
-                _remember_delivered_output(agent_bridge, task, channel_type, content)
-                logger.info(f"[Scheduler] Task {task['id']} executed: sent message to {receiver}")
-            else:
-                logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
        except Exception as e:
            logger.error(f"[Scheduler] Failed to send message: {e}")
-            import traceback
-            logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
+            return False
+
+        _remember_delivered_output(agent_bridge, task, channel_type, content)
+        logger.info(f"[Scheduler] Task {task['id']} executed: sent message to {receiver}")
+        return True

    except Exception as e:
        logger.error(f"[Scheduler] Error in _execute_send_message: {e}")
        import traceback
        logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
+        return False


-def _execute_tool_call(task: dict, agent_bridge):
-    """
-    Execute a tool_call action
-    
-    Args:
-        task: Task dictionary
-        agent_bridge: AgentBridge instance
-    """
+def _execute_tool_call(task: dict, agent_bridge) -> bool:
+    """Execute a tool_call action. Returns True/False for delivery."""
    try:
        action = task.get("action", {})
-        # Support both old and new field names
        tool_name = action.get("call_name") or action.get("tool_name")
        tool_params = action.get("call_params") or action.get("tool_params", {})
        result_prefix = action.get("result_prefix", "")
@@ -352,90 +383,70 @@ def _execute_tool_call(task: dict, agent_bridge):

        if not tool_name:
            logger.error(f"[Scheduler] Task {task['id']}: No tool_name specified")
-            return
-        
+            return True
        if not receiver:
            logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
-            return
+            return True

-        # Get tool manager and create tool instance
        from agent.tools.tool_manager import ToolManager
-        tool_manager = ToolManager()
-        tool = tool_manager.create_tool(tool_name)
-        
+        tool = ToolManager().create_tool(tool_name)
        if not tool:
            logger.error(f"[Scheduler] Task {task['id']}: Tool '{tool_name}' not found")
-            return
+            return True

-        # Execute tool
        logger.info(f"[Scheduler] Task {task['id']}: Executing tool '{tool_name}' with params {tool_params}")
        result = tool.execute(tool_params)
-        
-        # Get result content
-        if hasattr(result, 'result'):
-            content = result.result
-        else:
-            content = str(result)
-        
-        # Add prefix if specified
+        content = result.result if hasattr(result, 'result') else str(result)
        if result_prefix:
            content = f"{result_prefix}\n\n{content}"

-        # Send result as message
        context = Context(ContextType.TEXT, content)
        context["receiver"] = receiver
        context["isgroup"] = is_group
        context["session_id"] = receiver

-        # Channel-specific context setup
+        request_id = None
        if channel_type == "web":
-            # Web channel needs request_id
            import uuid
            request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
            context["request_id"] = request_id
-            logger.debug(f"[Scheduler] Generated request_id for web channel: {request_id}")
        elif channel_type == "feishu":
            context["receive_id_type"] = "chat_id" if is_group else "open_id"
            context["msg"] = None
-            logger.debug(f"[Scheduler] Feishu: receive_id_type={context['receive_id_type']}, is_group={is_group}, receiver={receiver}")
        elif channel_type == "wecom_bot":
            context["msg"] = None

        reply = Reply(ReplyType.TEXT, content)

-        # Get channel and send
        from channel.channel_factory import create_channel
+        channel = create_channel(channel_type)
+        if not channel:
+            logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
+            return False
+
+        if channel_type == "web" and request_id and hasattr(channel, 'request_to_session'):
+            channel.request_to_session[request_id] = receiver

        try:
-            channel = create_channel(channel_type)
-            if channel:
-                if channel_type == "web" and hasattr(channel, 'request_to_session'):
-                    channel.request_to_session[request_id] = receiver
-                    logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
-
            channel.send(reply, context)
-                _remember_delivered_output(agent_bridge, task, channel_type, content)
-                logger.info(f"[Scheduler] Task {task['id']} executed: sent tool result to {receiver}")
-            else:
-                logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
        except Exception as e:
            logger.error(f"[Scheduler] Failed to send tool result: {e}")
+            return False
+
+        _remember_delivered_output(agent_bridge, task, channel_type, content)
+        logger.info(f"[Scheduler] Task {task['id']} executed: sent tool result to {receiver}")
+        return True

    except Exception as e:
        logger.error(f"[Scheduler] Error in _execute_tool_call: {e}")
+        return False


-def _execute_skill_call(task: dict, agent_bridge):
-    """
-    Execute a skill_call action by asking Agent to run the skill
-    
-    Args:
-        task: Task dictionary
-        agent_bridge: AgentBridge instance
-    """
+def _execute_skill_call(task: dict, agent_bridge) -> bool:
+    """Execute a skill_call action by asking Agent to run the skill.
+    Returns True/False for delivery."""
    try:
        action = task.get("action", {})
-        # Support both old and new field names
        skill_name = action.get("call_name") or action.get("skill_name")
        skill_params = action.get("call_params") or action.get("skill_params", {})
        result_prefix = action.get("result_prefix", "")
@@ -445,32 +456,24 @@ def _execute_skill_call(task: dict, agent_bridge):

        if not skill_name:
            logger.error(f"[Scheduler] Task {task['id']}: No skill_name specified")
-            return
-        
+            return True
        if not receiver:
            logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
-            return
+            return True

        logger.info(f"[Scheduler] Task {task['id']}: Executing skill '{skill_name}' with params {skill_params}")

-        # Create a unique session_id for this scheduled task to avoid polluting user's conversation
-        # Format: scheduler_<receiver>_<task_id> to ensure isolation
        scheduler_session_id = f"scheduler_{receiver}_{task['id']}"
-        
-        # Build a natural language query for the Agent to execute the skill
-        # Format: "Use skill-name to do something with params"
        param_str = ", ".join([f"{k}={v}" for k, v in skill_params.items()])
        query = f"Use {skill_name} skill"
        if param_str:
            query += f" with {param_str}"

-        # Create context for Agent
        context = Context(ContextType.TEXT, query)
        context["receiver"] = receiver
        context["isgroup"] = is_group
        context["session_id"] = scheduler_session_id

-        # Channel-specific setup
        if channel_type == "web":
            import uuid
            request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
@@ -481,49 +484,48 @@ def _execute_skill_call(task: dict, agent_bridge):
        elif channel_type == "wecom_bot":
            context["msg"] = None

-        # Use Agent to execute the skill
        try:
-            # Don't clear history - scheduler tasks use isolated session_id so they won't pollute user conversations
            reply = agent_bridge.agent_reply(query, context=context, on_event=None, clear_history=False)
-            
-            if reply and reply.content:
-                content = reply.content
-                
-                # Add prefix if specified
-                if result_prefix:
-                    content = f"{result_prefix}\n\n{content}"
-                
-                # Send the result via channel
-                from channel.channel_factory import create_channel
-                
-                try:
-                    channel = create_channel(channel_type)
-                    if channel:
-                        # For web channel, register request_id
-                        if channel_type == "web" and hasattr(channel, 'request_to_session'):
-                            req_id = context.get("request_id")
-                            if req_id:
-                                channel.request_to_session[req_id] = receiver
-                                logger.debug(f"[Scheduler] Registered request_id {req_id} -> session {receiver}")
-                        
-                        channel.send(Reply(ReplyType.TEXT, content), context)
-                        _remember_delivered_output(agent_bridge, task, channel_type, content)
-                except Exception as e:
-                    logger.error(f"[Scheduler] Failed to send skill result: {e}")
-                
-                logger.info(f"[Scheduler] Task {task['id']} executed: skill result sent to {receiver}")
-            else:
-                logger.error(f"[Scheduler] Task {task['id']}: No result from skill execution")
-                
        except Exception as e:
            logger.error(f"[Scheduler] Failed to execute skill via Agent: {e}")
            import traceback
            logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
+            return False
+
+        if not (reply and reply.content):
+            logger.error(f"[Scheduler] Task {task['id']}: No result from skill execution")
+            return True
+
+        content = reply.content
+        if result_prefix:
+            content = f"{result_prefix}\n\n{content}"
+
+        from channel.channel_factory import create_channel
+        channel = create_channel(channel_type)
+        if not channel:
+            logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
+            return False
+
+        if channel_type == "web" and hasattr(channel, 'request_to_session'):
+            req_id = context.get("request_id")
+            if req_id:
+                channel.request_to_session[req_id] = receiver
+
+        try:
+            channel.send(Reply(ReplyType.TEXT, content), context)
+        except Exception as e:
+            logger.error(f"[Scheduler] Failed to send skill result: {e}")
+            return False
+
+        _remember_delivered_output(agent_bridge, task, channel_type, content)
+        logger.info(f"[Scheduler] Task {task['id']} executed: skill result sent to {receiver}")
+        return True

    except Exception as e:
        logger.error(f"[Scheduler] Error in _execute_skill_call: {e}")
        import traceback
        logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
+        return False


 def attach_scheduler_to_tool(tool, context: Context = None):
--- a/agent/tools/scheduler/scheduler_service.py
+++ b/agent/tools/scheduler/scheduler_service.py
@@ -52,7 +52,6 @@ class SchedulerService:
            self.running = True
            self.thread = threading.Thread(target=self._run_loop, daemon=True)
            self.thread.start()
-            logger.debug("[Scheduler] Service started")
    
    def stop(self):
        """Stop the scheduler service"""
@@ -67,7 +66,7 @@ class SchedulerService:
    
    def _run_loop(self):
        """Main scheduler loop"""
-        logger.debug("[Scheduler] Scheduler loop started")
+        logger.info("[Scheduler] Scheduler loop started")
        
        while self.running:
            try:
@@ -84,12 +83,18 @@ class SchedulerService:
        
        for task in tasks:
            try:
-                # Check if task is due
                if self._is_task_due(task, now):
                    logger.info(f"[Scheduler] Executing task: {task['id']} - {task['name']}")
-                    self._execute_task(task)
+                    ok = self._execute_task(task)
+                    if not ok:
+                        # Leave next_run_at as-is so the next loop retries.
+                        # Cron tasks within the catch-up window will keep
+                        # firing; beyond it _is_task_due will reschedule.
+                        logger.warning(
+                            f"[Scheduler] Task {task['id']} delivery failed, will retry next tick"
+                        )
+                        continue

-                    # Update next run time
                    next_run = self._calculate_next_run(task, now)
                    if next_run:
                        self.task_store.update_task(task['id'], {
@@ -97,7 +102,6 @@ class SchedulerService:
                            "last_run_at": now.isoformat()
                        })
                    else:
-                        # One-time task completed, remove it
                        self.task_store.delete_task(task['id'])
                        logger.info(f"[Scheduler] One-time task completed and removed: {task['id']}")
            except Exception as e:
@@ -128,22 +132,27 @@ class SchedulerService:
        try:
            next_run = _parse_naive_local(next_run_str)

-            # Check if task is overdue (e.g., service restart)
            if next_run < now:
                time_diff = (now - next_run).total_seconds()
-                
-                # If overdue by more than 5 minutes, skip this run and schedule next
-                if time_diff > 300:  # 5 minutes
-                    logger.warning(f"[Scheduler] Task {task['id']} is overdue by {int(time_diff)}s, skipping and scheduling next run")
-                    
-                    # For one-time tasks, remove them directly
                schedule = task.get("schedule", {})
-                    if schedule.get("type") == "once":
+                schedule_type = schedule.get("type")
+
+                # Catch-up window: fire if we're within 10 minutes of the
+                # scheduled tick. Beyond that we'd rather skip than push a
+                # stale daily report to the user.
+                if time_diff <= 600:
+                    return True
+
+                logger.warning(
+                    f"[Scheduler] Task {task['id']} is overdue by {int(time_diff)}s, "
+                    f"skipping and scheduling next run"
+                )
+
+                if schedule_type == "once":
                    self.task_store.delete_task(task['id'])
                    logger.info(f"[Scheduler] One-time task {task['id']} expired, removed")
                    return False

-                    # For recurring tasks, calculate next run from now
                next_next_run = self._calculate_next_run(task, now)
                if next_next_run:
                    self.task_store.update_task(task['id'], {
@@ -213,20 +222,22 @@ class SchedulerService:
        
        return None
    
-    def _execute_task(self, task: dict):
+    def _execute_task(self, task: dict) -> bool:
        """
-        Execute a task
+        Execute a task.

-        Args:
-            task: Task dictionary
+        Returns True if delivery succeeded (caller should advance state),
+        False if it failed (caller should keep next_run_at so the next
+        loop iteration retries). Callback may return None for legacy
+        behaviour, treated as success.
        """
        try:
-            # Call the execute callback
-            self.execute_callback(task)
+            result = self.execute_callback(task)
+            return False if result is False else True
        except Exception as e:
            logger.error(f"[Scheduler] Error executing task {task['id']}: {e}")
-            # Update task with error
            self.task_store.update_task(task['id'], {
                "last_error": str(e),
                "last_error_at": datetime.now().isoformat()
            })
+            return False
--- a/agent/tools/vision/vision.py
+++ b/agent/tools/vision/vision.py
@@ -3,7 +3,7 @@ Vision tool - Analyze images using Vision API.
 Supports local files (auto base64-encoded) and HTTP URLs.

 Provider resolution:
-  - tool.vision.model (if set) means "prefer this model first; fall back to
+  - tools.vision.model (if set) means "prefer this model first; fall back to
    other configured providers if it fails". The model name is mapped to its
    native provider (e.g. doubao-* → Doubao, kimi-* → Moonshot, gpt-* →
    OpenAI/LinkAI). That provider is tried first, then the standard auto
@@ -53,14 +53,15 @@ _DISCOVERABLE_MODELS = [
    ("ark_api_key", const.DOUBAO, const.DOUBAO_SEED_2_PRO, "Doubao"),
    ("dashscope_api_key", const.QWEN_DASHSCOPE, const.QWEN36_PLUS, "DashScope"),
    ("claude_api_key", const.CLAUDEAPI, const.CLAUDE_4_6_SONNET, "Claude"),
-    ("gemini_api_key", const.GEMINI, const.GEMINI_31_FLASH_LITE_PRE, "Gemini"),
+    ("gemini_api_key", const.GEMINI, const.GEMINI_35_FLASH, "Gemini"),
    ("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"),
    ("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"),
    ("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
+    ("mimo_api_key", const.MIMO, const.MIMO_V2_5_PRO, "MiMo"),
 ]

 # Model name prefix → discoverable provider display_name.
-# Used to auto-route tool.vision.model to its native provider.
+# Used to auto-route tools.vision.model to its native provider.
 # Matched case-insensitively; longest prefix wins.
 _MODEL_PREFIX_TO_PROVIDER = [
    ("doubao-", "Doubao"),
@@ -73,11 +74,29 @@ _MODEL_PREFIX_TO_PROVIDER = [
    ("glm-", "ZhipuAI"),
    ("minimax-", "MiniMax"),
    ("abab", "MiniMax"),
+    ("mimo-", "MiMo"),
 ]

 # Model prefixes that natively belong to OpenAI / LinkAI (raw HTTP providers).
 _OPENAI_MODEL_PREFIXES = ("gpt-", "o1-", "o3-", "o4-", "chatgpt-")

+# Maps the UI provider id (persisted in tools.vision.provider) to the internal
+# display name used in VisionProvider.name. Keep in sync with _DISCOVERABLE_MODELS
+# and the openai/linkai branches in _route_by_model_name.
+_PROVIDER_ID_TO_DISPLAY = {
+    "openai": "OpenAI",
+    "linkai": "LinkAI",
+    "moonshot": "Moonshot",
+    "doubao": "Doubao",
+    "dashscope": "DashScope",
+    "claudeAPI": "Claude",
+    "gemini": "Gemini",
+    "qianfan": "Qianfan",
+    "zhipu": "ZhipuAI",
+    "minimax": "MiniMax",
+    "mimo": "MiMo",
+}
+

@dataclass
 class VisionProvider:
@@ -154,7 +173,7 @@ class Vision(BaseTool):

        # Default model is only used as a last-resort placeholder for providers
        # whose VisionProvider.model_override is None (e.g. raw OpenAI provider
-        # when the user did not configure tool.vision.model).
+        # when the user did not configure tools.vision.model).
        return self._call_with_fallback(providers, DEFAULT_MODEL, question, image_content)

    def _call_with_fallback(self, providers: List[VisionProvider], model: str,
@@ -193,12 +212,12 @@ class Vision(BaseTool):
        """
        Build an ordered list of providers to try.

-        Semantics of `tool.vision.model`:
+        Semantics of `tools.vision.model`:
          "Prefer this model first; fall back to other configured providers
           if it fails."

        Order:
-          1. The provider that natively serves `tool.vision.model` (if any
+          1. The provider that natively serves `tools.vision.model` (if any
             and its API key is configured) — using the user-specified model
             name verbatim.
          2. Auto-discovery chain as fallback:
@@ -211,10 +230,16 @@ class Vision(BaseTool):
        are de-duplicated to avoid retrying the same endpoint twice.
        """
        user_model = self._resolve_user_vision_model()
+        user_provider = self._resolve_user_vision_provider()
        providers: List[VisionProvider] = []

-        # Step 1: preferred provider derived from tool.vision.model
-        if user_model:
+        # Step 1: preferred provider — explicit `tools.vision.provider`
+        # wins so custom model names can still be routed correctly. Falls
+        # through to model-name prefix inference when provider is unset.
+        preferred = None
+        if user_provider and user_model:
+            preferred = self._route_by_provider_id(user_provider, user_model)
+        if not preferred and user_model:
            preferred = self._route_by_model_name(user_model)
        if preferred:
            providers.extend(preferred)
@@ -251,11 +276,11 @@ class Vision(BaseTool):

    @staticmethod
    def _resolve_user_vision_model() -> Optional[str]:
-        """Read tool.vision.model from config; return None if unset/blank."""
-        tool_conf = conf().get("tool", {})
-        if not isinstance(tool_conf, dict):
+        """Read tools.vision.model (singular ``tool`` kept as runtime fallback)."""
+        tools_conf = conf().get("tools") or conf().get("tool") or {}
+        if not isinstance(tools_conf, dict):
            return None
-        vision_conf = tool_conf.get("vision", {})
+        vision_conf = tools_conf.get("vision", {})
        if not isinstance(vision_conf, dict):
            return None
        m = vision_conf.get("model")
@@ -263,6 +288,24 @@ class Vision(BaseTool):
            return m.strip()
        return None

+    @staticmethod
+    def _resolve_user_vision_provider() -> Optional[str]:
+        """Read tools.vision.provider — the UI-persisted vendor id.
+
+        Lets users pin a vendor for custom model names that prefix-inference
+        can't recognize. Returns None when unset/blank.
+        """
+        tools_conf = conf().get("tools") or conf().get("tool") or {}
+        if not isinstance(tools_conf, dict):
+            return None
+        vision_conf = tools_conf.get("vision", {})
+        if not isinstance(vision_conf, dict):
+            return None
+        p = vision_conf.get("provider")
+        if isinstance(p, str) and p.strip():
+            return p.strip()
+        return None
+
    @staticmethod
    def _infer_provider_from_model(model_name: str) -> Optional[str]:
        """
@@ -279,6 +322,54 @@ class Vision(BaseTool):
                return display_name
        return None

+    def _route_by_provider_id(self, provider_id: str, user_model: str) -> Optional[List[VisionProvider]]:
+        """Route by the UI-persisted provider id.
+
+        Returns:
+          - [provider] : provider id is known and its key is configured.
+          - None       : unknown provider id, or the bot can't be created.
+                         Caller falls through to model-name-based routing.
+        """
+        display_name = _PROVIDER_ID_TO_DISPLAY.get(provider_id)
+        if not display_name:
+            return None
+
+        # OpenAI / LinkAI use raw HTTP providers, not the discoverable bot path.
+        if provider_id == "openai":
+            p = self._build_openai_provider(user_model)
+            return [p] if p else None
+        if provider_id == "linkai":
+            p = self._build_linkai_provider(user_model)
+            return [p] if p else None
+
+        # Discoverable bot-backed providers.
+        for config_key, bot_type, _default_model, name in _DISCOVERABLE_MODELS:
+            if name != display_name:
+                continue
+            api_key = conf().get(config_key, "")
+            if not api_key or not api_key.strip():
+                logger.warning(f"[Vision] tools.vision.provider='{provider_id}' "
+                               f"but '{config_key}' is not configured. Falling back.")
+                return None
+            try:
+                from models.bot_factory import create_bot
+                bot = create_bot(bot_type)
+                if not hasattr(bot, 'call_vision'):
+                    logger.warning(f"[Vision] '{display_name}' bot does not implement call_vision.")
+                    return None
+            except Exception as e:
+                logger.warning(f"[Vision] Failed to create '{display_name}' bot: {e}")
+                return None
+            return [VisionProvider(
+                name=display_name,
+                api_key="",
+                api_base="",
+                model_override=user_model,
+                use_bot=True,
+                fallback_bot=bot,
+            )]
+        return None
+
    def _route_by_model_name(self, user_model: str) -> Optional[List[VisionProvider]]:
        """
        Try to build a provider list using the user-specified model name.
@@ -303,7 +394,7 @@ class Vision(BaseTool):
                self._append_provider(providers, lambda: self._build_linkai_provider(user_model))
            if providers:
                return providers
-            logger.warning(f"[Vision] tool.vision.model='{user_model}' looks like an OpenAI "
+            logger.warning(f"[Vision] tools.vision.model='{user_model}' looks like an OpenAI "
                           f"model but neither OPENAI_API_KEY nor LINKAI_API_KEY is configured.")
            return None  # fall through to auto

@@ -317,7 +408,7 @@ class Vision(BaseTool):
                continue
            api_key = conf().get(config_key, "")
            if not api_key or not api_key.strip():
-                logger.warning(f"[Vision] tool.vision.model='{user_model}' routes to "
+                logger.warning(f"[Vision] tools.vision.model='{user_model}' routes to "
                               f"'{display_name}' but '{config_key}' is not configured. "
                               f"Falling back to auto-discovery.")
                return None  # fall through to auto
@@ -452,8 +543,8 @@ class Vision(BaseTool):
        if not self._main_bot_supports_vision(bot):
            return None

-        # Use the configured main model name; do NOT inject tool.vision.model
-        # here, because by the time we reach this branch the tool.vision.model
+        # Use the configured main model name; do NOT inject tools.vision.model
+        # here, because by the time we reach this branch the tools.vision.model
        # routing has already been attempted (and either matched the main bot
        # or failed to find a provider).
        main_model_name = conf().get("model") or None
--- a/agent/tools/web_search/web_search.py
+++ b/agent/tools/web_search/web_search.py
@@ -1,13 +1,27 @@
-"""
-Web Search tool - Search the web using Bocha or LinkAI search API.
-Supports two backends with unified response format:
-  1. Bocha Search (primary, requires BOCHA_API_KEY)
-  2. LinkAI Search (fallback, requires LINKAI_API_KEY)
+"""Web Search tool. Supports four backends with a unified response format:
+  - bocha   (https://open.bochaai.com)
+  - zhipu   (https://docs.bigmodel.cn/cn/guide/tools/web-search)
+  - qianfan (https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy)
+  - linkai  (https://link-ai.tech, fallback)
+
+Provider selection
+  - strategy 'auto' (default): pick the first configured provider in the
+    canonical order [bocha, zhipu, qianfan, linkai]. When the caller passes
+    an explicit `provider` it overrides the pick; an invalid/unconfigured
+    one silently falls back to the auto order.
+  - strategy 'fixed': use the configured provider; if its credential is
+    missing at call time, silently fall back to auto order (no card hint).
+
+Credentials
+  - bocha   : tools.web_search.bocha_api_key  ->  env BOCHA_API_KEY
+  - zhipu   : conf.zhipu_ai_api_key            ->  env ZHIPUAI_API_KEY
+  - qianfan : conf.qianfan_api_key             ->  env QIANFAN_API_KEY
+  - linkai  : conf.linkai_api_key              ->  env LINKAI_API_KEY
 """

-import os
 import json
-from typing import Dict, Any, Optional
+import os
+from typing import Any, Dict, List, Optional

 import requests

@@ -16,12 +30,63 @@ from common.log import logger
 from config import conf


-# Default timeout for API requests (seconds)
 DEFAULT_TIMEOUT = 30

+# Canonical fallback order. Empirically ordered by Chinese real-time
+# quality + relevance: bocha (best overall), qianfan (best for hot news),
+# zhipu (strong on long-form articles), linkai (cloud aggregator, last
+# resort).
+PROVIDER_ORDER = ("bocha", "qianfan", "zhipu", "linkai")
+
+PROVIDER_LABELS = {
+    "bocha":   "Bocha",
+    "zhipu":   "Zhipu",
+    "qianfan": "Baidu Qianfan",
+    "linkai":  "LinkAI",
+}
+
+
+def _tools_web_search_conf() -> dict:
+    """Return the tools.web_search config block (dict-like)."""
+    tools_cfg = conf().get("tools") or {}
+    if not isinstance(tools_cfg, dict):
+        return {}
+    block = tools_cfg.get("web_search") or {}
+    return block if isinstance(block, dict) else {}
+
+
+def _get_api_key(provider: str) -> str:
+    """Resolve API key for a provider, with conf -> env fallback."""
+    if provider == "bocha":
+        key = (_tools_web_search_conf().get("bocha_api_key") or "").strip()
+        return key or os.environ.get("BOCHA_API_KEY", "").strip()
+    if provider == "zhipu":
+        key = (conf().get("zhipu_ai_api_key") or "").strip()
+        return key or os.environ.get("ZHIPUAI_API_KEY", "").strip()
+    if provider == "qianfan":
+        key = (conf().get("qianfan_api_key") or "").strip()
+        return key or os.environ.get("QIANFAN_API_KEY", "").strip()
+    if provider == "linkai":
+        key = (conf().get("linkai_api_key") or "").strip()
+        return key or os.environ.get("LINKAI_API_KEY", "").strip()
+    return ""
+
+
+def configured_providers() -> List[str]:
+    """Return configured providers in canonical order."""
+    return [p for p in PROVIDER_ORDER if _get_api_key(p)]
+
+
+def _configured_strategy() -> str:
+    return (_tools_web_search_conf().get("strategy") or "auto").strip().lower()
+
+
+def _configured_provider() -> str:
+    return (_tools_web_search_conf().get("provider") or "").strip().lower()
+

 class WebSearch(BaseTool):
-    """Tool for searching the web using Bocha or LinkAI search API"""
+    """Tool for searching the web across multiple providers."""

    name: str = "web_search"
    description: str = "Search the web for real-time information. Returns titles, URLs, and snippets."
@@ -55,264 +120,368 @@ class WebSearch(BaseTool):

    def __init__(self, config: dict = None):
        self.config = config or {}
-        self._backend = None  # Will be resolved on first execute

    @staticmethod
    def is_available() -> bool:
-        """Check if web search is available (at least one API key is configured)"""
-        return bool(os.environ.get("BOCHA_API_KEY") or os.environ.get("LINKAI_API_KEY"))
+        """Tool is offered to the agent when at least one provider has a key."""
+        return bool(configured_providers())

-    def _resolve_backend(self) -> Optional[str]:
-        """
-        Determine which search backend to use.
-        Priority: Bocha > LinkAI
+    @classmethod
+    def get_json_schema(cls) -> dict:
+        """Augment the static schema with a `provider` field — only when the
+        user has ≥2 providers configured AND strategy is 'auto'. Otherwise
+        the backend picks silently and exposing the field would only waste
+        the agent's tokens."""
+        schema = {
+            "name": cls.name,
+            "description": cls.description,
+            "parameters": json.loads(json.dumps(cls.params)),  # deep copy
+        }
+        if _configured_strategy() != "auto":
+            return schema
+        available = configured_providers()
+        if len(available) < 2:
+            return schema

-        :return: 'bocha', 'linkai', or None
+        schema["parameters"]["properties"]["provider"] = {
+            "type": "string",
+            "enum": available,
+            "description": "Optional. Specifies the search backend. You may switch between providers when the user wants results from a particular source or from multiple sources.",
+        }
+        return schema
+
+    # ------------------------------------------------------------------
+    # Provider resolution
+    # ------------------------------------------------------------------
+
+    def _resolve_provider(self, requested: Optional[str]) -> Optional[str]:
+        """Pick a provider for this call.
+
+        Priority: caller-supplied (if configured) > fixed strategy (if
+        configured) > first configured in PROVIDER_ORDER. Silent fallback
+        when the desired one has no key.
        """
-        if os.environ.get("BOCHA_API_KEY"):
-            return "bocha"
-        if os.environ.get("LINKAI_API_KEY"):
-            return "linkai"
+        available = configured_providers()
+        if not available:
            return None

-    def execute(self, args: Dict[str, Any]) -> ToolResult:
-        """
-        Execute web search
+        if requested:
+            req = requested.strip().lower()
+            if req in available:
+                return req
+            logger.warning(f"[WebSearch] requested provider '{requested}' unavailable, falling back")

-        :param args: Search parameters (query, count, freshness, summary)
-        :return: Search results
-        """
-        query = args.get("query", "").strip()
+        if _configured_strategy() == "fixed":
+            pinned = _configured_provider()
+            if pinned in available:
+                return pinned
+            if pinned:
+                logger.warning(f"[WebSearch] pinned provider '{pinned}' unavailable, falling back to auto")
+
+        return available[0]
+
+    @staticmethod
+    def _resolution_reason(requested: Optional[str], chosen: str) -> str:
+        """Human-readable explanation for why `chosen` won the resolver."""
+        if requested and requested.strip().lower() == chosen:
+            return "caller-requested"
+        strategy = _configured_strategy()
+        if strategy == "fixed" and _configured_provider() == chosen:
+            return "fixed-strategy"
+        return "auto-fallback"
+
+    # ------------------------------------------------------------------
+    # Entry point
+    # ------------------------------------------------------------------
+
+    def execute(self, args: Dict[str, Any]) -> ToolResult:
+        query = (args.get("query") or "").strip()
        if not query:
            return ToolResult.fail("Error: 'query' parameter is required")

        count = args.get("count", 10)
        freshness = args.get("freshness", "noLimit")
        summary = args.get("summary", False)
-
-        # Validate count
        if not isinstance(count, int) or count < 1 or count > 50:
            count = 10

-        # Resolve backend
-        backend = self._resolve_backend()
-        if not backend:
+        requested = args.get("provider")
+        provider = self._resolve_provider(requested)
+        if not provider:
            return ToolResult.fail(
-                "Error: No search API key configured. "
-                "Please set BOCHA_API_KEY or LINKAI_API_KEY using env_config tool.\n"
-                "  - Bocha Search: https://open.bocha.cn\n"
-                "  - LinkAI Search: https://link-ai.tech"
+                "Error: No search provider configured. "
+                "Configure one of BOCHA_API_KEY / zhipu_ai_api_key / qianfan_api_key / linkai_api_key."
+            )
+
+        # Always log the routing decision so multi-provider deployments can
+        # tell at a glance which backend served any given query.
+        available = configured_providers()
+        reason = self._resolution_reason(requested, provider)
+        q_preview = query if len(query) <= 60 else (query[:57] + "...")
+        logger.info(
+            f"[WebSearch] provider={provider} reason={reason} "
+            f"available={list(available)} query={q_preview!r} count={count} freshness={freshness}"
        )

        try:
-            if backend == "bocha":
+            if provider == "bocha":
                return self._search_bocha(query, count, freshness, summary)
-            else:
+            if provider == "zhipu":
+                return self._search_zhipu(query, count, freshness)
+            if provider == "qianfan":
+                return self._search_qianfan(query, count, freshness)
+            if provider == "linkai":
                return self._search_linkai(query, count, freshness)
+            return ToolResult.fail(f"Error: Unknown provider '{provider}'")
        except requests.Timeout:
            return ToolResult.fail(f"Error: Search request timed out after {DEFAULT_TIMEOUT}s")
        except requests.ConnectionError:
            return ToolResult.fail("Error: Failed to connect to search API")
        except Exception as e:
-            logger.error(f"[WebSearch] Unexpected error: {e}", exc_info=True)
+            logger.error(f"[WebSearch] Unexpected error ({provider}): {e}", exc_info=True)
            return ToolResult.fail(f"Error: Search failed - {str(e)}")

+    # ------------------------------------------------------------------
+    # Bocha
+    # ------------------------------------------------------------------
+
    def _search_bocha(self, query: str, count: int, freshness: str, summary: bool) -> ToolResult:
-        """
-        Search using Bocha API
-
-        :param query: Search query
-        :param count: Number of results
-        :param freshness: Time range filter
-        :param summary: Whether to include summary
-        :return: Formatted search results
-        """
-        api_key = os.environ.get("BOCHA_API_KEY", "")
-        url = "https://api.bocha.cn/v1/web-search"
-
+        api_key = _get_api_key("bocha")
+        url = "https://api.bochaai.com/v1/web-search"
        headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
-            "Accept": "application/json"
+            "Accept": "application/json",
        }
+        payload = {"query": query, "count": count, "freshness": freshness, "summary": summary}

-        payload = {
-            "query": query,
-            "count": count,
-            "freshness": freshness,
-            "summary": summary
-        }
+        logger.debug(f"[WebSearch] bocha: query='{query}', count={count}")
+        resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)

-        logger.debug(f"[WebSearch] Bocha search: query='{query}', count={count}")
+        if resp.status_code == 401:
+            return ToolResult.fail("Error: Invalid bocha API key.")
+        if resp.status_code == 403:
+            return ToolResult.fail("Error: bocha API — insufficient balance. Top up at https://open.bochaai.com")
+        if resp.status_code == 429:
+            return ToolResult.fail("Error: bocha API rate limit reached.")
+        if resp.status_code != 200:
+            return ToolResult.fail(f"Error: bocha API returned HTTP {resp.status_code}")

-        response = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
-
-        if response.status_code == 401:
-            return ToolResult.fail("Error: Invalid BOCHA_API_KEY. Please check your API key.")
-        if response.status_code == 403:
-            return ToolResult.fail("Error: Bocha API - insufficient balance. Please top up at https://open.bocha.cn")
-        if response.status_code == 429:
-            return ToolResult.fail("Error: Bocha API rate limit reached. Please try again later.")
-        if response.status_code != 200:
-            return ToolResult.fail(f"Error: Bocha API returned HTTP {response.status_code}")
-
-        data = response.json()
-
-        # Check API-level error code
+        data = resp.json()
        api_code = data.get("code")
        if api_code is not None and api_code != 200:
            msg = data.get("msg") or "Unknown error"
-            return ToolResult.fail(f"Error: Bocha API error (code={api_code}): {msg}")
-
-        # Extract and format results
-        return self._format_bocha_results(data, query)
-
-    def _format_bocha_results(self, data: dict, query: str) -> ToolResult:
-        """
-        Format Bocha API response into unified result structure
-
-        :param data: Raw API response
-        :param query: Original query
-        :return: Formatted ToolResult
-        """
-        search_data = data.get("data", {})
-        web_pages = search_data.get("webPages", {})
-        pages = web_pages.get("value", [])
-
-        if not pages:
-            return ToolResult.success({
-                "query": query,
-                "backend": "bocha",
-                "total": 0,
-                "results": [],
-                "message": "No results found"
-            })
+            return ToolResult.fail(f"Error: bocha API error (code={api_code}): {msg}")

+        pages = (data.get("data") or {}).get("webPages", {}).get("value", []) or []
        results = []
-        for page in pages:
-            result = {
-                "title": page.get("name", ""),
-                "url": page.get("url", ""),
-                "snippet": page.get("snippet", ""),
-                "siteName": page.get("siteName", ""),
-                "datePublished": page.get("datePublished") or page.get("dateLastCrawled", ""),
+        for p in pages:
+            item = {
+                "title": p.get("name", ""),
+                "url": p.get("url", ""),
+                "snippet": p.get("snippet", ""),
+                "siteName": p.get("siteName", ""),
+                "datePublished": p.get("datePublished") or p.get("dateLastCrawled", ""),
            }
-            # Include summary only if present
-            if page.get("summary"):
-                result["summary"] = page["summary"]
-            results.append(result)
-
-        total = web_pages.get("totalEstimatedMatches", len(results))
-
+            if p.get("summary"):
+                item["summary"] = p["summary"]
+            results.append(item)
+        total = (data.get("data") or {}).get("webPages", {}).get("totalEstimatedMatches", len(results))
        return ToolResult.success({
-            "query": query,
-            "backend": "bocha",
-            "total": total,
-            "count": len(results),
-            "results": results
+            "query": query, "backend": "bocha",
+            "total": total, "count": len(results), "results": results,
        })

+    # ------------------------------------------------------------------
+    # Zhipu
+    # ------------------------------------------------------------------
+
+    def _search_zhipu(self, query: str, count: int, freshness: str) -> ToolResult:
+        api_key = _get_api_key("zhipu")
+        api_base = (conf().get("zhipu_ai_api_base") or "https://open.bigmodel.cn/api/paas/v4").rstrip("/")
+        url = f"{api_base}/web_search"
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        }
+
+        # Zhipu Web Search expects `search_query` <= 70 chars; truncate
+        # gracefully so a long agent-supplied query doesn't get rejected.
+        trimmed_query = (query or "")[:70]
+        engine = (_tools_web_search_conf().get("zhipu_search_engine") or "search_pro").strip().lower()
+        if engine not in ("search_std", "search_pro", "search_pro_sogou", "search_pro_quark"):
+            engine = "search_pro"
+
+        payload: Dict[str, Any] = {
+            "search_engine": engine,
+            "search_query": trimmed_query,
+            "search_intent": False,
+            "count": max(1, min(int(count or 10), 50)),
+            "search_recency_filter": freshness if freshness in (
+                "oneDay", "oneWeek", "oneMonth", "oneYear", "noLimit"
+            ) else "noLimit",
+        }
+        content_size = (_tools_web_search_conf().get("zhipu_content_size") or "").strip().lower()
+        if content_size in ("medium", "high"):
+            payload["content_size"] = content_size
+
+        logger.debug(f"[WebSearch] zhipu: query='{trimmed_query}', count={payload['count']}, engine={engine}")
+        resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
+
+        if resp.status_code == 401:
+            return ToolResult.fail("Error: Invalid Zhipu API key.")
+        if resp.status_code != 200:
+            return ToolResult.fail(f"Error: Zhipu API returned HTTP {resp.status_code}: {resp.text[:200]}")
+
+        data = resp.json()
+        # Business-level errors (1701/1702/1703 etc.) come back as
+        # {"error": {"code","message"}} even on HTTP 200.
+        if isinstance(data, dict) and data.get("error"):
+            err = data["error"] or {}
+            return ToolResult.fail(f"Error: Zhipu returned {err.get('code')}: {err.get('message','')}")
+
+        items = data.get("search_result") or (data.get("data") or {}).get("search_result") or []
+        results = []
+        for it in items:
+            results.append({
+                "title": it.get("title", ""),
+                "url": it.get("link") or it.get("url", ""),
+                "snippet": it.get("content") or it.get("snippet", ""),
+                "siteName": it.get("media") or it.get("siteName", ""),
+                "datePublished": it.get("publish_date") or it.get("datePublished", ""),
+            })
+        return ToolResult.success({
+            "query": query, "backend": "zhipu",
+            "total": len(results), "count": len(results), "results": results,
+        })
+
+    # ------------------------------------------------------------------
+    # Qianfan (Baidu)
+    # ------------------------------------------------------------------
+
+    def _search_qianfan(self, query: str, count: int, freshness: str) -> ToolResult:
+        api_key = _get_api_key("qianfan")
+        api_base = (conf().get("qianfan_api_base") or "https://qianfan.baidubce.com/v2").rstrip("/")
+        url = f"{api_base}/ai_search/web_search"
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+            "X-Appbuilder-From": "cow",
+        }
+
+        count = max(1, min(int(count or 10), 50))
+        payload: Dict[str, Any] = {
+            "messages": [{"role": "user", "content": query}],
+            "search_source": "baidu_search_v2",
+            "resource_type_filter": [{"type": "web", "top_k": count}],
+        }
+
+        # Baidu AI Search expects freshness as a date-range filter, not a
+        # named recency token. Translate our shared vocabulary into the
+        # underlying page_time range expected by the API.
+        search_filter = self._qianfan_build_freshness_filter(freshness)
+        if search_filter:
+            payload["search_filter"] = search_filter
+
+        logger.debug(f"[WebSearch] qianfan: query='{query}', count={count}, freshness={freshness!r}")
+        resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
+
+        if resp.status_code == 401:
+            return ToolResult.fail("Error: Invalid Qianfan API key.")
+        if resp.status_code != 200:
+            return ToolResult.fail(f"Error: Qianfan API returned HTTP {resp.status_code}: {resp.text[:200]}")
+
+        data = resp.json()
+        # Even on HTTP 200 Baidu surfaces business errors as {"code","message"}.
+        if isinstance(data, dict) and data.get("code"):
+            return ToolResult.fail(f"Error: Qianfan returned {data.get('code')}: {data.get('message','')}")
+
+        refs = data.get("references") or []
+        results = []
+        for d in refs:
+            results.append({
+                "title": d.get("title", ""),
+                "url": d.get("url", ""),
+                "snippet": (d.get("content") or "")[:200],
+                "siteName": d.get("web_anchor") or d.get("website") or "",
+                "datePublished": d.get("date", ""),
+            })
+        return ToolResult.success({
+            "query": query, "backend": "qianfan",
+            "total": len(results), "count": len(results), "results": results,
+        })
+
+    @staticmethod
+    def _qianfan_build_freshness_filter(freshness: str) -> Optional[Dict[str, Any]]:
+        if not freshness or freshness == "noLimit":
+            return None
+        delta_days = {"oneDay": 1, "oneWeek": 7, "oneMonth": 30, "oneYear": 365}.get(freshness)
+        if not delta_days:
+            return None
+        from datetime import datetime, timedelta
+        now = datetime.now()
+        end_date = (now + timedelta(days=1)).strftime("%Y-%m-%d")
+        start_date = (now - timedelta(days=delta_days)).strftime("%Y-%m-%d")
+        return {"range": {"page_time": {"gte": start_date, "lt": end_date}}}
+
+    # ------------------------------------------------------------------
+    # LinkAI (plugin)
+    # ------------------------------------------------------------------
+
    def _search_linkai(self, query: str, count: int, freshness: str) -> ToolResult:
-        """
-        Search using LinkAI plugin API
-
-        :param query: Search query
-        :param count: Number of results
-        :param freshness: Time range filter
-        :return: Formatted search results
-        """
-        api_key = os.environ.get("LINKAI_API_KEY", "")
-        api_base = conf().get("linkai_api_base", "https://api.link-ai.tech")
-        url = f"{api_base.rstrip('/')}/v1/plugin/execute"
+        api_key = _get_api_key("linkai")
+        api_base = (conf().get("linkai_api_base") or "https://api.link-ai.tech").rstrip("/")
+        url = f"{api_base}/v1/plugin/execute"

        from common.utils import get_cloud_headers
        headers = get_cloud_headers(api_key)

-        payload = {
-            "code": "web-search",
-            "args": {
-                "query": query,
-                "count": count,
-                "freshness": freshness
-            }
-        }
+        payload = {"code": "web-search", "args": {"query": query, "count": count, "freshness": freshness}}
+        logger.debug(f"[WebSearch] linkai: query='{query}', count={count}")
+        resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)

-        logger.debug(f"[WebSearch] LinkAI search: query='{query}', count={count}")
-
-        response = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
-
-        if response.status_code == 401:
-            return ToolResult.fail("Error: Invalid LINKAI_API_KEY. Please check your API key.")
-        if response.status_code != 200:
-            return ToolResult.fail(f"Error: LinkAI API returned HTTP {response.status_code}")
-
-        data = response.json()
+        if resp.status_code == 401:
+            return ToolResult.fail("Error: Invalid LinkAI API key.")
+        if resp.status_code != 200:
+            return ToolResult.fail(f"Error: LinkAI API returned HTTP {resp.status_code}")

+        data = resp.json()
        if not data.get("success"):
            msg = data.get("message") or "Unknown error"
            return ToolResult.fail(f"Error: LinkAI search failed: {msg}")

-        return self._format_linkai_results(data, query)
-
-    def _format_linkai_results(self, data: dict, query: str) -> ToolResult:
-        """
-        Format LinkAI API response into unified result structure.
-        LinkAI returns the search data in data.data field, which follows
-        the same Bing-compatible format as Bocha.
-
-        :param data: Raw API response
-        :param query: Original query
-        :return: Formatted ToolResult
-        """
-        raw_data = data.get("data", "")
-
-        # LinkAI may return data as a JSON string
-        if isinstance(raw_data, str):
+        raw = data.get("data", "")
+        if isinstance(raw, str):
            try:
-                raw_data = json.loads(raw_data)
+                raw = json.loads(raw)
            except (json.JSONDecodeError, TypeError):
-                # If data is plain text, return it as a single result
                return ToolResult.success({
-                    "query": query,
-                    "backend": "linkai",
-                    "total": 1,
-                    "count": 1,
-                    "results": [{"content": raw_data}]
+                    "query": query, "backend": "linkai",
+                    "total": 1, "count": 1, "results": [{"content": raw}],
                })

-        # If the response follows Bing-compatible structure
-        if isinstance(raw_data, dict):
-            web_pages = raw_data.get("webPages", {})
-            pages = web_pages.get("value", [])
-
+        if isinstance(raw, dict):
+            pages = (raw.get("webPages") or {}).get("value", []) or []
            if pages:
                results = []
-                for page in pages:
-                    result = {
-                        "title": page.get("name", ""),
-                        "url": page.get("url", ""),
-                        "snippet": page.get("snippet", ""),
-                        "siteName": page.get("siteName", ""),
-                        "datePublished": page.get("datePublished") or page.get("dateLastCrawled", ""),
+                for p in pages:
+                    item = {
+                        "title": p.get("name", ""),
+                        "url": p.get("url", ""),
+                        "snippet": p.get("snippet", ""),
+                        "siteName": p.get("siteName", ""),
+                        "datePublished": p.get("datePublished") or p.get("dateLastCrawled", ""),
                    }
-                    if page.get("summary"):
-                        result["summary"] = page["summary"]
-                    results.append(result)
-
-                total = web_pages.get("totalEstimatedMatches", len(results))
+                    if p.get("summary"):
+                        item["summary"] = p["summary"]
+                    results.append(item)
+                total = (raw.get("webPages") or {}).get("totalEstimatedMatches", len(results))
                return ToolResult.success({
-                    "query": query,
-                    "backend": "linkai",
-                    "total": total,
-                    "count": len(results),
-                    "results": results
+                    "query": query, "backend": "linkai",
+                    "total": total, "count": len(results), "results": results,
                })

-        # Fallback: return raw data
        return ToolResult.success({
-            "query": query,
-            "backend": "linkai",
-            "total": 1,
-            "count": 1,
-            "results": [{"content": str(raw_data)}]
+            "query": query, "backend": "linkai",
+            "total": 1, "count": 1, "results": [{"content": str(raw)}],
        })
--- a/app.py
+++ b/app.py
@@ -289,6 +289,16 @@ def _warmup_mcp_tools():
        logger.warning(f"[App] MCP warmup failed (non-fatal): {e}")


+def _warmup_scheduler():
+    """Eager-init AgentBridge so the scheduler thread starts at process
+    boot rather than waiting for the first user message."""
+    try:
+        from bridge.bridge import Bridge
+        Bridge().get_agent_bridge()
+    except Exception as e:
+        logger.warning(f"[App] Scheduler warmup failed: {e}")
+
+
 def _sync_builtin_skills():
    """Sync builtin skills from project skills/ to workspace skills/ on startup."""
    import shutil
@@ -354,6 +364,8 @@ def run():
        # latency isn't dominated by npx package downloads.
        _warmup_mcp_tools()

+        _warmup_scheduler()
+
        logger.info(f"[App] Starting channels: {channel_names}")

        _channel_mgr = ChannelManager()
--- a/bridge/agent_bridge.py
+++ b/bridge/agent_bridge.py
@@ -5,7 +5,7 @@ Agent Bridge - Integrates Agent system with existing COW bridge
 import os
 from typing import Optional, List

-from agent.protocol import Agent, LLMModel, LLMRequest
+from agent.protocol import Agent, LLMModel, LLMRequest, get_cancel_registry
 from bridge.agent_event_handler import AgentEventHandler
 from bridge.agent_initializer import AgentInitializer
 from bridge.bridge import Bridge
@@ -285,6 +285,15 @@ class AgentBridge:
        
        # Create helper instances
        self.initializer = AgentInitializer(bridge, self)
+
+        # Eager-start the scheduler so cron tasks fire without waiting
+        # for the first user message. init_scheduler is idempotent.
+        try:
+            from agent.tools.scheduler.integration import init_scheduler
+            if init_scheduler(self):
+                self.scheduler_initialized = True
+        except Exception as e:
+            logger.warning(f"[AgentBridge] Eager scheduler init failed: {e}")
    def create_agent(self, system_prompt: str, tools: List = None, **kwargs) -> Agent:
        """
        Create the super agent with COW integration
@@ -390,10 +399,21 @@ class AgentBridge:
        """
        session_id = None
        agent = None
+        request_id = None
+        cancel_event = None
        try:
            # Extract session_id from context for user isolation
            if context:
                session_id = context.kwargs.get("session_id") or context.get("session_id")
+                request_id = context.kwargs.get("request_id") or context.get("request_id")
+
+            # Register a cancel token. Prefer per-turn request_id (web),
+            # fall back to session_id (IM channels). The Event is polled by
+            # AgentStreamExecutor at safe checkpoints.
+            registry = get_cancel_registry()
+            token_key = request_id or session_id
+            if token_key:
+                cancel_event = registry.register(token_key, session_id=session_id)

            # Get agent for this session (will auto-initialize if needed)
            agent = self.get_agent(session_id=session_id)
@@ -449,7 +469,8 @@ class AgentBridge:
                response = agent.run_stream(
                    user_message=query,
                    on_event=event_handler.handle_event,
-                    clear_history=clear_history
+                    clear_history=clear_history,
+                    cancel_event=cancel_event,
                )
            finally:
                # Restore original tools
@@ -459,6 +480,13 @@ class AgentBridge:
                # Log execution summary
                event_handler.log_summary()

+                # Release cancel token; keep registry bounded.
+                if token_key:
+                    try:
+                        registry.unregister(token_key)
+                    except Exception:
+                        pass
+
            # Persist new messages generated during this run
            if session_id:
                channel_type = (context.get("channel_type") or "") if context else ""
@@ -512,6 +540,12 @@ class AgentBridge:
                        logger.info(f"[AgentBridge] Cleared DB for session after error: {session_id}")
                except Exception as db_err:
                    logger.warning(f"[AgentBridge] Failed to clear DB after error: {db_err}")
+            # Release cancel token on error path too (idempotent).
+            if cancel_event is not None and (request_id or session_id):
+                try:
+                    get_cancel_registry().unregister(request_id or session_id)
+                except Exception:
+                    pass
            return Reply(ReplyType.ERROR, f"Agent error: {str(e)}")
    
    def _schedule_mcp_hot_reload(self, agent):
--- a/bridge/agent_event_handler.py
+++ b/bridge/agent_event_handler.py
@@ -2,8 +2,12 @@
 Agent Event Handler - Handles agent events and thinking process output
 """

+from common import const
 from common.log import logger

+# Cap intermediate thinking messages on weixin to stay within send quota.
+WEIXIN_THINKING_INSTANT_MAX = 7
+

 class AgentEventHandler:
    """
@@ -11,17 +15,9 @@ class AgentEventHandler:
    """

    def __init__(self, context=None, original_callback=None):
-        """
-        Initialize event handler
-        
-        Args:
-            context: COW context (for accessing channel)
-            original_callback: Original event callback to chain
-        """
        self.context = context
        self.original_callback = original_callback

-        # Get channel for sending intermediate messages
        self.channel = None
        if context:
            self.channel = context.kwargs.get("channel") if hasattr(context, "kwargs") else None
@@ -29,17 +25,17 @@ class AgentEventHandler:
        self.current_content = ""
        self.turn_number = 0

-    def handle_event(self, event):
-        """
-        Main event handler
+        channel_type = ""
+        if context and hasattr(context, "kwargs"):
+            channel_type = context.kwargs.get("channel_type", "") or ""
+        self._is_weixin = channel_type == const.WEIXIN
+        self._thinking_sent_count = 0
+        self._merged_buf: list[str] = []

-        Args:
-            event: Event dict with type and data
-        """
+    def handle_event(self, event):
        event_type = event.get("type")
        data = event.get("data", {})

-        # Dispatch to specific handlers
        if event_type == "turn_start":
            self._handle_turn_start(data)
        elif event_type == "message_update":
@@ -52,23 +48,21 @@ class AgentEventHandler:
            self._handle_tool_execution_start(data)
        elif event_type == "tool_execution_end":
            self._handle_tool_execution_end(data)
+        elif event_type == "agent_end":
+            self._handle_agent_end(data)

-        # Call original callback if provided
        if self.original_callback:
            self.original_callback(event)

    def _handle_turn_start(self, data):
-        """Handle turn start event"""
        self.turn_number = data.get("turn", 0)
        self.current_content = ""

    def _handle_message_update(self, data):
-        """Handle message update event (streaming content text)"""
        delta = data.get("delta", "")
        self.current_content += delta

    def _handle_message_end(self, data):
-        """Handle message end event"""
        tool_calls = data.get("tool_calls", [])

        if tool_calls:
@@ -78,26 +72,48 @@ class AgentEventHandler:
        else:
            if self.current_content.strip():
                logger.debug(f"💬 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}")
+            # Drain weixin buffer before final reply leaves chat_channel
+            self._flush_merged_now()

        self.current_content = ""

+    def _handle_agent_end(self, data):
+        self._flush_merged_now()
+
    def _handle_tool_execution_start(self, data):
-        """Handle tool execution start event - logged by agent_stream.py"""
        pass

    def _handle_tool_execution_end(self, data):
-        """Handle tool execution end event - logged by agent_stream.py"""
        pass

    def _send_to_channel(self, message):
-        """
-        Try to send intermediate message to channel.
-        Skipped in SSE mode because thinking text is already streamed via on_event.
-        """
        if self.context and self.context.get("on_event"):
            return
+        if not self.channel:
+            return

-        if self.channel:
+        if not self._is_weixin:
+            self._do_send(message)
+            return
+
+        if self._thinking_sent_count < WEIXIN_THINKING_INSTANT_MAX:
+            self._do_send(message)
+            self._thinking_sent_count += 1
+            return
+
+        self._merged_buf.append(message)
+
+    def _flush_merged_now(self):
+        if not self._merged_buf:
+            return
+        merged = "\n\n".join(self._merged_buf)
+        count = len(self._merged_buf)
+        self._merged_buf = []
+        logger.debug(f"[AgentEventHandler] Flushing {count} merged thinking msgs, len={len(merged)}")
+        self._do_send(merged)
+        self._thinking_sent_count += 1
+
+    def _do_send(self, message):
        try:
            from bridge.reply import Reply, ReplyType
            reply = Reply(ReplyType.TEXT, message)
@@ -106,7 +122,4 @@ class AgentEventHandler:
            logger.debug(f"[AgentEventHandler] Failed to send to channel: {e}")

    def log_summary(self):
-        """Log execution summary - simplified"""
-        # Summary removed as per user request
-        # Real-time logging during execution is sufficient
        pass
--- a/bridge/agent_initializer.py
+++ b/bridge/agent_initializer.py
@@ -521,7 +521,7 @@ class AgentInitializer:
                if tool_name == "web_search":
                    from agent.tools.web_search.web_search import WebSearch
                    if not WebSearch.is_available():
-                        logger.debug("[AgentInitializer] WebSearch skipped - no BOCHA_API_KEY or LINKAI_API_KEY")
+                        logger.debug("[AgentInitializer] WebSearch skipped - no search provider configured")
                        continue

                # Special handling for EnvConfig tool
--- a/bridge/bridge.py
+++ b/bridge/bridge.py
@@ -14,7 +14,9 @@ class Bridge(object):
    def __init__(self):
        self.btype = {
            "chat": const.OPENAI,
-            "voice_to_text": conf().get("voice_to_text", "openai"),
+            # Empty `voice_to_text` (the default in new configs) triggers
+            # the auto-pick below — see _auto_pick_voice_to_text for order.
+            "voice_to_text": conf().get("voice_to_text") or self._auto_pick_voice_to_text(),
            "text_to_voice": conf().get("text_to_voice", "google"),
            "translate": conf().get("translate", "baidu"),
        }
@@ -61,6 +63,10 @@ class Bridge(object):
            if model_type and model_type.startswith("deepseek"):
                self.btype["chat"] = const.DEEPSEEK

+            # 小米 MiMo 系列模型，全部以 mimo- 开头
+            if model_type and model_type.startswith("mimo-"):
+                self.btype["chat"] = const.MIMO
+
            if model_type and isinstance(model_type, str):
                lowered_model_type = model_type.lower()
                if lowered_model_type == const.QIANFAN or lowered_model_type.startswith("ernie"):
@@ -84,6 +90,46 @@ class Bridge(object):
        self.chat_bots = {}
        self._agent_bridge = None

+    def refresh_voice(self):
+        """Re-read voice_to_text / text_to_voice from config and drop the
+        cached voice bots so the next call picks up the new provider.
+        Used by the web console after the user edits voice settings.
+        Does NOT touch the agent_bridge / agent state.
+        """
+        new_v2t = conf().get("voice_to_text") or self._auto_pick_voice_to_text()
+        new_t2v = conf().get("text_to_voice", "google")
+        if conf().get("use_linkai") and conf().get("linkai_api_key"):
+            if not conf().get("voice_to_text") or conf().get("voice_to_text") in ["openai"]:
+                new_v2t = const.LINKAI
+            if not conf().get("text_to_voice") or conf().get("text_to_voice") in ["openai", const.TTS_1, const.TTS_1_HD]:
+                new_t2v = const.LINKAI
+        self.btype["voice_to_text"] = new_v2t
+        self.btype["text_to_voice"] = new_t2v
+        self.bots.pop("voice_to_text", None)
+        self.bots.pop("text_to_voice", None)
+        logger.info(f"[Bridge] voice refreshed: voice_to_text={new_v2t}, text_to_voice={new_t2v}")
+
+    @staticmethod
+    def _auto_pick_voice_to_text() -> str:
+        """Pick an ASR provider by configured api keys when voice_to_text is
+        unset. Order matches the web console: openai → dashscope → zhipu →
+        linkai. Falls back to 'openai' when nothing is configured so the
+        original "missing key" error is preserved.
+        """
+        def has(k: str) -> bool:
+            v = (conf().get(k) or "").strip()
+            return v != "" and v not in ("YOUR API KEY", "YOUR_API_KEY")
+
+        for key, provider in (
+            ("open_ai_api_key", "openai"),
+            ("dashscope_api_key", "dashscope"),
+            ("zhipu_ai_api_key", "zhipu"),
+            ("linkai_api_key", "linkai"),
+        ):
+            if has(key):
+                return provider
+        return "openai"
+
    # 模型对应的接口
    def get_bot(self, typename):
        if self.bots.get(typename) is None:
--- a/channel/channel_factory.py
+++ b/channel/channel_factory.py
@@ -42,6 +42,12 @@ def create_channel(channel_type) -> Channel:
    elif channel_type == const.QQ:
        from channel.qq.qq_channel import QQChannel
        ch = QQChannel()
+    elif channel_type == const.TELEGRAM:
+        from channel.telegram.telegram_channel import TelegramChannel
+        ch = TelegramChannel()
+    elif channel_type == const.SLACK:
+        from channel.slack.slack_channel import SlackChannel
+        ch = SlackChannel()
    elif channel_type in (const.WEIXIN, "wx"):
        from channel.weixin.weixin_channel import WeixinChannel
        ch = WeixinChannel()
--- a/channel/chat_channel.py
+++ b/channel/chat_channel.py
@@ -171,7 +171,13 @@ class ChatChannel(Channel):
            if "desire_rtype" not in context and conf().get("always_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
                context["desire_rtype"] = ReplyType.VOICE
        elif context.type == ContextType.VOICE:
-            if "desire_rtype" not in context and conf().get("voice_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
+            # Voice input replies with voice when either voice_reply_voice
+            # (mirror voice) or the global always_reply_voice toggle is on.
+            if (
+                "desire_rtype" not in context
+                and (conf().get("voice_reply_voice") or conf().get("always_reply_voice"))
+                and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE
+            ):
                context["desire_rtype"] = ReplyType.VOICE
        return context

@@ -264,6 +270,8 @@ class ChatChannel(Channel):
                if reply.type == ReplyType.TEXT:
                    reply_text = reply.content
                    if desire_rtype == ReplyType.VOICE and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
+                        # Preserve original text for the "text-then-voice" pattern in _send_reply.
+                        context["voice_reply_text"] = reply.content
                        reply = super().build_text_to_voice(reply.content)
                        return self._decorate_reply(context, reply)
                    if context.get("isgroup", False):
@@ -311,6 +319,15 @@ class ChatChannel(Channel):
                    # 短暂延迟后发送图片
                    time.sleep(0.3)
                    self._send(reply, context)
+                # Send text bubble before voice, unless channel already streamed
+                # the text (feishu) or natively renders STT under the voice (wechatcom).
+                elif reply.type == ReplyType.VOICE and context.get("voice_reply_text") \
+                        and not context.get("feishu_streamed") \
+                        and context.get("channel_type") not in ("wechatcom_app",):
+                    text_reply = Reply(ReplyType.TEXT, context.get("voice_reply_text"))
+                    self._send(text_reply, context)
+                    time.sleep(0.3)
+                    self._send(reply, context)
                else:
                    self._send(reply, context)
    
@@ -421,8 +438,21 @@ class ChatChannel(Channel):

        return func

+    # Chat commands that must bypass the per-session serial queue,
+    # otherwise /cancel would queue behind the task it tries to cancel.
+    # Use /cancel (not /stop) to avoid colliding with `cow stop` CLI.
+    _BYPASS_QUEUE_COMMANDS = ("/cancel",)
+
    def produce(self, context: Context):
        session_id = context["session_id"]
+
+        # Fast path: /cancel must not enter the queue.
+        if context.type == ContextType.TEXT and context.content:
+            stripped = context.content.strip().lower()
+            if stripped in self._BYPASS_QUEUE_COMMANDS:
+                self._handle_cancel_command(context, session_id)
+                return
+
        with self.lock:
            if session_id not in self.sessions:
                self.sessions[session_id] = [
@@ -434,6 +464,29 @@ class ChatChannel(Channel):
            else:
                self.sessions[session_id][0].put(context)

+    def _handle_cancel_command(self, context: Context, session_id: str) -> None:
+        """Cancel any in-flight agent run for *session_id* and reply inline.
+
+        Runs synchronously on the caller's thread. Reply is sent through
+        _send_reply so plugins (e.g. logging) still observe it.
+        """
+        try:
+            from agent.protocol import get_cancel_registry
+            from bridge.reply import Reply, ReplyType
+
+            cancelled = get_cancel_registry().cancel_session(session_id)
+            text = (
+                "🛑 已中止"
+                if cancelled > 0
+                else "当前没有可中止的任务。"
+            )
+            logger.info(
+                f"[chat_channel] /cancel fast-path: session={session_id}, cancelled={cancelled}"
+            )
+            self._send_reply(context, Reply(ReplyType.TEXT, text))
+        except Exception as e:
+            logger.warning(f"[chat_channel] /cancel fast-path failed: {e}")
+
    # 消费者函数，单独线程，用于从消息队列中取出消息并处理
    def consume(self):
        while True:
--- a/channel/dingtalk/dingtalk_channel.py
+++ b/channel/dingtalk/dingtalk_channel.py
@@ -86,6 +86,8 @@ def _check(func):

@singleton
 class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
+    NOT_SUPPORT_REPLYTYPE = []
+
    dingtalk_client_id = conf().get('dingtalk_client_id')
    dingtalk_client_secret = conf().get('dingtalk_client_secret')

@@ -870,6 +872,48 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
                    self.reply_text("抱歉，文件上传失败", incoming_message)
            return
        
+        # Native sampleAudio. Upload only accepts ogg/amr, so convert TTS mp3/wav to amr.
+        elif reply.type == ReplyType.VOICE:
+            logger.info(f"[DingTalk] Sending voice: {reply.content}")
+            access_token = self.get_access_token()
+            if not access_token:
+                logger.error("[DingTalk] Cannot get access token for voice")
+                self.reply_text("抱歉，语音发送失败（无法获取token）", incoming_message)
+                return
+
+            voice_path = reply.content
+            if voice_path.startswith("file://"):
+                voice_path = voice_path[7:]
+
+            amr_path = voice_path
+            duration_ms = 0
+            if not voice_path.lower().endswith((".amr", ".ogg")):
+                try:
+                    from voice.audio_convert import any_to_amr
+                    amr_path = os.path.splitext(voice_path)[0] + ".amr"
+                    duration_ms = int(any_to_amr(voice_path, amr_path) or 0)
+                except Exception as e:
+                    logger.error(f"[DingTalk] Failed to convert voice to amr: {e}")
+                    self.reply_text("抱歉，语音转码失败", incoming_message)
+                    return
+
+            media_id = self.upload_media(amr_path, media_type="voice")
+            if not media_id:
+                logger.error("[DingTalk] Failed to upload voice media")
+                self.reply_text("抱歉，语音上传失败", incoming_message)
+                return
+
+            msg_param = {
+                "mediaId": media_id,
+                "duration": str(duration_ms or 1000),
+            }
+            success = self._send_file_message(
+                access_token, incoming_message, "sampleAudio", msg_param, isgroup
+            )
+            if not success:
+                self.reply_text("抱歉，语音发送失败", incoming_message)
+            return
+
        # 处理文本消息
        elif reply.type == ReplyType.TEXT:
            logger.info(f"[DingTalk] Sending text message, length={len(reply.content)}")
--- a/channel/feishu/feishu_channel.py
+++ b/channel/feishu/feishu_channel.py
@@ -752,6 +752,9 @@ class FeiShuChanel(ChatChannel):
        init_in_flight = [False]
        # 一旦初始化失败就长期标记为 disabled，本次回复不再尝试任何流式调用
        disabled = [False]
+        # True after agent_cancelled: agent_end stops rewriting the card
+        # with stale final_response and just finalizes current content.
+        cancelled = [False]
        lock = threading.Lock()

        # ---- 异步推送队列 ----------------------------------------------------
@@ -1076,18 +1079,42 @@ class FeiShuChanel(ChatChannel):
                    message_id[0] = None
                    sequence[0] = 0

+            elif event_type == "agent_cancelled":
+                # Lock channel into "no-rewrite" mode: the subsequent
+                # agent_end's final_response is from the last *completed*
+                # turn (the user already saw it), so rewriting the card
+                # would duplicate it visually.
+                with lock:
+                    cancelled[0] = True
+
            elif event_type == "agent_end":
                # 最终回复：用 final_response 覆盖当前流式卡片，然后关闭流式模式。
                final_response = data.get("final_response", "")
-                if not final_response:
-                    return
-                final_text = str(final_response)
                # 标记 streamed 让 chat_channel 跳过 send()
                context["feishu_streamed"] = True

                with lock:
+                    was_cancelled = cancelled[0]
                    has_card = card_id[0] is not None
                    init_busy = init_in_flight[0]
+                    pending_text = current_text[0]
+
+                if was_cancelled:
+                    # Cancelled path: finalize the in-flight card with
+                    # partial output (or a short marker if empty); drop
+                    # stale final_response to avoid duplicating last turn.
+                    if has_card:
+                        _drain_push_queue()
+                        partial = (pending_text or "").rstrip()
+                        final_text = partial or "_(已中止)_"
+                        _stream_update_text(final_text)
+                        _close_streaming_mode(final_text)
+                    push_queue.put(None)
+                    return
+
+                if not final_response:
+                    return
+                final_text = str(final_response)

                # 罕见情况：agent_end 触发时还没创建过卡片（极快返回 / 没有
                # message_update），主动创建一张承载 final_text。
@@ -1515,10 +1542,16 @@ class FeiShuChanel(ChatChannel):
            else:
                context.type = ContextType.TEXT
            context.content = content.strip()
+            # Text input opts into voice replies only when the always-on toggle is set.
+            if "desire_rtype" not in context and conf().get("always_reply_voice"):
+                context["desire_rtype"] = ReplyType.VOICE

        elif context.type == ContextType.VOICE:
-            # 2.语音请求
-            if "desire_rtype" not in context and conf().get("voice_reply_voice"):
+            # 2.语音请求: voice input replies with voice if either
+            # voice_reply_voice (mirror reply) or always_reply_voice is on.
+            if "desire_rtype" not in context and (
+                conf().get("voice_reply_voice") or conf().get("always_reply_voice")
+            ):
                context["desire_rtype"] = ReplyType.VOICE

        return context
--- a/channel/slack/init.py
+++ b/channel/slack/init.py
@@ -0,0 +1 @@
+
--- a/channel/slack/slack_channel.py
+++ b/channel/slack/slack_channel.py
@@ -0,0 +1,506 @@
+"""
+Slack channel via Bolt for Python (Socket Mode).
+
+Features:
+- Direct message & channel chat (text / image / file)
+- Channel trigger: @mention or reply in a thread the bot is in (configurable)
+- /cancel fast-path matches Web channel behaviour
+- Socket Mode: no public IP / callback URL required, works behind NAT
+
+Implementation note:
+    slack_bolt's SocketModeHandler is blocking and runs its own background
+    threads. We start it in a dedicated thread so the rest of cow (sync) stays
+    untouched. Inbound events are dispatched onto cow's existing sync
+    ChatChannel.produce() pipeline; outbound send() calls the Slack Web API
+    client directly (it is sync-safe).
+"""
+
+import os
+import re
+import threading
+
+import requests
+
+from bridge.context import Context, ContextType
+from bridge.reply import Reply, ReplyType
+from channel.chat_channel import ChatChannel, check_prefix
+from channel.slack.slack_message import SlackMessage
+from common.expired_dict import ExpiredDict
+from common.log import logger
+from common.singleton import singleton
+from config import conf
+
+
+@singleton
+class SlackChannel(ChatChannel):
+    NOT_SUPPORT_REPLYTYPE = []
+
+    def __init__(self):
+        super().__init__()
+        self.bot_token = ""
+        self.app_token = ""
+        self.bot_user_id = ""  # used to strip @mention and ignore self messages
+        self._app = None
+        self._handler = None
+        self._client = None
+        self._loop_thread = None
+        # Idempotent dedup; Slack retries event delivery on slow ack
+        self._received_msgs = ExpiredDict(60 * 60 * 1)
+
+        # Disable group whitelist / prefix checks (we handle triggering ourselves
+        # in _should_reply_in_channel), aligned with telegram / feishu channels.
+        conf()["group_name_white_list"] = ["ALL_GROUP"]
+        conf()["single_chat_prefix"] = [""]
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def startup(self):
+        self.bot_token = conf().get("slack_bot_token", "")
+        self.app_token = conf().get("slack_app_token", "")
+        if not self.bot_token or not self.app_token:
+            err = "[Slack] slack_bot_token and slack_app_token are both required"
+            logger.error(err)
+            self.report_startup_error(err)
+            return
+
+        # Guard against the common mistake of swapping the two tokens:
+        # bot token must start with xoxb-, app-level token with xapp-.
+        if not self.bot_token.startswith("xoxb-") or not self.app_token.startswith("xapp-"):
+            err = (
+                "[Slack] token type mismatch: slack_bot_token must start with 'xoxb-' "
+                "and slack_app_token must start with 'xapp-' (they look swapped)"
+            )
+            logger.error(err)
+            self.report_startup_error(err)
+            return
+
+        try:
+            from slack_bolt import App
+            from slack_bolt.adapter.socket_mode import SocketModeHandler
+        except ImportError:
+            err = (
+                "[Slack] slack_bolt is not installed. "
+                "Run: pip install slack_bolt"
+            )
+            logger.error(err)
+            self.report_startup_error(err)
+            return
+
+        try:
+            self._app = App(token=self.bot_token)
+            self._client = self._app.client
+
+            # Resolve our own bot user id (needed for @mention strip / self-ignore)
+            auth = self._client.auth_test()
+            self.bot_user_id = auth.get("user_id", "")
+            self.name = self.bot_user_id  # ChatChannel uses self.name to strip @-mention
+            logger.info(f"[Slack] Bot logged in as user_id={self.bot_user_id}, team={auth.get('team')}")
+        except Exception as e:
+            err = f"[Slack] auth_test failed: {e}"
+            logger.error(err)
+            self.report_startup_error(err)
+            return
+
+        self._register_handlers()
+
+        self._handler = SocketModeHandler(self._app, self.app_token)
+
+        def _run():
+            try:
+                logger.info("[Slack] Starting Socket Mode connection...")
+                self.report_startup_success()
+                logger.info("[Slack] ✅ Slack bot ready, listening for events")
+                self._handler.start()
+            except Exception as e:
+                logger.error(f"[Slack] socket mode crashed: {e}", exc_info=True)
+                self.report_startup_error(str(e))
+            finally:
+                logger.info("[Slack] socket mode exited")
+
+        self._loop_thread = threading.Thread(target=_run, daemon=True, name="slack-socket")
+        self._loop_thread.start()
+        # Block startup() until the handler thread exits, matching other channels'
+        # behaviour (startup is a blocking call).
+        self._loop_thread.join()
+
+    def _register_handlers(self):
+        app = self._app
+
+        # app_mention: bot is @-mentioned in a channel
+        @app.event("app_mention")
+        def _on_app_mention(event, ack):
+            ack()
+            self._handle_event(event, is_group=True)
+
+        # message: DMs and channel messages (including thread replies)
+        @app.event("message")
+        def _on_message(event, ack):
+            ack()
+            self._handle_message_event(event)
+
+    def stop(self):
+        logger.info("[Slack] stop() called")
+        try:
+            if self._handler is not None:
+                self._handler.close()
+        except Exception as e:
+            logger.warning(f"[Slack] handler close error: {e}")
+        if self._loop_thread and self._loop_thread.is_alive():
+            try:
+                self._loop_thread.join(timeout=10)
+            except Exception:
+                pass
+        logger.info("[Slack] stop() completed")
+
+    # ------------------------------------------------------------------
+    # Inbound: slack event -> ChatMessage -> ChatChannel.produce
+    # ------------------------------------------------------------------
+
+    def _handle_message_event(self, event: dict):
+        """Route a raw `message` event: skip bot/system noise, decide grouping."""
+        try:
+            logger.debug(
+                f"[Slack] message event: channel_type={event.get('channel_type')}, "
+                f"subtype={event.get('subtype')}, user={event.get('user')}, "
+                f"ts={event.get('ts')}, thread_ts={event.get('thread_ts')}"
+            )
+            # Ignore bot messages (including our own) and message edits/deletes
+            if event.get("bot_id") or event.get("subtype") in ("bot_message", "message_changed", "message_deleted"):
+                return
+            if event.get("user") == self.bot_user_id:
+                return
+
+            channel_type = event.get("channel_type", "")
+            # DM (im) is single chat; channel/group is group chat. app_mention
+            # already covers channel @-mentions, so for plain channel messages we
+            # only react when configured / thread-following.
+            is_group = channel_type in ("channel", "group", "mpim")
+            if is_group:
+                # app_mention handler covers explicit @bot; here we only handle
+                # follow-up replies in threads the bot participates in.
+                if not self._should_reply_in_channel(event):
+                    return
+            self._handle_event(event, is_group=is_group)
+        except Exception as e:
+            logger.error(f"[Slack] _handle_message_event error: {e}", exc_info=True)
+
+    def _handle_event(self, event: dict, is_group: bool):
+        """Parse event -> build SlackMessage -> produce()."""
+        try:
+            channel_id = event.get("channel", "")
+            ts = event.get("ts", "")
+            if not channel_id:
+                return
+
+            # Idempotent dedup
+            msg_uid = f"{channel_id}:{ts}"
+            if self._received_msgs.get(msg_uid):
+                return
+            self._received_msgs[msg_uid] = True
+
+            # Parse type + download media if needed.
+            ctype, content, caption = self._parse_event(event)
+            if ctype is None:
+                logger.debug(f"[Slack] unsupported message type, skip. event={event}")
+                return
+
+            # Strip <@bot_user_id> mention from channel text
+            if is_group and self.bot_user_id:
+                if ctype == ContextType.TEXT and content:
+                    content = self._strip_at_mention(content)
+                if caption:
+                    caption = self._strip_at_mention(caption)
+
+            slack_msg = SlackMessage(
+                event,
+                is_group=is_group,
+                bot_user_id=self.bot_user_id,
+                ctype=ctype,
+                content=content,
+            )
+            slack_msg.is_at = is_group  # if we reached here in a channel, bot is mentioned/threaded
+
+            from channel.file_cache import get_file_cache
+            file_cache = get_file_cache()
+            session_id = self._compute_session_id(event, is_group)
+
+            # Media + caption together: treat as a complete query and bypass the cache
+            if ctype in (ContextType.IMAGE, ContextType.FILE) and caption:
+                tag = "image" if ctype == ContextType.IMAGE else "file"
+                merged_text = f"{caption}\n[{tag}: {content}]"
+                slack_msg.ctype = ContextType.TEXT
+                slack_msg.content = merged_text
+                ctype = ContextType.TEXT
+                logger.info(f"[Slack] Media+caption merged for session {session_id}")
+                # fallthrough to the TEXT branch below
+
+            elif ctype == ContextType.IMAGE:
+                file_cache.add(session_id, content, file_type="image")
+                logger.info(f"[Slack] Image cached for session {session_id}, waiting for query...")
+                return
+            elif ctype == ContextType.FILE:
+                file_cache.add(session_id, content, file_type="file")
+                logger.info(f"[Slack] File cached for session {session_id}: {content}")
+                return
+
+            if ctype == ContextType.TEXT:
+                # Fast-path: /cancel mirrors Web channel behaviour
+                if (content or "").strip().lower() in ("/cancel", "cancel"):
+                    self._do_cancel(session_id, channel_id, event)
+                    return
+
+                cached_files = file_cache.get(session_id)
+                if cached_files:
+                    refs = []
+                    for fi in cached_files:
+                        ftype = fi["type"]
+                        tag = ftype if ftype in ("image", "video") else "file"
+                        refs.append(f"[{tag}: {fi['path']}]")
+                    slack_msg.content = (slack_msg.content or "") + "\n" + "\n".join(refs)
+                    file_cache.clear(session_id)
+                    logger.info(f"[Slack] Attached {len(cached_files)} cached file(s) to query")
+
+            # Reply in the originating thread when present, else start one on this msg
+            thread_ts = event.get("thread_ts") or ts
+
+            context = self._compose_context(
+                slack_msg.ctype,
+                slack_msg.content,
+                isgroup=is_group,
+                msg=slack_msg,
+                # Replies go back into the thread, no manual @mention needed
+                no_need_at=True,
+            )
+            if context:
+                context["session_id"] = session_id
+                context["receiver"] = channel_id
+                context["slack_channel"] = channel_id
+                context["slack_thread_ts"] = thread_ts if is_group else None
+                self.produce(context)
+            logger.debug(f"[Slack] received: type={ctype}, content={str(slack_msg.content)[:80]}")
+        except Exception as e:
+            logger.error(f"[Slack] _handle_event error: {e}", exc_info=True)
+
+    def _do_cancel(self, session_id: str, channel_id: str, event: dict):
+        """Fast-path: /cancel calls cancel_session directly without going through agent."""
+        try:
+            from agent.protocol import get_cancel_registry
+            cancelled = get_cancel_registry().cancel_session(session_id)
+            text = "Current task cancelled." if cancelled else "No running task to cancel."
+            thread_ts = event.get("thread_ts") or event.get("ts")
+            self._client.chat_postMessage(channel=channel_id, text=text, thread_ts=thread_ts)
+            logger.info(f"[Slack] /cancel session={session_id}, cancelled={cancelled}")
+        except Exception as e:
+            logger.error(f"[Slack] /cancel error: {e}", exc_info=True)
+
+    def _parse_event(self, event: dict):
+        """Parse a slack event and return (ctype, content, caption).
+
+        - content is text for ContextType.TEXT, otherwise the local file path
+        - caption is the optional text accompanying a file; empty for plain text
+        """
+        text = (event.get("text") or "").strip()
+        files = event.get("files") or []
+
+        if files:
+            # Handle the first attachment; caption is the accompanying message text
+            f = files[0]
+            mimetype = (f.get("mimetype") or "").lower()
+            url = f.get("url_private_download") or f.get("url_private")
+            name = f.get("name") or f.get("id") or "file"
+            if not url:
+                return (None, None, "")
+            path = self._download_file(url, name)
+            if not path:
+                return (None, None, "")
+            if mimetype.startswith("image/"):
+                return (ContextType.IMAGE, path, text)
+            return (ContextType.FILE, path, text)
+
+        if text:
+            return (ContextType.TEXT, text, "")
+
+        return (None, None, "")
+
+    def _download_file(self, url: str, name: str):
+        """Download a Slack private file (requires bot token auth) to local tmp dir."""
+        try:
+            headers = {"Authorization": f"Bearer {self.bot_token}"}
+            resp = requests.get(url, headers=headers, timeout=60, stream=True)
+            resp.raise_for_status()
+            tmp_dir = SlackMessage.get_tmp_dir()
+            # Sanitize the name and keep it unique-ish via the url tail
+            safe_name = re.sub(r"[^\w.\-]", "_", name)
+            local_path = os.path.join(tmp_dir, safe_name)
+            with open(local_path, "wb") as fp:
+                for chunk in resp.iter_content(chunk_size=8192):
+                    if chunk:
+                        fp.write(chunk)
+            logger.debug(f"[Slack] downloaded {name} -> {local_path}")
+            return local_path
+        except Exception as e:
+            logger.error(f"[Slack] download_file failed ({name}): {e}")
+            return None
+
+    # ------------------------------------------------------------------
+    # Channel trigger logic
+    # ------------------------------------------------------------------
+
+    def _should_reply_in_channel(self, event: dict) -> bool:
+        """Decide whether to reply to a plain channel message (no @mention).
+
+        app_mention already handles explicit @bot, so here we only deal with
+        follow-up messages. `all` replies to every message; `mention_or_reply`
+        replies inside threads the bot already participates in.
+        """
+        mode = conf().get("slack_group_trigger", "mention_or_reply")
+        if mode == "all":
+            return True
+        if mode == "mention_only":
+            return False
+        # mention_or_reply: follow up only within an existing thread
+        return bool(event.get("thread_ts"))
+
+    def _strip_at_mention(self, content: str) -> str:
+        """Strip <@BOT_USER_ID> from channel text."""
+        if not content or not self.bot_user_id:
+            return content
+        pattern = re.compile(r"<@" + re.escape(self.bot_user_id) + r">", re.IGNORECASE)
+        return pattern.sub("", content).strip()
+
+    @staticmethod
+    def _compute_session_id(event: dict, is_group: bool) -> str:
+        channel_id = event.get("channel", "")
+        user_id = event.get("user", "")
+        if is_group:
+            if conf().get("group_shared_session", True):
+                return f"slack_channel_{channel_id}"
+            return f"slack_channel_{channel_id}_{user_id}"
+        return f"slack_user_{user_id}"
+
+    # ------------------------------------------------------------------
+    # Override _compose_context: skip the parent's group whitelist/at checks
+    # (already handled via _should_reply_in_channel). Same idea as telegram.
+    # ------------------------------------------------------------------
+
+    def _compose_context(self, ctype: ContextType, content, **kwargs):
+        context = Context(ctype, content)
+        context.kwargs = kwargs
+        if "channel_type" not in context:
+            context["channel_type"] = self.channel_type
+        if "origin_ctype" not in context:
+            context["origin_ctype"] = ctype
+
+        cmsg = context["msg"]
+        if cmsg.is_group:
+            if conf().get("group_shared_session", True):
+                context["session_id"] = cmsg.other_user_id
+            else:
+                context["session_id"] = f"{cmsg.from_user_id}:{cmsg.other_user_id}"
+        else:
+            context["session_id"] = cmsg.from_user_id
+        context["receiver"] = cmsg.other_user_id
+
+        if ctype == ContextType.TEXT:
+            img_match_prefix = check_prefix(content, conf().get("image_create_prefix"))
+            if img_match_prefix:
+                content = content.replace(img_match_prefix, "", 1)
+                context.type = ContextType.IMAGE_CREATE
+            else:
+                context.type = ContextType.TEXT
+            context.content = (content or "").strip()
+            if "desire_rtype" not in context and conf().get("always_reply_voice"):
+                context["desire_rtype"] = ReplyType.VOICE
+        elif ctype == ContextType.VOICE:
+            if "desire_rtype" not in context and (
+                conf().get("voice_reply_voice") or conf().get("always_reply_voice")
+            ):
+                context["desire_rtype"] = ReplyType.VOICE
+
+        return context
+
+    # ------------------------------------------------------------------
+    # Outbound: ChatChannel.send -> Slack Web API
+    # ------------------------------------------------------------------
+
+    def send(self, reply: Reply, context: Context):
+        """Called from cow's sync main thread; Slack Web client is sync-safe."""
+        if self._client is None:
+            logger.warning("[Slack] client not ready, drop reply")
+            return
+
+        channel_id = context.get("slack_channel")
+        thread_ts = context.get("slack_thread_ts")
+        if not channel_id:
+            logger.warning("[Slack] no slack_channel in context, drop reply")
+            return
+
+        try:
+            self._do_send(reply, channel_id, thread_ts)
+            logger.info(f"[Slack] sent reply (type={reply.type}, channel={channel_id})")
+        except Exception as e:
+            logger.error(f"[Slack] send failed: {e}", exc_info=True)
+
+    def _do_send(self, reply: Reply, channel_id: str, thread_ts):
+        rtype = reply.type
+        content = reply.content
+
+        if rtype in (ReplyType.TEXT, ReplyType.INFO, ReplyType.ERROR):
+            text = str(content) if content is not None else ""
+            if not text:
+                return
+            # Slack caps a message around 40k chars; split conservatively
+            for chunk in _split_text(text, 3500):
+                self._client.chat_postMessage(channel=channel_id, text=chunk, thread_ts=thread_ts)
+
+        elif rtype == ReplyType.IMAGE:
+            # Already a local BytesIO; upload it directly
+            content.seek(0)
+            self._client.files_upload_v2(
+                channel=channel_id, file=content, filename="image.png", thread_ts=thread_ts,
+            )
+
+        elif rtype == ReplyType.IMAGE_URL:
+            url = str(content)
+            if url.startswith("file://"):
+                local = url[7:]
+                self._client.files_upload_v2(
+                    channel=channel_id, file=local, thread_ts=thread_ts,
+                )
+            else:
+                # Post the URL as text; Slack will unfurl it as an image preview
+                self._client.chat_postMessage(channel=channel_id, text=url, thread_ts=thread_ts)
+
+        elif rtype in (ReplyType.VOICE, ReplyType.FILE):
+            local = content[7:] if isinstance(content, str) and content.startswith("file://") else content
+            caption = getattr(reply, "text_content", None) or None
+            self._client.files_upload_v2(
+                channel=channel_id, file=local, initial_comment=caption, thread_ts=thread_ts,
+            )
+
+        else:
+            # Fallback: send as plain text
+            self._client.chat_postMessage(channel=channel_id, text=str(content), thread_ts=thread_ts)
+
+
+def _split_text(text: str, limit: int):
+    """Split long text preferring line breaks to keep markdown structure intact."""
+    if len(text) <= limit:
+        yield text
+        return
+    buf = []
+    size = 0
+    for line in text.splitlines(keepends=True):
+        if size + len(line) > limit and buf:
+            yield "".join(buf)
+            buf, size = [], 0
+        # Hard-split single lines that exceed the limit
+        while len(line) > limit:
+            yield line[:limit]
+            line = line[limit:]
+        buf.append(line)
+        size += len(line)
+    if buf:
+        yield "".join(buf)
--- a/channel/slack/slack_message.py
+++ b/channel/slack/slack_message.py
@@ -0,0 +1,60 @@
+"""
+Slack message adapter.
+
+Convert a Slack event payload into cow's unified ChatMessage.
+File downloads are NOT performed here; the channel layer downloads files
+on demand because it needs the bot token for authenticated download URLs.
+"""
+import os
+
+from bridge.context import ContextType
+from channel.chat_message import ChatMessage
+from common.utils import expand_path
+from config import conf
+
+
+class SlackMessage(ChatMessage):
+    """Wrap a Slack event into the unified ChatMessage."""
+
+    def __init__(self, event: dict, is_group: bool = False, bot_user_id: str = "",
+                 ctype: ContextType = ContextType.TEXT, content: str = ""):
+        super().__init__(event)
+        # Basic fields
+        self.msg_id = event.get("client_msg_id") or event.get("ts") or ""
+        try:
+            self.create_time = int(float(event.get("ts", 0)))
+        except (TypeError, ValueError):
+            self.create_time = 0
+        self.ctype = ctype
+        self.content = content
+
+        # Sender / chat info
+        from_user_id = event.get("user", "unknown")
+        channel_id = event.get("channel", "")
+        self.from_user_id = from_user_id
+        self.from_user_nickname = from_user_id
+        self.to_user_id = bot_user_id or "slack_bot"
+        self.to_user_nickname = bot_user_id or "slack_bot"
+
+        self.is_group = is_group
+        if is_group:
+            # Channel chat: other_user_id = channel_id, actual_user_id = sender id
+            self.other_user_id = channel_id
+            self.other_user_nickname = channel_id
+            self.actual_user_id = from_user_id
+            self.actual_user_nickname = from_user_id
+        else:
+            # DM: use channel_id so replies go back to the same DM channel
+            self.other_user_id = channel_id or from_user_id
+            self.other_user_nickname = from_user_id
+
+        # Whether the bot was triggered by @-mention (set by channel layer)
+        self.is_at = False
+
+    @staticmethod
+    def get_tmp_dir() -> str:
+        """Local download directory, aligned with other channels (agent_workspace/tmp)."""
+        workspace_root = expand_path(conf().get("agent_workspace", "~/cow"))
+        tmp_dir = os.path.join(workspace_root, "tmp")
+        os.makedirs(tmp_dir, exist_ok=True)
+        return tmp_dir
--- a/channel/telegram/init.py
+++ b/channel/telegram/init.py
--- a/channel/telegram/telegram_channel.py
+++ b/channel/telegram/telegram_channel.py
@@ -0,0 +1,719 @@
+"""
+Telegram channel via Bot API (long polling mode).
+
+Features:
+- Single chat & group chat (text / photo / voice / video / document)
+- Group trigger: @mention or reply-to-bot (configurable)
+- /cancel fast-path matches Web channel behaviour
+- Auto-register bot commands menu on startup (mirrors Web slash menu)
+- Optional HTTP/SOCKS5 proxy support for restricted networks
+
+Implementation note:
+    python-telegram-bot is async-first. We run the bot inside a dedicated
+    thread with its own asyncio loop so the rest of cow (which is sync)
+    stays untouched. Inbound updates are dispatched onto cow's existing
+    sync ChatChannel.produce() pipeline; outbound send() schedules
+    coroutines back onto that loop via asyncio.run_coroutine_threadsafe.
+"""
+
+import asyncio
+import os
+import re
+import threading
+
+from bridge.context import Context, ContextType
+from bridge.reply import Reply, ReplyType
+from channel.chat_channel import ChatChannel, check_prefix
+from channel.telegram.telegram_message import TelegramMessage
+from common.expired_dict import ExpiredDict
+from common.log import logger
+from common.singleton import singleton
+from config import conf
+
+# Bot command menu, aligned with Web slash commands.
+# Top-level commands only; sub-commands are entered with a space (e.g. "/skill list").
+TELEGRAM_BOT_COMMANDS = [
+    ("help", "Show command help"),
+    ("status", "Show running status"),
+    ("context", "View/clear conversation context (sub: clear)"),
+    ("skill", "Manage skills (list/search/install/...)"),
+    ("memory", "Manage memory (sub: dream)"),
+    ("knowledge", "Manage knowledge base (list/on/off)"),
+    ("config", "Show current config"),
+    ("cancel", "Cancel running agent task"),
+    ("logs", "Show recent logs"),
+    ("version", "Show version"),
+]
+
+
+@singleton
+class TelegramChannel(ChatChannel):
+    NOT_SUPPORT_REPLYTYPE = []
+
+    def __init__(self):
+        super().__init__()
+        self.bot_token = ""
+        self.bot_username = ""  # used for @-mention matching
+        self._bot = None
+        self._application = None
+        self._loop = None
+        self._loop_thread = None
+        self._stop_event = threading.Event()
+        # Idempotent dedup; TG occasionally redelivers the same update on flaky networks
+        self._received_msgs = ExpiredDict(60 * 60 * 1)
+
+        # Disable group whitelist / prefix checks (we handle triggering ourselves
+        # in _should_reply_in_group), aligned with feishu / wecom_bot channels.
+        conf()["group_name_white_list"] = ["ALL_GROUP"]
+        conf()["single_chat_prefix"] = [""]
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def startup(self):
+        self.bot_token = conf().get("telegram_token", "")
+        if not self.bot_token:
+            err = "[Telegram] telegram_token is required"
+            logger.error(err)
+            self.report_startup_error(err)
+            return
+
+        try:
+            from telegram.ext import (
+                Application,
+                MessageHandler,
+                CommandHandler,
+                filters,
+            )
+        except ImportError:
+            err = (
+                "[Telegram] python-telegram-bot is not installed. "
+                "Run: pip install python-telegram-bot"
+            )
+            logger.error(err)
+            self.report_startup_error(err)
+            return
+
+        # Run the asyncio event loop in a dedicated thread so the sync cow body
+        # is untouched.
+        self._loop = asyncio.new_event_loop()
+
+        def _run_loop():
+            asyncio.set_event_loop(self._loop)
+            try:
+                self._loop.run_until_complete(self._async_main(Application, MessageHandler, CommandHandler, filters))
+            except Exception as e:
+                logger.error(f"[Telegram] event loop crashed: {e}", exc_info=True)
+                self.report_startup_error(str(e))
+            finally:
+                try:
+                    self._loop.close()
+                except Exception:
+                    pass
+                logger.info("[Telegram] event loop exited")
+
+        self._loop_thread = threading.Thread(target=_run_loop, daemon=True, name="telegram-loop")
+        self._loop_thread.start()
+        # Block startup() until the loop thread exits, matching other channels'
+        # behaviour (startup is a blocking call).
+        self._loop_thread.join()
+
+    async def _async_main(self, Application, MessageHandler, CommandHandler, filters):
+        """Build Application, register handlers, and run polling."""
+        builder = Application.builder().token(self.bot_token)
+
+        # Proxy: prefer telegram_proxy config, fall back to HTTPS_PROXY env var
+        proxy_url = conf().get("telegram_proxy", "") or os.environ.get("HTTPS_PROXY", "")
+        if proxy_url:
+            try:
+                builder = builder.proxy(proxy_url).get_updates_proxy(proxy_url)
+                logger.info(f"[Telegram] using proxy: {proxy_url}")
+            except Exception as e:
+                logger.warning(f"[Telegram] proxy config failed, fallback to direct: {e}")
+
+        # Media uploads (photo/voice/video/document) over a proxy can be slow,
+        # bump read/write/connect/pool timeouts.
+        builder = (
+            builder
+            .read_timeout(60)
+            .write_timeout(120)
+            .connect_timeout(30)
+            .pool_timeout(30)
+        )
+
+        application = builder.build()
+        self._application = application
+        self._bot = application.bot
+
+        # Fetch our own username (needed for @-mention matching in groups)
+        try:
+            me = await self._bot.get_me()
+            self.bot_username = me.username or ""
+            self.name = self.bot_username  # ChatChannel uses self.name to strip @-mention
+            logger.info(f"[Telegram] Bot logged in as @{self.bot_username} (id={me.id})")
+        except Exception as e:
+            err = f"[Telegram] get_me failed: {e}"
+            logger.error(err)
+            self.report_startup_error(err)
+            return
+
+        # Register the command menu (failure is non-fatal)
+        if conf().get("telegram_register_commands", True):
+            try:
+                from telegram import BotCommand
+                cmds = [BotCommand(name, desc) for name, desc in TELEGRAM_BOT_COMMANDS]
+                await self._bot.set_my_commands(cmds)
+                logger.info(f"[Telegram] Registered {len(cmds)} bot commands")
+            except Exception as e:
+                logger.warning(f"[Telegram] set_my_commands failed: {e}")
+
+        # Handlers:
+        # 1) /cancel uses the fast-path
+        application.add_handler(CommandHandler("cancel", self._on_cancel))
+        # 2) Normal messages (text + media)
+        application.add_handler(MessageHandler(filters.ALL & ~filters.COMMAND, self._on_message))
+        # 3) Other slash commands are forwarded as plain text for the agent to handle
+        application.add_handler(MessageHandler(filters.COMMAND, self._on_command_passthrough))
+
+        # Start polling. drop_pending_updates avoids replaying backlog after restart.
+        # Transient "Server disconnected" / RemoteProtocolError during get_updates
+        # are common over proxies/flaky networks; PTB's network loop auto-retries,
+        # so we only need to keep the noise down (see _quiet_polling_network_errors).
+        self._quiet_polling_network_errors()
+        logger.info("[Telegram] Starting long polling...")
+        await application.initialize()
+        await application.start()
+        await application.updater.start_polling(
+            drop_pending_updates=True,
+            # Long-poll hold time on the server side; smaller value = reconnect more
+            # often but each hung connection fails faster.
+            timeout=30,
+            # Retry forever on transient get_updates network errors instead of giving up.
+            bootstrap_retries=-1,
+        )
+        self.report_startup_success()
+        logger.info("[Telegram] ✅ Telegram bot ready, polling for updates")
+
+        # Block until stop()
+        try:
+            while not self._stop_event.is_set():
+                await asyncio.sleep(0.5)
+        finally:
+            try:
+                await application.updater.stop()
+                await application.stop()
+                await application.shutdown()
+            except Exception as e:
+                logger.warning(f"[Telegram] shutdown error: {e}")
+
+    @staticmethod
+    def _quiet_polling_network_errors():
+        """Downgrade PTB's noisy 'Exception happened while polling for updates' logs.
+
+        These transient get_updates errors (RemoteProtocolError / NetworkError /
+        TimedOut, typically over a proxy) are auto-retried by PTB's network loop,
+        so logging the full traceback at ERROR is just noise. We attach a filter
+        that drops these specific records while leaving real errors untouched.
+        """
+        import logging
+
+        class _PollingNoiseFilter(logging.Filter):
+            _NEEDLES = (
+                "Exception happened while polling for updates",
+                "Server disconnected without sending a response",
+            )
+
+            def filter(self, record: logging.LogRecord) -> bool:
+                try:
+                    msg = record.getMessage()
+                except Exception:
+                    return True
+                if any(n in msg for n in self._NEEDLES):
+                    # Keep a single-line breadcrumb at DEBUG, drop the traceback.
+                    logger.debug(f"[Telegram] transient polling network error (auto-retrying): {msg.splitlines()[0]}")
+                    return False
+                return True
+
+        noise_filter = _PollingNoiseFilter()
+        for name in ("telegram.ext.Updater", "telegram.ext._updater", "telegram.ext"):
+            logging.getLogger(name).addFilter(noise_filter)
+
+    def stop(self):
+        logger.info("[Telegram] stop() called")
+        self._stop_event.set()
+        if self._loop_thread and self._loop_thread.is_alive():
+            try:
+                self._loop_thread.join(timeout=10)
+            except Exception:
+                pass
+        logger.info("[Telegram] stop() completed")
+
+    # ------------------------------------------------------------------
+    # Inbound: telegram update -> ChatMessage -> ChatChannel.produce
+    # ------------------------------------------------------------------
+
+    async def _on_cancel(self, update, _context):
+        """Fast-path: /cancel calls cancel_session directly without going through agent."""
+        try:
+            from agent.protocol import get_cancel_registry
+            session_id = self._compute_session_id(update)
+            cancelled = get_cancel_registry().cancel_session(session_id)
+            text = "Current task cancelled." if cancelled else "No running task to cancel."
+            await update.effective_message.reply_text(text)
+            logger.info(f"[Telegram] /cancel session={session_id}, cancelled={cancelled}")
+        except Exception as e:
+            logger.error(f"[Telegram] /cancel error: {e}", exc_info=True)
+            try:
+                await update.effective_message.reply_text(f"⚠️ /cancel failed: {e}")
+            except Exception:
+                pass
+
+    async def _on_command_passthrough(self, update, _context):
+        """All non-/cancel commands fall through to plain message handling."""
+        await self._on_message(update, _context)
+
+    async def _on_message(self, update, _context):
+        """Telegram update entry: parse message -> build ChatMessage -> produce()."""
+        try:
+            message = update.effective_message
+            chat = update.effective_chat
+            if not message or not chat:
+                return
+
+            # Idempotent dedup
+            msg_uid = f"{chat.id}:{message.message_id}"
+            if self._received_msgs.get(msg_uid):
+                return
+            self._received_msgs[msg_uid] = True
+
+            is_group = chat.type in ("group", "supergroup")
+
+            # Debug log: helpful when group messages are silently dropped
+            if is_group:
+                logger.debug(
+                    f"[Telegram] group update received: chat_id={chat.id}, "
+                    f"text={(message.text or message.caption or '')[:40]!r}, "
+                    f"reply_to_bot={bool(message.reply_to_message and message.reply_to_message.from_user and message.reply_to_message.from_user.username == self.bot_username)}"
+                )
+
+            # Group trigger gate (silently drop if not triggered)
+            if is_group and not self._should_reply_in_group(update):
+                logger.debug(f"[Telegram] group message not triggered (need @{self.bot_username} or reply), skip")
+                return
+
+            # Parse message type + download media if needed.
+            # Media messages with caption return both the local path and the caption text.
+            ctype, content, caption = await self._parse_message(message)
+            if ctype is None:
+                logger.debug(f"[Telegram] unsupported message type, skip. msg={message}")
+                return
+
+            # Strip @bot mention for group text/caption
+            if is_group and self.bot_username:
+                if ctype == ContextType.TEXT and content:
+                    content = self._strip_at_mention(content)
+                if caption:
+                    caption = self._strip_at_mention(caption)
+
+            tg_msg = TelegramMessage(
+                update,
+                is_group=is_group,
+                bot_username=self.bot_username,
+                ctype=ctype,
+                content=content,
+            )
+            tg_msg.is_at = is_group  # If we got here in a group, the bot is mentioned/replied
+
+            # File cache: standalone media goes into cache, the next text query attaches them
+            from channel.file_cache import get_file_cache
+            file_cache = get_file_cache()
+            session_id = self._compute_session_id(update)
+
+            # Media + caption together: treat as a complete query and bypass the cache
+            if ctype in (ContextType.IMAGE, ContextType.FILE) and caption:
+                tag = "image" if ctype == ContextType.IMAGE else "file"
+                merged_text = f"{caption}\n[{tag}: {content}]"
+                tg_msg.ctype = ContextType.TEXT
+                tg_msg.content = merged_text
+                ctype = ContextType.TEXT
+                logger.info(f"[Telegram] Media+caption merged for session {session_id}")
+                # fallthrough to the TEXT branch below
+
+            elif ctype == ContextType.IMAGE:
+                file_cache.add(session_id, content, file_type="image")
+                logger.info(f"[Telegram] Image cached for session {session_id}, waiting for query...")
+                return
+            elif ctype == ContextType.FILE:
+                file_cache.add(session_id, content, file_type="file")
+                logger.info(f"[Telegram] File cached for session {session_id}: {content}")
+                return
+
+            if ctype == ContextType.TEXT:
+                cached_files = file_cache.get(session_id)
+                if cached_files:
+                    refs = []
+                    for fi in cached_files:
+                        ftype = fi["type"]
+                        tag = ftype if ftype in ("image", "video") else "file"
+                        refs.append(f"[{tag}: {fi['path']}]")
+                    tg_msg.content = (tg_msg.content or "") + "\n" + "\n".join(refs)
+                    file_cache.clear(session_id)
+                    logger.info(f"[Telegram] Attached {len(cached_files)} cached file(s) to query")
+
+            # Dispatch to cow main pipeline (reuses ChatChannel._compose_context routing)
+            context = self._compose_context(
+                tg_msg.ctype,
+                tg_msg.content,
+                isgroup=is_group,
+                msg=tg_msg,
+            )
+            if context:
+                context["session_id"] = session_id
+                context["receiver"] = str(chat.id)
+                context["telegram_chat_id"] = chat.id
+                context["telegram_reply_to_msg_id"] = message.message_id if is_group else None
+                self.produce(context)
+            logger.debug(f"[Telegram] received: type={ctype}, content={str(tg_msg.content)[:80]}")
+
+        except Exception as e:
+            logger.error(f"[Telegram] _on_message error: {e}", exc_info=True)
+
+    async def _parse_message(self, message):
+        """Parse a telegram message and return (ctype, content, caption).
+
+        - content is text for ContextType.TEXT, otherwise the local file path
+        - caption is the optional text accompanying a media message; empty for plain text
+        """
+        caption = (message.caption or "").strip()
+
+        if message.photo:
+            largest = message.photo[-1]
+            path = await self._download_file(largest.file_id, suffix=".jpg")
+            return (ContextType.IMAGE, path, caption) if path else (None, None, "")
+
+        if message.voice or message.audio:
+            audio_obj = message.voice or message.audio
+            suffix = ".ogg" if message.voice else (
+                "." + (audio_obj.mime_type.split("/")[-1] if getattr(audio_obj, "mime_type", "") else "mp3")
+            )
+            path = await self._download_file(audio_obj.file_id, suffix=suffix)
+            return (ContextType.VOICE, path, caption) if path else (None, None, "")
+
+        if message.video or message.video_note:
+            video_obj = message.video or message.video_note
+            path = await self._download_file(video_obj.file_id, suffix=".mp4")
+            return (ContextType.FILE, path, caption) if path else (None, None, "")
+
+        if message.document:
+            doc = message.document
+            ext = ""
+            if doc.file_name and "." in doc.file_name:
+                ext = "." + doc.file_name.rsplit(".", 1)[-1]
+            path = await self._download_file(doc.file_id, suffix=ext, original_name=doc.file_name)
+            if not path:
+                return (None, None, "")
+            # Image-typed documents (user picked "send as file") are treated as images
+            mime = (doc.mime_type or "").lower()
+            if mime.startswith("image/"):
+                return (ContextType.IMAGE, path, caption)
+            return (ContextType.FILE, path, caption)
+
+        if message.text:
+            return (ContextType.TEXT, message.text.strip(), "")
+
+        return (None, None, "")
+
+    async def _download_file(self, file_id: str, suffix: str = "", original_name: str = ""):
+        """Download via bot.get_file into the local tmp dir; return path or None on failure."""
+        try:
+            f = await self._bot.get_file(file_id)
+            tmp_dir = TelegramMessage.get_tmp_dir()
+            base = original_name or f"{file_id}{suffix or ''}"
+            # Prefix with file_id to avoid name collisions / weird chars
+            safe_name = f"{file_id}_{base}" if original_name else base
+            local_path = os.path.join(tmp_dir, safe_name)
+            await f.download_to_drive(custom_path=local_path)
+            logger.debug(f"[Telegram] downloaded file_id={file_id} -> {local_path}")
+            return local_path
+        except Exception as e:
+            logger.error(f"[Telegram] download_file failed (file_id={file_id}): {e}")
+            return None
+
+    # ------------------------------------------------------------------
+    # Group trigger logic
+    # ------------------------------------------------------------------
+
+    def _should_reply_in_group(self, update) -> bool:
+        """Decide whether to reply to a group message based on configuration."""
+        mode = conf().get("telegram_group_trigger", "mention_or_reply")
+        if mode == "all":
+            return True
+
+        message = update.effective_message
+        if not message:
+            return False
+
+        # 1) Mentioned
+        if self.bot_username and self._is_mentioned(message, self.bot_username):
+            return True
+
+        # 2) Reply to a bot message
+        if mode == "mention_or_reply":
+            reply = message.reply_to_message
+            if reply and reply.from_user and reply.from_user.username == self.bot_username:
+                return True
+
+        return False
+
+    @staticmethod
+    def _is_mentioned(message, bot_username: str) -> bool:
+        """Check whether entities/caption_entities contain a @mention of the bot."""
+        bot_at = "@" + bot_username.lower()
+        text = (message.text or message.caption or "").lower()
+        if bot_at in text:
+            return True
+        # Also check entities strictly to support text_mention (no-username @)
+        for ent in (message.entities or []) + (message.caption_entities or []):
+            if ent.type == "mention":
+                src = message.text or message.caption or ""
+                if src[ent.offset: ent.offset + ent.length].lower() == bot_at:
+                    return True
+        return False
+
+    def _strip_at_mention(self, content: str) -> str:
+        """Strip @bot_username from group text (case-insensitive)."""
+        if not content or not self.bot_username:
+            return content
+        pattern = re.compile(r"@" + re.escape(self.bot_username), re.IGNORECASE)
+        return pattern.sub("", content).strip()
+
+    @staticmethod
+    def _compute_session_id(update) -> str:
+        chat = update.effective_chat
+        user = update.effective_user
+        is_group = chat.type in ("group", "supergroup")
+        if is_group:
+            if conf().get("group_shared_session", True):
+                return f"tg_group_{chat.id}"
+            return f"tg_group_{chat.id}_{user.id}"
+        return f"tg_user_{user.id}"
+
+    # ------------------------------------------------------------------
+    # Override _compose_context: skip the parent's group whitelist/at checks
+    # (already handled in _on_message via _should_reply_in_group). Same idea
+    # as the feishu channel.
+    # ------------------------------------------------------------------
+
+    def _compose_context(self, ctype: ContextType, content, **kwargs):
+        context = Context(ctype, content)
+        context.kwargs = kwargs
+        if "channel_type" not in context:
+            context["channel_type"] = self.channel_type
+        if "origin_ctype" not in context:
+            context["origin_ctype"] = ctype
+
+        cmsg = context["msg"]
+        if cmsg.is_group:
+            if conf().get("group_shared_session", True):
+                context["session_id"] = cmsg.other_user_id
+            else:
+                context["session_id"] = f"{cmsg.from_user_id}:{cmsg.other_user_id}"
+        else:
+            context["session_id"] = cmsg.from_user_id
+        context["receiver"] = cmsg.other_user_id
+
+        if ctype == ContextType.TEXT:
+            img_match_prefix = check_prefix(content, conf().get("image_create_prefix"))
+            if img_match_prefix:
+                content = content.replace(img_match_prefix, "", 1)
+                context.type = ContextType.IMAGE_CREATE
+            else:
+                context.type = ContextType.TEXT
+            context.content = (content or "").strip()
+            if "desire_rtype" not in context and conf().get("always_reply_voice"):
+                context["desire_rtype"] = ReplyType.VOICE
+        elif ctype == ContextType.VOICE:
+            if "desire_rtype" not in context and (
+                conf().get("voice_reply_voice") or conf().get("always_reply_voice")
+            ):
+                context["desire_rtype"] = ReplyType.VOICE
+
+        return context
+
+    # ------------------------------------------------------------------
+    # Outbound: ChatChannel.send -> Telegram API
+    # ------------------------------------------------------------------
+
+    def send(self, reply: Reply, context: Context):
+        """Called from cow's sync main thread; we marshal the coroutine onto the loop thread."""
+        if self._loop is None or self._bot is None:
+            logger.warning("[Telegram] bot not ready, drop reply")
+            return
+
+        chat_id = context.get("telegram_chat_id")
+        reply_to = context.get("telegram_reply_to_msg_id")
+        if chat_id is None:
+            logger.warning("[Telegram] no telegram_chat_id in context, drop reply")
+            return
+
+        coro = self._async_send(reply, chat_id, reply_to)
+        try:
+            future = asyncio.run_coroutine_threadsafe(coro, self._loop)
+            # Media uploads through a proxy can be slow; let PTB's own timeouts win
+            future.result(timeout=180)
+        except Exception as e:
+            logger.error(f"[Telegram] send failed: {e}")
+
+    # Number of retries for transient network errors (proxy hiccups etc.)
+    _SEND_RETRIES = 2
+    _SEND_RETRY_BACKOFF = 2.0  # seconds
+
+    async def _send_with_retry(self, send_fn, *, label: str):
+        """Run a single Telegram API call with retries for transient network errors."""
+        from telegram.error import NetworkError, TimedOut
+        last_err = None
+        for attempt in range(self._SEND_RETRIES + 1):
+            try:
+                return await send_fn()
+            except (NetworkError, TimedOut) as e:
+                last_err = e
+                if attempt >= self._SEND_RETRIES:
+                    break
+                wait = self._SEND_RETRY_BACKOFF * (attempt + 1)
+                logger.warning(
+                    f"[Telegram] {label} transient error (attempt {attempt + 1}/"
+                    f"{self._SEND_RETRIES + 1}): {e}; retry in {wait}s"
+                )
+                await asyncio.sleep(wait)
+        raise last_err
+
+    async def _async_send(self, reply: Reply, chat_id, reply_to_msg_id):
+        try:
+            rtype = reply.type
+            content = reply.content
+
+            if rtype == ReplyType.TEXT or rtype == ReplyType.INFO or rtype == ReplyType.ERROR:
+                # Telegram caps a single text message at 4096 chars; auto-split
+                text = str(content) if content is not None else ""
+                if not text:
+                    return
+                for chunk in _split_text(text, 4000):
+                    await self._send_with_retry(
+                        lambda c=chunk: self._bot.send_message(
+                            chat_id=chat_id,
+                            text=c,
+                            reply_to_message_id=reply_to_msg_id,
+                            # Avoid failing the whole send if reply_to was deleted
+                            allow_sending_without_reply=True,
+                        ),
+                        label="send_message",
+                    )
+
+            elif rtype == ReplyType.IMAGE:
+                # Already a local BytesIO; send it directly
+                content.seek(0)
+                await self._send_with_retry(
+                    lambda: self._bot.send_photo(
+                        chat_id=chat_id,
+                        photo=content,
+                        reply_to_message_id=reply_to_msg_id,
+                        allow_sending_without_reply=True,
+                    ),
+                    label="send_photo",
+                )
+
+            elif rtype == ReplyType.IMAGE_URL:
+                url = str(content)
+                if url.startswith("file://"):
+                    local = url[7:]
+                    # Open inside the lambda so each retry gets a fresh stream
+                    async def _send_local_photo():
+                        with open(local, "rb") as f:
+                            return await self._bot.send_photo(
+                                chat_id=chat_id, photo=f,
+                                reply_to_message_id=reply_to_msg_id,
+                                allow_sending_without_reply=True,
+                            )
+                    await self._send_with_retry(_send_local_photo, label="send_photo(file)")
+                else:
+                    await self._send_with_retry(
+                        lambda: self._bot.send_photo(
+                            chat_id=chat_id, photo=url,
+                            reply_to_message_id=reply_to_msg_id,
+                            allow_sending_without_reply=True,
+                        ),
+                        label="send_photo(url)",
+                    )
+
+            elif rtype == ReplyType.VOICE:
+                local = content[7:] if isinstance(content, str) and content.startswith("file://") else content
+                async def _send_voice():
+                    with open(local, "rb") as f:
+                        return await self._bot.send_voice(
+                            chat_id=chat_id, voice=f,
+                            reply_to_message_id=reply_to_msg_id,
+                            allow_sending_without_reply=True,
+                        )
+                await self._send_with_retry(_send_voice, label="send_voice")
+
+            elif rtype == ReplyType.FILE:
+                # Videos go through send_video, everything else through send_document
+                local = content[7:] if isinstance(content, str) and content.startswith("file://") else content
+                # File replies may carry an accompanying text caption
+                caption = getattr(reply, "text_content", None) or None
+                is_video = isinstance(local, str) and local.lower().endswith(
+                    (".mp4", ".mov", ".avi", ".mkv", ".webm")
+                )
+
+                async def _send_file():
+                    with open(local, "rb") as f:
+                        if is_video:
+                            return await self._bot.send_video(
+                                chat_id=chat_id, video=f, caption=caption,
+                                reply_to_message_id=reply_to_msg_id,
+                                allow_sending_without_reply=True,
+                            )
+                        return await self._bot.send_document(
+                            chat_id=chat_id, document=f, caption=caption,
+                            reply_to_message_id=reply_to_msg_id,
+                            allow_sending_without_reply=True,
+                        )
+                await self._send_with_retry(_send_file, label="send_video" if is_video else "send_document")
+
+            else:
+                # Fallback: send as plain text
+                await self._send_with_retry(
+                    lambda: self._bot.send_message(
+                        chat_id=chat_id, text=str(content),
+                        reply_to_message_id=reply_to_msg_id,
+                        allow_sending_without_reply=True,
+                    ),
+                    label="send_message(fallback)",
+                )
+
+            logger.info(f"[Telegram] sent reply (type={rtype}, chat_id={chat_id})")
+
+        except Exception as e:
+            logger.error(f"[Telegram] _async_send error: {e}", exc_info=True)
+
+
+def _split_text(text: str, limit: int):
+    """Split long text preferring line breaks to keep markdown structure intact."""
+    if len(text) <= limit:
+        yield text
+        return
+    buf = []
+    size = 0
+    for line in text.splitlines(keepends=True):
+        if size + len(line) > limit and buf:
+            yield "".join(buf)
+            buf, size = [], 0
+        # Hard-split single lines that exceed the limit
+        while len(line) > limit:
+            yield line[:limit]
+            line = line[limit:]
+        buf.append(line)
+        size += len(line)
+    if buf:
+        yield "".join(buf)
--- a/channel/telegram/telegram_message.py
+++ b/channel/telegram/telegram_message.py
@@ -0,0 +1,62 @@
+"""
+Telegram message adapter.
+
+Convert a python-telegram-bot Update into cow's unified ChatMessage.
+File downloads are NOT performed here; the channel layer triggers
+bot.get_file() on demand because it requires the async event loop.
+"""
+import os
+
+from bridge.context import ContextType
+from channel.chat_message import ChatMessage
+from common.utils import expand_path
+from config import conf
+
+
+class TelegramMessage(ChatMessage):
+    """Wrap a Telegram Update into the unified ChatMessage."""
+
+    def __init__(self, update, is_group: bool = False, bot_username: str = "",
+                 ctype: ContextType = ContextType.TEXT, content: str = ""):
+        super().__init__(update)
+        message = update.effective_message
+        chat = update.effective_chat
+        user = update.effective_user
+
+        # Basic fields
+        self.msg_id = str(message.message_id) if message else ""
+        self.create_time = int(message.date.timestamp()) if message and message.date else 0
+        self.ctype = ctype
+        self.content = content
+
+        # Sender / chat info
+        from_user_id = str(user.id) if user else "unknown"
+        from_user_nick = (
+            user.full_name if user and user.full_name else (user.username if user else "unknown")
+        )
+        self.from_user_id = from_user_id
+        self.from_user_nickname = from_user_nick or from_user_id
+        self.to_user_id = bot_username or "telegram_bot"
+        self.to_user_nickname = bot_username or "telegram_bot"
+
+        self.is_group = is_group
+        if is_group:
+            # Group: other_user_id = group_id, actual_user_id = sender id
+            self.other_user_id = str(chat.id)
+            self.other_user_nickname = chat.title or str(chat.id)
+            self.actual_user_id = from_user_id
+            self.actual_user_nickname = self.from_user_nickname
+        else:
+            self.other_user_id = from_user_id
+            self.other_user_nickname = self.from_user_nickname
+
+        # Whether the bot was triggered by @-mention or reply (set by channel layer)
+        self.is_at = False
+
+    @staticmethod
+    def get_tmp_dir() -> str:
+        """Local download directory, aligned with other channels (agent_workspace/tmp)."""
+        workspace_root = expand_path(conf().get("agent_workspace", "~/cow"))
+        tmp_dir = os.path.join(workspace_root, "tmp")
+        os.makedirs(tmp_dir, exist_ok=True)
+        return tmp_dir
--- a/channel/web/chat.html
+++ b/channel/web/chat.html
@@ -137,6 +137,11 @@
                            <i class="fas fa-sliders item-icon text-xs w-5 text-center"></i>
                            <span data-i18n="menu_config">配置</span>
                        </a>
+                        <a class="sidebar-item flex items-center gap-3 px-3 py-2 rounded-lg cursor-pointer transition-all duration-150 hover:bg-white/5 hover:text-neutral-200 text-[14px]"
+                           data-view="models">
+                            <i class="fas fa-microchip item-icon text-xs w-5 text-center"></i>
+                            <span data-i18n="menu_models">模型</span>
+                        </a>
                        <a class="sidebar-item flex items-center gap-3 px-3 py-2 rounded-lg cursor-pointer transition-all duration-150 hover:bg-white/5 hover:text-neutral-200 text-[14px]"
                           data-view="skills">
                            <i class="fas fa-bolt item-icon text-xs w-5 text-center"></i>
@@ -417,8 +422,9 @@
                                    </button>
                                </div>
                                <div id="slash-menu" class="slash-menu hidden"></div>
+                                <div class="flex-1 min-w-0 relative flex items-center">
                                    <textarea id="chat-input"
-                                          class="flex-1 min-w-0 px-4 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600
+                                              class="w-full pl-4 pr-11 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600
                                                     bg-slate-50 dark:bg-white/5 text-slate-800 dark:text-slate-100
                                                     placeholder:text-slate-400 dark:placeholder:text-slate-500
                                                     focus:outline-none focus:ring-0 focus:border-primary-600
@@ -426,12 +432,20 @@
                                              rows="1"
                                              data-i18n-placeholder="input_placeholder"
                                              placeholder="输入消息，或输入 / 使用指令"></textarea>
+                                    <button id="mic-btn" type="button"
+                                            class="absolute right-2 top-1/2 -translate-y-1/2 w-8 h-8 flex items-center justify-center rounded-lg
+                                                   text-slate-400 hover:text-primary-500 hover:bg-primary-50 dark:hover:bg-primary-900/20
+                                                   cursor-pointer transition-colors duration-150"
+                                            data-i18n-title="mic_idle_title" title="点击录音 / 再按一次结束">
+                                        <i class="fas fa-microphone text-sm"></i>
+                                    </button>
+                                </div>
                                <button id="send-btn"
                                        class="flex-shrink-0 w-10 h-10 flex items-center justify-center rounded-lg
                                               bg-primary-400 text-white hover:bg-primary-500
                                               disabled:bg-slate-300 dark:disabled:bg-slate-600
                                               disabled:cursor-not-allowed cursor-pointer transition-colors duration-150"
-                                        disabled onclick="sendMessage()">
+                                        disabled>
                                    <i class="fas fa-paper-plane text-sm"></i>
                                </button>
                            </div>
@@ -460,6 +474,11 @@
                                            <i class="fas fa-microchip text-primary-500 text-sm"></i>
                                        </div>
                                        <h3 class="font-semibold text-slate-800 dark:text-slate-100" data-i18n="config_model">模型配置</h3>
+                                        <a class="ml-auto text-xs text-slate-500 dark:text-slate-400 hover:text-primary-500 dark:hover:text-primary-400 cursor-pointer transition-colors flex items-center gap-1"
+                                           onclick="navigateTo('models')">
+                                            <span data-i18n="config_model_advanced">高级配置</span>
+                                            <i class="fas fa-arrow-right text-[10px]"></i>
+                                        </a>
                                    </div>
                                    <div class="space-y-5">
                                        <!-- Provider -->
@@ -850,6 +869,41 @@
                    </div>
                </div>

+                <!-- ====================================================== -->
+                <!-- VIEW: Models                                            -->
+                <!-- ====================================================== -->
+                <div id="view-models" class="view">
+                    <!-- Tailwind JIT safelist: capability-card icon colors are
+                         emitted from JS template strings. Listing them here
+                         (display:none) guarantees the CDN-side compiler picks
+                         them up regardless of render timing. -->
+                    <div class="hidden bg-blue-50 dark:bg-blue-900/30 text-blue-500
+                                       bg-orange-50 dark:bg-orange-900/30 text-orange-500
+                                       bg-purple-50 dark:bg-purple-900/30 text-purple-500
+                                       bg-amber-50 dark:bg-amber-900/30 text-amber-500
+                                       bg-primary-50 dark:bg-primary-900/30 text-primary-500"></div>
+                    <div class="flex-1 overflow-y-auto p-6">
+                        <div class="max-w-4xl mx-auto">
+                            <div class="flex items-center justify-between mb-6">
+                                <div>
+                                    <h2 class="text-xl font-bold text-slate-800 dark:text-slate-100" data-i18n="models_title">模型管理</h2>
+                                    <p class="text-sm text-slate-500 dark:text-slate-400 mt-1" data-i18n="models_desc">统一管理对话、视觉、语音、向量、图像、搜索能力</p>
+                                </div>
+                                <button id="models-add-vendor-btn" onclick="openVendorModal('')"
+                                        class="flex items-center gap-2 px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600
+                                               text-white text-sm font-medium cursor-pointer transition-colors duration-150">
+                                    <i class="fas fa-plus text-xs"></i>
+                                    <span data-i18n="models_add_vendor">添加厂商</span>
+                                </button>
+                            </div>
+                            <div id="models-loading" class="flex items-center gap-2 py-12 justify-center text-slate-400 dark:text-slate-500 text-sm">
+                                <i class="fas fa-spinner fa-spin text-xs"></i><span>Loading...</span>
+                            </div>
+                            <div id="models-content" class="grid gap-6 hidden"></div>
+                        </div>
+                    </div>
+                </div>
+
                <!-- ====================================================== -->
                <!-- VIEW: Channels                                          -->
                <!-- ====================================================== -->
@@ -959,7 +1013,7 @@
    </div><!-- /app -->

    <!-- Confirm Dialog -->
-    <div id="confirm-dialog-overlay" class="fixed inset-0 bg-black/50 z-[100] hidden flex items-center justify-center">
+    <div id="confirm-dialog-overlay" class="fixed inset-0 bg-black/50 z-[200] hidden flex items-center justify-center">
        <div class="bg-white dark:bg-[#1A1A1A] rounded-2xl border border-slate-200 dark:border-white/10 shadow-xl
                    w-full max-w-sm mx-4 overflow-hidden">
            <div class="p-6">
@@ -984,6 +1038,77 @@
        </div>
    </div>

+    <!-- Vendor Credentials Modal -->
+    <div id="vendor-modal-overlay" class="fixed inset-0 bg-black/50 z-[100] hidden flex items-center justify-center">
+        <div class="bg-white dark:bg-[#1A1A1A] rounded-2xl border border-slate-200 dark:border-white/10 shadow-xl
+                    w-full max-w-md mx-4">
+            <div class="p-6">
+                <div class="flex items-center gap-3 mb-5">
+                    <div class="w-10 h-10 rounded-xl bg-primary-50 dark:bg-primary-900/20 flex items-center justify-center flex-shrink-0">
+                        <i class="fas fa-key text-primary-500"></i>
+                    </div>
+                    <div class="min-w-0 flex-1">
+                        <h3 id="vendor-modal-title" class="font-semibold text-slate-800 dark:text-slate-100 text-base"></h3>
+                        <p id="vendor-modal-subtitle" class="text-xs text-slate-500 dark:text-slate-400 mt-0.5 font-mono"></p>
+                    </div>
+                </div>
+
+                <!-- Provider selector (only visible when adding via top button) -->
+                <div id="vendor-modal-picker-wrap" class="mb-4 hidden">
+                    <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5" data-i18n="models_provider">厂商</label>
+                    <div id="vendor-modal-picker" class="cfg-dropdown" tabindex="0">
+                        <div class="cfg-dropdown-selected">
+                            <span class="cfg-dropdown-text">--</span>
+                            <i class="fas fa-chevron-down cfg-dropdown-arrow"></i>
+                        </div>
+                        <div class="cfg-dropdown-menu"></div>
+                    </div>
+                </div>
+
+                <div class="space-y-4">
+                    <div>
+                        <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">API Key</label>
+                        <input id="vendor-modal-key" type="text" autocomplete="off" data-1p-ignore data-lpignore="true"
+                               class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
+                                      bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
+                                      focus:outline-none focus:border-primary-500 font-mono transition-colors"
+                               placeholder="sk-...">
+                    </div>
+                    <div id="vendor-modal-base-wrap">
+                        <label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">API Base</label>
+                        <input id="vendor-modal-base" type="text"
+                               class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
+                                      bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
+                                      focus:outline-none focus:border-primary-500 font-mono transition-colors"
+                               placeholder="https://...../v1">
+                        <p id="vendor-modal-base-hint" class="mt-1.5 text-xs text-slate-400 dark:text-slate-500 hidden">
+                            <i class="fas fa-info-circle mr-1"></i><span data-i18n="models_base_default_hint">留空将使用官方默认地址</span>
+                        </p>
+                    </div>
+                </div>
+            </div>
+            <div class="flex items-center justify-between gap-3 px-6 py-4 border-t border-slate-100 dark:border-white/5 rounded-b-2xl">
+                <button id="vendor-modal-clear"
+                        class="px-3 py-2 rounded-lg text-xs
+                               text-red-500 dark:text-red-400 hover:bg-red-50 dark:hover:bg-red-900/20
+                               cursor-pointer transition-colors duration-150 hidden"
+                        data-i18n="models_clear_credential">清除凭据</button>
+                <span id="vendor-modal-status"
+                      class="flex-1 text-xs text-primary-500 opacity-0 transition-opacity duration-300 text-center"></span>
+                <button id="vendor-modal-cancel"
+                        class="px-4 py-2 rounded-lg border border-slate-200 dark:border-white/10
+                               text-slate-600 dark:text-slate-300 text-sm font-medium
+                               hover:bg-slate-50 dark:hover:bg-white/5
+                               cursor-pointer transition-colors duration-150"
+                        data-i18n="cancel">取消</button>
+                <button id="vendor-modal-save"
+                        class="px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600 text-white text-sm font-medium
+                               cursor-pointer transition-colors duration-150 disabled:opacity-50 disabled:cursor-not-allowed"
+                        data-i18n="save">保存</button>
+            </div>
+        </div>
+    </div>
+
    <script defer src="assets/js/console.js"></script>
 </body>
 </html>
--- a/channel/web/static/css/console.css
+++ b/channel/web/static/css/console.css
@@ -725,6 +725,58 @@
    background: rgba(74, 190, 110, 0.15);
    color: #74E9A4;
 }
+/* When an item carries a hint (e.g. brand alias next to a technical model
+   id), label/hint are split into two spans so the hint sits on the right in
+   a dim, smaller weight. Without a hint the row stays a plain text node and
+   uses the default ellipsis behaviour, so no layout regressions for old call
+   sites. */
+.cfg-dropdown-label {
+    flex: 1 1 auto;
+    min-width: 0;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+.cfg-dropdown-hint {
+    flex-shrink: 0;
+    margin-left: auto;
+    padding-left: 12px;
+    color: #94a3b8;
+    font-size: 12px;
+    font-weight: 400;
+}
+.dark .cfg-dropdown-hint {
+    color: #64748b;
+}
+.cfg-dropdown-item.active .cfg-dropdown-hint {
+    /* Tint the hint toward the brand colour on the active row so it doesn't
+       fight with the highlighted label tone. */
+    color: rgba(34, 133, 71, 0.65);
+}
+.dark .cfg-dropdown-item.active .cfg-dropdown-hint {
+    color: rgba(116, 233, 164, 0.6);
+}
+/* The active row gets a trailing brand-green checkmark via a Font Awesome
+   pseudo-element so every dropdown (chat / vision / image / asr / tts / etc.)
+   surfaces "this is what's currently selected" without per-call JS plumbing.
+   When a hint is present, the ✓ sits to its right with a small gap; without
+   a hint, margin-left:auto pushes the ✓ flush against the right edge. */
+.cfg-dropdown-item.active::after {
+    content: '\f00c';                  /* FontAwesome check glyph */
+    font-family: 'Font Awesome 6 Free', 'Font Awesome 5 Free', 'FontAwesome';
+    font-weight: 900;
+    margin-left: auto;
+    padding-left: 12px;
+    color: #4abe6e;
+    font-size: 11px;
+    flex-shrink: 0;
+}
+.cfg-dropdown-item.active:has(.cfg-dropdown-hint)::after {
+    /* When hint occupies the auto-margin slot, the ✓ no longer benefits
+       from `margin-left: auto`; replace it with a small fixed gap so the
+       ✓ trails the hint cleanly. */
+    margin-left: 0;
+    padding-left: 10px;
+}

 /* API Key masking via CSS (avoids browser password prompts) */
 .cfg-key-masked {
@@ -732,6 +784,77 @@
    text-security: disc;
 }

+/* Provider logo image — vendors flagged as `provider-logo-invert-dark`
+   ship a black wordmark that disappears on the dark canvas; we invert their
+   luminance only in dark mode so the brand stays recognizable without
+   touching multi-color marks like Google/MiniMax. */
+.provider-logo-img {
+    object-fit: contain;
+    object-position: center;
+}
+.dark .provider-logo-invert-dark {
+    filter: invert(1) brightness(1.15);
+}
+
+/* Models page — provider dropdown rows.
+   Configured rows look like ordinary picker entries; the .active row's
+   trailing brand-green ✓ already announces "this is what's selected"
+   (handled globally by .cfg-dropdown-item.active::after above).
+   Unconfigured rows are visually subdued and carry a trailing gear icon
+   as a "click to set up" affordance. */
+.cap-provider-label {
+    flex: 1 1 auto;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+.cap-provider-gear {
+    margin-left: auto;
+    padding-left: 12px;
+    color: #94a3b8;
+    font-size: 11px;
+    flex-shrink: 0;
+}
+.cap-provider-item.cap-provider-unconfigured {
+    color: #94a3b8;
+}
+.dark .cap-provider-item.cap-provider-unconfigured {
+    color: #64748b;
+}
+.cap-provider-item.cap-provider-unconfigured:hover {
+    color: #475569;
+}
+.dark .cap-provider-item.cap-provider-unconfigured:hover {
+    color: #cbd5e1;
+}
+.cap-provider-item.cap-provider-unconfigured:hover .cap-provider-gear {
+    color: #475569;
+}
+.dark .cap-provider-item.cap-provider-unconfigured:hover .cap-provider-gear {
+    color: #cbd5e1;
+}
+/* If the active row ever lands on an unconfigured vendor (defensive — the
+   click handler normally diverts to the modal), suppress the global ✓ so
+   the gear remains the sole trailing icon and the row keeps reading as
+   "needs setup" rather than "already selected". */
+.cap-provider-item.cap-provider-unconfigured.active::after {
+    content: none;
+}
+
+/* "Add vendor" modal picker — each configured row carries a static
+   brand-green ✓ via decorateVendorModalPicker so users can see what's set
+   up at a glance. The active row's global ✓ is suppressed here to avoid
+   showing two checks side by side on configured + selected rows. */
+.vendor-picker-item.active::after {
+    content: none;
+}
+.vendor-picker-configured-mark {
+    margin-left: auto;
+    padding-left: 12px;
+    color: #4abe6e;
+    font-size: 11px;
+    flex-shrink: 0;
+}
+
 /* Chat Input */
 #chat-input {
    resize: none; height: 42px; max-height: 180px;
@@ -1171,3 +1294,108 @@
    overflow: hidden;
    min-height: 2.5em;  /* ~2 lines at text-sm leading-relaxed */
 }
+
+/* --------------------------------------------------------------------
+ * Voice pill — compact custom audio player used by mic uploads and TTS
+ * replies. Replaces the bulky native <audio controls> with a play/pause
+ * icon + thin progress bar + duration counter so it blends into chat
+ * bubbles without the chrome-grey browser default look.
+ * ------------------------------------------------------------------ */
+.voice-pill {
+    display: inline-flex;
+    align-items: center;
+    gap: 8px;
+    padding: 6px 10px;
+    border-radius: 999px;
+    background: rgba(15, 23, 42, 0.05);
+    color: rgb(71, 85, 105);
+    font-size: 12px;
+    line-height: 1;
+    max-width: 240px;
+    user-select: none;
+    cursor: default;
+}
+.dark .voice-pill {
+    background: rgba(255, 255, 255, 0.08);
+    color: rgb(203, 213, 225);
+}
+.voice-pill[data-loading="1"] {
+    opacity: 0.65;
+}
+.voice-pill-btn {
+    width: 22px;
+    height: 22px;
+    border-radius: 999px;
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    background: var(--color-primary-500, #2563eb);
+    color: #fff;
+    flex-shrink: 0;
+    cursor: pointer;
+    transition: transform 0.1s ease;
+}
+.voice-pill-btn:hover { transform: scale(1.05); }
+.voice-pill-btn i { font-size: 9px; margin-left: 1px; }
+.voice-pill-btn[data-state="play"] i { margin-left: 2px; }
+.voice-pill-btn[data-state="pause"] i { margin-left: 0; }
+.voice-pill-track {
+    flex: 1;
+    height: 3px;
+    border-radius: 999px;
+    background: rgba(100, 116, 139, 0.25);
+    overflow: hidden;
+    min-width: 70px;
+}
+.dark .voice-pill-track {
+    background: rgba(148, 163, 184, 0.25);
+}
+.voice-pill-fill {
+    height: 100%;
+    width: 0%;
+    background: var(--color-primary-500, #2563eb);
+    border-radius: inherit;
+    transition: width 0.1s linear;
+}
+.voice-pill-time {
+    font-variant-numeric: tabular-nums;
+    font-size: 11px;
+    color: inherit;
+    opacity: 0.75;
+    flex-shrink: 0;
+    min-width: 28px;
+    text-align: right;
+}
+.voice-pill audio { display: none; }
+
+/* Send button toggles into a Stop button while an SSE stream is in flight.
+   Match the look of the disabled send button (light grey block + white
+   glyph) so it reads as the same visual element, just paused/idle from
+   sending perspective and clickable to stop. */
+#send-btn.send-btn-cancel {
+    background-color: rgb(203 213 225) !important; /* slate-300, == disabled send-btn */
+    color: white !important;
+}
+#send-btn.send-btn-cancel:hover {
+    background-color: rgb(148 163 184) !important; /* slate-400 */
+}
+#send-btn.send-btn-cancel:disabled {
+    background-color: rgb(226 232 240) !important; /* slate-200, while stop is in flight */
+    color: white !important;
+    cursor: progress;
+}
+.dark #send-btn.send-btn-cancel {
+    background-color: rgb(71 85 105) !important; /* slate-600, == dark disabled send-btn */
+    color: white !important;
+}
+.dark #send-btn.send-btn-cancel:hover {
+    background-color: rgb(100 116 139) !important; /* slate-500 */
+}
+.dark #send-btn.send-btn-cancel:disabled {
+    background-color: rgb(51 65 85) !important; /* slate-700 */
+    color: rgb(203 213 225) !important;
+}
+
+.agent-cancelled-tag {
+    font-style: italic;
+}
--- a/channel/web/static/js/console.js
+++ b/channel/web/static/js/console.js
--- a/channel/web/static/logos/claudeAPI.svg
+++ b/channel/web/static/logos/claudeAPI.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251656961" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="18432" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M252.8 652.8l167.893333-94.293333 2.773334-8.106667-2.773334-4.48h-8.106666l-28.16-1.706667-96-2.56-83.2-3.413333-80.64-4.266667-20.266667-4.266666L85.333333 504.746667l1.92-12.586667 17.066667-11.52 24.32 2.133333 53.973333 3.626667 81.066667 5.546667 58.666667 3.413333 87.04 9.173333h13.866666l1.92-5.546666-4.693333-3.413334-3.626667-3.413333-83.84-56.746667-90.666666-60.16-47.573334-34.56-25.813333-17.493333-13.013333-16.426667-5.546667-35.84 23.253333-25.813333 31.36 2.133333 7.893334 2.133334 31.786666 24.32 67.84 52.48L401.066667 391.466667l13.013333 10.88 5.12-3.626667 0.64-2.56-5.76-9.813333-48.213333-87.04L314.453333 210.773333l-22.826666-36.693333-5.973334-21.973333a107.861333 107.861333 0 0 1-3.626666-26.026667l26.666666-36.053333L323.413333 85.333333l35.413334 4.693334 14.933333 13.013333 21.973333 50.346667 35.626667 79.36 55.253333 107.733333 16.213334 32 8.746666 29.653333 3.2 9.173334h5.546667v-5.12l4.48-60.8 8.32-74.453334 8.106667-96 2.773333-27.093333 13.44-32.426667 26.666667-17.493333 20.693333 10.026667 17.066667 24.32-2.346667 15.786666-10.24 65.92-19.84 103.253334-13.013333 69.12h7.466666l8.746667-8.746667 34.986667-46.506667 58.666666-73.386666 26.026667-29.226667 30.293333-32.213333 19.413334-15.36h36.693333l27.093333 40.106666-12.16 41.386667-37.76 48-31.36 40.533333-45.013333 60.586667-28.16 48.426667 2.56 3.84 6.613333-0.64 101.546667-21.546667 54.826667-10.026667 65.493333-11.306666 29.653333 13.866666 3.2 14.08-11.733333 28.8-69.973333 17.28-82.133334 16.426667-122.24 29.013333-1.493333 1.066667 1.706667 2.133333 55.04 5.12 23.466666 1.28h57.6l107.306667 7.893334 28.16 18.56 16.853333 22.613333-2.773333 17.28-43.306667 21.973333-58.24-13.866666-136.106666-32.426667-46.72-11.733333h-6.4v3.84l38.826666 37.973333 71.253334 64.426667 89.173333 82.986666 4.48 20.48-11.52 16.213334-12.16-1.706667-78.506667-58.88-30.293333-26.666667-68.48-57.6h-4.48v5.973334l15.786667 23.04 83.413333 125.226666 4.266667 38.4-5.973334 12.586667-21.546666 7.466667-23.68-4.266667-48.853334-68.48-50.346666-77.226667-40.533334-69.12-4.906666 2.773334-23.893334 258.133333-11.306666 13.226667-26.026667 10.026666-21.546667-16.426666-11.52-26.666667 11.52-52.48 13.866667-68.48 11.306667-54.4 10.24-67.626667 5.973333-22.4-0.426667-1.493333-4.906666 0.64-50.986667 69.973333-77.653333 104.746667-61.44 65.706667-14.72 5.76-25.386667-13.226667 2.346667-23.466667 14.293333-20.906666 84.906667-107.946667 51.2-66.986667 33.066666-38.613333v-5.546667h-2.133333l-225.493333 146.56-40.106667 5.12-17.28-16.213333 2.133333-26.666667 8.106667-8.746666 67.84-46.72h-0.213333l0.853333 0.853333z" fill="#D97757" p-id="18433"></path></svg>
--- a/channel/web/static/logos/custom.svg
+++ b/channel/web/static/logos/custom.svg
@@ -0,0 +1,10 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="200" height="200" fill="none" stroke="#475569" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
+  <!-- Horizontal slider tracks -->
+  <line x1="4" y1="7" x2="20" y2="7"/>
+  <line x1="4" y1="12" x2="20" y2="12"/>
+  <line x1="4" y1="17" x2="20" y2="17"/>
+  <!-- Knobs (filled circles) -->
+  <circle cx="9" cy="7"  r="2.2" fill="#475569" stroke="none"/>
+  <circle cx="15" cy="12" r="2.2" fill="#475569" stroke="none"/>
+  <circle cx="7" cy="17"  r="2.2" fill="#475569" stroke="none"/>
+</svg>
--- a/channel/web/static/logos/dashscope.svg
+++ b/channel/web/static/logos/dashscope.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251621200" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="17444" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M1019.364785 620.816931L891.797142 397.807295 946.450846 293.15069a29.097778 29.097778 0 0 0 6.399732-36.393472l-70.184053-126.586684a30.078737 30.078737 0 0 0-24.574968-13.652427H597.4945L539.171949 14.549389a27.348852 27.348852 0 0 0-20.906122-14.549389H380.628607a29.139776 29.139776 0 0 0-24.616967 14.549389v5.545767L225.797108 243.062793H100.919352a29.182775 29.182775 0 0 0-25.513928 13.653427L3.428446 384.11187a32.766624 32.766624 0 0 0 0 29.182775L132.831012 638.096205 74.508461 740.064923a32.766624 32.766624 0 0 0 0 29.05478l66.514207 116.561105a29.905744 29.905744 0 0 0 25.513929 14.505391H427.132654l62.845361 109.222414A30.078737 30.078737 0 0 0 512.762058 1024H660.382859a29.139776 29.139776 0 0 0 24.574968-14.549389l128.463606-224.843558h114.76818a31.91366 31.91366 0 0 0 24.660965-15.444352l66.471208-117.414069a28.158818 28.158818 0 0 0 0-30.9747l0.042999 0.042999z m-161.273228 14.591387L791.57735 512.490479 518.265827 993.964261l-74.748861-122.87484h-273.268525l65.618244-119.205994h139.386147L101.856313 272.244568h143.055993L380.671605 30.121735l68.34913 119.247993-70.184053 122.87484H925.501726l-69.202094 121.936879 137.594222 241.183873H858.134555z" fill="#605BEC" p-id="17445"></path><path d="M499.962596 699.320634l174.371677-274.719464H324.694955z" fill="#605BEC" p-id="17446"></path></svg>
--- a/channel/web/static/logos/deepseek.svg
+++ b/channel/web/static/logos/deepseek.svg
--- a/channel/web/static/logos/doubao.svg
+++ b/channel/web/static/logos/doubao.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779261485522" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5381" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M958.976 439.808C804.864 336.896 642.56 321.536 642.56 321.536s8.192 235.008-10.752 306.176c-0.512 9.728-11.776 75.264-43.008 157.696-10.752 28.16-24.064 55.296-39.424 81.408-40.96 74.24-89.6 127.488-89.6 127.488 119.808-48.64 205.312-92.672 309.76-175.616 122.88-96.768 229.376-254.464 189.44-378.88z" fill="#37E1BE" p-id="5382"></path><path d="M329.728 395.776c158.208-100.864 308.736-78.848 312.32-74.752 0.512 0.512 1.024 0.512 1.024 0.512 0-14.336-6.656-60.928-13.312-106.496-11.776-60.928-22.528-124.928-23.04-133.632-170.496-139.264-356.864-78.336-448 25.6-61.44 70.144-103.424 169.984-102.4 224.256V762.88c0.512-12.8 1.536-20.48 2.048-20.48 17.92-197.12 271.36-346.624 271.36-346.624z" fill="#A569FF" p-id="5383"></path><path d="M792.064 272.384c-41.984-43.52-87.552-88.576-122.368-125.44-33.28-34.816-59.392-60.928-62.976-65.536 0.512 8.704 11.264 72.704 23.04 133.632 6.656 45.568 12.8 92.672 13.312 106.496 0 0 162.304 15.36 316.416 118.272-0.512 0-83.456-80.384-167.424-167.424zM549.888 866.816c-2.56 1.024-198.656 107.008-292.352-30.72-20.992-30.72-31.744-68.096-33.28-106.496-3.072-74.752 5.12-227.84 105.472-333.824 0 0-253.44 149.504-270.848 346.624-0.512 0.512-2.048 8.192-2.048 20.48-1.024 32.768 4.608 98.304 43.008 155.136 52.224 78.336 193.024 138.752 328.192 85.504l33.28-9.728c-1.024 0.512 47.616-52.224 88.576-126.976z" fill="#1E37FC" p-id="5384"></path></svg>
--- a/channel/web/static/logos/gemini.svg
+++ b/channel/web/static/logos/gemini.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251750646" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="29551" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M214.101333 512c0-32.512 5.546667-63.701333 15.36-92.928L57.173333 290.218667A491.861333 491.861333 0 0 0 4.693333 512c0 79.701333 18.858667 154.88 52.394667 221.610667l172.202667-129.066667A290.56 290.56 0 0 1 214.101333 512" fill="#FBBC05" p-id="29552"></path><path d="M516.693333 216.192c72.106667 0 137.258667 25.002667 188.458667 65.962667L854.101333 136.533333C763.349333 59.178667 646.997333 11.392 516.693333 11.392c-202.325333 0-376.234667 113.28-459.52 278.826667l172.373334 128.853333c39.68-118.016 152.832-202.88 287.146666-202.88" fill="#EA4335" p-id="29553"></path><path d="M516.693333 807.808c-134.357333 0-247.509333-84.864-287.232-202.88l-172.288 128.853333c83.242667 165.546667 257.152 278.826667 459.52 278.826667 124.842667 0 244.053333-43.392 333.568-124.757333l-163.584-123.818667c-46.122667 28.458667-104.234667 43.776-170.026666 43.776" fill="#34A853" p-id="29554"></path><path d="M1005.397333 512c0-29.568-4.693333-61.44-11.648-91.008H516.650667V614.4h274.602666c-13.696 65.962667-51.072 116.650667-104.533333 149.632l163.541333 123.818667c93.994667-85.418667 155.136-212.650667 155.136-375.850667" fill="#4285F4" p-id="29555"></path></svg>
--- a/channel/web/static/logos/linkai.svg
+++ b/channel/web/static/logos/linkai.svg
--- a/channel/web/static/logos/minimax.svg
+++ b/channel/web/static/logos/minimax.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251514432" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="11888" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M415.392 475.808v329.984c-22.304 111.744-170.56 82.944-171.2 1.92-0.672-101.824 0-202.976 0-304.064v-117.184c0-14.656-3.2-26.24-16-35.392-24.96-18.72-54.944 3.264-55.584 30.208-1.408 36.16-0.704 71.616-1.408 107.264 0 28.16 0 55.52 0.64 83.648-18.368 123.776-168.32 103.232-171.808 0.704V487.04c0-28.032 54.944-34.624 52.256 7.36-1.792 20.8-0.64 42.272-1.344 62.912-0.64 36.8 55.648 61.6 68.896 1.408 0.64-49.632 0.64-99.264 0.64-149.344 0-62.752 17.824-113.856 84.352-118.624 28.8-2.56 47.968 9.504 66.336 30.304 7.04 7.36 23.68 30.72 24.32 56.16 0 23.456 0.64 46.752 0.64 70.464 0 46.72-0.64 93.76-0.64 140.48 0 30.304 0.64 60.256 0.64 89.856 0 37.536 0 75.552-0.64 113.152-0.64 48.864 58.816 48.16 68.352-0.768 0-57.632 0.64-114.56 0.64-172.192 0-141.984-0.64-283.968-0.64-425.856 0-14.72-2.048-55.584 5.76-70.464 41.504-101.12 167.392-56.96 168.544 26.72 2.432 171.52 0 344.896 0.64 516.8 0 59.616-48.416 46.816-51.104 23.488 0-178.88 0-358.4 0.64-537.024-2.368-44.832-68.832-38.72-72.672-6.592-1.28 36.864-0.64 74.4-1.28 111.232v219.008h0.64l0.448 0.256h-0.064z" fill="#D4367A" p-id="11889"></path><path d="M610.016 473.184v242.336V143.648c21.632-112.512 169.824-83.264 170.464-2.176 0.704 101.12 0 202.912 0.704 304 0 38.784 0 77.728-0.64 116.544 0 15.36 3.776 26.176 16.64 36.032 24.32 18.24 54.24-3.2 55.584-30.592 1.344-35.488 0.64-70.976 0.64-107.328V376.96c18.56-123.776 168.128-103.232 171.264-0.704v310.592c0 28.16-54.304 34.848-51.872-7.296 1.472-21.44 0-267.104 0.768-288.64 1.28-36.16-55.712-61.664-68.928-0.768v148.576c0 63.68-17.856 113.92-84.96 119.36-63.264 1.504-88.704-42.24-90.752-86.432V271.328c0-38.24 0-75.552 0.64-113.088 0.64-48.864-58.784-48.864-68.896 0.704V831.36c0 14.592 2.048 55.52-5.184 70.432-41.44 101.056-168 56.864-169.152-26.752v-79.616c3.136-53.6 48.416-40.864 50.464-18.176v94.464c2.432 44.928 68.928 39.488 72.064 6.656 1.344-36.896 1.344-73.728 1.344-111.296v-293.824h-0.192v-0.064z" fill="#ED6D48" p-id="11890"></path></svg>
--- a/channel/web/static/logos/moonshot.svg
+++ b/channel/web/static/logos/moonshot.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251592968" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="16416" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M117.9648 684.6464l342.30272 93.57312v75.34592l209.7152 58.5728A428.99456 428.99456 0 0 1 512 942.08c-176.128 0-327.53664-105.8816-394.0352-257.4336zM83.29216 477.42976l407.30624 112.64-9.6256 37.00736-6.0416 35.0208 383.3856 104.96a432.5376 432.5376 0 0 1-65.10592 70.32832l-688.18944-185.9584A429.4656 429.4656 0 0 1 81.92 512c0-11.63264 0.47104-23.1424 1.37216-34.54976z m57.344-182.4768l429.07648 114.21696a279.94112 279.94112 0 0 0-23.06048 35.55328 201.17504 201.17504 0 0 0-14.70464 34.93888l403.08736 110.26432a426.8032 426.8032 0 0 1-23.552 81.7152L86.54848 448.7168a427.25376 427.25376 0 0 1 54.0672-153.76384z m158.47424-156.75392l404.23424 108.31872a190.2592 190.2592 0 0 0-32.80896 24.90368c-9.13408 8.8064-19.8656 21.4016-32.1536 37.74464l285.24544 77.78304c9.216 30.45376 15.03232 61.8496 17.32608 93.5936L156.61056 269.68064a432.27136 432.27136 0 0 1 142.49984-131.4816zM512 81.92c142.90944 0 269.55776 69.71392 347.7504 176.98816L337.26464 118.90688A428.50304 428.50304 0 0 1 512 81.92z" fill="#000000" p-id="16417"></path></svg>
--- a/channel/web/static/logos/openai.svg
+++ b/channel/web/static/logos/openai.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251225589" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="9015" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M881.664 431.488a218.88 218.88 0 0 0-18.176-177.088A218.624 218.624 0 0 0 628.992 149.76c-40.576-45.824-100.288-71.424-162.176-71.424a219.136 219.136 0 0 0-208 150.4 215.68 215.68 0 0 0-144 104.512 218.944 218.944 0 0 0 26.688 254.912 218.752 218.752 0 0 0 19.2 177.152 217.088 217.088 0 0 0 234.624 104.512 219.136 219.136 0 0 0 162.112 72.512 219.136 219.136 0 0 0 208-150.4 215.68 215.68 0 0 0 144-104.512 219.008 219.008 0 0 0-27.712-256z m-324.288 454.4a158.08 158.08 0 0 1-103.424-37.376c1.088-1.088 4.288-2.176 5.376-3.2l171.712-99.2a28.16 28.16 0 0 0 13.824-24.512V479.488l72.576 41.6c1.024 0 1.024 1.024 1.024 2.112v200.512a160.512 160.512 0 0 1-161.088 162.112z m-347.712-148.288c-19.2-33.088-25.6-71.488-19.2-108.8 1.088 1.024 3.2 2.176 5.376 3.2l171.712 99.2a25.984 25.984 0 0 0 27.712 0l210.112-121.6v84.224c0 1.152 0 2.176-1.024 2.176L430.464 796.16c-76.8 44.8-176 18.176-220.8-58.624z m-44.736-375.424c19.2-32.64 48.896-57.856 84.224-71.488v204.8c0 9.6 5.376 19.2 13.888 24.512l210.176 121.6-72.576 41.6c-1.024 0-2.112 1.088-2.112 0L224.64 582.912a160.448 160.448 0 0 1-59.776-220.8h0.064z m597.312 138.688l-210.112-121.6 72.512-41.6c1.088 0 2.176-1.088 2.176 0l173.824 100.224a161.088 161.088 0 0 1-25.6 291.2V525.44a26.304 26.304 0 0 0-12.8-24.512z m71.488-108.8a23.232 23.232 0 0 0-5.312-3.2L656.64 289.536a26.048 26.048 0 0 0-27.712 0l-210.176 121.6V326.912c0-1.088 0-2.176 1.088-2.176l173.824-100.224a161.152 161.152 0 0 1 220.8 59.712c19.2 32 25.6 70.4 19.2 107.776z m-454.4 149.248l-72.64-41.6c-1.024 0-1.024-1.088-1.024-2.176V297.088A162.048 162.048 0 0 1 467.84 135.04a158.08 158.08 0 0 1 103.424 37.312 22.848 22.848 0 0 1-5.312 3.2L394.24 274.688a28.16 28.16 0 0 0-13.888 24.512v242.112h-1.088z m39.424-85.312l93.824-54.4 93.888 54.4v107.712l-93.888 54.4-93.824-54.4V456z" fill="#000000" p-id="9016"></path></svg>
--- a/channel/web/static/logos/qianfan.svg
+++ b/channel/web/static/logos/qianfan.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251568791" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="14450" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M96.20121136 636.3124965c-0.1472897-113.41305959-0.29457937-226.8261192-0.29457937-340.23917879 0-14.87625845 7.65906378-26.51214381 20.4732666-34.02391789 45.51251353-26.65943349 91.02502705-53.31886698 136.83211997-79.53643141 71.1409192-40.94653321 142.42912809-81.59848704 213.71733698-122.39773055 7.36448439-4.12411126 14.58167909-8.3955122 21.50429441-13.2560719 19.44223878-13.40336159 39.03176725-16.05457598 60.09419263-3.53495252 27.39588193 16.34915535 54.93905355 32.25644163 82.48222516 48.16372793 88.0792333 50.96223197 176.30575629 101.77717426 264.38498958 152.59211653 9.86840908 5.74429781 19.88410785 11.19401627 29.60522725 17.0856038 14.13981003 8.54280189 21.50429441 21.06242535 21.50429443 37.70616007 0 147.73155685 0.29457937 295.46311371-0.1472897 443.19467057 0 15.46541722-7.2171947 28.57419943-21.7988738 36.96971163-34.7603663 20.17868721-70.55176044 38.88447758-104.57567833 59.94690293-48.90017634 30.19438599-100.00969801 56.11737105-148.76258466 86.60633642-29.01606849 18.11663161-59.50503387 34.02391789-89.11026112 50.96223197-13.10878221 7.51177407-26.07027474 15.17083783-39.03176726 22.9771913-13.84523065 8.3955122-27.83775099 8.83738127-41.97756102 0.73644843-56.41195043-32.55102101-112.82390085-65.10204201-169.38314098-97.653063-61.86166887-35.64410444-123.72333775-71.1409192-185.4377169-106.78502365-11.19401627-6.48074626-22.24074286-12.81420285-32.99289009-19.88410785-11.48859565-7.65906378-17.08560379-19.14765941-17.08560378-32.69831069-0.1472897-34.7603663 0.1472897-69.52073264 0.29457938-104.28109895 1.62018657-0.58915875 1.62018657-1.62018657-0.29457938-2.65121438z m356.58833414-225.500512c2.20934532-1.76747625 4.41869063-3.68224221 6.77532565-5.15513907 68.93157389-39.62092601 137.86314777-79.24185204 206.94201135-118.86277807 2.79850407-1.62018657 6.48074626-1.62018657 6.62803594-6.18616688 0.1472897-4.8605597-4.12411126-4.71327001-6.77532564-6.18616688-40.65195383-23.56635005-81.59848704-46.83812071-122.10315117-70.84633984-16.79102442-10.01569877-32.84560039-8.54280189-48.45830728 0.58915876-45.9543826 26.51214381-91.46689612 53.61344636-137.27398903 80.42016953-31.96186226 18.70579035-64.21830387 37.11700133-96.32745581 55.67550198-18.41121097 10.60485751-27.54317163 25.33382629-27.24859225 47.72185885 0.88373813 89.55213018 0.58915875 179.10426036 0.14728969 268.65639053-0.1472897 20.17868721 9.27925033 33.58204881 25.33382629 43.15587853 31.3727035 18.70579035 63.18727606 37.11700133 95.14913832 54.93905355 10.89943689 6.03887719 21.06242535 13.99252034 35.79139414 18.41121096V505.51925374c6.48074626 19.58952848 18.55850066 34.02391789 36.67513226 44.6287754 27.83775099 16.20186565 63.18727606 12.51962347 86.31175705-10.45756784 26.95401286-26.65943349 28.72148912-62.89269668 12.81420282-90.14128893-16.34915535-28.42690974-43.59774757-37.55887038-74.38129233-38.73718787z m82.48222517 429.64401928c14.28709972-3.82953187 25.92298506-13.99252034 38.88447758-21.35700473 40.94653321-23.27177067 81.30390766-47.72185885 122.54502023-70.55176046 26.95401286-15.02354815 52.87699792-31.66728287 80.71474891-45.21793415 16.79102442-8.10093283 29.60522723-22.53532223 29.60522726-43.4504579 0.1472897-92.939793 0.29457937-185.73229631 0.14728969-278.6720893 0-11.19401627-5.15513907-13.99252034-13.84523067-7.06990501-26.51214381 20.76784598-57.29568854 34.46578693-86.16446735 51.25681135-54.49718448 31.81457257-109.14165865 63.33456576-163.78613282 95.00184862-8.54280189 4.8605597-11.78317502 10.45756784-11.63588535 20.47326662 0.29457937 96.18016613 0.1472897 192.50762194 0.1472897 288.68778806-0.29457937 3.5349525-1.47289687 7.65906378 3.38766282 10.8994369z" fill="#066AF3" p-id="14451"></path><path d="M96.20121136 636.3124965c1.91476594 1.03102783 1.91476594 2.06205563 0 3.09308345v-3.09308345z" fill="#4372E0" p-id="14452"></path><path d="M391.3697457 505.37196405c-5.44971845-44.33419602 13.84523065-74.08671296 61.4197998-94.55997955 30.93083443 1.17831749 58.03213699 10.31027814 74.38129233 38.5898982 15.75999659 27.39588193 14.13981003 63.48185543-12.81420282 90.14128893-23.27177067 22.97719129-58.47400606 26.65943349-86.31175705 10.45756783-18.11663161-10.60485751-30.34167568-25.03924691-36.67513226-44.62877541z" fill="#002A9A" p-id="14453"></path></svg>
--- a/channel/web/static/logos/zhipu.svg
+++ b/channel/web/static/logos/zhipu.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251419020" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="10062" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M520.063496 0v77.563152c0 269.231173-144.758953 414.054122-434.212862 434.340854L86.106618 511.968002H76.827198V255.984001l443.236298-255.984001z" fill="#5B55F6" p-id="10063"></path><path d="M520.063496 1023.936004v-77.563152c0-269.231173-144.758953-414.054122-434.212862-434.340854L86.042622 511.968002H76.827198v255.984001l443.236298 255.984001z" fill="#376AF3" p-id="10064"></path><path d="M520.063496 0v77.563152c0 269.231173 144.758953 414.054122 434.276858 434.340854L954.08437 511.968002h9.215424V255.984001L520.063496 0z" fill="#5B55F6" p-id="10065"></path><path d="M520.063496 1023.936004v-77.563152c0-269.231173 144.758953-414.054122 434.276858-434.340854L954.08437 511.968002h9.27942v255.984001l-443.236298 255.984001z" fill="#376AF3" p-id="10066"></path></svg>
--- a/channel/web/web_channel.py
+++ b/channel/web/web_channel.py
--- a/channel/wechatmp/passive_reply.py
+++ b/channel/wechatmp/passive_reply.py
@@ -103,14 +103,21 @@ class Query:
                task_running = True
                waiting_until = request_time + 4
                while time.time() < waiting_until:
-                    if from_user in channel.running:
-                        time.sleep(0.1)
-                    else:
+                    if from_user not in channel.running:
                        task_running = False
                        break
+                    # Task still running, but if it has already produced cached
+                    # segments (e.g. multi-turn thinking output), return them now
+                    # instead of forcing the user to wait for the whole task. The
+                    # remaining segments are fetched by the user's next message.
+                    if channel.cache_dict.get(from_user):
+                        break
+                    time.sleep(0.1)

                reply_text = ""
-                if task_running:
+                # Only fall back to retry / "thinking" hint when the task is still
+                # running AND there is nothing cached to send yet.
+                if task_running and not channel.cache_dict.get(from_user):
                    if request_cnt < 3:
                        # waiting for timeout (the POST request will be closed by Wechat official server)
                        time.sleep(2)
@@ -131,8 +138,22 @@ class Query:

                # Only one request can access to the cached data
                try:
-                    (reply_type, reply_content) = channel.cache_dict[from_user].pop(0)
-                    if not channel.cache_dict[from_user]:  # If popping the message makes the list empty, delete the user entry from cache
+                    # WeChat passive reply allows only a single reply per request.
+                    # To avoid forcing the user to send an extra message for every
+                    # segment of multi-turn agent output, drain all consecutive
+                    # cached text segments at once and merge them into one reply.
+                    # Media (voice/image) can only be returned one at a time, so it
+                    # stops the merge and is returned on its own.
+                    cached = channel.cache_dict[from_user]
+                    if cached[0][0] == "text":
+                        reply_type = "text"
+                        merged_parts = []
+                        while cached and cached[0][0] == "text":
+                            merged_parts.append(cached.pop(0)[1])
+                        reply_content = "\n\n".join(merged_parts)
+                    else:
+                        (reply_type, reply_content) = cached.pop(0)
+                    if not channel.cache_dict[from_user]:  # If draining empties the list, delete the user entry from cache
                        del channel.cache_dict[from_user]
                except IndexError:
                    return "success"
--- a/channel/wechatmp/wechatmp_channel.py
+++ b/channel/wechatmp/wechatmp_channel.py
@@ -134,8 +134,14 @@ class WechatMPChannel(ChatChannel):

            elif reply.type == ReplyType.IMAGE_URL:  # 从网络下载图片
                img_url = reply.content
-                pic_res = requests.get(img_url, stream=True)
                image_storage = io.BytesIO()
+                if img_url.startswith("file://") or os.path.isfile(img_url):
+                    # Local file produced by the agent (e.g. a generated image)
+                    local_path = img_url[len("file://"):] if img_url.startswith("file://") else img_url
+                    with open(local_path, "rb") as f:
+                        image_storage.write(f.read())
+                else:
+                    pic_res = requests.get(img_url, stream=True)
                    for block in pic_res.iter_content(1024):
                        image_storage.write(block)
                image_storage.seek(0)
@@ -258,8 +264,14 @@ class WechatMPChannel(ChatChannel):
                logger.info("[wechatmp] Do send voice to {}".format(receiver))
            elif reply.type == ReplyType.IMAGE_URL:  # 从网络下载图片
                img_url = reply.content
-                pic_res = requests.get(img_url, stream=True)
                image_storage = io.BytesIO()
+                if img_url.startswith("file://") or os.path.isfile(img_url):
+                    # Local file produced by the agent (e.g. a generated image)
+                    local_path = img_url[len("file://"):] if img_url.startswith("file://") else img_url
+                    with open(local_path, "rb") as f:
+                        image_storage.write(f.read())
+                else:
+                    pic_res = requests.get(img_url, stream=True)
                    for block in pic_res.iter_content(1024):
                        image_storage.write(block)
                image_storage.seek(0)
--- a/channel/wecom_bot/wecom_bot_channel.py
+++ b/channel/wecom_bot/wecom_bot_channel.py
@@ -81,6 +81,8 @@ def _loads_wecom_ws_json(raw):
@singleton
 class WecomBotChannel(ChatChannel):

+    NOT_SUPPORT_REPLYTYPE = []
+
    def __init__(self):
        super().__init__()
        self.bot_id = ""
@@ -438,6 +440,17 @@ class WecomBotChannel(ChatChannel):
                    state["current"] = ""
                _push_stream(state, force=True)

+            elif event_type == "agent_cancelled":
+                # Flush partial output and strip trailing "---" separator
+                # left over from previous turn, to avoid a dangling divider.
+                if state["current"]:
+                    state["committed"] += state["current"]
+                    state["current"] = ""
+                state["committed"] = state["committed"].rstrip()
+                if state["committed"].endswith("---"):
+                    state["committed"] = state["committed"][:-3].rstrip()
+                _push_stream(state, force=True)
+
        return on_event

    # ------------------------------------------------------------------
@@ -472,6 +485,8 @@ class WecomBotChannel(ChatChannel):
            else:
                context.type = ContextType.TEXT
            context.content = content.strip()
+            if "desire_rtype" not in context and conf().get("always_reply_voice"):
+                context["desire_rtype"] = ReplyType.VOICE

        return context

@@ -498,6 +513,8 @@ class WecomBotChannel(ChatChannel):
            self._send_file(reply.content, receiver, is_group, req_id)
        elif reply.type == ReplyType.VIDEO or reply.type == ReplyType.VIDEO_URL:
            self._send_file(reply.content, receiver, is_group, req_id, media_type="video")
+        elif reply.type == ReplyType.VOICE:
+            self._send_voice(reply.content, receiver, is_group, req_id)
        else:
            logger.warning(f"[WecomBot] Unsupported reply type: {reply.type}, falling back to text")
            self._send_text(str(reply.content), receiver, is_group, req_id)
@@ -730,6 +747,65 @@ class WecomBotChannel(ChatChannel):
                },
            })

+    def _send_voice(self, voice_path: str, receiver: str, is_group: bool, req_id: str = None):
+        """Send native voice reply. WeCom voice media must be amr."""
+        local_path = voice_path
+        if local_path.startswith("file://"):
+            local_path = local_path[7:]
+
+        if local_path.startswith(("http://", "https://")):
+            try:
+                resp = requests.get(local_path, timeout=60)
+                resp.raise_for_status()
+                ext = os.path.splitext(local_path)[1] or ".mp3"
+                tmp_path = f"/tmp/wecom_voice_{uuid.uuid4().hex[:8]}{ext}"
+                with open(tmp_path, "wb") as f:
+                    f.write(resp.content)
+                local_path = tmp_path
+            except Exception as e:
+                logger.error(f"[WecomBot] Failed to download voice for sending: {e}")
+                return
+
+        if not os.path.exists(local_path):
+            logger.error(f"[WecomBot] Voice file not found: {local_path}")
+            return
+
+        amr_path = local_path
+        if not local_path.lower().endswith(".amr"):
+            try:
+                from voice.audio_convert import any_to_amr
+                amr_path = os.path.splitext(local_path)[0] + ".amr"
+                any_to_amr(local_path, amr_path)
+            except Exception as e:
+                logger.error(f"[WecomBot] Failed to convert voice to amr: {e}")
+                return
+
+        media_id = self._upload_media(amr_path, "voice")
+        if not media_id:
+            logger.error("[WecomBot] Failed to upload voice media")
+            return
+
+        if req_id:
+            self._ws_send({
+                "cmd": "aibot_respond_msg",
+                "headers": {"req_id": req_id},
+                "body": {
+                    "msgtype": "voice",
+                    "voice": {"media_id": media_id},
+                },
+            })
+        else:
+            self._ws_send({
+                "cmd": "aibot_send_msg",
+                "headers": {"req_id": self._gen_req_id()},
+                "body": {
+                    "chatid": receiver,
+                    "chat_type": 2 if is_group else 1,
+                    "msgtype": "voice",
+                    "voice": {"media_id": media_id},
+                },
+            })
+
    def _active_send_markdown(self, content: str, receiver: str, is_group: bool):
        """Proactively send markdown message (for scheduled tasks, no req_id)."""
        self._ws_send({
--- a/channel/weixin/weixin_channel.py
+++ b/channel/weixin/weixin_channel.py
@@ -47,19 +47,24 @@ def _load_credentials(cred_path: str) -> dict:


 def _save_credentials(cred_path: str, data: dict):
-    """Save credentials to JSON file."""
+    """Atomically save credentials to JSON file (tmp + rename)."""
    os.makedirs(os.path.dirname(cred_path), exist_ok=True)
-    with open(cred_path, "w") as f:
+    tmp_path = f"{cred_path}.tmp"
+    with open(tmp_path, "w") as f:
        json.dump(data, f, indent=2)
    try:
-        os.chmod(cred_path, 0o600)
+        os.chmod(tmp_path, 0o600)
    except Exception:
        pass
+    os.replace(tmp_path, cred_path)


@singleton
 class WeixinChannel(ChatChannel):

+    # ilink bot protocol has no outbound voice item; deliver TTS as a file.
+    NOT_SUPPORT_REPLYTYPE = []
+
    LOGIN_STATUS_IDLE = "idle"
    LOGIN_STATUS_WAITING = "waiting_scan"
    LOGIN_STATUS_SCANNED = "scanned"
@@ -70,7 +75,10 @@ class WeixinChannel(ChatChannel):
        self.api = None
        self._stop_event = threading.Event()
        self._poll_thread = None
-        self._context_tokens = {}  # user_id -> context_token
+        # user_id -> context_token. Guarded by _context_tokens_lock for any
+        # mutation that races with disk persistence.
+        self._context_tokens = {}
+        self._context_tokens_lock = threading.Lock()
        self._received_msgs = ExpiredDict(60 * 60 * 7.1)
        self._get_updates_buf = ""
        self._credentials_path = ""
@@ -92,12 +100,19 @@ class WeixinChannel(ChatChannel):
            conf().get("weixin_credentials_path", "~/.weixin_cow_credentials.json")
        )

-        if not token:
+        # Always load credentials so we can restore context_tokens even when
+        # the bot token itself comes from config.
        creds = _load_credentials(self._credentials_path)
+        if not token:
            token = creds.get("token", "")
            if creds.get("base_url"):
                base_url = creds["base_url"]

+        # Restore persisted context_tokens so scheduler can deliver pushes
+        # immediately after restart, without waiting for the user to ping
+        # the bot first.
+        self._restore_context_tokens_from_creds(creds)
+
        if not token:
            token, base_url = self._login_with_retry(base_url)
            if not token:
@@ -137,6 +152,11 @@ class WeixinChannel(ChatChannel):
    def _relogin(self) -> bool:
        """Re-login after session expiry. Returns True on success."""
        base_url = self.api.base_url if self.api else DEFAULT_BASE_URL
+        # Clearing the whole credentials file is intentional: the new login
+        # will issue a fresh `token` and persisted context_tokens belong to
+        # the previous bot identity, so they must not survive.
+        with self._context_tokens_lock:
+            self._context_tokens.clear()
            if os.path.exists(self._credentials_path):
                try:
                    os.remove(self._credentials_path)
@@ -153,9 +173,62 @@ class WeixinChannel(ChatChannel):
            cdn_base_url=self.api.cdn_base_url if self.api else CDN_BASE_URL,
        )
        self.login_status = self.LOGIN_STATUS_OK
-        self._context_tokens.clear()
        return True

+    # ── Context token persistence ──────────────────────────────────────
+    # ilink requires every outbound send to echo the context_token from the
+    # user's latest inbound message. We mirror the in-memory map into the
+    # credentials JSON so scheduled pushes survive process restarts.
+    # All mutation + disk IO is serialized via _context_tokens_lock so that
+    # concurrent updates can never lose each other's writes.
+
+    def _restore_context_tokens_from_creds(self, creds: dict) -> None:
+        if not isinstance(creds, dict):
+            return
+        tokens = creds.get("context_tokens")
+        if not isinstance(tokens, dict):
+            return
+        restored = 0
+        with self._context_tokens_lock:
+            for user_id, token in tokens.items():
+                if isinstance(user_id, str) and isinstance(token, str) and token:
+                    self._context_tokens[user_id] = token
+                    restored += 1
+        if restored:
+            logger.info(f"[Weixin] Restored {restored} context_tokens from credentials")
+
+    def _persist_context_tokens_locked(self) -> None:
+        """Flush the token map to disk. Caller must hold _context_tokens_lock."""
+        if not self._credentials_path:
+            return
+        try:
+            creds = _load_credentials(self._credentials_path) or {}
+            creds["context_tokens"] = dict(self._context_tokens)
+            _save_credentials(self._credentials_path, creds)
+        except Exception as e:
+            logger.warning(f"[Weixin] Failed to persist context_tokens: {e}")
+
+    def _update_context_token(self, user_id: str, token: str) -> None:
+        """Update the in-memory token for a user; flush to disk only on change."""
+        if not user_id or not token:
+            return
+        with self._context_tokens_lock:
+            if self._context_tokens.get(user_id) == token:
+                return
+            self._context_tokens[user_id] = token
+            self._persist_context_tokens_locked()
+
+    def _invalidate_context_token(self, user_id: str) -> None:
+        """Drop the cached token for a user (used after -14 / send rejection)."""
+        if not user_id:
+            return
+        with self._context_tokens_lock:
+            if user_id not in self._context_tokens:
+                return
+            del self._context_tokens[user_id]
+            logger.info(f"[Weixin] Invalidated stale context_token for {user_id}")
+            self._persist_context_tokens_locked()
+
    # ── QR Login ───────────────────────────────────────────────────────

    @staticmethod
@@ -388,7 +461,7 @@ class WeixinChannel(ChatChannel):
        context_token = raw_msg.get("context_token", "")

        if context_token and from_user:
-            self._context_tokens[from_user] = context_token
+            self._update_context_token(from_user, context_token)

        cdn_base_url = self.api.cdn_base_url if self.api else CDN_BASE_URL
        try:
@@ -464,6 +537,14 @@ class WeixinChannel(ChatChannel):
            else:
                context.type = ContextType.TEXT
            context.content = content.strip()
+            if "desire_rtype" not in context and conf().get("always_reply_voice"):
+                context["desire_rtype"] = ReplyType.VOICE
+
+        elif ctype == ContextType.VOICE:
+            if "desire_rtype" not in context and (
+                conf().get("voice_reply_voice") or conf().get("always_reply_voice")
+            ):
+                context["desire_rtype"] = ReplyType.VOICE

        return context

@@ -486,6 +567,9 @@ class WeixinChannel(ChatChannel):
            self._send_file(reply.content, receiver, context_token)
        elif reply.type in (ReplyType.VIDEO, ReplyType.VIDEO_URL):
            self._send_video(reply.content, receiver, context_token)
+        elif reply.type == ReplyType.VOICE:
+            # ilink has no outbound voice item; deliver TTS as a file attachment.
+            self._send_file(reply.content, receiver, context_token)
        else:
            logger.warning(f"[Weixin] Unsupported reply type: {reply.type}, fallback to text")
            self._send_text(str(reply.content), receiver, context_token)
@@ -496,10 +580,30 @@ class WeixinChannel(ChatChannel):
            return msg.context_token
        return self._context_tokens.get(receiver, "")

+    def _check_send_response(self, resp, receiver: str) -> None:
+        """Inspect a send-API response; drop stale context_token on -14.
+
+        ilink uses ret/errcode = -14 to signal that the session (and any
+        cached context_token) is no longer valid. The plugin keeps running
+        because the bot itself can re-login; we just need to forget the
+        per-user token so the next push won't retry forever.
+        """
+        if not isinstance(resp, dict):
+            return
+        ret = resp.get("ret")
+        errcode = resp.get("errcode")
+        if ret == -14 or errcode == -14:
+            logger.warning(
+                f"[Weixin] Send returned -14 (session expired) for "
+                f"receiver={receiver}; dropping cached context_token"
+            )
+            self._invalidate_context_token(receiver)
+
    def _send_text(self, text: str, receiver: str, context_token: str):
        if len(text) <= TEXT_CHUNK_LIMIT:
            try:
-                self.api.send_text(receiver, text, context_token)
+                resp = self.api.send_text(receiver, text, context_token)
+                self._check_send_response(resp, receiver)
                logger.debug(f"[Weixin] Text sent to {receiver}, len={len(text)}")
            except Exception as e:
                logger.error(f"[Weixin] Failed to send text: {e}")
@@ -508,7 +612,8 @@ class WeixinChannel(ChatChannel):
        chunks = self._split_text(text, TEXT_CHUNK_LIMIT)
        for i, chunk in enumerate(chunks):
            try:
-                self.api.send_text(receiver, chunk, context_token)
+                resp = self.api.send_text(receiver, chunk, context_token)
+                self._check_send_response(resp, receiver)
                logger.debug(f"[Weixin] Text chunk {i+1}/{len(chunks)} sent to {receiver}, len={len(chunk)}")
            except Exception as e:
                logger.error(f"[Weixin] Failed to send text chunk {i+1}/{len(chunks)}: {e}")
@@ -542,13 +647,14 @@ class WeixinChannel(ChatChannel):
            return
        try:
            result = upload_media_to_cdn(self.api, local_path, receiver, media_type=1)
-            self.api.send_image_item(
+            resp = self.api.send_image_item(
                to=receiver,
                context_token=context_token,
                encrypt_query_param=result["encrypt_query_param"],
                aes_key_b64=result["aes_key_b64"],
                ciphertext_size=result["ciphertext_size"],
            )
+            self._check_send_response(resp, receiver)
            logger.info(f"[Weixin] Image sent to {receiver}")
        except Exception as e:
            logger.error(f"[Weixin] Image send failed: {e}")
@@ -561,7 +667,7 @@ class WeixinChannel(ChatChannel):
            return
        try:
            result = upload_media_to_cdn(self.api, local_path, receiver, media_type=3)
-            self.api.send_file_item(
+            resp = self.api.send_file_item(
                to=receiver,
                context_token=context_token,
                encrypt_query_param=result["encrypt_query_param"],
@@ -569,6 +675,7 @@ class WeixinChannel(ChatChannel):
                file_name=os.path.basename(local_path),
                file_size=result["raw_size"],
            )
+            self._check_send_response(resp, receiver)
            logger.info(f"[Weixin] File sent to {receiver}")
        except Exception as e:
            logger.error(f"[Weixin] File send failed: {e}")
@@ -581,13 +688,14 @@ class WeixinChannel(ChatChannel):
            return
        try:
            result = upload_media_to_cdn(self.api, local_path, receiver, media_type=2)
-            self.api.send_video_item(
+            resp = self.api.send_video_item(
                to=receiver,
                context_token=context_token,
                encrypt_query_param=result["encrypt_query_param"],
                aes_key_b64=result["aes_key_b64"],
                ciphertext_size=result["ciphertext_size"],
            )
+            self._check_send_response(resp, receiver)
            logger.info(f"[Weixin] Video sent to {receiver}")
        except Exception as e:
            logger.error(f"[Weixin] Video send failed: {e}")
--- a/cli/VERSION
+++ b/cli/VERSION
@@ -1 +1 @@
-2.0.8
+2.0.9
--- a/common/const.py
+++ b/common/const.py
@@ -15,6 +15,7 @@ ZHIPU_AI = "zhipu"
 MOONSHOT = "moonshot"
 MiniMax = "minimax"
 DEEPSEEK = "deepseek"
+MIMO = "mimo"  # 小米 MiMo 大模型
 CUSTOM = "custom"  # custom OpenAI-compatible API, bot_type won't auto-switch on model change
 MODELSCOPE = "modelscope"

@@ -29,8 +30,9 @@ CLAUDE_35_SONNET = "claude-3-5-sonnet-latest"  # 带 latest 标签的模型名
 CLAUDE_35_SONNET_1022 = "claude-3-5-sonnet-20241022"  # 带具体日期的模型名称，会固定为该日期发布的模型
 CLAUDE_35_SONNET_0620 = "claude-3-5-sonnet-20240620"
 CLAUDE_4_OPUS = "claude-opus-4-0"
+CLAUDE_4_8_OPUS = "claude-opus-4-8"      # Claude Opus 4.8 - Agent推荐模型
 CLAUDE_4_7_OPUS = "claude-opus-4-7"      # Claude Opus 4.7
-CLAUDE_4_6_OPUS = "claude-opus-4-6"      # Claude Opus 4.6 - Agent推荐模型
+CLAUDE_4_6_OPUS = "claude-opus-4-6"      # Claude Opus 4.6
 CLAUDE_4_SONNET = "claude-sonnet-4-0"    # Claude Sonnet 4.0
 CLAUDE_4_5_SONNET = "claude-sonnet-4-5"  # Claude Sonnet 4.5 - Agent推荐模型
 CLAUDE_4_6_SONNET = "claude-sonnet-4-6"  # Claude Sonnet 4.6 - Agent推荐模型
@@ -47,6 +49,7 @@ GEMINI_3_FLASH_PRE = "gemini-3-flash-preview"  # Gemini 3 Flash Preview - Agent
 GEMINI_3_PRO_PRE = "gemini-3-pro-preview"  # Gemini 3 Pro Preview
 GEMINI_31_PRO_PRE = "gemini-3.1-pro-preview"  # Gemini 3.1 Pro Preview - Agent推荐模型
 GEMINI_31_FLASH_LITE_PRE = "gemini-3.1-flash-lite-preview"  # Gemini 3.1 Flash Lite Preview - Agent推荐模型
+GEMINI_35_FLASH = "gemini-3.5-flash"  # Gemini 3.5 Flash - Agent推荐模型

 # OpenAI
 GPT35 = "gpt-3.5-turbo"
@@ -74,6 +77,7 @@ GPT_5_NANO = "gpt-5-nano"
 GPT_54 = "gpt-5.4"  # GPT-5.4 - Agent recommended model
 GPT_54_MINI = "gpt-5.4-mini"
 GPT_54_NANO = "gpt-5.4-nano"
+GPT_55 = "gpt-5.5"  # GPT-5.5 - top-tier (expensive), not default
 O1 = "o1-preview"
 O1_MINI = "o1-mini"
 WHISPER_1 = "whisper-1"
@@ -104,10 +108,12 @@ QWEN_LONG = "qwen-long"
 QWEN3_MAX = "qwen3-max"  # Qwen3 Max - Agent推荐模型
 QWEN35_PLUS = "qwen3.5-plus"  # Qwen3.5 Plus - Omni model (MultiModalConversation)
 QWEN36_PLUS = "qwen3.6-plus"  # Qwen3.6 Plus - Omni model (MultiModalConversation)
+QWEN37_MAX = "qwen3.7-max"  # Qwen3.7 Max - Agent推荐模型
 QWQ_PLUS = "qwq-plus"

 # MiniMax
 MINIMAX_M2_7 = "MiniMax-M2.7"  # MiniMax M2.7 - Latest
+MINIMAX_TEXT_01 = "MiniMax-Text-01"  # MiniMax 多模态 (vision)
 MINIMAX_M2_7_HIGHSPEED = "MiniMax-M2.7-highspeed"  # MiniMax M2.7 highspeed
 MINIMAX_M2_5 = "MiniMax-M2.5"  # MiniMax M2.5
 MINIMAX_M2_1 = "MiniMax-M2.1"  # MiniMax M2.1
@@ -119,6 +125,7 @@ MINIMAX_ABAB6_5 = "abab6.5-chat"  # MiniMax abab6.5
 GLM_5_1 = "glm-5.1"  # 智谱 GLM-5.1 - Agent recommended model (default)
 GLM_5_TURBO = "glm-5-turbo"  # 智谱 GLM-5-Turbo
 GLM_5 = "glm-5"  # 智谱 GLM-5
+GLM_5V_TURBO = "glm-5v-turbo"  # 智谱多模态 (vision)
 GLM_4 = "glm-4"
 GLM_4_PLUS = "glm-4-plus"
 GLM_4_flash = "glm-4-flash"
@@ -135,6 +142,13 @@ KIMI_K2 = "kimi-k2"
 KIMI_K2_5 = "kimi-k2.5"
 KIMI_K2_6 = "kimi-k2.6"  # Kimi K2.6 - Agent recommended model (default)

+# 小米 MiMo
+MIMO_V2_5_PRO = "mimo-v2.5-pro"      # MiMo V2.5 Pro - 旗舰，长上下文（默认推荐）
+MIMO_V2_5 = "mimo-v2.5"              # MiMo V2.5 - 多模态（文/图/音/视频）
+MIMO_V2_PRO = "mimo-v2-pro"          # MiMo V2 Pro
+MIMO_V2_OMNI = "mimo-v2-omni"        # MiMo V2 Omni - 多模态
+MIMO_V2_FLASH = "mimo-v2-flash"      # MiMo V2 Flash - 极速版
+
 # Doubao (Volcengine Ark)
 DOUBAO = "doubao"
 DOUBAO_SEED_2_CODE = "doubao-seed-2-0-code-preview-260215"
@@ -177,13 +191,16 @@ MODEL_LIST = [
              # MiniMax
              MiniMax, MINIMAX_M2_7, MINIMAX_M2_7_HIGHSPEED, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5,

+              # 小米 MiMo
+              MIMO, MIMO_V2_5_PRO, MIMO_V2_5, MIMO_V2_PRO, MIMO_V2_OMNI, MIMO_V2_FLASH,
+
              # Claude
-              CLAUDE3, CLAUDE_4_6_SONNET, CLAUDE_4_7_OPUS, CLAUDE_4_6_OPUS, CLAUDE_4_OPUS, CLAUDE_4_5_SONNET, CLAUDE_4_SONNET, CLAUDE_3_OPUS, CLAUDE_3_OPUS_0229,
+              CLAUDE3, CLAUDE_4_8_OPUS, CLAUDE_4_7_OPUS, CLAUDE_4_6_SONNET, CLAUDE_4_6_OPUS, CLAUDE_4_OPUS, CLAUDE_4_5_SONNET, CLAUDE_4_SONNET, CLAUDE_3_OPUS, CLAUDE_3_OPUS_0229,
              CLAUDE_35_SONNET, CLAUDE_35_SONNET_1022, CLAUDE_35_SONNET_0620, CLAUDE_3_SONNET, CLAUDE_3_HAIKU,
              "claude", "claude-3-haiku", "claude-3-sonnet", "claude-3-opus", "claude-3.5-sonnet",

              # Gemini
-              GEMINI_31_FLASH_LITE_PRE, GEMINI_31_PRO_PRE, GEMINI_3_PRO_PRE, GEMINI_3_FLASH_PRE, GEMINI_25_PRO_PRE, GEMINI_25_FLASH_PRE,
+              GEMINI_35_FLASH, GEMINI_31_FLASH_LITE_PRE, GEMINI_31_PRO_PRE, GEMINI_3_PRO_PRE, GEMINI_3_FLASH_PRE, GEMINI_25_PRO_PRE, GEMINI_25_FLASH_PRE,
              GEMINI_20_FLASH, GEMINI_20_flash_exp, GEMINI_15_PRO, GEMINI_15_flash, GEMINI_PRO, GEMINI,

              # OpenAI
@@ -193,7 +210,7 @@ MODEL_LIST = [
              GPT_4o, GPT_4O_0806, GPT_4o_MINI,
              GPT_41, GPT_41_MINI, GPT_41_NANO,
              GPT_5, GPT_5_MINI, GPT_5_NANO,
-              GPT_54, GPT_54_MINI, GPT_54_NANO,
+              GPT_54, GPT_55, GPT_54_MINI, GPT_54_NANO,
              O1, O1_MINI,

              # GLM (智谱AI)
@@ -201,7 +218,7 @@ MODEL_LIST = [
              GLM_4_0520, GLM_4_AIR, GLM_4_AIRX, GLM_4_7,

              # Qwen (通义千问)
-              QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG,
+              QWEN37_MAX, QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG,

              # Doubao (豆包)
              DOUBAO, DOUBAO_SEED_2_CODE, DOUBAO_SEED_2_PRO, DOUBAO_SEED_2_LITE, DOUBAO_SEED_2_MINI,
@@ -227,4 +244,6 @@ DINGTALK = "dingtalk"
 WECOM_BOT = "wecom_bot"
 QQ = "qq"
 WEIXIN = "weixin"
-WECHAT_KF = "wechat_kf"  # WeCom customer service (微信客服) channel
+WECHAT_KF = "wechat_kf"
+TELEGRAM = "telegram"
+SLACK = "slack"
--- a/common/utils.py
+++ b/common/utils.py
@@ -117,6 +117,18 @@ def expand_path(path: str) -> str:
    return expanded


+def is_cloud_deployment() -> bool:
+    if os.environ.get("CLOUD_DEPLOYMENT_ID"):
+        return True
+    try:
+        from config import conf
+        if conf().get("cloud_deployment_id"):
+            return True
+    except Exception:
+        pass
+    return False
+
+
 def get_cloud_headers(api_key: str) -> dict:
    """
    Build standard headers for LinkAI API requests,
--- a/config-template.json
+++ b/config-template.json
@@ -16,8 +16,8 @@
  "open_ai_api_base": "https://api.openai.com/v1",
  "gemini_api_key": "",
  "gemini_api_base": "https://generativelanguage.googleapis.com",
-  "voice_to_text": "openai",
-  "text_to_voice": "openai",
+  "voice_to_text": "",
+  "text_to_voice": "",
  "voice_reply_voice": false,
  "speech_recognition": true,
  "group_speech_recognition": false,
--- a/config.py
+++ b/config.py
@@ -173,6 +173,15 @@ available_setting = {
    # 企微智能机器人配置(长连接模式)
    "wecom_bot_id": "",  # 企微智能机器人BotID
    "wecom_bot_secret": "",  # 企微智能机器人长连接Secret
+    # Telegram 配置
+    "telegram_token": "",  # 从 @BotFather 申请的 bot token
+    "telegram_proxy": "",  # 可选的 HTTP/SOCKS5 代理，例如 http://127.0.0.1:7890 或 socks5://127.0.0.1:1080（留空则走系统环境变量）
+    "telegram_group_trigger": "mention_or_reply",  # 群聊触发方式: mention_or_reply(@或回复触发,推荐) | mention_only(仅@) | all(所有消息)
+    "telegram_register_commands": True,  # 启动时是否自动向 BotFather 注册命令菜单（与 web 端 slash 命令一致）
+    # Slack 配置（Socket Mode，无需公网 IP）
+    "slack_bot_token": "",  # Bot User OAuth Token，形如 xoxb-...
+    "slack_app_token": "",  # App-Level Token（开启 Socket Mode 后生成），形如 xapp-...
+    "slack_group_trigger": "mention_or_reply",  # 频道触发方式: mention_or_reply(@或线程内回复,推荐) | mention_only(仅@) | all(所有消息)
    # 微信配置
    "weixin_token": "",  # 微信登录后获取的bot_token，留空则启动时自动扫码登录
    "weixin_base_url": "https://ilinkai.weixin.qq.com",  # Weixin ilink API base URL
@@ -181,7 +190,7 @@ available_setting = {
    # chatgpt指令自定义触发词
    "clear_memory_commands": ["#清除记忆"],  # 重置会话指令，必须以#开头
    # channel配置
-    "channel_type": "",  # 通道类型，支持多渠道同时运行。单个: "feishu"，多个: "feishu, dingtalk" 或 ["feishu", "dingtalk"]。可选值: web,feishu,dingtalk,wecom_bot,weixin,wechatmp,wechatmp_service,wechatcom_app,wechat_kf
+    "channel_type": "",  # 通道类型，支持多渠道同时运行。单个: "feishu"，多个: "feishu, dingtalk" 或 ["feishu", "dingtalk"]。可选值: web,feishu,dingtalk,wecom_bot,weixin,wechatmp,wechatmp_service,wechatcom_app,wechat_kf,telegram,slack
    "web_console": True,  # 是否自动启动Web控制台（默认启动）。设为False可禁用
    "subscribe_msg": "",  # 订阅消息, 支持: wechatmp, wechatmp_service, wechatcom_app
    "debug": False,  # 是否开启debug模式，开启后会打印更多日志
@@ -216,10 +225,14 @@ available_setting = {
    "Minimax_base_url": "",
    "deepseek_api_key": "",
    "deepseek_api_base": "https://api.deepseek.com/v1",
+    # 小米 MiMo 大模型
+    "mimo_api_key": "",
+    "mimo_api_base": "https://api.xiaomimimo.com/v1",
    "web_host": "",  # Web console bind address; empty means auto
    "web_port": 9899,
    "web_password": "",  # Web console password; empty means no authentication required
    "web_session_expire_days": 30,  # Auth session expiry in days
+    "web_file_serve_root": "~",  # Root dir the /api/file endpoint may serve; "/" allows the whole filesystem
    "agent": True,  # 是否开启Agent模式
    "agent_workspace": "~/cow",  # agent工作空间路径，用于存储skills、memory等
    "agent_max_context_tokens": 50000,  # Agent模式下最大上下文tokens
@@ -337,8 +350,18 @@ def load_config():
    config_str = read_file(config_path)
    logger.debug("[INIT] config str: {}".format(drag_sensitive(config_str)))

-    # 将json字符串反序列化为dict类型
-    config = Config(json.loads(config_str))
+    # 将json字符串反序列化为dict类型。
+    # `object_pairs_hook` lets us catch users who accidentally typed the
+    # same key twice (e.g. two `"tools"` blocks) — json.loads would
+    # otherwise silently drop all but the last occurrence.
+    config = Config(json.loads(config_str, object_pairs_hook=_merge_duplicate_keys))
+
+    # Migrate legacy singular keys (`tool`, `skill`) into the canonical
+    # plural buckets so the rest of the codebase only reads one schema.
+    # Deep-merge so existing `tools`/`skills` entries are preserved and
+    # only missing namespaces are filled in from the legacy section.
+    _merge_legacy_namespace(config, legacy="tool",  canonical="tools")
+    _merge_legacy_namespace(config, legacy="skill", canonical="skills")

    # override config with environment variables.
    # Some online deployment platforms (e.g. Railway) deploy project from github directly. So you shouldn't put your secrets like api key in a config file, instead use environment variables to override the default config.
@@ -398,6 +421,8 @@ def load_config():
        "minimax_api_base": "MINIMAX_API_BASE",
        "deepseek_api_key": "DEEPSEEK_API_KEY",
        "deepseek_api_base": "DEEPSEEK_API_BASE",
+        "mimo_api_key": "MIMO_API_KEY",
+        "mimo_api_base": "MIMO_API_BASE",
        "qianfan_api_key": "QIANFAN_API_KEY",
        "qianfan_api_base": "QIANFAN_API_BASE",
        "zhipu_ai_api_key": "ZHIPU_AI_API_KEY",
@@ -434,7 +459,7 @@ def load_config():
                os.environ[env_key] = str(val)
                injected += 1

-    injected += _sync_skill_config_to_env(config.get("skill", {}))
+    injected += _sync_skill_config_to_env(config.get("skills", {}))

    if injected:
        logger.info("[INIT] Synced {} config values to environment variables".format(injected))
@@ -442,11 +467,90 @@ def load_config():
    config.load_user_datas()


+def _deep_merge_dicts(base: dict, incoming: dict) -> dict:
+    """Recursively merge ``incoming`` into ``base`` (incoming wins on leaves)."""
+    for key, val in incoming.items():
+        if (
+            key in base
+            and isinstance(base[key], dict)
+            and isinstance(val, dict)
+        ):
+            _deep_merge_dicts(base[key], val)
+        else:
+            base[key] = val
+    return base
+
+
+def _merge_duplicate_keys(pairs):
+    """object_pairs_hook for json.loads: deep-merge duplicate top-level keys
+    (lists concat, dicts merge, scalars take the latter) instead of dropping."""
+    out = {}
+    duplicates = []
+    for key, val in pairs:
+        if key not in out:
+            out[key] = val
+            continue
+        duplicates.append(key)
+        prev = out[key]
+        if isinstance(prev, dict) and isinstance(val, dict):
+            _deep_merge_dicts(prev, val)
+        elif isinstance(prev, list) and isinstance(val, list):
+            prev.extend(val)
+        else:
+            out[key] = val
+    if duplicates:
+        # logger may not be wired yet — fall back to print so we never lose the warning.
+        unique = sorted(set(duplicates))
+        try:
+            logger.warning("[INIT] config.json has duplicate keys (merged): %s", unique)
+        except Exception:
+            print("[INIT] config.json has duplicate keys (merged):", unique)
+    return out
+
+
+def _merge_legacy_namespace(cfg, legacy: str, canonical: str) -> None:
+    """Fold deprecated singular keys (``tool`` / ``skill``) into their plural
+    canonical counterparts at load time. Canonical entries always win."""
+    legacy_section = cfg.get(legacy)
+    if not isinstance(legacy_section, dict) or not legacy_section:
+        cfg.pop(legacy, None)
+        return
+    canonical_section = cfg.get(canonical)
+    if not isinstance(canonical_section, dict):
+        canonical_section = {}
+    merged_keys = []
+    for name, val in legacy_section.items():
+        if name in canonical_section:
+            if isinstance(canonical_section[name], dict) and isinstance(val, dict):
+                for sub_key, sub_val in val.items():
+                    if (
+                        sub_key in canonical_section[name]
+                        and isinstance(canonical_section[name][sub_key], dict)
+                        and isinstance(sub_val, dict)
+                    ):
+                        _deep_merge_dicts(sub_val, canonical_section[name][sub_key])
+                        canonical_section[name][sub_key] = sub_val
+                    else:
+                        canonical_section[name].setdefault(sub_key, sub_val)
+            continue
+        canonical_section[name] = val
+        merged_keys.append(name)
+    cfg[canonical] = canonical_section
+    cfg.pop(legacy, None)
+    if merged_keys:
+        logger.warning(
+            "[INIT] Legacy config key '{}' is deprecated; merged into '{}': {}. "
+            "Please rename '{}' to '{}' in your config.json.".format(
+                legacy, canonical, merged_keys, legacy, canonical,
+            )
+        )
+
+
 def _sync_skill_config_to_env(skill_section) -> int:
    """Flatten skill-namespaced config into environment variables.

-    Mapping rule: ``config["skill"][<name>][<key>]`` -> ``SKILL_<NAME>_<KEY>``
-    (e.g. ``skill["image-generation"].model`` -> ``SKILL_IMAGE_GENERATION_MODEL``).
+    Mapping rule: ``config["skills"][<name>][<key>]`` -> ``SKILL_<NAME>_<KEY>``
+    (e.g. ``skills["image-generation"].model`` -> ``SKILL_IMAGE_GENERATION_MODEL``).

    This lets subprocess-based skill scripts read their own settings without
    importing project code. Existing env vars are NOT overwritten so the
--- a/docs/README.md
+++ b/docs/README.md
@@ -0,0 +1,30 @@
+# Documentation
+
+This directory contains the Mintlify documentation site for the project.
+
+## Prerequisites
+
+- Node.js v20.17.0 or higher (LTS recommended)
+
+## Install the CLI (one-time, global)
+
+```bash
+npm i -g mint
+```
+
+## Run the docs locally
+
+From this `docs/` directory:
+
+```bash
+mint dev
+```
+
+Then open http://localhost:3000 (or the port Mint reports if 3000 is in use).
+
+> The first run downloads the Mint preview framework (~90 MB) into `~/.mintlify/`.
+> Subsequent runs start instantly from the local cache.
+
+## More
+
+- Mintlify docs: https://www.mintlify.com/docs
--- a/docs/channels/index.mdx
+++ b/docs/channels/index.mdx
@@ -0,0 +1,43 @@
+---
+title: 通道概览
+description: CowAgent 支持的通道及能力矩阵
+---
+
+CowAgent 支持接入多种聊天通道，启动时通过 `channel_type` 切换。Web 控制台默认开启，可与其他接入通道并行运行。
+
+## 能力矩阵
+
+下表汇总各通道支持的入站消息类型、机器人回复类型与群聊能力，方便按场景选择。
+
+| 通道 | 文本 | 图片 | 文件 | 语音 | 群聊 |
+| --- | :-: | :-: | :-: | :-: | :-: |
+| [微信](/channels/weixin) | ✅ | ✅ | ✅ | ✅ |  |
+| [Web 控制台](/channels/web) | ✅ | ✅ | ✅ | ✅ | |
+| [飞书](/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [钉钉](/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [企微智能机器人](/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [QQ](/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
+| [企业微信应用](/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
+| [公众号](/channels/wechatmp) | ✅ | ✅ | | ✅ | |
+| [Telegram](/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [Slack](/channels/slack) | ✅ | ✅ | ✅ | | ✅ |
+
+- **图片 / 文件 / 语音**列表示通道支持收发对应消息类型，具体细节详见各通道文档
+- **群聊**列指可识别并响应群消息
+
+<Tip>
+  每个通道的语音 / 图像能力依赖对应模型厂商的配置，详见 [模型概览](/models)。
+</Tip>
+
+## 通道一览
+
+- [Web 控制台](/channels/web) — 内置浏览器对话和管理面板，默认开启
+- [微信](/channels/weixin) — 通过个人微信扫码登录
+- [飞书](/channels/feishu) — 飞书自建机器人
+- [钉钉](/channels/dingtalk) — 钉钉自建机器人
+- [企微智能机器人](/channels/wecom-bot) — 企业微信智能机器人
+- [QQ](/channels/qq) — QQ 官方机器人开放平台
+- [企业微信应用](/channels/wecom) — 企业微信自建应用接入
+- [公众号](/channels/wechatmp) — 微信公众号（订阅号 / 服务号）
+- [Telegram](/channels/telegram) — 海外 IM，5 分钟接入，无需公网 IP
+- [Slack](/channels/slack) — 团队协作 IM，Socket Mode 接入，无需公网 IP
--- a/docs/channels/slack.mdx
+++ b/docs/channels/slack.mdx
@@ -0,0 +1,118 @@
+---
+title: Slack
+description: 将 CowAgent 接入 Slack App
+---
+
+> 通过 Slack App 的 **Socket Mode** 接入 CowAgent，支持私聊（DM）与频道（@机器人 / 线程内回复触发）。Socket Mode 基于长连接，无需公网 IP 与回调地址，开箱即用。
+
+## 一、接入步骤
+
+### 步骤一：创建 Slack App
+
+1. 打开 [Slack API 应用管理页](https://api.slack.com/apps)，点击 **Create New App** → **From scratch**。
+2. 填写 **App Name**（如 `CowAgent`），选择要安装的 **Workspace**，点击创建。
+
+### 步骤二：开启 Socket Mode 并获取 App Token
+
+1. 左侧菜单进入 **Settings → Socket Mode**，打开 **Enable Socket Mode**。
+2. 系统会提示生成一个 **App-Level Token**，作用域勾选 `connections:write`，生成后保存这串以 `xapp-` 开头的 Token。
+
+<Tip>
+  Socket Mode 通过 WebSocket 长连接接收事件，无需在公网暴露回调 URL，适合本地或内网部署。
+</Tip>
+
+### 步骤三：配置 Bot 权限并安装
+
+1. 进入 **Features → OAuth & Permissions**，在 **Bot Token Scopes** 中点击 **Add an OAuth Scope**，逐项添加以下权限：
+
+   ```
+   app_mentions:read
+   channels:history
+   chat:write
+   commands
+   files:read
+   files:write
+   groups:history
+   im:history
+   mpim:history
+   users:read
+   ```
+
+   <Note>
+     `files:read` / `files:write` 用于图片、文件的收发；若仅需文本对话可省略。
+   </Note>
+
+2. 进入 **Features → Event Subscriptions**，打开 **Enable Events**，在 **Subscribe to bot events** 中点击 **Add Bot User Event** 添加以下事件：
+
+   ```
+   app_mention
+   message.im
+   message.channels
+   ```
+
+   <Note>
+     如需在私有频道使用，再添加 `message.groups`。
+   </Note>
+3. 进入 **Features → App Home**，在 **Show Tabs** 区域勾选 **Messages Tab**，并勾选下方的 **Allow users to send Slash commands and messages from the messages tab**（允许用户从消息标签页发送消息），否则私聊输入框会被关闭、无法给机器人发消息。
+4. 回到 **OAuth & Permissions**，点击 **Install to Workspace** 完成安装，安装后获取以 `xoxb-` 开头的 **Bot User OAuth Token**。
+
+<Tip>
+  若 Slack 客户端仍提示「向此应用发送消息的功能已关闭」，请确认已完成上一步的 App Home 设置，并刷新或重启 Slack 客户端（必要时把 App 从对话列表移除后重新打开）。
+</Tip>
+
+### 步骤四：接入 CowAgent
+
+<Tabs>
+  <Tab title="Web 控制台（推荐）">
+    打开 Web 控制台（本地链接：http://127.0.0.1:9899 ），选择 **通道** 菜单，点击 **接入通道**，选择 **Slack**，分别填入 Bot Token（`xoxb-`）和 App Token（`xapp-`），点击接入即可。
+  </Tab>
+  <Tab title="配置文件">
+    在 `config.json` 中添加以下配置后启动：
+
+    ```json
+    {
+      "channel_type": "slack",
+      "slack_bot_token": "xoxb-xxxxxxxxxxxx",
+      "slack_app_token": "xapp-xxxxxxxxxxxx",
+      "slack_group_trigger": "mention_or_reply"
+    }
+    ```
+
+    | 参数 | 说明 | 默认值 |
+    | --- | --- | --- |
+    | `slack_bot_token` | Bot User OAuth Token，形如 `xoxb-...` | - |
+    | `slack_app_token` | App-Level Token（开启 Socket Mode 后生成），形如 `xapp-...` | - |
+    | `slack_group_trigger` | 频道触发方式：`mention_or_reply`（@或线程内回复）/ `mention_only`（仅@） / `all`（所有消息） | `mention_or_reply` |
+  </Tab>
+</Tabs>
+
+启动 Cow 后，日志中出现以下输出即表示接入成功：
+
+```
+[Slack] Bot logged in as user_id=U0XXXXXXX, team=Txxxxxxxx
+[Slack] ✅ Slack bot ready, listening for events
+```
+
+## 二、功能说明
+
+| 功能 | 支持情况 |
+| --- | --- |
+| 私聊（DM） | ✅ |
+| 频道（@机器人 / 线程内回复） | ✅ |
+| 文本消息 | ✅ 收发 |
+| 图片消息 | ✅ 收发 |
+| 文件消息 | ✅ 收发（PDF / Word / Excel 等） |
+| 线程回复 | ✅ 回复发送至触发消息所在线程 |
+
+<Note>
+  Slack 通过线程（Thread）组织对话。机器人会把回复发送到触发消息所在的线程，频道内更整洁。
+</Note>
+
+## 三、使用
+
+完成接入后：
+
+- **私聊（DM）**：在 Slack 左侧 **Apps** 中找到你的 App，直接发消息对话。
+- **频道**：把 App 邀请进频道（`/invite @你的App`），使用 `@你的App 你好` 触发对话；后续在同一线程内直接回复即可继续对话。
+
+发送图片或文件时，可以在附件的输入框中 **添加文字说明**（描述/问题）一并发送，机器人会结合附件回答。也支持先发附件再发问题，两条消息会自动合并提问。
--- a/docs/channels/telegram.mdx
+++ b/docs/channels/telegram.mdx
@@ -0,0 +1,112 @@
+---
+title: Telegram
+description: 将 CowAgent 接入 Telegram Bot
+---
+
+> 通过 Telegram Bot API 接入 CowAgent，支持单聊与群聊（@机器人 / 回复机器人触发），使用 Long Polling 模式无需公网 IP，开箱即用。
+
+
+## 一、接入步骤
+
+### 步骤一：通过 BotFather 创建 Bot
+
+1. 在 Telegram 中搜索并打开官方账号 [@BotFather](https://t.me/BotFather)。
+2. 发送 `/newbot` 命令，按提示输入：
+   - **Bot 名称**（显示名，可中文，例如 `My CowAgent Bot`）
+   - **Bot 用户名**（必须以 `bot` 结尾，例如 `my_cowagent_bot`）
+3. 创建成功后，BotFather 会返回一段 **HTTP API Token**（形如 `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`），妥善保存。
+
+<Tip>
+  这个 Token 等同于 Bot 的密码，请勿泄露。如果意外泄漏可向 `@BotFather` 发送 `/revoke` 重置。
+</Tip>
+
+### 步骤二：（群聊使用）关闭 Privacy Mode
+
+仅使用单聊可跳过此步。Telegram Bot 默认开启 **Privacy Mode**，群聊中只能收到带 `@bot` 的命令（如 `/start@your_bot`）以及对 bot 消息的 reply；**普通的 `@bot 你好` 文字消息收不到**，会导致群聊无响应。
+
+向 `@BotFather` 发送：
+
+1. `/setprivacy`
+2. 选择刚才创建的 bot
+3. 选择 `Disable`
+
+<Note>
+  若设置后群聊仍无响应，可尝试把 Bot 从群里移除并重新拉入。
+</Note>
+
+### 步骤三：接入 CowAgent
+
+<Tabs>
+  <Tab title="Web 控制台（推荐）">
+    打开 Web 控制台（本地链接：http://127.0.0.1:9899 ），选择 **通道** 菜单，点击 **接入通道**，选择 **Telegram**，填入 Bot Token，点击接入即可。
+  </Tab>
+  <Tab title="配置文件">
+    在 `config.json` 中添加以下配置后启动：
+
+    ```json
+    {
+      "channel_type": "telegram",
+      "telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ",
+      "telegram_group_trigger": "mention_or_reply"
+    }
+    ```
+
+    | 参数 | 说明 | 默认值 |
+    | --- | --- | --- |
+    | `telegram_token` | BotFather 返回的 HTTP API Token | - |
+    | `telegram_group_trigger` | 群聊触发方式：`mention_or_reply`（@或回复机器人）/ `mention_only`（仅@） / `all`（所有消息） | `mention_or_reply` |
+    | `telegram_register_commands` | 启动时是否自动向 BotFather 注册命令菜单 | `true` |
+    | `telegram_proxy` | （可选）代理地址，如 `http://127.0.0.1:7890`、`socks5://127.0.0.1:1080`；运行环境无法直连 `api.telegram.org` 时配置，留空则使用环境变量 `HTTPS_PROXY` | `""` |
+  </Tab>
+</Tabs>
+
+启动 Cow 后，日志中出现以下输出即表示接入成功：
+
+```
+[Telegram] Bot logged in as @my_cowagent_bot (id=123456789)
+[Telegram] Registered 10 bot commands
+[Telegram] ✅ Telegram bot ready, polling for updates
+```
+
+## 二、功能说明
+
+| 功能 | 支持情况 |
+| --- | --- |
+| 单聊 | ✅ |
+| 群聊（@机器人 / 回复机器人） | ✅ |
+| 文本消息 | ✅ 收发 |
+| 图片消息 | ✅ 收发 |
+| 语音消息 | ✅ 收发（接收 OGG/Opus，发送 OGG/Opus） |
+| 视频消息 | ✅ 收发 |
+| 文件消息 | ✅ 收发（PDF / Word / Excel 等） |
+| 命令菜单 | ✅ 与 Web 控制台 slash 命令一致 |
+
+### 命令菜单
+
+启动时会自动向 BotFather 注册命令菜单，用户在 Telegram 输入框输入 `/` 会出现下拉提示：
+
+| 命令 | 说明 |
+| --- | --- |
+| `/help` | 显示命令帮助 |
+| `/status` | 查看运行状态 |
+| `/context` | 查看对话上下文（`/context clear` 清除） |
+| `/skill` | 技能管理（`/skill list`、`/skill install` 等） |
+| `/memory` | 记忆管理（`/memory dream`） |
+| `/knowledge` | 知识库管理（`/knowledge list` / `on` / `off`） |
+| `/config` | 查看当前配置 |
+| `/cancel` | 中止当前正在运行的 Agent 任务 |
+| `/logs` | 查看最近日志 |
+| `/version` | 查看版本 |
+
+<Note>
+  Telegram 命令菜单只能展示一级命令，子命令通过空格输入即可，例如 `/skill list`、`/context clear`。
+</Note>
+
+## 三、使用
+
+完成接入后：
+
+- **单聊**：在 Telegram 中搜索你创建的 Bot 用户名（如 `@my_cowagent_bot`），点击 `Start` 即可开始对话。
+- **群聊**：把 Bot 拉进群，使用 `@bot 你好` 或 **回复 Bot 的某条消息** 触发对话。若群聊无响应，请检查 Privacy Mode 是否已按 [步骤二](#步骤二-群聊使用-关闭-privacy-mode) 关闭。
+
+发送图片或文件时，可以直接在附件上方的输入框中 **添加 Caption**（描述/问题）一并发送，机器人会结合附件回答。也支持先发附件再发问题，两条消息会自动合并提问。
--- a/docs/channels/web.mdx
+++ b/docs/channels/web.mdx
@@ -59,9 +59,9 @@ Web 控制台是 CowAgent 的默认通道，启动后会自动运行，通过浏

 ### 模型管理

-支持在线管理模型配置，无需手动编辑配置文件：
+支持在线管理不同模型厂商的文本、图像、语音、向量模型配置，无需手动编辑配置文件：

-<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" />
+<img width="850" src="https://cdn.link-ai.tech/doc/20260521212949.png" />

 ### 技能管理

--- a/docs/cli/general.mdx
+++ b/docs/cli/general.mdx
@@ -39,6 +39,14 @@ Mode:    agent
 Session: 12 messages | 8 skills loaded
 ```

+## cancel
+
+中止当前会话正在运行的 Agent 任务。在 Agent 执行长时间任务（例如多轮工具调用、长流式输出）时，可随时发送 `/cancel`，Agent 会在下一次工具执行前停止。Web 端、微信、企业微信、飞书等各通道均可使用。
+
+```text
+/cancel
+```
+
 ## config

 查看或修改运行时配置。修改后立即生效，无需重启服务。
--- a/docs/cli/index.mdx
+++ b/docs/cli/index.mdx
@@ -57,6 +57,7 @@ Others:
 | --- | --- |
 | `/help` | 显示命令帮助 |
 | `/status` | 查看服务状态和配置 |
+| `/cancel` | 中止当前正在运行的 Agent 任务 |
 | `/config` | 查看或修改运行时配置 |
 | `/skill` | 管理技能（安装、卸载、启用、禁用等） |
 | `/memory dream [N]` | 手动触发记忆蒸馏（默认 3 天，最大 30） |
@@ -82,6 +83,7 @@ Others:
 | version | ✓ | ✓ |
 | status | ✓ | ✓ |
 | logs | ✓ | ✓ |
+| cancel | ✗ | ✓ |
 | config | ✗ | ✓ |
 | context | — | ✓ |
 | memory (子命令) | ✗ | ✓ |
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -38,6 +38,12 @@
      {
        "language": "zh",
        "default": true,
+        "navbar": {
+          "links": [
+            { "label": "官网", "href": "https://cowagent.ai/?lang=zh" },
+            { "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
+          ]
+        },
        "tabs": [
          {
            "tab": "项目介绍",
@@ -82,6 +88,7 @@
                  "models/doubao",
                  "models/kimi",
                  "models/qianfan",
+                  "models/mimo",
                  "models/linkai",
                  "models/coding-plan",
                  "models/custom"
@@ -181,6 +188,7 @@
              {
                "group": "接入渠道",
                "pages": [
+                  "channels/index",
                  "channels/weixin",
                  "channels/web",
                  "channels/feishu",
@@ -189,7 +197,9 @@
                  "channels/qq",
                  "channels/wecom",
                  "channels/wechat-kf",
-                  "channels/wechatmp"
+                  "channels/wechatmp",
+                  "channels/telegram",
+                  "channels/slack"
                ]
              }
            ]
@@ -216,6 +226,7 @@
                "group": "发布记录",
                "pages": [
                  "releases/overview",
+                  "releases/v2.0.9",
                  "releases/v2.0.8",
                  "releases/v2.0.7",
                  "releases/v2.0.6",
@@ -233,6 +244,12 @@
      },
      {
        "language": "en",
+        "navbar": {
+          "links": [
+            { "label": "Website", "href": "https://cowagent.ai/" },
+            { "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
+          ]
+        },
        "tabs": [
          {
            "tab": "Introduction",
@@ -254,7 +271,8 @@
                "group": "Installation",
                "pages": [
                  "en/guide/quick-start",
-                  "en/guide/manual-install"
+                  "en/guide/manual-install",
+                  "en/guide/upgrade"
                ]
              }
            ]
@@ -276,6 +294,7 @@
                  "en/models/doubao",
                  "en/models/kimi",
                  "en/models/qianfan",
+                  "en/models/mimo",
                  "en/models/linkai",
                  "en/models/coding-plan",
                  "en/models/custom"
@@ -331,6 +350,7 @@
                "pages": [
                  "en/skills/index",
                  "en/skills/install",
+                  "en/skills/create",
                  "en/skills/hub"
                ]
              },
@@ -374,6 +394,7 @@
              {
                "group": "Platforms",
                "pages": [
+                  "en/channels/index",
                  "en/channels/weixin",
                  "en/channels/web",
                  "en/channels/feishu",
@@ -382,7 +403,9 @@
                  "en/channels/qq",
                  "en/channels/wecom",
                  "en/channels/wechat-kf",
-                  "en/channels/wechatmp"
+                  "en/channels/wechatmp",
+                  "en/channels/telegram",
+                  "en/channels/slack"
                ]
              }
            ]
@@ -397,7 +420,7 @@
                  "en/cli/process",
                  "en/cli/skill",
                  "en/cli/memory-knowledge",
-                  "en/cli/chat"
+                  "en/cli/general"
                ]
              }
            ]
@@ -409,6 +432,7 @@
                "group": "Release Notes",
                "pages": [
                  "en/releases/overview",
+                  "en/releases/v2.0.9",
                  "en/releases/v2.0.8",
                  "en/releases/v2.0.7",
                  "en/releases/v2.0.6",
@@ -426,6 +450,12 @@
      },
      {
        "language": "ja",
+        "navbar": {
+          "links": [
+            { "label": "ウェブサイト", "href": "https://cowagent.ai/" },
+            { "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
+          ]
+        },
        "tabs": [
          {
            "tab": "紹介",
@@ -470,6 +500,7 @@
                  "ja/models/doubao",
                  "ja/models/kimi",
                  "ja/models/qianfan",
+                  "ja/models/mimo",
                  "ja/models/linkai",
                  "ja/models/coding-plan",
                  "ja/models/custom"
@@ -569,6 +600,7 @@
              {
                "group": "プラットフォーム",
                "pages": [
+                  "ja/channels/index",
                  "ja/channels/weixin",
                  "ja/channels/web",
                  "ja/channels/feishu",
@@ -577,7 +609,9 @@
                  "ja/channels/qq",
                  "ja/channels/wecom",
                  "ja/channels/wechat-kf",
-                  "ja/channels/wechatmp"
+                  "ja/channels/wechatmp",
+                  "ja/channels/telegram",
+                  "ja/channels/slack"
                ]
              }
            ]
@@ -604,6 +638,7 @@
                "group": "リリースノート",
                "pages": [
                  "ja/releases/overview",
+                  "ja/releases/v2.0.9",
                  "ja/releases/v2.0.8",
                  "ja/releases/v2.0.7",
                  "ja/releases/v2.0.6",
--- a/docs/en/README.md
+++ b/docs/en/README.md
@@ -1,250 +0,0 @@
-<p align="center"><img src="https://github.com/user-attachments/assets/eca9a9ec-8534-4615-9e0f-96c5ac1d10a3" alt="CowAgent" width="550" /></p>
-
-<p align="center">
-  <a href="https://github.com/zhayujie/CowAgent/releases/latest"><img src="https://img.shields.io/github/v/release/zhayujie/CowAgent" alt="Latest release"></a>
-  <a href="https://github.com/zhayujie/CowAgent/blob/master/LICENSE"><img src="https://img.shields.io/github/license/zhayujie/CowAgent" alt="License: MIT"></a>
-  <a href="https://github.com/zhayujie/CowAgent"><img src="https://img.shields.io/github/stars/zhayujie/CowAgent?style=flat-square" alt="Stars"></a> <br/>
-  [<a href="https://github.com/zhayujie/CowAgent/blob/master/README.md">中文</a>] | [English] | [<a href="https://github.com/zhayujie/CowAgent/blob/master/docs/ja/README.md">日本語</a>]
-</p>
-
-**CowAgent** is an AI super assistant powered by LLMs, capable of autonomous task planning, operating computers and external resources, creating and executing Skills, and continuously growing with long-term memory and a personal knowledge base. It supports flexible model switching, handles text, voice, images, and files, and can be integrated into WeChat, Web, Feishu, DingTalk, WeCom Bot, WeCom App, and WeChat Official Account — running 7×24 hours on your personal computer or server.
-
-<p align="center">
-  <a href="https://cowagent.ai/">🌐 Website</a> &nbsp;·&nbsp;
-  <a href="https://docs.cowagent.ai/en/intro/index">📖 Docs</a> &nbsp;·&nbsp;
-  <a href="https://docs.cowagent.ai/en/guide/quick-start">🚀 Quick Start</a> &nbsp;·&nbsp;
-  <a href="https://skills.cowagent.ai/">🧩 Skill Hub</a> &nbsp;·&nbsp;
-  <a href="https://link-ai.tech/cowagent/create">☁️ Try Online</a>
-</p>
-
-## Introduction
-
-> CowAgent is both an out-of-the-box AI super assistant and a highly extensible Agent framework. You can extend it with new model interfaces, channels, built-in tools, and the Skills system to flexibly implement various customization needs.
-
- ✅ **Autonomous Task Planning**: Understands complex tasks and autonomously plans execution, continuously thinking and invoking tools until goals are achieved.
- ✅ **Long-term Memory**: Automatically persists conversation memory to local files and databases, including core memory, daily memory, and Deep Dream distillation, with keyword and vector retrieval support.
- ✅ **Personal Knowledge Base**: Automatically organizes structured knowledge with cross-references to build a knowledge graph, with web-based visualization and conversational management.
- ✅ **Skills System**: Implements a Skills creation and execution engine, supports installing skills from [Skill Hub](https://skills.cowagent.ai), GitHub, etc., or creating custom Skills through conversation.
- ✅ **Tool System**: Built-in tools for file I/O, terminal execution, browser automation, scheduled tasks, messaging, and more — autonomously invoked by the Agent.
- ✅ **CLI System**: Provides terminal commands and in-chat commands for process management, skill installation, configuration, and more.
- ✅ **Multimodal Messages**: Supports parsing, processing, generating, and sending text, images, voice, files, and other message types.
- ✅ **Multiple Model Support**: Supports DeepSeek, MiniMax, Claude, Gemini, OpenAI, GLM, Qwen, Doubao, Kimi, and other mainstream model providers.
- ✅ **Multi-platform Deployment**: Runs on local computers or servers, integrable into WeChat, Web, Feishu, DingTalk, WeChat Official Account, and WeCom applications.
-
-## Disclaimer
-
-1. This project follows the [MIT License](/LICENSE) and is intended for technical research and learning. Users must comply with local laws, regulations, policies, and corporate bylaws. Any illegal or rights-infringing use is prohibited.
-2. Agent mode consumes more tokens than normal chat mode. Choose models based on effectiveness and cost. Agent has access to the host OS — please deploy in trusted environments.
-3. CowAgent focuses on open-source development and does not participate in, authorize, or issue any cryptocurrency.
-
-## Demo
-
-Try online (no deployment needed): [CowAgent](https://link-ai.tech/cowagent/create)
-
-## Changelog
-
-> **2026.04.14:** [v2.0.6](https://github.com/zhayujie/CowAgent/releases/tag/2.0.6) — Knowledge Base, Deep Dream Memory Distillation, Smart Context Compression, Web Console upgrades.
-
-> **2026.04.01:** [v2.0.5](https://github.com/zhayujie/CowAgent/releases/tag/2.0.5) — Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more.
-
-> **2026.02.27:** [v2.0.2](https://github.com/zhayujie/CowAgent/releases/tag/2.0.2) — Web console overhaul (streaming chat, model/skill/memory/channel/scheduler/log management), multi-channel concurrent running, session persistence, new models including Gemini 3.1 Pro / Claude 4.6 Sonnet / Qwen3.5 Plus.
-
-> **2026.02.13:** [v2.0.1](https://github.com/zhayujie/CowAgent/releases/tag/2.0.1) — Built-in Web Search tool, smart context trimming, runtime info dynamic update, Windows compatibility, fixes for scheduler memory loss, Feishu connection issues, and more.
-
-> **2026.02.03:** [v2.0.0](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) — Full upgrade to AI super assistant with multi-step task planning, long-term memory, built-in tools, Skills framework, new models, and optimized channels.
-
-> **2025.05.23:** [v1.7.6](https://github.com/zhayujie/CowAgent/releases/tag/1.7.6) — Web channel optimization, AgentMesh multi-agent plugin, Baidu TTS, claude-4-sonnet/opus support.
-
-> **2025.04.11:** [v1.7.5](https://github.com/zhayujie/CowAgent/releases/tag/1.7.5) — wechatferry protocol, DeepSeek model, Tencent Cloud voice, ModelScope and Gitee-AI support.
-
-> **2024.12.13:** [v1.7.4](https://github.com/zhayujie/CowAgent/releases/tag/1.7.4) — Gemini 2.0 model, Web channel, memory leak fix.
-
-Full changelog: [Release Notes](https://docs.cowagent.ai/en/releases/overview)
-
-<br/>
-
-## 🚀 Quick Start
-
-The project provides a one-click script for installation, configuration, startup, and management:
-
-**Linux / macOS:**
-```bash
-bash <(curl -fsSL https://cdn.link-ai.tech/code/cow/run.sh)
-```
-
-**Windows (PowerShell):**
-```powershell
-irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex
-```
-
-After running, the Web service starts by default. Access `http://localhost:9899/chat` to chat.
-
-Script usage: [One-click Install](https://docs.cowagent.ai/en/guide/quick-start). After installation, you can also use `cow start`, `cow stop`, and other [CLI commands](https://docs.cowagent.ai/en/cli/index) to manage the service.
-
-### Manual Installation
-
-**1. Clone the project**
-
-```bash
-git clone https://github.com/zhayujie/CowAgent
-cd CowAgent/
-```
-
-**2. Install dependencies**
-
-```bash
-pip3 install -r requirements.txt
-pip3 install -r requirements-optional.txt   # optional but recommended
-```
-
-**3. Install Cow CLI (recommended)**
-
-```bash
-pip3 install -e .
-```
-
-After installation, use `cow` commands to manage the service (start, stop, update, etc.) and skills. See [Command Docs](https://docs.cowagent.ai/en/cli/index).
-
-**4. Install browser (optional)**
-
-If you need the Agent to operate a browser (visit web pages, fill forms, etc.):
-
-```bash
-cow install-browser
-```
-
-This auto-installs `playwright` and Chromium. See [Browser Tool Docs](https://docs.cowagent.ai/en/tools/browser).
-
-**5. Configure**
-
-```bash
-cp config-template.json config.json
-```
-
-Fill in your model API key and channel type in `config.json`. See the [configuration docs](https://docs.cowagent.ai/en/guide/manual-install) for details.
-
-**6. Run**
-
-```bash
-cow start              # recommended, requires Cow CLI
-python3 app.py         # or run directly
-```
-
-For server deployment, use `cow` commands to manage the service:
-
-```bash
-cow start              # start in background
-cow stop               # stop service
-cow restart            # restart service
-cow status             # check running status
-cow logs               # view logs
-cow update             # pull latest code and restart
-```
-
-Or use the traditional way:
-
-```bash
-nohup python3 app.py & tail -f nohup.out
-```
-
-### Docker Deployment
-
-```bash
-curl -O https://cdn.link-ai.tech/code/cow/docker-compose.yml
-# Edit docker-compose.yml with your config
-sudo docker compose up -d
-sudo docker logs -f chatgpt-on-wechat
-```
-
-<br/>
-
-## Models
-
-Supports mainstream model providers. Recommended models for Agent mode:
-
-| Provider | Recommended Model |
-| --- | --- |
-| DeepSeek | `deepseek-v4-flash` |
-| MiniMax | `MiniMax-M2.7` |
-| Claude | `claude-sonnet-4-6` |
-| Gemini | `gemini-3.1-pro-preview` |
-| OpenAI | `gpt-5.4` |
-| GLM | `glm-5.1` |
-| Qwen | `qwen3.6-plus` |
-| Doubao | `doubao-seed-2-0-code-preview-260215` |
-| Kimi | `kimi-k2.6` |
-
-For detailed configuration of each model, see the [Models documentation](https://docs.cowagent.ai/en/models/index).
-
-### Coding Plan
-
-Coding Plan is a monthly subscription package offered by various providers, ideal for high-frequency Agent usage. All providers can be accessed via OpenAI-compatible mode:
-
-```json
-{
-  "bot_type": "openai",
-  "model": "MODEL_NAME",
-  "open_ai_api_base": "PROVIDER_CODING_PLAN_API_BASE",
-  "open_ai_api_key": "YOUR_API_KEY"
-}
-```
-
- `bot_type`: Must be `openai`
- `model`: Model name supported by the provider
- `open_ai_api_base`: Provider's Coding Plan API Base (different from standard pay-as-you-go)
- `open_ai_api_key`: Provider's Coding Plan API Key
-
-> Note: Coding Plan API Base and API Key are usually separate from standard pay-as-you-go ones. Please obtain them from each provider's platform.
-
-Supported providers include Alibaba Cloud, MiniMax, Zhipu GLM, Kimi, Volcengine, and more. For detailed configuration of each provider, see the [Coding Plan documentation](https://docs.cowagent.ai/en/models/coding-plan).
-
-<br/>
-
-## Channels
-
-Supports multiple platforms. Set `channel_type` in `config.json` to switch:
-
-| Channel | `channel_type` | Docs |
-| --- | --- | --- |
-| WeChat | `weixin` | [WeChat Setup](https://docs.cowagent.ai/en/channels/weixin) |
-| Web (default) | `web` | [Web Channel](https://docs.cowagent.ai/en/channels/web) |
-| Feishu | `feishu` | [Feishu Setup](https://docs.cowagent.ai/en/channels/feishu) |
-| DingTalk | `dingtalk` | [DingTalk Setup](https://docs.cowagent.ai/en/channels/dingtalk) |
-| WeCom Bot | `wecom_bot` | [WeCom Bot Setup](https://docs.cowagent.ai/en/channels/wecom-bot) |
-| WeCom App | `wechatcom_app` | [WeCom Setup](https://docs.cowagent.ai/en/channels/wecom) |
-| WeChat MP | `wechatmp` / `wechatmp_service` | [WeChat MP Setup](https://docs.cowagent.ai/en/channels/wechatmp) |
-| Terminal | `terminal` | — |
-
-Multiple channels can be enabled simultaneously, separated by commas: `"channel_type": "feishu,dingtalk"`.
-
-<br/>
-
-## Enterprise Services
-
-<a href="https://link-ai.tech" target="_blank"><img width="720" src="https://cdn.link-ai.tech/image/link-ai-intro.jpg"></a>
-
-> [LinkAI](https://link-ai.tech/) is a one-stop AI agent platform for enterprises and developers, integrating multimodal LLMs, knowledge bases, Agent plugins, and workflows. Supports one-click integration with mainstream platforms, SaaS and private deployment.
-
-<br/>
-
-## 🔗 Related Projects
-
- [Cow Skill Hub](https://github.com/zhayujie/cow-skill-hub): Open skill marketplace for AI Agents — browse, search, install, and publish skills for CowAgent, OpenClaw, Claude Code, and more.
- [bot-on-anything](https://github.com/zhayujie/bot-on-anything): Lightweight and highly extensible LLM application framework supporting Slack, Telegram, Discord, Gmail, and more.
- [AgentMesh](https://github.com/MinimalFuture/AgentMesh): Open-source Multi-Agent framework for complex problem solving through agent team collaboration.
-
-## 🔎 FAQ
-
-FAQs: <https://github.com/zhayujie/CowAgent/wiki/FAQs>
-
-## 🛠️ Contributing
-
-Welcome to add new channels, referring to the [Feishu channel](https://github.com/zhayujie/CowAgent/blob/master/channel/feishu/feishu_channel.py) as an example. Also welcome to contribute new Skills, see the [Skill Creation docs](https://docs.cowagent.ai/en/skills/create), or submit to [Skill Hub](https://skills.cowagent.ai/submit).
-
-## ✉ Contact
-
-Welcome to submit PRs and Issues, and support the project with a 🌟 Star. For questions, check the [FAQ list](https://github.com/zhayujie/CowAgent/wiki/FAQs) or search [Issues](https://github.com/zhayujie/CowAgent/issues).
-
-## 🌟 Contributors
-
-![cow contributors](https://contrib.rocks/image?repo=zhayujie/CowAgent&max=1000)
--- a/docs/en/channels/feishu.mdx
+++ b/docs/en/channels/feishu.mdx
@@ -15,8 +15,11 @@ description: Integrate CowAgent into Feishu via a custom enterprise app

 No need to manually create an app on the Feishu Developer Platform. Start the Cow project, open the web console (default `http://127.0.0.1:9899/`), go to **Channels**, click **Add Channel**, choose **Feishu**, then under the **Scan QR** tab click **One-click Create Feishu App** and scan with the **Feishu App** to complete app creation and connection automatically.

+<img src="https://cdn.link-ai.tech/doc/20260505181126.png" width="800"/>
+
 <Note>
-  The created app comes with all required permissions (messaging, card read/write, group events, etc.) and event subscriptions pre-configured. Currently only the Feishu mainland version is supported (Lark international not yet supported).
+  1. Requires `lark-oapi` ≥ 1.5.5.
+  2. The created app comes with all required permissions (messaging, card read/write, group events, etc.) and event subscriptions pre-configured — no manual setup on the developer console needed. Currently only the Feishu mainland version is supported (Lark international not yet supported).
 </Note>

 When starting from CLI without `feishu_app_id` configured, the QR code is also printed to the terminal.
--- a/docs/en/channels/index.mdx
+++ b/docs/en/channels/index.mdx
@@ -0,0 +1,43 @@
+---
+title: Channels Overview
+description: Channels supported by CowAgent and their capability matrix
+---
+
+CowAgent supports multiple chat channels. Switch between them at startup via `channel_type`. The Web Console is enabled by default and can run in parallel with other channels.
+
+## Capability Matrix
+
+The table below summarizes the inbound message types, bot reply types, and group chat capabilities supported by each channel, making it easy to choose by scenario.
+
+| Channel | Text | Image | File | Voice | Group Chat |
+| --- | :-: | :-: | :-: | :-: | :-: |
+| [WeChat](/en/channels/weixin) | ✅ | ✅ | ✅ | ✅ |  |
+| [Web Console](/en/channels/web) | ✅ | ✅ | ✅ | ✅ | |
+| [Feishu](/en/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [DingTalk](/en/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [WeCom Bot](/en/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [QQ](/en/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
+| [WeCom App](/en/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
+| [Official Account](/en/channels/wechatmp) | ✅ | ✅ | | ✅ | |
+| [Telegram](/en/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [Slack](/en/channels/slack) | ✅ | ✅ | ✅ | | ✅ |
+
+- The **Image / File / Voice** columns indicate that the channel can send and receive the corresponding message types; see each channel's docs for details
+- The **Group Chat** column indicates the ability to recognize and respond to group messages
+
+<Tip>
+  The voice / image capabilities of each channel depend on the configuration of the corresponding model provider. See [Models Overview](/en/models/index) for details.
+</Tip>
+
+## Channel List
+
+- [Web Console](/en/channels/web) — built-in browser-based chat and management panel, enabled by default
+- [WeChat](/en/channels/weixin) — log in via personal WeChat QR scan
+- [Feishu](/en/channels/feishu) — Feishu custom bot
+- [DingTalk](/en/channels/dingtalk) — DingTalk custom bot
+- [WeCom Bot](/en/channels/wecom-bot) — WeCom AI Bot via WebSocket long connection
+- [QQ](/en/channels/qq) — QQ Official Bot open platform
+- [WeCom App](/en/channels/wecom) — WeCom custom app integration
+- [Official Account](/en/channels/wechatmp) — WeChat Official Account (subscription / service)
+- [Telegram](/en/channels/telegram) — global IM, 5-minute setup, no public IP needed
+- [Slack](/en/channels/slack) — team collaboration IM, Socket Mode integration, no public IP needed
--- a/docs/en/channels/slack.mdx
+++ b/docs/en/channels/slack.mdx
@@ -0,0 +1,118 @@
+---
+title: Slack
+description: Integrate CowAgent with a Slack App
+---
+
+> Integrate CowAgent into Slack via a Slack App in **Socket Mode**. Supports direct messages (DM) and channels (triggered by @mention or replying within a thread). Socket Mode uses a persistent WebSocket connection — no public IP or callback URL required, works out of the box.
+
+## 1. Setup
+
+### Step 1: Create a Slack App
+
+1. Open the [Slack API apps page](https://api.slack.com/apps), click **Create New App** → **From scratch**.
+2. Enter an **App Name** (e.g. `CowAgent`), pick the **Workspace** to install into, and create it.
+
+### Step 2: Enable Socket Mode and get the App Token
+
+1. In the left sidebar go to **Settings → Socket Mode** and turn on **Enable Socket Mode**.
+2. You will be prompted to generate an **App-Level Token** with the `connections:write` scope. Save this token starting with `xapp-`.
+
+<Tip>
+  Socket Mode receives events over a WebSocket connection, so you don't need to expose a public callback URL — ideal for local or intranet deployments.
+</Tip>
+
+### Step 3: Configure bot scopes and install
+
+1. Go to **Features → OAuth & Permissions**, click **Add an OAuth Scope** under **Bot Token Scopes**, and add the following scopes one by one:
+
+   ```
+   app_mentions:read
+   channels:history
+   chat:write
+   commands
+   files:read
+   files:write
+   groups:history
+   im:history
+   mpim:history
+   users:read
+   ```
+
+   <Note>
+     `files:read` / `files:write` are used for sending/receiving images and files; omit them if you only need text conversations.
+   </Note>
+
+2. Go to **Features → Event Subscriptions**, turn on **Enable Events**, and under **Subscribe to bot events** click **Add Bot User Event** to add:
+
+   ```
+   app_mention
+   message.im
+   message.channels
+   ```
+
+   <Note>
+     Add `message.groups` if you need to use the bot in private channels.
+   </Note>
+3. Go to **Features → App Home**, enable **Messages Tab** under **Show Tabs**, and check **Allow users to send Slash commands and messages from the messages tab**. Otherwise the DM input box is disabled and users cannot message the bot.
+4. Back in **OAuth & Permissions**, click **Install to Workspace**. After installing, copy the **Bot User OAuth Token** starting with `xoxb-`.
+
+<Tip>
+  If the Slack client still shows "Sending messages to this app has been turned off", make sure you completed the App Home step above, then refresh or restart the Slack client (remove the app from your conversations and reopen it if needed).
+</Tip>
+
+### Step 4: Connect to CowAgent
+
+<Tabs>
+  <Tab title="Web Console (Recommended)">
+    Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Slack**, paste the Bot Token (`xoxb-`) and App Token (`xapp-`), and click connect.
+  </Tab>
+  <Tab title="Config File">
+    Add the following to `config.json` and start Cow:
+
+    ```json
+    {
+      "channel_type": "slack",
+      "slack_bot_token": "xoxb-xxxxxxxxxxxx",
+      "slack_app_token": "xapp-xxxxxxxxxxxx",
+      "slack_group_trigger": "mention_or_reply"
+    }
+    ```
+
+    | Key | Description | Default |
+    | --- | --- | --- |
+    | `slack_bot_token` | Bot User OAuth Token, like `xoxb-...` | - |
+    | `slack_app_token` | App-Level Token (generated after enabling Socket Mode), like `xapp-...` | - |
+    | `slack_group_trigger` | Channel trigger: `mention_or_reply` (@ or reply in thread) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` |
+  </Tab>
+</Tabs>
+
+The integration is ready when you see logs like:
+
+```
+[Slack] Bot logged in as user_id=U0XXXXXXX, team=Txxxxxxxx
+[Slack] ✅ Slack bot ready, listening for events
+```
+
+## 2. Capabilities
+
+| Feature | Support |
+| --- | --- |
+| Direct message (DM) | ✅ |
+| Channel (@bot / reply in thread) | ✅ |
+| Text messages | ✅ send / receive |
+| Image messages | ✅ send / receive |
+| File messages | ✅ send / receive (PDF / Word / Excel, etc.) |
+| Thread replies | ✅ replies are posted to the thread of the triggering message |
+
+<Note>
+  Slack organizes conversations into threads. The bot posts replies into the thread of the triggering message, keeping channels tidy.
+</Note>
+
+## 3. Usage
+
+Once connected:
+
+- **Direct message (DM)**: find your App under **Apps** in the Slack sidebar and message it directly.
+- **Channel**: invite the App into a channel (`/invite @your-app`), then trigger it with `@your-app hello`; continue the conversation by replying within the same thread.
+
+When sending an image or file, you can **add a text caption** (description / question) in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically.
--- a/docs/en/channels/telegram.mdx
+++ b/docs/en/channels/telegram.mdx
@@ -0,0 +1,111 @@
+---
+title: Telegram
+description: Integrate CowAgent with Telegram via the Bot API
+---
+
+> Integrate CowAgent into Telegram via the official Bot API. Supports private chat and group chat (triggered by @mention or replying to the bot). Uses Long Polling — no public IP required, works out of the box.
+
+
+## 1. Setup
+
+### Step 1: Create a Bot via BotFather
+
+1. Open the official account [@BotFather](https://t.me/BotFather) in Telegram.
+2. Send `/newbot` and follow the prompts:
+   - **Bot name** (display name, e.g. `My CowAgent Bot`)
+   - **Bot username** (must end with `bot`, e.g. `my_cowagent_bot`)
+3. Once created, BotFather returns an **HTTP API Token** (e.g. `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`). Keep it safe.
+
+<Tip>
+  The token is the password of your bot — never share it. If it leaks, send `/revoke` to `@BotFather` to reset it.
+</Tip>
+
+### Step 2: (Group chat only) Disable Privacy Mode
+
+Skip this step if you only use private chat. Telegram bots run in **Privacy Mode** by default — in groups they can only see commands suffixed with `@bot` (e.g. `/start@your_bot`) and replies to bot messages; **plain `@bot hello` text messages are not delivered**, so the bot will appear unresponsive in groups.
+
+Send the following to `@BotFather`:
+
+1. `/setprivacy`
+2. Pick the bot you just created
+3. Choose `Disable`
+
+<Note>
+  If the bot is still silent in groups after this, try removing it from the group and adding it back.
+</Note>
+
+### Step 3: Connect to CowAgent
+
+<Tabs>
+  <Tab title="Web Console (Recommended)">
+    Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Telegram**, paste the Bot Token, and click connect.
+  </Tab>
+  <Tab title="Config File">
+    Add the following to `config.json` and start Cow:
+
+    ```json
+    {
+      "channel_type": "telegram",
+      "telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ",
+      "telegram_group_trigger": "mention_or_reply"
+    }
+    ```
+
+    | Key | Description | Default |
+    | --- | --- | --- |
+    | `telegram_token` | HTTP API Token returned by BotFather | - |
+    | `telegram_group_trigger` | Group trigger: `mention_or_reply` (@ or reply) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` |
+    | `telegram_register_commands` | Whether to register the command menu with BotFather on startup | `true` |
+  </Tab>
+</Tabs>
+
+The integration is ready when you see logs like:
+
+```
+[Telegram] Bot logged in as @my_cowagent_bot (id=123456789)
+[Telegram] Registered 10 bot commands
+[Telegram] ✅ Telegram bot ready, polling for updates
+```
+
+## 2. Capabilities
+
+| Feature | Support |
+| --- | --- |
+| Private chat | ✅ |
+| Group chat (@bot / reply to bot) | ✅ |
+| Text messages | ✅ send / receive |
+| Image messages | ✅ send / receive |
+| Voice messages | ✅ send / receive (OGG/Opus) |
+| Video messages | ✅ send / receive |
+| File messages | ✅ send / receive (PDF / Word / Excel, etc.) |
+| Command menu | ✅ aligned with Web Console slash commands |
+
+### Command Menu
+
+On startup, the channel registers a command menu with BotFather. Typing `/` in Telegram shows a dropdown:
+
+| Command | Description |
+| --- | --- |
+| `/help` | Show command help |
+| `/status` | View runtime status |
+| `/context` | View conversation context (`/context clear` to clear) |
+| `/skill` | Skill management (`/skill list`, `/skill install`, ...) |
+| `/memory` | Memory management (`/memory dream`) |
+| `/knowledge` | Knowledge base (`/knowledge list` / `on` / `off`) |
+| `/config` | View current config |
+| `/cancel` | Cancel the running Agent task |
+| `/logs` | View recent logs |
+| `/version` | Show version |
+
+<Note>
+  Telegram's command menu only displays top-level commands; subcommands are entered with a space, e.g. `/skill list`, `/context clear`.
+</Note>
+
+## 3. Usage
+
+Once connected:
+
+- **Private chat**: search for your bot username (e.g. `@my_cowagent_bot`) in Telegram, click `Start` and chat away.
+- **Group chat**: add the bot to a group, then trigger it with `@bot hello` or by **replying to one of the bot's messages**. If the bot doesn't respond in groups, double-check Privacy Mode in [Step 2](#step-2-group-chat-only-disable-privacy-mode).
+
+When sending an image or file, you can **add a caption** (description / question) directly in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically.
--- a/docs/en/channels/web.mdx
+++ b/docs/en/channels/web.mdx
@@ -1,23 +1,32 @@
 ---
 title: Web Console
-description: Use CowAgent through the web console
+description: Use CowAgent through the Web Console
 ---

-The Web Console is CowAgent's default channel. It starts automatically after launch, allowing you to chat with the Agent through a browser and manage models, skills, memory, channels, and other configurations online.
+The Web Console is CowAgent's default channel. It runs automatically once started, letting you chat with the Agent in a browser and manage models, skills, memory, channels, and other configuration online.

 ## Configuration

 ```json
 {
  "channel_type": "web",
-  "web_port": 9899
+  "web_host": "0.0.0.0",
+  "web_port": 9899,
+  "web_password": "",
+  "enable_thinking": false
 }
 ```

 | Parameter | Description | Default |
 | --- | --- | --- |
 | `channel_type` | Set to `web` | `web` |
+| `web_host` | Web service listen address. Defaults to `127.0.0.1` (local only); set to `0.0.0.0` for public access and configure a password | `""` |
 | `web_port` | Web service listen port | `9899` |
+| `web_password` | Access password. Leave empty to disable password protection; recommended when listening on `0.0.0.0` | `""` |
+| `web_session_expire_days` | Login session validity in days | `30` |
+| `enable_thinking` | Whether to enable deep thinking mode | `false` |
+
+Once a password is configured, you must enter it to log in when accessing the console. The login session is kept for 30 days by default, so restarting the service during that period does not require re-login. The password can also be changed online from the "Configuration" page in the console.

 ## Access URL

@@ -34,13 +43,13 @@ After starting the project, visit:

 ### Chat Interface

-Supports streaming output with real-time display of the Agent's reasoning process and tool calls, providing intuitive observation of the Agent's decision-making:
+Supports streaming output with real-time display of the Agent's reasoning process and tool calls, providing intuitive observation of the Agent's decision-making. Deep thinking can be toggled via configuration or the "Agent Configuration" switch in the console.

 <img width="850" src="https://cdn.link-ai.tech/doc/20260227180120.png" />

 #### Multi-Session Management

-The chat interface supports multi-session management. All session records are persistently stored in a SQLite database:
+The chat interface supports multi-session management. All session records are persistently stored in the database:

 - **Session List**: Click the history icon on the left to expand/collapse the session list panel, with scroll-to-load support for all historical sessions
 - **AI-Generated Titles**: After the first exchange in a new session, the model is automatically called to generate a short summary title
@@ -50,9 +59,9 @@ The chat interface supports multi-session management. All session records are pe

 ### Model Management

-Manage model configurations online without manually editing config files:
+Manage text, image, voice, and embedding model configurations for different providers online — no need to edit config files manually:

-<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" />
+<img width="850" src="https://cdn.link-ai.tech/doc/20260521212949.png" />

 ### Skill Management

@@ -80,6 +89,6 @@ View and manage scheduled tasks online, including one-time tasks, fixed interval

 ### Logs

-View Agent runtime logs in real-time for monitoring and troubleshooting:
+View Agent runtime logs in real time for monitoring and troubleshooting:

 <img width="850" src="https://cdn.link-ai.tech/doc/20260227173514.png" />
--- a/docs/en/channels/wecom-bot.mdx
+++ b/docs/en/channels/wecom-bot.mdx
@@ -3,39 +3,54 @@ title: WeCom Bot
 description: Connect CowAgent to WeCom AI Bot (WebSocket long connection)
 ---

-Connect CowAgent via WeCom AI Bot, supporting both direct messages and group chats. No public IP required — uses WebSocket long connection with Markdown rendering and streaming output.
+> Connect CowAgent via WeCom AI Bot, supporting both internal direct messages and group chats. No public IP required — uses a WebSocket long connection, with Markdown rendering and streaming output.

 <Note>
-  WeCom Bot and WeCom App are two different integration methods. WeCom Bot uses WebSocket long connection, requiring no public IP or domain, making it easier to set up.
+  WeCom Bot and WeCom App are two different integration methods. WeCom Bot uses a WebSocket long connection and requires no public IP or domain, making setup much simpler.
 </Note>

-## 1. Create an AI Bot
+## 1. Connection methods
+
+### Option A: One-click QR scan (recommended)
+
+No need to create the bot ahead of time. Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/), go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **QR scan** mode, and scan the QR code with **WeCom** — bot creation and connection complete automatically.
+
+<img src="https://cdn.link-ai.tech/doc/20260401121213.png" width="800"/>
+
+<Note>
+  After a successful scan, you can further configure the bot (name, avatar, visibility scope, etc.) in **WeCom Workbench → AI Bot**.
+</Note>
+
+### Option B: Manual creation
+
+Create the AI Bot in WeCom and obtain the Bot ID and Secret, then connect via the Web console or config file.
+
+**Step 1: Create the AI Bot**

 1. Open the WeCom client, go to **Workbench**, and click **AI Bot**:

 <img src="https://cdn.link-ai.tech/doc/20260316180959.png" width="800"/>

-2. Click **Create Bot** → **Manual Creation**:
+2. Click **Create Bot → Manual Creation**:

-<img src="https://cdn.link-ai.tech/doc/20260316181118.png" width="600"/>
+<img src="https://cdn.link-ai.tech/doc/20260316181118.png" width="800"/>

 3. Scroll to the bottom of the right panel and select **API Mode**:

-<img src="https://cdn.link-ai.tech/doc/20260316181215.png" width="600"/>
+<img src="https://cdn.link-ai.tech/doc/20260316181215.png" width="800"/>

-4. Set the bot name, avatar, and visibility scope. Select **Long Connection** mode, note down the **Bot ID** and **Secret**, then click Save.
+4. Set the bot name, avatar, and visibility scope. Choose **Long Connection** mode, save the **Bot ID** and **Secret**, then click Save.

-## 2. Configuration
+**Step 2: Connect to CowAgent**

-### Option A: Web Console
+<Tabs>
+  <Tab title="Web Console">
+    Open the Web console, go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **Manual** mode, enter the Bot ID and Secret, and click Connect.

-Start the program and open the Web console (local access: http://127.0.0.1:9899). Go to the **Channels** tab, click **Connect Channel**, select **WeCom Bot**, fill in the Bot ID and Secret from the previous step, and click Connect.
-
-<img src="https://cdn.link-ai.tech/doc/20260316181711.png" width="600"/>
-
-### Option B: Config File
-
-Add the following to your `config.json`:
+    <img src="https://cdn.link-ai.tech/doc/20260316181711.png" width="800"/>
+  </Tab>
+  <Tab title="Config File">
+    Add the following to `config.json`, then start CowAgent:

    ```json
    {
@@ -48,26 +63,28 @@ Add the following to your `config.json`:
    | Parameter | Description |
    | --- | --- |
    | `wecom_bot_id` | Bot ID of the AI Bot |
-| `wecom_bot_secret` | Secret for the AI Bot |
+    | `wecom_bot_secret` | Secret of the AI Bot |
+  </Tab>
+</Tabs>

-After configuration, start the program. The log message `[WecomBot] Subscribe success` indicates a successful connection.
+The log line `[WecomBot] Subscribe success` confirms the connection is established.

-## 3. Supported Features
+## 2. Supported features

 | Feature | Status |
 | --- | --- |
-| Direct Messages | ✅ |
-| Group Chat (@bot) | ✅ |
-| Text Messages | ✅ Send & Receive |
-| Image Messages | ✅ Send & Receive |
-| File Messages | ✅ Send & Receive |
-| Streaming Reply | ✅ |
-| Scheduled Push | ✅ |
+| Direct chat | ✅ |
+| Group chat (@bot) | ✅ |
+| Text messages | ✅ Send / Receive |
+| Image messages | ✅ Send / Receive |
+| File messages | ✅ Send / Receive |
+| Streaming replies | ✅ |
+| Scheduled push messages | ✅ |

-## 4. Usage
+## 3. Usage

-Search for the bot name in WeCom to start a direct conversation.
+Search for the bot's name inside WeCom to start a direct chat.

-To use in group chats, add the bot to a group and @mention it to send messages.
+To use the bot in an internal group chat, add it to the group and @-mention it.

 <img src="https://cdn.link-ai.tech/doc/20260316182902.png" width="800"/>
--- a/docs/en/channels/weixin.mdx
+++ b/docs/en/channels/weixin.mdx
@@ -1,19 +1,21 @@
 ---
 title: WeChat
-description: Connect CowAgent to personal WeChat
+description: Connect CowAgent to personal WeChat (via the official API)
 ---

-> Connect CowAgent to your personal WeChat. Simply scan a QR code to log in — no public IP required. Supports text, image, voice, file, and video messages.
+> Connect CowAgent to your personal WeChat — scan to log in, no public IP required. Supports text, image, voice, file, and video messages in 1-on-1 chats. Backed by WeChat's official API; safe to use. After connecting, a bot assistant is added to your conversation list without affecting normal account usage.

-## 1. Configuration
+## 1. Setup and run

-### Option A: Web Console
+### Option A: Web console

-Start the program and open the Web console (local access: http://127.0.0.1:9899). Go to the **Channels** tab, click **Connect Channel**, select **WeChat**, and follow the prompts to scan the QR code.
+Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/). Go to the **Channels** tab, click **Connect Channel**, select **WeChat**, and follow the prompts to scan in.

-### Option B: Config File
+<img src="https://cdn.link-ai.tech/doc/20260322195114.png" width="800" />

-Set `channel_type` to `weixin` in your `config.json`:
+### Option B: Config file
+
+Set `channel_type` to `weixin` in `config.json`:

 ```json
 {
@@ -21,52 +23,49 @@ Set `channel_type` to `weixin` in your `config.json`:
 }
 ```

-After starting the program, a QR code will be displayed in the terminal. Scan it with WeChat and confirm on your phone to complete login.
+After starting CowAgent, a QR code is displayed in the terminal. Scan it with WeChat to complete login.
+
+<img src="https://cdn.link-ai.tech/doc/20260322195509.png" width="800" />

 <Note>
-  For backward compatibility, setting `channel_type` to `wx` also activates the WeChat channel.
+  1. For backward compatibility, setting `channel_type` to `wx` also activates the WeChat channel.
+  2. The WeChat client must be on version **8.0.69** or higher.
 </Note>

-## 2. Parameters
+## 2. Usage

-| Parameter | Description | Default |
-| --- | --- | --- |
-| `channel_type` | Set to `weixin` or `wx` | — |
+Once authorized, the integration completes and you can start chatting. A bot assistant is created in your WeChat conversation list, leaving normal account usage unaffected.

-Login credentials are automatically saved to `~/.weixin_cow_credentials.json`. To force a re-login, delete this file and restart.
+> You can find the bot at any time by searching for **"微信ClawBot"**. You may also rename it, change its avatar, pin it to the top of your conversation list, and so on.
+
+<img src="https://cdn.link-ai.tech/doc/83ae8251d896219fde4803f4205205be.jpg" width="250" />

 ## 3. Login

-### QR Code Login
+### QR code login

-On first startup, a QR code is displayed in the terminal (valid for approximately 2 minutes). Scan it with WeChat and confirm on your phone.
+On first startup, a QR code appears in the terminal (valid for around 2 minutes). Scan it with WeChat and confirm on your phone to log in.

- The QR code automatically refreshes when it expires
- The `qrcode` dependency is already included in `requirements.txt`, enabling QR code rendering directly in the terminal
+- The QR code refreshes automatically when it expires
+- The `qrcode` dependency is already included in `requirements.txt`, so the QR code renders directly in the terminal after install

-### Credential Persistence
+### Credential persistence

-After successful login, credentials are saved to `~/.weixin_cow_credentials.json`. Subsequent startups will reuse the saved credentials without requiring a new scan.
+After a successful login, credentials are saved to `~/.weixin_cow_credentials.json`. Subsequent startups reuse the saved credentials with no need to re-scan.

-To force a re-login, delete the credentials file and restart the program.
+To force a re-login, delete the credentials file and restart.

-### Session Expiry
+### Session expiry

-When the WeChat session expires (errcode -14), the program automatically clears old credentials and initiates a new QR login — no manual intervention required.
+When the WeChat session expires (errcode `-14`), CowAgent automatically clears old credentials and initiates a new QR login — no manual intervention required.

-## 4. Supported Features
+## 4. Supported features

 | Feature | Status |
 | --- | --- |
-| Direct Messages | ✅ |
-| Text Messages | ✅ Send & Receive |
-| Image Messages | ✅ Send & Receive |
-| File Messages | ✅ Send & Receive |
-| Video Messages | ✅ Send & Receive |
-| Voice Messages | ✅ Receive |
-
-## 5. Notes
-
-1. Ensure network access to `ilinkai.weixin.qq.com`.
-2. Media files (images, files, videos) are transferred via CDN with AES-128-ECB encryption, handled automatically by the program.
-3. A stable network connection is recommended to avoid frequent disconnections that would require re-scanning.
+| Direct messages | ✅ |
+| Text messages | ✅ Send & Receive |
+| Image messages | ✅ Send & Receive |
+| File messages | ✅ Send & Receive |
+| Video messages | ✅ Send & Receive |
+| Voice messages | ✅ Receive (built-in speech recognition) |
--- a/docs/en/cli/general.mdx
+++ b/docs/en/cli/general.mdx
@@ -25,6 +25,14 @@ View current session and service status, including process info, model configura
 /status
 ```

+## cancel
+
+Abort the agent task currently running in this session. When the agent is busy with a long task (e.g. multi-turn tool calls or a long streaming response), send `/cancel` and the agent will stop before the next tool execution. Available across all channels — Web, WeChat, WeCom, Feishu, etc.
+
+```text
+/cancel
+```
+
 ## config

 View or modify runtime configuration. Changes take effect immediately without restarting.
--- a/docs/en/cli/index.mdx
+++ b/docs/en/cli/index.mdx
@@ -57,6 +57,7 @@ In the Web console or any connected channel, type `/` to see command suggestions
 | --- | --- |
 | `/help` | Show command help |
 | `/status` | View service status and configuration |
+| `/cancel` | Abort the currently running agent task |
 | `/config` | View or modify runtime configuration |
 | `/skill` | Manage skills (install, uninstall, enable, disable, etc.) |
 | `/memory dream [N]` | Manually trigger memory distillation (default 3 days, max 30) |
@@ -80,6 +81,7 @@ In the Web console or any connected channel, type `/` to see command suggestions
 | version | ✓ | ✓ |
 | status | ✓ | ✓ |
 | logs | ✓ | ✓ |
+| cancel | ✗ | ✓ |
 | config | ✗ | ✓ |
 | context | — | ✓ |
 | memory (subcommands) | ✗ | ✓ |
--- a/docs/en/cli/skill.mdx
+++ b/docs/en/cli/skill.mdx
@@ -19,6 +19,24 @@ cow skill list
 ```
 </CodeGroup>

+Example output:
+
+```
+📦 Installed skills (3/4)
+
+✅ pptx
+   Use this skill any time a .pptx file is involved…
+   Source: cowhub
+
+✅ skill-creator
+   Create, install, or update skills…
+   Source: builtin
+
+⏸️ image-vision (disabled)
+   Image understanding and visual analysis
+   Source: builtin
+```
+
 **Browse the Skill Hub** (view all available skills):

 <CodeGroup>
--- a/docs/en/guide/manual-install.mdx
+++ b/docs/en/guide/manual-install.mdx
@@ -81,7 +81,7 @@ nohup python3 app.py & tail -f nohup.out
 ```

 <Tip>
-  If deploying on a server, open port `9899` in your firewall or security group to access the Web console. It's recommended to restrict access to specific IPs for security.
+  **Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs.
 </Tip>

 ## Docker Deployment
@@ -113,7 +113,7 @@ sudo docker logs -f chatgpt-on-wechat
 ```

 <Tip>
-  If deploying on a server, open port `9899` in your firewall or security group to access the Web console. It's recommended to restrict access to specific IPs for security.
+  **Running in Docker?** Set `WEB_HOST` to `0.0.0.0` in `docker-compose.yml` so the console is reachable from outside the container, and set `WEB_PASSWORD` to protect it. Make sure port `9899` is mapped to the host and open in your firewall or security group.
 </Tip>

 ## Core Configuration
--- a/docs/en/guide/quick-start.mdx
+++ b/docs/en/guide/quick-start.mdx
@@ -33,6 +33,10 @@ The script automatically performs these steps:

 By default, the Web console starts after installation. Access `http://localhost:9899` to begin chatting.

+<Note>
+  **Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access only). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs.
+</Note>
+
 ## Management Commands

 After installation, use the `cow` command to manage the service:
--- a/docs/en/guide/upgrade.mdx
+++ b/docs/en/guide/upgrade.mdx
@@ -0,0 +1,61 @@
+---
+title: Upgrade
+description: How to upgrade CowAgent
+---
+
+## Recommended: One-line upgrade
+
+Use `cow update` to pull the latest code and restart the service in one step:
+
+```bash
+cow update
+```
+
+The command runs the following automatically:
+
+1. Pull the latest code (`git pull`)
+2. Stop the running service
+3. Update Python dependencies
+4. Reinstall the CLI
+5. Start the service
+
+<Note>
+  If the Cow CLI is not installed, `./run.sh update` performs the same operations.
+</Note>
+
+## Manual upgrade
+
+Run the following inside the project root:
+
+```bash
+git pull
+pip3 install -r requirements.txt
+pip3 install -e .
+```
+
+Then restart the service:
+
+```bash
+# Using Cow CLI (recommended)
+cow restart
+
+# Or using run.sh
+./run.sh restart
+
+# Or restart manually with nohup
+kill $(ps -ef | grep app.py | grep -v grep | awk '{print $2}')
+nohup python3 app.py & tail -f nohup.out
+```
+
+## Docker upgrade
+
+Run the following in the directory containing `docker-compose.yml`:
+
+```bash
+sudo docker compose pull
+sudo docker compose up -d
+```
+
+<Tip>
+  Back up `config.json` before upgrading. For Docker deployments, mount the workspace directory as a volume to persist data across upgrades.
+</Tip>
--- a/docs/en/intro/architecture.mdx
+++ b/docs/en/intro/architecture.mdx
@@ -9,7 +9,7 @@ CowAgent 2.0 has evolved from a simple chatbot into a super intelligent assistan

 CowAgent's architecture consists of the following core modules:

-<img src="https://cdn.link-ai.tech/doc/cow-agent-arch-en.jpg.jpg" alt="CowAgent Architecture" />
+<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/architecture/en/architecture.jpg" alt="CowAgent Architecture" />

 | Module | Description |
 | --- | --- |
@@ -39,8 +39,8 @@ The Agent workspace is located at `~/cow` by default and stores system prompts,

 ```
 ~/cow/
-├── system.md          # Agent system prompt
-├── user.md            # User profile
+├── SYSTEM.md          # Agent system prompt
+├── USER.md            # User profile
 ├── MEMORY.md          # Core memory
 ├── memory/            # Long-term memory storage
 │   └── YYYY-MM-DD.md  # Daily memory
@@ -67,9 +67,10 @@ Configure Agent mode parameters in `config.json`:
 {
  "agent": true,
  "agent_workspace": "~/cow",
-  "agent_max_context_tokens": 40000,
-  "agent_max_context_turns": 30,
-  "agent_max_steps": 15
+  "agent_max_context_tokens": 50000,
+  "agent_max_context_turns": 20,
+  "agent_max_steps": 20,
+  "enable_thinking": false
 }
 ```

@@ -77,7 +78,9 @@ Configure Agent mode parameters in `config.json`:
 | --- | --- | --- |
 | `agent` | Enable Agent mode | `true` |
 | `agent_workspace` | Workspace path | `~/cow` |
-| `agent_max_context_tokens` | Max context tokens | `40000` |
-| `agent_max_context_turns` | Max context turns | `30` |
-| `agent_max_steps` | Max decision steps per task | `15` |
+| `agent_max_context_tokens` | Max context tokens | `50000` |
+| `agent_max_context_turns` | Max context turns | `20` |
+| `agent_max_steps` | Max decision steps per task | `20` |
+| `enable_thinking` | Enable deep-thinking mode | `false` |
+| `knowledge` | Enable personal knowledge base | `true` |
 | `knowledge` | Enable personal knowledge base | `true` |
--- a/docs/en/intro/features.mdx
+++ b/docs/en/intro/features.mdx
@@ -84,7 +84,7 @@ Secrets required by skills are stored in an environment variable file, managed b

 The Skills system provides infinite extensibility for the Agent. Each Skill consists of a description file, execution scripts (optional), and resources (optional), describing how to complete specific types of tasks. Skills allow the Agent to follow instructions for complex workflows, invoke tools, or integrate third-party systems.

- **[Skill Hub](https://skills.cowagent.ai/):** An open skill marketplace featuring official, community, and third-party skills. Install with one command.
+- [Skill Hub](https://skills.cowagent.ai/): An open skill marketplace featuring official, community, and third-party skills. Install with one command.
 - **Built-in skills:** Located in the project's `skills/` directory, including skill creator, image recognition, LinkAI agent, web fetch, and more. Built-in skills are automatically enabled based on dependency conditions (API keys, system commands, etc.).
 - **Custom skills:** Created by users through conversation, stored in the workspace (`~/cow/skills/`), capable of implementing any complex business process or third-party integration.

--- a/docs/en/intro/index.mdx
+++ b/docs/en/intro/index.mdx
@@ -1,53 +1,60 @@
 ---
 title: Introduction
-description: CowAgent - AI Super Assistant powered by LLMs
+description: CowAgent - Open-source super AI assistant and Agent Harness
 ---

-<img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="600px"/>
+<div align="center">
+  <img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="450px"/>
+</div>

-**CowAgent** is an AI super assistant powered by LLMs with autonomous task planning, long-term memory, skills system, multimodal messages, multiple model support, and multi-platform deployment.
+**CowAgent** is an open-source super AI assistant and Agent Harness. It proactively plans tasks, runs tools and skills, and autonomously grows with memory and knowledge.

-CowAgent can proactively think and plan tasks, operate computers and external resources, create and execute Skills, and continuously grow with long-term memory. It supports flexible switching between multiple models, handles text, voice, images, files and other multimodal messages, and can be integrated into WeChat, web, Feishu, DingTalk, WeCom, and WeChat Official Account. It runs 7x24 hours on your personal computer or server.
+CowAgent is lightweight, easy to deploy, and built to extend. Plug in any major LLM provider, run it across Web and major IM platforms, 24/7 on a personal computer or server.

+<CardGroup cols={2}>
  <Card title="GitHub" icon="github" href="https://github.com/zhayujie/CowAgent">
-  github.com/zhayujie/CowAgent
+    Open-source repository — Star and contribute
  </Card>
+  <Card title="Try Online" icon="cloud" href="https://link-ai.tech/cowagent/create">
+    No setup required — experience CowAgent instantly
+  </Card>
+</CardGroup>

 ## Core Capabilities

 <CardGroup cols={2}>
  <Card title="Autonomous Task Planning" icon="brain" href="/en/intro/architecture">
-    Understands complex tasks and autonomously plans execution, continuously thinking and invoking tools until goals are achieved. Supports accessing file systems, terminals, browsers, schedulers, and other system resources through tools.
+    Decomposes complex tasks and executes them step by step, looping over tools and skills until the goal is reached.
  </Card>
-  <Card title="Long-term Memory" icon="database" href="/en/memory">
-    Three-tier memory flow (context → daily memory → global memory) with daily Deep Dream distillation, keyword and vector retrieval support.
+  <Card title="Long-term Memory" icon="database" href="/en/memory/index">
+    Three-tier architecture (context → daily → core), automatic Deep Dream distillation, hybrid keyword + vector retrieval.
  </Card>
-  <Card title="Knowledge Base" icon="book" href="/en/knowledge">
-    Automatically organizes structured knowledge with knowledge graph visualization, building a continuously growing knowledge network through cross-references.
+  <Card title="Personal Knowledge Base" icon="book" href="/en/knowledge/index">
+    Auto-curates structured knowledge into a Markdown wiki, builds an evolving knowledge graph with visual browsing.
  </Card>
  <Card title="Skills System" icon="puzzle-piece" href="/en/skills/index">
-    Implements a Skills creation and execution engine with built-in skills, and supports custom Skills development through natural language conversation.
+    A complete skill creation and execution engine. Install from Skill Hub or generate custom skills via natural-language conversation.
  </Card>
-  <Card title="Multimodal Messages" icon="image" href="/en/channels/web">
-    Supports parsing, processing, generating, and sending text, images, voice, files, and other message types.
+  <Card title="Multimodal Messaging" icon="image" href="/en/channels/web">
+    First-class support for text, images, voice, and files — recognition, generation, and delivery.
  </Card>
  <Card title="Tool System" icon="wrench" href="/en/tools/index">
-    Built-in tools for file I/O, terminal execution, browser automation, scheduled tasks, messaging, and more. The Agent autonomously invokes tools to accomplish complex tasks.
+    Built-in file I/O, terminal, browser, scheduler, memory retrieval, web search, and more — with native MCP integration.
  </Card>
  <Card title="Command System" icon="terminal" href="/en/cli/index">
-    Provides terminal CLI and in-chat commands for process management, skill installation, configuration, context inspection, and other common operations.
+    Terminal CLI and in-chat commands for process management, skill installation, configuration, and context inspection.
  </Card>
-  <Card title="Multiple Model Support" icon="microchip" href="/en/models/index">
-    Supports mainstream model providers including OpenAI, Claude, Gemini, DeepSeek, MiniMax, GLM, Qwen, Kimi, Doubao, and more.
+  <Card title="Pluggable Models" icon="microchip" href="/en/models/index">
+    Claude, GPT, Gemini, DeepSeek, Qwen, GLM, Kimi, MiniMax, Doubao, and more — swap providers from the Web console with one click.
  </Card>
-  <Card title="Multi-platform Deployment" icon="server" href="/en/channels/weixin">
-    Runs on local computers or servers, integrable into WeChat, web, Feishu, DingTalk, WeChat Official Account, and WeCom applications.
+  <Card title="Multi-channel Integration" icon="server" href="/en/channels/index">
+    A single Agent simultaneously serves Web, WeChat, Feishu, DingTalk, WeCom, QQ, and Official Accounts.
  </Card>
 </CardGroup>

-## Quick Experience
+## Quick Start

-Run the following command in your terminal for one-click install, configuration, and startup:
+Run one of the commands below to install, configure, and start CowAgent in a single step:

 <Tabs>
  <Tab title="Linux / macOS">
@@ -62,25 +69,25 @@ Run the following command in your terminal for one-click install, configuration,
  </Tab>
 </Tabs>

-By default, the Web service starts after running. Access `http://localhost:9899/chat` to chat in the web interface.
+Once started, open `http://localhost:9899` to access the **Web console** — the unified place to chat, configure providers, connect channels, and install skills.

 <CardGroup cols={2}>
  <Card title="Quick Start" icon="rocket" href="/en/guide/quick-start">
    Complete installation and run guide
  </Card>
  <Card title="Architecture" icon="sitemap" href="/en/intro/architecture">
-    CowAgent system architecture design
+    CowAgent system architecture
  </Card>
 </CardGroup>

 ## Disclaimer

-1. This project follows the [MIT License](https://github.com/zhayujie/CowAgent/blob/master/LICENSE) and is intended for technical research and learning. Users must comply with local laws, regulations, policies, and corporate bylaws. Any illegal or rights-infringing use is prohibited.
-2. Agent mode consumes more tokens than normal chat mode. Choose models based on effectiveness and cost. Agent has access to the host operating system — deploy with caution.
-3. CowAgent focuses on open-source development and does not participate in, authorize, or issue any cryptocurrency.
+1. This project is licensed under the [MIT License](https://github.com/zhayujie/CowAgent/blob/master/LICENSE) and is intended for technical research and learning. You are responsible for complying with applicable laws and regulations in your jurisdiction; the maintainers assume no liability for any consequences arising from use of this project.
+2. **Cost & safety:** Agent mode consumes substantially more tokens than plain chat — pick models that balance quality and cost. The Agent has access to your local operating system; deploy only in trusted environments.
+3. CowAgent is a pure open-source project and does not participate in, authorize, or issue any cryptocurrency.

 ## Community

-Add our assistant on WeChat to join the open-source community:
+Scan the WeChat QR code to join the open-source community group:

 <img width="140" src="https://img-1317903499.cos.ap-guangzhou.myqcloud.com/docs/open-community.png" />
--- a/docs/en/knowledge/index.mdx
+++ b/docs/en/knowledge/index.mdx
@@ -5,6 +5,10 @@ description: CowAgent personal knowledge base — structured knowledge accumulat

 The personal knowledge base is the Agent's long-term structured knowledge store, saved in the `knowledge/` directory within the workspace. Unlike memory, which is organized by timeline, the knowledge base organizes content by topic — articles, conversation insights, and learning materials are structured into interlinked Markdown pages, forming a continuously growing knowledge network.

+<Frame>
+  <img src="https://cdn.link-ai.tech/doc/20260413105435.png" width="800" />
+</Frame>
+
 ## Core Concepts

 ### Knowledge vs Memory
@@ -43,7 +47,7 @@ Knowledge writing is an autonomous Agent behavior, triggered in these scenarios:
 Each knowledge page includes cross-reference links to related pages, gradually building a knowledge graph.

 <Frame>
-  <img src="https://gist.github.com/user-attachments/assets/3ce92f78-1863-4820-8fa8-660c0f2b7f09" alt="Conversational knowledge ingest" />
+  <img src="https://cdn.link-ai.tech/doc/20260413110104.png" width="800" />
 </Frame>

 ## Knowledge Retrieval
@@ -63,11 +67,11 @@ The web console provides a dedicated "Knowledge" module with:
 - **Chat integration** — Knowledge document links referenced in Agent replies are clickable for direct navigation

 <Frame>
-  <img src="https://gist.github.com/user-attachments/assets/b7b9d6be-0ac1-4c65-803b-2c6b36bd59a7" alt="Knowledge document browsing" />
+  <img src="https://cdn.link-ai.tech/doc/17aad553d3e9e428c52ff9dc31726fda.png" width="800" />
 </Frame>

 <Frame>
-  <img src="https://gist.github.com/user-attachments/assets/44ae68ca-96cc-40b9-ab33-cdbec34c2379" alt="Knowledge graph visualization" />
+  <img src="https://cdn.link-ai.tech/doc/20260413105402.png" width="800" />
 </Frame>

 ## CLI Commands
--- a/docs/en/memory/index.mdx
+++ b/docs/en/memory/index.mdx
@@ -27,7 +27,7 @@ The Agent automatically persists conversation content to long-term memory throug

 - **On context trimming** — When conversation turns or tokens exceed the configured limit, the oldest half of the context is trimmed, and the discarded content is summarized by LLM into key information and written to the daily memory file. The summary is also asynchronously injected into the retained context for conversational continuity
 - **Daily scheduled summary** — A full summary is automatically triggered at 23:55 every day, ensuring memory is preserved even on low-activity days (skipped if content hasn't changed)
- **[Deep Dream (memory distillation)](/en/memory/deep-dream)** — Runs automatically after the daily summary, distilling daily memories into MEMORY.md and generating a dream diary
+- [Deep Dream (memory distillation)](/en/memory/deep-dream) — Runs automatically after the daily summary, distilling daily memories into MEMORY.md and generating a dream diary
 - **On API context overflow** — When the model API returns a context overflow error, the current conversation summary is saved as an emergency measure

 All memory writes run asynchronously in a background thread (LLM summarization + file writing), never blocking normal conversation replies.
--- a/docs/en/models/claude.mdx
+++ b/docs/en/models/claude.mdx
@@ -1,17 +1,50 @@
 ---
 title: Claude
-description: Claude model configuration
+description: Anthropic Claude model configuration (Text Chat + Image Understanding)
 ---

+Claude is provided by Anthropic and supports both text chat and image understanding. The mainstream Sonnet / Opus models natively support vision, so no separate Vision model needs to be specified.
+
+<Tip>
+  All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
+</Tip>
+
+## Text Chat
+
 ```json
 {
-  "model": "claude-sonnet-4-6",
+  "model": "claude-opus-4-8",
  "claude_api_key": "YOUR_API_KEY"
 }
 ```

 | Parameter | Description |
 | --- | --- |
-| `model` | Options include `claude-sonnet-4-6`, `claude-opus-4-7`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-sonnet-4-0`, `claude-3-5-sonnet-latest`, etc. See [official models](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
-| `claude_api_key` | Create at [Claude Console](https://console.anthropic.com/settings/keys) |
-| `claude_api_base` | Optional. Defaults to `https://api.anthropic.com/v1`. Change to use third-party proxy |
+| `model` | Supports `claude-opus-4-8`, `claude-opus-4-7`, `claude-sonnet-4-6`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-sonnet-4-0`, `claude-3-5-sonnet-latest`, etc. See [official models](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
+| `claude_api_key` | Create one in the [Claude Console](https://console.anthropic.com/settings/keys) |
+| `claude_api_base` | Optional, defaults to `https://api.anthropic.com/v1`. Can be changed to a third-party proxy |
+
+### Model Selection
+
+| Model | Use Case |
+| --- | --- |
+| `claude-opus-4-8` | Default recommended, latest flagship; best for complex reasoning and long-running tasks |
+| `claude-opus-4-7` | Previous-generation Opus flagship |
+| `claude-sonnet-4-6` | Balanced cost and speed, lower cost |
+| `claude-opus-4-6` / `claude-sonnet-4-5` / `claude-sonnet-4-0` | Earlier flagships at a lower price |
+
+## Image Understanding
+
+Once `claude_api_key` is configured, the Agent's Vision tool automatically uses the Claude main model to recognize images, with no extra setup required.
+
+To manually specify a Vision model, set it explicitly in the configuration file:
+
+```json
+{
+  "tools": {
+    "vision": {
+      "model": "claude-sonnet-4-6"
+    }
+  }
+}
+```
--- a/docs/en/models/coding-plan.mdx
+++ b/docs/en/models/coding-plan.mdx
@@ -77,7 +77,7 @@ Reference: [China Key](https://platform.minimaxi.com/docs/coding-plan/quickstart

 ---

-## Zhipu GLM
+## GLM

 ```json
 {
--- a/docs/en/models/custom.mdx
+++ b/docs/en/models/custom.mdx
@@ -1,26 +1,26 @@
 ---
 title: Custom
-description: Custom provider for third-party APIs and local models
+description: Custom vendor configuration for third-party API proxies and local models
 ---

-For models accessed via OpenAI-compatible APIs, such as:
+For model services accessed via the OpenAI-compatible protocol or locally deployed models, such as:

- **Third-party API proxies**: Use a unified API Base to call multiple models
- **Local models**: Models deployed locally via Ollama, vLLM, LocalAI, etc.
- **Private deployments**: Self-hosted model services within your organization
+- **Third-party API proxies**: call multiple models through a unified API base
+- **Local models**: models deployed locally with tools like Ollama, vLLM, LocalAI
+- **Private deployments**: model services deployed inside an enterprise

 <Note>
-  Unlike the `openai` provider, switching models under the Custom provider will not auto-switch the provider type. Your custom API address is always preserved.
+  Difference from the `openai` vendor: when a custom vendor is selected, switching models via `/config model` does not automatically switch the vendor type — the custom API address is always used.
 </Note>

-## Configuration
+## Text Chat

-### Third-party API Proxy
+### Third-party API proxy

 ```json
 {
  "bot_type": "custom",
-  "model": "deepseek-v4-flash",
+  "model": "",
  "custom_api_key": "YOUR_API_KEY",
  "custom_api_base": "https://{your-proxy.com}/v1"
 }
@@ -29,13 +29,13 @@ For models accessed via OpenAI-compatible APIs, such as:
 | Parameter | Description |
 | --- | --- |
 | `bot_type` | Must be set to `custom` |
-| `model` | Model name, any model supported by your proxy service |
-| `custom_api_key` | API key provided by your proxy service |
-| `custom_api_base` | API base URL, must be OpenAI-compatible |
+| `model` | Model name; any model name supported by the proxy service |
+| `custom_api_key` | API key provided by the proxy service |
+| `custom_api_base` | API endpoint provided by the proxy service; must be OpenAI-compatible |

-### Local Models
+### Local models

-Local models typically don't require an API key — just set the API base:
+Local models usually do not require an API key — only the API base needs to be filled in:

 ```json
 {
@@ -45,7 +45,7 @@ Local models typically don't require an API key — just set the API base:
 }
 ```

-Common local deployment tools and their default addresses:
+Common local deployment tools and their default endpoints:

 | Tool | Default API Base |
 | --- | --- |
@@ -53,9 +53,9 @@ Common local deployment tools and their default addresses:
 | [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` |
 | [LocalAI](https://localai.io) | `http://localhost:8080/v1` |

-## Switching Models
+### Switching Models

-Under the Custom provider, switching models only changes `model` without affecting `bot_type` or the API address:
+Switching models under a custom vendor only changes `model` — `bot_type` and the API endpoint remain unchanged:

 ```
 /config model qwen3.5:27b
--- a/docs/en/models/deepseek.mdx
+++ b/docs/en/models/deepseek.mdx
@@ -1,9 +1,11 @@
 ---
 title: DeepSeek
-description: DeepSeek model configuration
+description: DeepSeek model configuration (Text Chat + Thinking Mode)
 ---

-Option 1: Native integration (recommended):
+DeepSeek is one of the default recommended vendors in Agent mode, focused on cost-effective text chat and task planning.
+
+## Text Chat

 ```json
 {
@@ -14,24 +16,24 @@ Option 1: Native integration (recommended):

 | Parameter | Description |
 | --- | --- |
-| `model` | Supports `deepseek-v4-flash` (default) and `deepseek-v4-pro` |
-| `deepseek_api_key` | Create at [DeepSeek Platform](https://platform.deepseek.com/api_keys) |
+| `model` | Supports `deepseek-v4-flash` (Default), `deepseek-v4-pro` |
+| `deepseek_api_key` | Create one on the [DeepSeek Platform](https://platform.deepseek.com/api_keys) |
 | `deepseek_api_base` | Optional, defaults to `https://api.deepseek.com/v1`. Can be changed to a third-party proxy |

-## Model Selection
+### Model Selection

 | Model | Use Case |
 | --- | --- |
-| `deepseek-v4-flash` | Default: fast and cost-effective |
-| `deepseek-v4-pro` | Stronger on complex tasks |
+| `deepseek-v4-flash` | Default recommended; fast and low cost |
+| `deepseek-v4-pro` | Smarter; better for complex tasks |

 ## Thinking Mode

-The V4 series (`deepseek-v4-flash` / `deepseek-v4-pro`) supports an explicit "thinking mode": the model emits a chain-of-thought (`reasoning_content`) before the final answer to improve answer quality.
+The V4 series (`deepseek-v4-flash` / `deepseek-v4-pro`) supports an explicit "thinking mode": before producing the final answer, the model emits a chain of thought (`reasoning_content`) to improve answer quality.

 ### Toggle

-Controlled by the global `enable_thinking` setting:
+Controlled by the global `enable_thinking` config, and can also be toggled from the Web Console's configuration page:

 ```json
 {
@@ -39,12 +41,12 @@ Controlled by the global `enable_thinking` setting:
 }
 ```

- `true`: thinking is on across all channels. The Web console renders the reasoning trace; IM channels (WeChat / WeCom / DingTalk / Feishu) don't render it but still benefit from higher answer quality.
- `false`: thinking off, faster responses with lower first-token latency.
+- `true`: the model thinks before answering across all channels. The Web Console displays the thinking process; IM channels (WeChat / WeCom / DingTalk / Feishu) do not show it but still get better answers.
+- `false`: thinking is disabled, responses are faster, and time-to-first-token is lower.

 ### Reasoning Effort

-Under thinking mode, `reasoning_effort` controls how hard the model thinks:
+Under thinking mode, `reasoning_effort` controls reasoning intensity:

 ```json
 {
@@ -55,27 +57,16 @@ Under thinking mode, `reasoning_effort` controls how hard the model thinks:

 | Value | Use Case |
 | --- | --- |
-| `high` (default) | Day-to-day agent tasks; balanced thinking depth and latency |
-| `max` | Complex coding, long-horizon planning, strict-constraint tasks. Deeper reasoning at the cost of more output tokens and higher latency |
+| `high` (Default) | Day-to-day Agent tasks; balanced reasoning and speed |
+| `max` | Complex coding, long-horizon planning, strictly constrained tasks; deeper reasoning but more time and output tokens |

-`reasoning_effort` only takes effect when `enable_thinking` is `true`. It is silently ignored on models that do not support thinking mode.
+`reasoning_effort` only takes effect when `enable_thinking` is `true`; it is ignored automatically when the model does not support thinking mode.

-### Notes
+### Behavior Notes

- **Sampling parameters**: under thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are silently ignored by the server (no error). CowAgent skips sending them automatically.
- **Multi-turn tool calls**: once the history contains any tool-call turn, DeepSeek requires `reasoning_content` on every assistant message. CowAgent handles the round-trip automatically, including across mid-session toggles of the thinking switch.
+- **Sampling parameters**: in thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are ignored by the server (without errors). CowAgent automatically skips them.
+- **Multi-turn tool calls**: when the history contains tool calls, DeepSeek requires every assistant message to include `reasoning_content`. CowAgent handles this automatically, so toggling thinking mode across turns will not cause errors.

 <Tip>
-  Start with `deepseek-v4-flash`; switch to `deepseek-v4-pro` for harder tasks; enable `enable_thinking` when you want deeper reasoning.
+  `deepseek-v4-flash` is used by default; switch to `deepseek-v4-pro` for complex tasks; enable `enable_thinking` when deep reasoning is needed.
 </Tip>
-
-Option 2: OpenAI-compatible configuration:
-
-```json
-{
-  "model": "deepseek-v4-flash",
-  "bot_type": "openai",
-  "open_ai_api_key": "YOUR_API_KEY",
-  "open_ai_api_base": "https://api.deepseek.com/v1"
-}
-```
--- a/docs/en/models/doubao.mdx
+++ b/docs/en/models/doubao.mdx
@@ -1,17 +1,66 @@
 ---
-title: Doubao (ByteDance)
-description: Doubao (Volcano Ark) model configuration
+title: Doubao
+description: Doubao (Volcengine Ark) model configuration (Text / Image Understanding / Image Generation / Embedding)
 ---

+Doubao (Volcengine Ark) supports text chat, image understanding, image generation (Seedream), and embedding. A single `ark_api_key` enables all capabilities.
+
+<Tip>
+  All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
+</Tip>
+
+## Text Chat
+
 ```json
 {
-  "model": "doubao-seed-2-0-code-preview-260215",
+  "model": "doubao-seed-2-0-pro-260215",
  "ark_api_key": "YOUR_API_KEY"
 }
 ```

 | Parameter | Description |
 | --- | --- |
-| `model` | Options include `doubao-seed-2-0-code-preview-260215`, `doubao-seed-2-0-pro-260215`, `doubao-seed-2-0-lite-260215`, etc. |
-| `ark_api_key` | Create at [Volcano Ark Console](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) |
-| `ark_base_url` | Optional. Defaults to `https://ark.cn-beijing.volces.com/api/v3` |
+| `model` | Can be `doubao-seed-2-0-pro-260215`, `doubao-seed-2-0-code-preview-260215`, `doubao-seed-2-0-lite-260215`, etc. |
+| `ark_api_key` | Create one in the [Volcengine Ark Console](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) |
+| `ark_base_url` | Optional, defaults to `https://ark.cn-beijing.volces.com/api/v3` |
+
+## Image Understanding
+
+Once `ark_api_key` is configured, the Agent's Vision tool automatically uses `doubao-seed-2-0-pro-260215` to recognize images, with no extra setup required.
+
+To manually specify a Vision model:
+
+```json
+{
+  "tools": {
+    "vision": {
+      "model": "doubao-seed-2-0-pro-260215"
+    }
+  }
+}
+```
+
+## Image Generation
+
+```json
+{
+  "skills": {
+    "image-generation": {
+      "model": "seedream-5.0-lite"
+    }
+  }
+}
+```
+
+Available models: `seedream-5.0-lite`, `seedream-4.5`.
+
+## Embedding
+
+```json
+{
+  "embedding_provider": "doubao",
+  "embedding_model": "doubao-embedding-vision-251215"
+}
+```
+
+The default model is `doubao-embedding-vision-251215` (multimodal embedding); the dimension (1024 or 2048) can be set via `embedding_dimensions` in the configuration file. After changing the embedding, run `/memory rebuild-index` to rebuild the index.
--- a/docs/en/models/gemini.mdx
+++ b/docs/en/models/gemini.mdx
@@ -1,16 +1,59 @@
 ---
 title: Gemini
-description: Google Gemini model configuration
+description: Google Gemini model configuration (Text Chat + Image Understanding + Image Generation)
 ---

+Google Gemini supports text chat, image understanding, and image generation (Nano Banana series). A single `gemini_api_key` enables all capabilities.
+
+<Tip>
+  All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
+</Tip>
+
+## Text Chat
+
 ```json
 {
-  "model": "gemini-3.1-pro-preview",
+  "model": "gemini-3.5-flash",
  "gemini_api_key": "YOUR_API_KEY"
 }
 ```

 | Parameter | Description |
 | --- | --- |
-| `model` | Options include `gemini-3.1-flash-lite-preview`, `gemini-3.1-pro-preview`, `gemini-3-flash-preview`, `gemini-3-pro-preview`, etc. See [official docs](https://ai.google.dev/gemini-api/docs/models) |
-| `gemini_api_key` | Create at [Google AI Studio](https://aistudio.google.com/app/apikey) |
+| `model` | Recommended: `gemini-3.5-flash`; also supports `gemini-3.1-pro-preview`, `gemini-3.1-flash-lite-preview`, `gemini-3-flash-preview`, `gemini-3-pro-preview`, etc. See [official docs](https://ai.google.dev/gemini-api/docs/models) |
+| `gemini_api_key` | Create one in [Google AI Studio](https://aistudio.google.com/app/apikey) |
+| `gemini_api_base` | Optional, defaults to `https://generativelanguage.googleapis.com`. Can be changed to a third-party proxy |
+
+## Image Understanding
+
+All Gemini models natively support vision. Once `gemini_api_key` is configured, the Agent's Vision tool automatically uses the main model to recognize images, with no extra setup required.
+
+To manually specify a Vision model:
+
+```json
+{
+  "tools": {
+    "vision": {
+      "model": "gemini-3.1-flash-lite-preview"
+    }
+  }
+}
+```
+
+## Image Generation
+
+```json
+{
+  "skills": {
+    "image-generation": {
+      "model": "gemini-3.1-flash-image-preview"
+    }
+  }
+}
+```
+
+| Model ID | Alias |
+| --- | --- |
+| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
+| `gemini-3-pro-image-preview` | Nano Banana Pro |
+| `gemini-2.5-flash-image` | Nano Banana |
--- a/docs/en/models/glm.mdx
+++ b/docs/en/models/glm.mdx
@@ -1,8 +1,16 @@
 ---
-title: GLM (Zhipu AI)
-description: Zhipu AI GLM model configuration
+title: GLM
+description: Zhipu AI GLM model configuration (Text / Image Understanding / Speech-to-Text / Embedding)
 ---

+Zhipu AI supports text chat, image understanding, speech-to-text (ASR), and embedding. A single `zhipu_ai_api_key` enables all capabilities.
+
+<Tip>
+  All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
+</Tip>
+
+## Text Chat
+
 ```json
 {
  "model": "glm-5.1",
@@ -12,16 +20,37 @@ description: Zhipu AI GLM model configuration

 | Parameter | Description |
 | --- | --- |
-| `model` | Options include `glm-5.1`, `glm-5-turbo`, `glm-5`, `glm-4.7`, `glm-4-plus`, `glm-4-flash`, `glm-4-air`, etc. See [model codes](https://bigmodel.cn/dev/api/normal-model/glm-4) |
-| `zhipu_ai_api_key` | Create at [Zhipu AI Console](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
+| `model` | Can be `glm-5.1`, `glm-5-turbo`, `glm-5`, `glm-4.7`, `glm-4-plus`, `glm-4-flash`, `glm-4-air`, etc. See [model codes](https://bigmodel.cn/dev/api/normal-model/glm-4) |
+| `zhipu_ai_api_key` | Create one in the [Zhipu AI Console](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
+| `zhipu_ai_api_base` | Optional, defaults to `https://open.bigmodel.cn/api/paas/v4` |

-OpenAI-compatible configuration is also supported:
+## Image Understanding
+
+Zhipu's chat models (`glm-5.1`, `glm-5-turbo`, etc.) do not support vision; vision calls are uniformly routed to `glm-5v-turbo`. Once `zhipu_ai_api_key` is configured, the Agent's Vision tool automatically uses this model, with no need to specify it explicitly in the configuration file.
+
+## Speech-to-Text (ASR)

 ```json
 {
-  "bot_type": "openai",
-  "model": "glm-5.1",
-  "open_ai_api_base": "https://open.bigmodel.cn/api/paas/v4",
-  "open_ai_api_key": "YOUR_API_KEY"
+  "voice_to_text": "zhipu",
+  "voice_to_text_model": "glm-asr-2512"
 }
 ```
+
+| Parameter | Description |
+| --- | --- |
+| `voice_to_text` | Set to `zhipu` to enable Zhipu ASR |
+| `voice_to_text_model` | Optional, defaults to `glm-asr-2512` |
+
+Credentials are automatically reused from `zhipu_ai_api_key`. Audio files should be smaller than 25MB; oversized files may be rejected by the server.
+
+## Embedding
+
+```json
+{
+  "embedding_provider": "zhipu",
+  "embedding_model": "embedding-3"
+}
+```
+
+Available models: `embedding-3`, `embedding-2`. After changing the embedding, run `/memory rebuild-index` to rebuild the index.
--- a/docs/en/models/index.mdx
+++ b/docs/en/models/index.mdx
@@ -1,58 +1,38 @@
 ---
 title: Models Overview
-description: Supported models and recommended choices for CowAgent
+description: Model vendors supported by CowAgent and their capability matrix
 ---

-CowAgent supports mainstream LLMs from domestic and international providers. Model interfaces are implemented in the project's `models/` directory.
+CowAgent supports a wide range of mainstream large language models. Model interfaces live under the project's `models/` directory. Beyond text chat, several vendors also provide vision understanding, image generation, speech-to-text, text-to-speech, and embeddings — all of which can be invoked on demand in the Agent flow.

-<Note>
-  For Agent mode, the following models are recommended based on quality and cost: deepseek-v4-flash, MiniMax-M2.7, claude-sonnet-4-6, gemini-3.1-pro-preview, glm-5.1, qwen3.6-plus, kimi-k2.6, ernie-5.1
-</Note>
+## Capability Matrix

-## Configuration
+A snapshot of each vendor's capabilities. "Text" refers to the main chat model; the remaining columns show which Agent capabilities the vendor can power.

-Configure the model name and API key in `config.json` according to your chosen model. Each model also supports OpenAI-compatible access by setting `bot_type` to `openai` and configuring `open_ai_api_base` and `open_ai_api_key`.
-
-You can also use the [LinkAI](https://link-ai.tech) platform interface to flexibly switch between multiple models with support for knowledge base, workflows, and other Agent capabilities.
-
-## Supported Models
-
-<CardGroup cols={2}>
-  <Card title="DeepSeek" href="/en/models/deepseek">
-    deepseek-v4-flash, deepseek-v4-pro, and more
-  </Card>
-  <Card title="Baidu Qianfan / ERNIE" href="/en/models/qianfan">
-    ernie-5.1, ernie-5.0, ernie-4.5-turbo-128k, and more
-  </Card>
-  <Card title="MiniMax" href="/en/models/minimax">
-    MiniMax-M2.7 and other series models
-  </Card>
-  <Card title="Claude" href="/en/models/claude">
-    claude-sonnet-4-6 and more
-  </Card>
-  <Card title="Gemini" href="/en/models/gemini">
-    gemini-3.1-pro-preview and more
-  </Card>
-  <Card title="OpenAI" href="/en/models/openai">
-    gpt-5.4, gpt-4.1, o-series and more
-  </Card>
-  <Card title="GLM (Zhipu AI)" href="/en/models/glm">
-    glm-5.1, glm-5-turbo, glm-5 and other series models
-  </Card>
-  <Card title="Qwen (Tongyi Qianwen)" href="/en/models/qwen">
-    qwen3.6-plus, qwen3-max and more
-  </Card>
-  <Card title="Doubao (ByteDance)" href="/en/models/doubao">
-    doubao-seed series models
-  </Card>
-  <Card title="Kimi" href="/en/models/kimi">
-    kimi-k2.6, kimi-k2.5, kimi-k2 and more
-  </Card>
-  <Card title="LinkAI" href="/en/models/linkai">
-    Unified multi-model interface + knowledge base
-  </Card>
-</CardGroup>
+| Vendor | Representative Models | Text | Vision | Image Gen | STT | TTS | Embedding |
+| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: |
+| [DeepSeek](/en/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | |
+| [MiniMax](/en/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
+| [Claude](/en/models/claude) | claude-opus-4-8 | ✅ | ✅ | | | | |
+| [Gemini](/en/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | |
+| [OpenAI](/en/models/openai) | gpt-5.5, o-series | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [GLM](/en/models/glm) | glm-5.1, glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ |
+| [Qwen](/en/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [Doubao](/en/models/doubao) | doubao-seed-2.0 series | ✅ | ✅ | ✅ | | | ✅ |
+| [Kimi](/en/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
+| [ERNIE](/en/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
+| [MiMo](/en/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
+| [LinkAI](/en/models/linkai) | 100+ models from multiple vendors | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [Custom](/en/models/custom) | Local models / third-party proxies | ✅ | | | | | |

 <Tip>
-  For a full list of model names, refer to the project's [`common/const.py`](https://github.com/zhayujie/CowAgent/blob/master/common/const.py) file.
+  Every capability in the Web console (Vision / Image / STT / TTS / Embedding / Web Search) can be configured independently with its own vendor and model — there is no forced binding between them.
 </Tip>
+
+## How to Configure
+
+**Option 1 (recommended):** Manage models and capabilities online via the [Web console](/en/channels/web), with no need to edit the configuration file:
+
+<img width="900" src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-models-config.png" />
+
+**Option 2:** Edit `config.json` manually and fill in the model name and API key for the selected vendor. Every model also supports OpenAI-compatible access — just set `bot_type` to `openai` and configure `open_ai_api_base` and `open_ai_api_key`.
--- a/docs/en/models/kimi.mdx
+++ b/docs/en/models/kimi.mdx
@@ -1,8 +1,16 @@
 ---
-title: Kimi (Moonshot)
-description: Kimi (Moonshot) model configuration
+title: Kimi
+description: Kimi (Moonshot) model configuration (Text Chat + Image Understanding)
 ---

+Kimi is provided by Moonshot and supports both text chat and image understanding. The `kimi-k2.x` series natively supports vision.
+
+<Tip>
+  All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
+</Tip>
+
+## Text Chat
+
 ```json
 {
  "model": "kimi-k2.6",
@@ -12,16 +20,22 @@ description: Kimi (Moonshot) model configuration

 | Parameter | Description |
 | --- | --- |
-| `model` | Options include `kimi-k2.6`, `kimi-k2.5`, `kimi-k2`, `moonshot-v1-8k`, `moonshot-v1-32k`, `moonshot-v1-128k` |
-| `moonshot_api_key` | Create at [Moonshot Console](https://platform.moonshot.cn/console/api-keys) |
+| `model` | Can be `kimi-k2.6`, `kimi-k2.5`, `kimi-k2`, `moonshot-v1-8k`, `moonshot-v1-32k`, `moonshot-v1-128k` |
+| `moonshot_api_key` | Create one in the [Moonshot Console](https://platform.moonshot.cn/console/api-keys) |
+| `moonshot_base_url` | Optional, defaults to `https://api.moonshot.cn/v1` |

-OpenAI-compatible configuration is also supported:
+## Image Understanding
+
+Once `moonshot_api_key` is configured, the Agent's Vision tool automatically uses `kimi-k2.6` to recognize images, with no extra setup required.
+
+To manually specify a Vision model:

 ```json
 {
-  "bot_type": "openai",
-  "model": "kimi-k2.6",
-  "open_ai_api_base": "https://api.moonshot.cn/v1",
-  "open_ai_api_key": "YOUR_API_KEY"
+  "tools": {
+    "vision": {
+      "model": "kimi-k2.6"
+    }
+  }
 }
 ```
--- a/docs/en/models/linkai.mdx
+++ b/docs/en/models/linkai.mdx
@@ -1,9 +1,15 @@
 ---
 title: LinkAI
-description: Unified access to multiple models via LinkAI platform
+description: Access text, vision, image, speech, and embedding capabilities through the LinkAI platform
 ---

-The [LinkAI](https://link-ai.tech) platform lets you flexibly switch between OpenAI, Claude, Gemini, DeepSeek, MiniMax, Qwen, Kimi, and other models, with support for knowledge base, workflows, plugins, and other Agent capabilities.
+A single `linkai_api_key` gives you access to all capabilities of mainstream vendors such as OpenAI, Claude, Gemini, DeepSeek, MiniMax, Qwen, Kimi, and Doubao.
+
+<Tip>
+  All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
+</Tip>
+
+## Text Chat

 ```json
 {
@@ -14,8 +20,84 @@ The [LinkAI](https://link-ai.tech) platform lets you flexibly switch between Ope

 | Parameter | Description |
 | --- | --- |
-| `use_linkai` | Set to `true` to enable LinkAI interface |
-| `linkai_api_key` | Create at [LinkAI Console](https://link-ai.tech/console/interface) |
-| `model` | Leave empty to use the agent's default model. Can be switched flexibly on the platform. All models in the [model list](https://link-ai.tech/console/models) are supported |
+| `use_linkai` | Set to `true` to enable |
+| `linkai_api_key` | Create one in the [Console](https://link-ai.tech/console/interface) |
+| `model` | Can be any code from the [model list](https://link-ai.tech/console/models) |

-See the [API documentation](https://docs.link-ai.tech/platform/api) for more details.
+See [Model Service](https://link-ai.tech/console/models) for more.
+
+## Image Understanding
+
+Once configured, the Agent's Vision tool automatically calls multimodal models via the gateway, with no extra setup required. To manually specify a Vision model:
+
+```json
+{
+  "tools": {
+    "vision": {
+      "model": "gpt-5.4-mini"
+    }
+  }
+}
+```
+
+Available models: `gpt-4.1-mini`, `gpt-5.4-mini`, `qwen3.6-plus`, `doubao-seed-2-0-pro-260215`, `kimi-k2.6`, `claude-sonnet-4-6`, `gemini-3.1-flash-lite-preview`, etc.
+
+## Image Generation
+
+```json
+{
+  "skills": {
+    "image-generation": {
+      "model": "gpt-image-2"
+    }
+  }
+}
+```
+
+| Model ID | Alias |
+| --- | --- |
+| `gpt-image-2` | OpenAI |
+| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
+| `gemini-3-pro-image-preview` | Nano Banana Pro |
+| `seedream-5.0-lite` | ByteDance Doubao Seedream |
+
+## Speech-to-Text (ASR)
+
+```json
+{
+  "voice_to_text": "linkai"
+}
+```
+
+ASR uses Whisper by default; credentials are automatically reused from `linkai_api_key`.
+
+## Text-to-Speech (TTS)
+
+The TTS gateway supports multiple underlying engines. The engine is selected by `text_to_voice_model`, and the available voices change with the engine.
+
+```json
+{
+  "text_to_voice": "linkai",
+  "text_to_voice_model": "doubao",
+  "tts_voice_id": "BV001_streaming"
+}
+```
+
+| `text_to_voice_model` | Engine |
+| --- | --- |
+| `tts-1` | OpenAI · Multi-language (voices like `alloy` / `nova` / `echo`, etc.) |
+| `doubao` | ByteDance Doubao · Rich Chinese voices |
+| `baidu` | Baidu · Chinese broadcaster voices |
+
+Voices differ by engine; we recommend selecting them visually in the Web Console under "Model Management → Text-to-Speech".
+
+## Embedding
+
+```json
+{
+  "embedding_provider": "linkai",
+  "embedding_model": "text-embedding-3-small"
+}
+```
+
+The default model is `text-embedding-3-small` (OpenAI-compatible). After changing the embedding, run `/memory rebuild-index` to rebuild the index.
--- a/docs/en/models/mimo.mdx
+++ b/docs/en/models/mimo.mdx
@@ -0,0 +1,136 @@
+---
+title: MiMo
+description: Xiaomi MiMo model configuration (Text Chat + Image Understanding + Text-to-Speech)
+---
+
+Xiaomi MiMo is a native omni-modal large model. A single `mimo_api_key` enables text chat, image understanding, and text-to-speech all at once.
+
+<Tip>
+  All capabilities below can be configured in one place via the "Model Management" page in the Web Console — no need to manually edit the configuration file.
+</Tip>
+
+## Text Chat
+
+```json
+{
+  "model": "mimo-v2.5-pro",
+  "mimo_api_key": "YOUR_API_KEY",
+  "mimo_api_base": "https://api.xiaomimimo.com/v1"
+}
+```
+
+| Parameter | Description |
+| --- | --- |
+| `model` | Default recommendation: `mimo-v2.5-pro`; `mimo-v2.5` is also supported |
+| `mimo_api_key` | Create one in the [MiMo Open Platform](https://platform.xiaomimimo.com/console/api-keys) |
+| `mimo_api_base` | Optional, defaults to `https://api.xiaomimimo.com/v1` |
+
+### Model Selection
+
+| Model | Use Case |
+| --- | --- |
+| `mimo-v2.5-pro` | Flagship: native omni-modal + Agent capability, up to 1M tokens context |
+| `mimo-v2.5` | General-purpose, native omni-modal (text / image / video / audio) |
+
+## Thinking Mode
+
+The MiMo V2.5 series enables "thinking mode" by default: the model emits `reasoning_content` (chain-of-thought) before the final answer, improving performance on complex tasks.
+
+Use the global `enable_thinking` flag to toggle visibility (also switchable from the Web Console settings):
+
+```json
+{
+  "enable_thinking": true
+}
+```
+
+## Image Understanding
+
+Once `mimo_api_key` is configured, the Agent's Vision tool can automatically use MiMo's vision models:
+
+- When the main model itself is multimodal (`mimo-v2.5-pro` / `mimo-v2.5`), images are handled directly by the main model with no extra setup.
+- When the main model belongs to another vendor, the Vision tool falls back to `mimo-v2.5-pro` in order.
+
+To force a specific Vision model, set it explicitly in the configuration:
+
+```json
+{
+  "tools": {
+    "vision": {
+      "provider": "mimo",
+      "model": "mimo-v2.5-pro"
+    }
+  }
+}
+```
+
+## Text-to-Speech (TTS)
+
+```json
+{
+  "text_to_voice": "mimo",
+  "text_to_voice_model": "mimo-v2.5-tts",
+  "tts_voice_id": "冰糖"
+}
+```
+
+| Parameter | Description |
+| --- | --- |
+| `text_to_voice_model` | Currently only `mimo-v2.5-tts` (preset voices + singing mode) |
+| `tts_voice_id` | Preset voice name (Chinese voice IDs use the Chinese name directly) |
+
+### Preset Voices
+
+| Voice ID | Description |
+| --- | --- |
+| `Mia` | English · Female |
+| `Chloe` | English · Female |
+| `Milo` | English · Male |
+| `Dean` | English · Male |
+| `冰糖` | Chinese · Female (default) |
+| `茉莉` | Chinese · Female |
+| `苏打` | Chinese · Male |
+| `白桦` | Chinese · Male |
+
+
+You can also pick a voice visually from the Web Console under "Model Management → Text-to-Speech".
+
+### Style Control
+
+MiMo TTS supports embedding **audio tags** in the synthesis text to control emotion, tone, dialect, persona, and even singing. Tags must appear in the **text that will be synthesized to speech (i.e. the Agent's reply)**, with the overall style tag placed at the very beginning:
+
+```
+(style)content-to-synthesize
+```
+
+Half-width `()`, full-width `（）`, and `[]` brackets are all accepted. Both Chinese and English style descriptors work — pick whichever language expresses the timbre most precisely. Common examples:
+
+| Category | Example tags |
+| --- | --- |
+| Basic emotions | `happy` `sad` `angry` `fear` `surprised` `excited` `aggrieved` `calm` `indifferent` |
+| Compound emotions | `wistful` `relieved` `helpless` `guilty` `at ease` `uneasy` `touched` |
+| Overall tone | `gentle` `aloof` `lively` `serious` `languid` `playful` `deep` `sharp` `cutting` |
+| Voice character | `magnetic` `mellow` `bright` `ethereal` `childlike` `aged` `sweet` `husky` |
+| Persona | `squeaky` `mature lady` `young boy` `uncle` `Taiwanese accent` |
+| Dialect | `Northeastern` `Sichuan` `Henan` `Cantonese` |
+| Role-play | `Sun Wukong` `Lin Daiyu` |
+| Singing | `sing` / `singing` |
+
+Examples:
+
+- `(magnetic)The night is deep, and the city is still breathing.`
+- `(gentle)Take a breath. You've got this.`
+- `(serious)This is the final warning before the system reboots.`
+- `(singing)Oh, when the saints go marching in…`
+
+You can also insert fine-grained audio tags at any position in the text to control breathing, laughter, pauses, etc. For example:
+
+```
+(nervous, deep breath) Phew… stay calm, stay calm. (faster pace) I've rehearsed this intro fifty times, it'll be fine.
+```
+
+See the [MiMo speech synthesis documentation](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5) for the full tag list.
+
+<Tip>
+  When CowAgent calls TTS, the Agent's reply text (including any `(...)` tags) is forwarded directly to MiMo for synthesis. Tell the model in its persona / system prompt to "prefix replies with a `(style)` tag to control the tone", and IM channels (WeChat / Feishu / DingTalk / WeCom) will play voice replies with the corresponding emotion, dialect, or even singing.
+</Tip>
--- a/docs/en/models/minimax.mdx
+++ b/docs/en/models/minimax.mdx
@@ -1,8 +1,16 @@
 ---
 title: MiniMax
-description: MiniMax model configuration
+description: MiniMax model configuration (Text / Image Understanding / Image Generation / Text-to-Speech)
 ---

+MiniMax supports text chat, image understanding, image generation, and text-to-speech. A single `minimax_api_key` enables all capabilities.
+
+<Tip>
+  All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
+</Tip>
+
+## Text Chat
+
 ```json
 {
  "model": "MiniMax-M2.7",
@@ -12,16 +20,52 @@ description: MiniMax model configuration

 | Parameter | Description |
 | --- | --- |
-| `model` | Options include `MiniMax-M2.7`, `MiniMax-M2.5`, `MiniMax-M2.1`, `MiniMax-M2.1-lightning`, `MiniMax-M2`, etc. |
-| `minimax_api_key` | Create at [MiniMax Console](https://platform.minimaxi.com/user-center/basic-information/interface-key) |
+| `model` | Can be `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.1`, `MiniMax-M2.1-lightning`, `MiniMax-M2`, etc. |
+| `minimax_api_key` | Create one in the [MiniMax Console](https://platform.minimaxi.com/user-center/basic-information/interface-key) |

-OpenAI-compatible configuration is also supported:
+## Image Understanding
+
+MiniMax's M2.x chat models do not support vision natively; vision calls are uniformly routed to `MiniMax-Text-01`. Once `minimax_api_key` is configured, the Agent's Vision tool automatically uses this model, with no need to specify it explicitly in the configuration file.
+
+## Image Generation

 ```json
 {
-  "bot_type": "openai",
-  "model": "MiniMax-M2.7",
-  "open_ai_api_base": "https://api.minimaxi.com/v1",
-  "open_ai_api_key": "YOUR_API_KEY"
+  "skills": {
+    "image-generation": {
+      "model": "image-01"
+    }
+  }
 }
 ```
+
+Available models: `image-01`.
+
+## Text-to-Speech (TTS)
+
+```json
+{
+  "text_to_voice": "minimax",
+  "text_to_voice_model": "speech-2.8-hd",
+  "tts_voice_id": "female-shaonv"
+}
+```
+
+| Parameter | Description |
+| --- | --- |
+| `text_to_voice_model` | `speech-2.8-hd` (emotional rendering, natural sound), `speech-2.8-turbo` (ultra-fast), `speech-2.6-hd`, `speech-2.6-turbo` |
+| `tts_voice_id` | Voice ID; supports Chinese / Cantonese / English / Japanese / Korean — 70+ voices in total |
+
+Common voice examples:
+
+| Voice ID | Description |
+| --- | --- |
+| `female-shaonv` | Chinese · Young Girl (Female) |
+| `female-yujie` | Chinese · Mature Lady (Female) |
+| `female-tianmei` | Chinese · Sweet Female (Female) |
+| `male-qn-jingying` | Chinese · Elite Youth (Male) |
+| `male-qn-badao` | Chinese · Dominant Youth (Male) |
+| `Cantonese_GentleLady` | Cantonese · Gentle Female Voice |
+| `English_Graceful_Lady` | English · Graceful Lady |
+
+For the full voice list (70+ voices across Chinese / Cantonese / English / Japanese / Korean), see the [system voice list](https://platform.minimaxi.com/docs/faq/system-voice-id), or select visually in the Web Console under "Model Management → Text-to-Speech".
--- a/docs/en/models/openai.mdx
+++ b/docs/en/models/openai.mdx
@@ -1,11 +1,20 @@
 ---
 title: OpenAI
-description: OpenAI model configuration
+description: OpenAI model configuration (Text / Vision / Image / Speech / Embedding)
 ---

+OpenAI offers the most complete coverage and can simultaneously serve text chat, vision understanding, image generation, speech-to-text (ASR), text-to-speech (TTS), and embedding. A single `open_ai_api_key` lets the Agent use all of these capabilities.
+
+<Tip>
+  All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
+</Tip>
+
+
+## Text Chat
+
 ```json
 {
-  "model": "gpt-5.4",
+  "model": "gpt-5.5",
  "open_ai_api_key": "YOUR_API_KEY",
  "open_ai_api_base": "https://api.openai.com/v1"
 }
@@ -13,7 +22,82 @@ description: OpenAI model configuration

 | Parameter | Description |
 | --- | --- |
-| `model` | Matches the [model parameter](https://platform.openai.com/docs/models) of the OpenAI API. Supports o-series, gpt-5.4, gpt-5 series, gpt-4.1, etc. Recommended for Agent mode: `gpt-5.4` |
-| `open_ai_api_key` | Create at [OpenAI Platform](https://platform.openai.com/api-keys) |
-| `open_ai_api_base` | Optional. Change to use third-party proxy |
-| `bot_type` | Not required for official OpenAI models. Set to `openai` when using Claude or other non-OpenAI models via proxy |
+| `model` | Same as OpenAI's [model parameter](https://platform.openai.com/docs/models); supports `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`, the `gpt-5` series, `gpt-4.1`, the o-series, etc. Agent mode defaults to `gpt-5.5`; use `gpt-5.4` for better cost-efficiency |
+| `open_ai_api_key` | Create one on the [OpenAI Platform](https://platform.openai.com/api-keys) |
+| `open_ai_api_base` | Optional; change it to access a third-party proxy |
+| `bot_type` | Not required when using OpenAI's official models; set to `openai` when accessing other vendors via the compatible protocol |
+
+## Image Understanding
+
+OpenAI models like `gpt-5.5`, `gpt-5.4`, `gpt-4o`, and `gpt-4.1` natively support vision. Once `open_ai_api_key` is configured, the Agent's Vision tool automatically uses the main model to recognize images. If the main model does not support vision or you want to specify it explicitly, set it in the configuration file:
+
+```json
+{
+  "tools": {
+    "vision": {
+      "model": "gpt-5.4-mini"
+    }
+  }
+}
+```
+
+Supported Vision models: `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`, `gpt-5`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4o`.
+
+## Image Generation
+
+Specify the image generation model in the configuration file; the Agent automatically routes image generation skill calls to OpenAI:
+
+```json
+{
+  "skills": {
+    "image-generation": {
+      "model": "gpt-image-2"
+    }
+  }
+}
+```
+
+Supported image generation models: `gpt-image-2`, `gpt-image-1`.
+
+## Speech-to-Text (ASR)
+
+```json
+{
+  "voice_to_text": "openai",
+  "voice_to_text_model": "gpt-4o-mini-transcribe"
+}
+```
+
+| Parameter | Description |
+| --- | --- |
+| `voice_to_text` | Set to `openai` to enable OpenAI speech-to-text |
+| `voice_to_text_model` | Optional, defaults to `gpt-4o-mini-transcribe`; can also be `gpt-4o-transcribe`, `whisper-1` |
+
+Credentials are automatically reused from `open_ai_api_key`.
+
+## Text-to-Speech (TTS)
+
+```json
+{
+  "text_to_voice": "openai",
+  "text_to_voice_model": "tts-1",
+  "tts_voice_id": "alloy"
+}
+```
+
+| Parameter | Description |
+| --- | --- |
+| `text_to_voice_model` | `tts-1`, `tts-1-hd`, `gpt-4o-mini-tts` |
+| `tts_voice_id` | Voices: `alloy`, `echo`, `fable`, `onyx`, `nova`, `shimmer`, `ash`, `ballad`, `coral`, `sage`, `verse` |
+
+## Embedding
+
+```json
+{
+  "embedding_provider": "openai",
+  "embedding_model": "text-embedding-3-small"
+}
+```
+
+Available models: `text-embedding-3-small`, `text-embedding-3-large`, `text-embedding-ada-002`. After changing the embedding, run `/memory rebuild-index` to rebuild the index.
+
--- a/docs/en/models/qianfan.mdx
+++ b/docs/en/models/qianfan.mdx
@@ -1,6 +1,6 @@
 ---
-title: Baidu Qianfan / ERNIE
-description: Baidu Qianfan ERNIE model configuration
+title: ERNIE
+description: ERNIE model configuration (Baidu Qianfan)
 ---

 Option 1: Native integration (recommended):
@@ -40,7 +40,7 @@ To force a specific Vision model, set it explicitly in `config.json`:

 ```json
 {
-  "tool": {
+  "tools": {
    "vision": {
      "model": "ernie-4.5-turbo-vl"
    }
--- a/docs/en/models/qwen.mdx
+++ b/docs/en/models/qwen.mdx
@@ -1,8 +1,16 @@
 ---
-title: Qwen (Tongyi Qianwen)
-description: Tongyi Qianwen model configuration
+title: Qwen
+description: Qwen model configuration (Text / Image Understanding / Image Generation / Speech-to-Text / Text-to-Speech / Embedding)
 ---

+Qwen (Alibaba DashScope / Bailian) is one of the most fully-featured vendors. Text, image understanding, image generation, speech-to-text, text-to-speech, and embedding can all be enabled with a single `dashscope_api_key`.
+
+<Tip>
+  All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
+</Tip>
+
+## Text Chat
+
 ```json
 {
  "model": "qwen3.6-plus",
@@ -12,16 +20,93 @@ description: Tongyi Qianwen model configuration

 | Parameter | Description |
 | --- | --- |
-| `model` | Options include `qwen3.6-plus`, `qwen3.5-plus`, `qwen3-max`, `qwen-max`, `qwen-plus`, `qwen-turbo`, `qwq-plus`, etc. |
-| `dashscope_api_key` | Create at [Bailian Console](https://bailian.console.aliyun.com/?tab=model#/api-key). See [official docs](https://bailian.console.aliyun.com/?tab=api#/api) |
+| `model` | Can be `qwen3.6-plus`, `qwen3.7-max`, `qwen3.5-plus`, `qwen3-max`, `qwen-max`, `qwen-plus`, `qwen-turbo`, `qwq-plus`, etc. |
+| `dashscope_api_key` | Create one in the [Bailian Console](https://bailian.console.aliyun.com/?tab=model#/api-key); see the [official docs](https://bailian.console.aliyun.com/?tab=api#/api) |

-OpenAI-compatible configuration is also supported:
+## Image Understanding
+
+Once `dashscope_api_key` is configured, the Agent's Vision tool automatically calls Qwen's vision models to recognize images. Models like `qwen3-max` / `qwen3.5-plus` / `qwen3.6-plus` are already multimodal; if the main model is text-only (e.g. `qwen-turbo`), it automatically falls back to `qwen-vl-max`.
+
+To manually specify a Vision model:

 ```json
 {
-  "bot_type": "openai",
-  "model": "qwen3.6-plus",
-  "open_ai_api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1",
-  "open_ai_api_key": "YOUR_API_KEY"
+  "tools": {
+    "vision": {
+      "model": "qwen3.6-plus"
+    }
+  }
 }
 ```
+
+Supported models: `qwen3.6-plus`, `qwen3.5-plus`, `qwen3-max`.
+
+## Image Generation
+
+```json
+{
+  "skills": {
+    "image-generation": {
+      "model": "qwen-image-2.0"
+    }
+  }
+}
+```
+
+Available models: `qwen-image-2.0`, `qwen-image-2.0-pro`.
+
+## Speech-to-Text (ASR)
+
+```json
+{
+  "voice_to_text": "dashscope",
+  "voice_to_text_model": "qwen3-asr-flash"
+}
+```
+
+| Parameter | Description |
+| --- | --- |
+| `voice_to_text` | Set to `dashscope` to enable Qwen ASR |
+| `voice_to_text_model` | Optional, defaults to `qwen3-asr-flash` |
+
+Credentials are automatically reused from `dashscope_api_key`. A single audio segment should be smaller than 10MB and no longer than 300 seconds.
+
+## Text-to-Speech (TTS)
+
+```json
+{
+  "text_to_voice": "dashscope",
+  "text_to_voice_model": "qwen3-tts-flash",
+  "tts_voice_id": "Cherry"
+}
+```
+
+| Parameter | Description |
+| --- | --- |
+| `text_to_voice_model` | Optional, defaults to `qwen3-tts-flash`; covers Mandarin, dialects, and major foreign languages |
+| `tts_voice_id` | Voice ID; see the common list below |
+
+Common voice examples:
+
+| Voice ID | Description |
+| --- | --- |
+| `Cherry` | Qianyue · Sunny Female Voice |
+| `Serena` | Suyao · Gentle Female Voice |
+| `Ethan` | Chenxu · Sunny Male Voice |
+| `Chelsie` | Qianxue · Anime Girl |
+| `Dylan` | Beijing Dialect · Xiaodong |
+| `Rocky` | Cantonese · Aqiang |
+| `Sunny` | Sichuan Dialect · Qing'er |
+
+The full voice list (Mandarin / regional dialects / bilingual, etc.) can be selected visually in the Web Console under "Model Management → Text-to-Speech".
+
+## Embedding
+
+```json
+{
+  "embedding_provider": "dashscope",
+  "embedding_model": "text-embedding-v4"
+}
+```
+
+The default model is `text-embedding-v4`. After changing the embedding, run `/memory rebuild-index` to rebuild the index.
--- a/docs/en/releases/overview.mdx
+++ b/docs/en/releases/overview.mdx
@@ -5,12 +5,15 @@ description: CowAgent version history

 | Version | Date | Description |
 | --- | --- | --- |
+| [2.0.9](/en/releases/v2.0.9) | 2026.05.22 | Model management console, MCP protocol support, browser persistent login, new models (gpt-5.5, gemini-3.5-flash, qwen3.7-max, etc.), deployment hardening |
+| [2.0.8](/en/releases/v2.0.8) | 2026.05.06 | Major Feishu channel upgrade (voice, streaming and Markdown, one-click QR-scan setup), DeepSeek V4 and Baidu models, scheduler tool enhancements |
 | [2.0.7](/en/releases/v2.0.7) | 2026.04.22 | Image Generation Skill (6-provider auto-routing), new models (Kimi K2.6, Claude Opus 4.7, GLM 5.1), knowledge base and Web Console improvements |
-| [2.0.6](/en/releases/v2.0.6) | 2026.04.14 | Knowledge Base, Deep Dream Memory Distillation, Smart Context Compression, Web Console upgrades |
+| [2.0.6](/en/releases/v2.0.6) | 2026.04.14 | Project rename, Knowledge Base system, Deep Dream Memory Distillation, Smart Context Compression, Web Console multi-session and various improvements |
 | [2.0.5](/en/releases/v2.0.5) | 2026.04.01 | Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more |
 | [2.0.4](/en/releases/v2.0.4) | 2026.03.22 | Personal WeChat channel, new model support, Japanese docs, script refactoring and bug fixes |
+| [2.0.3](/en/releases/v2.0.3) | 2026.03.18 | WeCom Smart Bot and QQ channels, Coding Plan support, multiple new models, Web file processing, memory system upgrade |
 | [2.0.2](/en/releases/v2.0.2) | 2026.02.27 | Web Console upgrade, multi-channel concurrency, session persistence |
-| [2.0.1](/en/releases/v2.0.1) | 2026.02.27 | Built-in Web Search tool, smart context management, multiple fixes |
+| [2.0.1](/en/releases/v2.0.1) | 2026.02.13 | Built-in Web Search tool, smart context management, multiple fixes |
 | [2.0.0](/en/releases/v2.0.0) | 2026.02.03 | Full upgrade to AI super assistant |
 | 1.7.6 | 2025.05.23 | Web Channel optimization, AgentMesh plugin |
 | 1.7.5 | 2025.04.11 | DeepSeek model |
@@ -21,6 +24,8 @@ description: CowAgent version history
 | 1.6.9 | 2024.07.19 | gpt-4o-mini, Alibaba voice recognition |
 | 1.6.8 | 2024.07.05 | Claude 3.5, Gemini 1.5 Pro |
 | 1.6.0 | 2024.04.26 | Kimi integration, gpt-4-turbo upgrade |
+| 1.5.8 | 2024.03.26 | GLM-4, Claude-3, edge-tts |
+| 1.5.2 | 2023.11.10 | Feishu channel, image recognition chat |
 | 1.5.0 | 2023.11.10 | gpt-4-turbo, dall-e-3, tts multimodal |
 | 1.0.0 | 2022.12.12 | Project created, first ChatGPT integration |

--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251656961" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="18432" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M252.8 652.8l167.893333-94.293333 2.773334-8.106667-2.773334-4.48h-8.106666l-28.16-1.706667-96-2.56-83.2-3.413333-80.64-4.266667-20.266667-4.266666L85.333333 504.746667l1.92-12.586667 17.066667-11.52 24.32 2.133333 53.973333 3.626667 81.066667 5.546667 58.666667 3.413333 87.04 9.173333h13.866666l1.92-5.546666-4.693333-3.413334-3.626667-3.413333-83.84-56.746667-90.666666-60.16-47.573334-34.56-25.813333-17.493333-13.013333-16.426667-5.546667-35.84 23.253333-25.813333 31.36 2.133333 7.893334 2.133334 31.786666 24.32 67.84 52.48L401.066667 391.466667l13.013333 10.88 5.12-3.626667 0.64-2.56-5.76-9.813333-48.213333-87.04L314.453333 210.773333l-22.826666-36.693333-5.973334-21.973333a107.861333 107.861333 0 0 1-3.626666-26.026667l26.666666-36.053333L323.413333 85.333333l35.413334 4.693334 14.933333 13.013333 21.973333 50.346667 35.626667 79.36 55.253333 107.733333 16.213334 32 8.746666 29.653333 3.2 9.173334h5.546667v-5.12l4.48-60.8 8.32-74.453334 8.106667-96 2.773333-27.093333 13.44-32.426667 26.666667-17.493333 20.693333 10.026667 17.066667 24.32-2.346667 15.786666-10.24 65.92-19.84 103.253334-13.013333 69.12h7.466666l8.746667-8.746667 34.986667-46.506667 58.666666-73.386666 26.026667-29.226667 30.293333-32.213333 19.413334-15.36h36.693333l27.093333 40.106666-12.16 41.386667-37.76 48-31.36 40.533333-45.013333 60.586667-28.16 48.426667 2.56 3.84 6.613333-0.64 101.546667-21.546667 54.826667-10.026667 65.493333-11.306666 29.653333 13.866666 3.2 14.08-11.733333 28.8-69.973333 17.28-82.133334 16.426667-122.24 29.013333-1.493333 1.066667 1.706667 2.133333 55.04 5.12 23.466666 1.28h57.6l107.306667 7.893334 28.16 18.56 16.853333 22.613333-2.773333 17.28-43.306667 21.973333-58.24-13.866666-136.106666-32.426667-46.72-11.733333h-6.4v3.84l38.826666 37.973333 71.253334 64.426667 89.173333 82.986666 4.48 20.48-11.52 16.213334-12.16-1.706667-78.506667-58.88-30.293333-26.666667-68.48-57.6h-4.48v5.973334l15.786667 23.04 83.413333 125.226666 4.266667 38.4-5.973334 12.586667-21.546666 7.466667-23.68-4.266667-48.853334-68.48-50.346666-77.226667-40.533334-69.12-4.906666 2.773334-23.893334 258.133333-11.306666 13.226667-26.026667 10.026666-21.546667-16.426666-11.52-26.666667 11.52-52.48 13.866667-68.48 11.306667-54.4 10.24-67.626667 5.973333-22.4-0.426667-1.493333-4.906666 0.64-50.986667 69.973333-77.653333 104.746667-61.44 65.706667-14.72 5.76-25.386667-13.226667 2.346667-23.466667 14.293333-20.906666 84.906667-107.946667 51.2-66.986667 33.066666-38.613333v-5.546667h-2.133333l-225.493333 146.56-40.106667 5.12-17.28-16.213333 2.133333-26.666667 8.106667-8.746666 67.84-46.72h-0.213333l0.853333 0.853333z" fill="#D97757" p-id="18433"></path></svg>
@@ -1 +1 @@
 .0.8
 .0.9