Merge branch 'master' into feat/wechatcom-kf-channel

This commit is contained in:
zhayujie
2026-05-30 17:17:29 +08:00
committed by GitHub
212 changed files with 14439 additions and 3691 deletions

1
.gitignore vendored
View File

@@ -32,7 +32,6 @@ plugins/banwords/lib/__pycache__
!plugins/role !plugins/role
!plugins/keyword !plugins/keyword
!plugins/linkai !plugins/linkai
!plugins/agent
!plugins/cow_cli !plugins/cow_cli
client_config.json client_config.json
ref/ ref/

1049
README.md

File diff suppressed because it is too large Load Diff

View File

@@ -44,6 +44,7 @@ CREATE TABLE IF NOT EXISTS messages (
role TEXT NOT NULL, role TEXT NOT NULL,
content TEXT NOT NULL, content TEXT NOT NULL,
created_at INTEGER NOT NULL, created_at INTEGER NOT NULL,
extras TEXT NOT NULL DEFAULT '',
UNIQUE (session_id, seq) UNIQUE (session_id, seq)
); );
@@ -67,6 +68,12 @@ _MIGRATION_ADD_CONTEXT_START_SEQ = """
ALTER TABLE sessions ADD COLUMN context_start_seq INTEGER NOT NULL DEFAULT 0; ALTER TABLE sessions ADD COLUMN context_start_seq INTEGER NOT NULL DEFAULT 0;
""" """
# Generic JSON sidecar for per-message attachments (TTS audio URL, future use).
# Always optional — readers must tolerate missing column / empty / invalid JSON.
_MIGRATION_ADD_MSG_EXTRAS = """
ALTER TABLE messages ADD COLUMN extras TEXT NOT NULL DEFAULT '';
"""
DEFAULT_MAX_AGE_DAYS: int = 30 DEFAULT_MAX_AGE_DAYS: int = 30
@@ -169,20 +176,26 @@ def _group_into_display_turns(
cur_rest: List[tuple] = [] cur_rest: List[tuple] = []
started = False started = False
for role, raw_content, created_at in rows: for role, raw_content, created_at, raw_extras in rows:
try: try:
content = json.loads(raw_content) content = json.loads(raw_content)
except Exception: except Exception:
content = raw_content content = raw_content
try:
extras = json.loads(raw_extras) if raw_extras else {}
if not isinstance(extras, dict):
extras = {}
except Exception:
extras = {}
if role == "user" and _is_visible_user_message(content): if role == "user" and _is_visible_user_message(content):
if started: if started:
groups.append((cur_user, cur_rest)) groups.append((cur_user, cur_rest))
cur_user = (content, created_at) cur_user = (content, created_at, extras)
cur_rest = [] cur_rest = []
started = True started = True
else: else:
cur_rest.append((role, content, created_at)) cur_rest.append((role, content, created_at, extras))
if started: if started:
groups.append((cur_user, cur_rest)) groups.append((cur_user, cur_rest))
@@ -195,7 +208,7 @@ def _group_into_display_turns(
for user_row, rest in groups: for user_row, rest in groups:
# User turn # User turn
if user_row: if user_row:
content, created_at = user_row content, created_at, _u_extras = user_row
text = _extract_display_text(content) text = _extract_display_text(content)
if text: if text:
turns.append({"role": "user", "content": text, "created_at": created_at}) turns.append({"role": "user", "content": text, "created_at": created_at})
@@ -206,8 +219,11 @@ def _group_into_display_turns(
tool_results: Dict[str, str] = {} tool_results: Dict[str, str] = {}
final_text = "" final_text = ""
final_ts: Optional[int] = None final_ts: Optional[int] = None
merged_extras: Dict[str, Any] = {}
for role, content, created_at in rest: for role, content, created_at, extras in rest:
if role == "assistant" and isinstance(extras, dict):
merged_extras.update(extras)
if role == "user": if role == "user":
tool_results.update(_extract_tool_results(content)) tool_results.update(_extract_tool_results(content))
elif role == "assistant": elif role == "assistant":
@@ -256,6 +272,8 @@ def _group_into_display_turns(
"steps": steps, "steps": steps,
"created_at": final_ts or (user_row[1] if user_row else 0), "created_at": final_ts or (user_row[1] if user_row else 0),
} }
if merged_extras:
turn["extras"] = merged_extras
turns.append(turn) turns.append(turn)
return turns return turns
@@ -411,13 +429,15 @@ class ConversationStore:
content = json.dumps( content = json.dumps(
msg.get("content", ""), ensure_ascii=False msg.get("content", ""), ensure_ascii=False
) )
extras_obj = msg.get("extras") or {}
extras = json.dumps(extras_obj, ensure_ascii=False) if extras_obj else ""
conn.execute( conn.execute(
""" """
INSERT OR IGNORE INTO messages INSERT OR IGNORE INTO messages
(session_id, seq, role, content, created_at) (session_id, seq, role, content, created_at, extras)
VALUES (?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?)
""", """,
(session_id, next_seq, role, content, now), (session_id, next_seq, role, content, now, extras),
) )
next_seq += 1 next_seq += 1
@@ -651,6 +671,55 @@ class ConversationStore:
logger.info(f"[ConversationStore] Pruned {deleted} expired sessions") logger.info(f"[ConversationStore] Pruned {deleted} expired sessions")
return deleted return deleted
def attach_extras_to_last_assistant(
self,
session_id: str,
extras: Dict[str, Any],
) -> Optional[int]:
"""
Merge ``extras`` into the latest assistant message of a session.
Used by post-processing (e.g. TTS) that needs to annotate an already
persisted bot reply with attachments such as audio URLs.
Returns the message seq that was updated, or ``None`` if no assistant
message exists or the update could not be applied.
"""
if not extras:
return None
with self._lock:
conn = self._connect()
try:
row = conn.execute(
"""
SELECT seq, extras FROM messages
WHERE session_id = ? AND role = 'assistant'
ORDER BY seq DESC LIMIT 1
""",
(session_id,),
).fetchone()
if not row:
return None
seq, raw = row
try:
cur = json.loads(raw) if raw else {}
if not isinstance(cur, dict):
cur = {}
except Exception:
cur = {}
cur.update(extras)
conn.execute(
"UPDATE messages SET extras = ? WHERE session_id = ? AND seq = ?",
(json.dumps(cur, ensure_ascii=False), session_id, seq),
)
conn.commit()
return seq
except Exception as e:
logger.warning(f"[ConversationStore] attach_extras failed: {e}")
return None
finally:
conn.close()
def load_history_page( def load_history_page(
self, self,
session_id: str, session_id: str,
@@ -698,15 +767,31 @@ class ConversationStore:
).fetchone() ).fetchone()
ctx_start = ctx_row[0] if ctx_row else 0 ctx_start = ctx_row[0] if ctx_row else 0
rows = conn.execute( # extras column is added by migration; tolerate older DBs that
""" # might miss it by falling back to a NULL literal.
SELECT seq, role, content, created_at try:
FROM messages rows = conn.execute(
WHERE session_id = ? """
ORDER BY seq ASC SELECT seq, role, content, created_at, extras
""", FROM messages
(session_id,), WHERE session_id = ?
).fetchall() ORDER BY seq ASC
""",
(session_id,),
).fetchall()
except sqlite3.OperationalError:
rows = [
(seq, role, content, created_at, "")
for (seq, role, content, created_at) in conn.execute(
"""
SELECT seq, role, content, created_at
FROM messages
WHERE session_id = ?
ORDER BY seq ASC
""",
(session_id,),
).fetchall()
]
finally: finally:
conn.close() conn.close()
@@ -719,13 +804,16 @@ class ConversationStore:
include_thinking = False include_thinking = False
# Strip seq for display grouping, but record max seq per visible user group # Strip seq for display grouping, but record max seq per visible user group
plain_rows = [(role, content, created_at) for _seq, role, content, created_at in rows] plain_rows = [
(role, content, created_at, extras_raw)
for _seq, role, content, created_at, extras_raw in rows
]
visible = _group_into_display_turns(plain_rows, include_thinking=include_thinking) visible = _group_into_display_turns(plain_rows, include_thinking=include_thinking)
# Build a mapping: find the seq of each visible user message to annotate context boundary. # Build a mapping: find the seq of each visible user message to annotate context boundary.
# Walk through rows to find visible user message seqs in order. # Walk through rows to find visible user message seqs in order.
visible_user_seqs: List[int] = [] visible_user_seqs: List[int] = []
for seq, role, raw_content, _ts in rows: for seq, role, raw_content, _ts, _extras in rows:
if role != "user": if role != "user":
continue continue
try: try:
@@ -911,6 +999,18 @@ class ConversationStore:
except Exception as e: except Exception as e:
logger.warning(f"[ConversationStore] Migration (context_start_seq) failed: {e}") logger.warning(f"[ConversationStore] Migration (context_start_seq) failed: {e}")
msg_cols = {
row[1]
for row in conn.execute("PRAGMA table_info(messages)").fetchall()
}
if "extras" not in msg_cols:
try:
conn.execute(_MIGRATION_ADD_MSG_EXTRAS)
conn.commit()
logger.info("[ConversationStore] Migrated: added messages.extras column")
except Exception as e:
logger.warning(f"[ConversationStore] Migration (extras) failed: {e}")
def _connect(self) -> sqlite3.Connection: def _connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(str(self._db_path), timeout=10) conn = sqlite3.connect(str(self._db_path), timeout=10)
conn.execute("PRAGMA journal_mode=WAL") conn.execute("PRAGMA journal_mode=WAL")

View File

@@ -31,9 +31,13 @@ def detect_index_dim(storage) -> Optional[int]:
if not row or not row["embedding"]: if not row or not row["embedding"]:
return None return None
try: try:
emb = json.loads(row["embedding"]) raw = row["embedding"]
if isinstance(raw, (bytes, bytearray)):
# New BLOB format: 4 bytes per float32
return len(raw) // 4
emb = json.loads(raw)
return len(emb) if isinstance(emb, list) else None return len(emb) if isinstance(emb, list) else None
except (json.JSONDecodeError, TypeError): except (json.JSONDecodeError, TypeError, Exception):
return None return None

View File

@@ -13,7 +13,7 @@ from datetime import datetime, timedelta
from agent.memory.config import MemoryConfig, get_default_memory_config from agent.memory.config import MemoryConfig, get_default_memory_config
from agent.memory.storage import MemoryStorage, MemoryChunk, SearchResult from agent.memory.storage import MemoryStorage, MemoryChunk, SearchResult
from agent.memory.chunker import TextChunker from agent.memory.chunker import TextChunker
from agent.memory.embedding import EmbeddingProvider from agent.memory.embedding import EmbeddingProvider, EmbeddingCache
from agent.memory.summarizer import MemoryFlushManager, create_memory_files_if_needed from agent.memory.summarizer import MemoryFlushManager, create_memory_files_if_needed
@@ -61,7 +61,11 @@ class MemoryManager:
logger.info( logger.info(
"[MemoryManager] No embedding provider; memory will use keyword search only" "[MemoryManager] No embedding provider; memory will use keyword search only"
) )
# Cache for query embeddings (avoids redundant API calls within a session)
self._embedding_cache = EmbeddingCache()
# Initialize memory flush manager # Initialize memory flush manager
workspace_dir = self.config.get_workspace() workspace_dir = self.config.get_workspace()
self.flush_manager = MemoryFlushManager( self.flush_manager = MemoryFlushManager(
@@ -128,7 +132,14 @@ class MemoryManager:
vector_results = [] vector_results = []
if self.embedding_provider: if self.embedding_provider:
try: try:
query_embedding = self.embedding_provider.embed_query(query) provider_name = type(self.embedding_provider).__name__
model_name = getattr(self.embedding_provider, 'model', '')
cached = self._embedding_cache.get(query, provider_name, model_name)
if cached is not None:
query_embedding = cached
else:
query_embedding = self.embedding_provider.embed_query(query)
self._embedding_cache.put(query, provider_name, model_name, query_embedding)
vector_results = self.storage.search_vector( vector_results = self.storage.search_vector(
query_embedding=query_embedding, query_embedding=query_embedding,
user_id=user_id, user_id=user_id,

View File

@@ -5,12 +5,42 @@ Provides vector and keyword search capabilities
""" """
from __future__ import annotations from __future__ import annotations
import re
import sqlite3 import sqlite3
import json import json
import hashlib import hashlib
import threading
from typing import List, Dict, Optional, Any from typing import List, Dict, Optional, Any
from pathlib import Path from pathlib import Path
from dataclasses import dataclass from dataclasses import dataclass
try:
import numpy as np
_HAS_NUMPY = True
except ImportError:
_HAS_NUMPY = False
np = None # type: ignore[assignment]
# UPSERT (INSERT … ON CONFLICT DO UPDATE) requires SQLite ≥ 3.24.0 (2018).
# Older systems (e.g. CentOS 7 ships SQLite 3.7) fall back to INSERT OR REPLACE,
# which risks FTS5 rowid drift on chunk updates (see save_chunk docstring).
_HAS_UPSERT = sqlite3.sqlite_version_info >= (3, 24, 0)
# ---------------------------------------------------------------------------
# CJK character ranges, compiled once at module load.
# Covers: CJK Symbols/Punctuation, Japanese kana (hiragana + katakana),
# CJK Unified Ideographs + Extension A, Korean syllables (Hangul),
# CJK Compatibility Ideographs, and CJK Extension BF.
# ---------------------------------------------------------------------------
_CJK_RANGES = (
r'\u3000-\u30ff' # CJK Symbols/Punctuation + Japanese kana
r'\u3400-\u9fff' # CJK Unified Ideographs (incl. Extension A)
r'\uac00-\ud7af' # Korean syllables (Hangul)
r'\uf900-\ufaff' # CJK Compatibility Ideographs
r'\U00020000-\U0002fa1f' # CJK Extension BF
)
_RE_CONTAINS_CJK = re.compile(f'[{_CJK_RANGES}]')
_RE_CJK_WORDS = re.compile(f'[{_CJK_RANGES}]+')
_RE_TRIGRAM_TOKENS = re.compile(f'[{_CJK_RANGES}]+|[A-Za-z0-9_]+')
@dataclass @dataclass
@@ -48,6 +78,10 @@ class MemoryStorage:
self.db_path = db_path self.db_path = db_path
self.conn: Optional[sqlite3.Connection] = None self.conn: Optional[sqlite3.Connection] = None
self.fts5_available = False # Track FTS5 availability self.fts5_available = False # Track FTS5 availability
# RLock protects concurrent writes from the same process.
# SQLite WAL mode handles read/write concurrency at the file level,
# but same-process concurrent writes still need a Python-level lock.
self._lock = threading.RLock()
self._init_db() self._init_db()
def _check_fts5_support(self) -> bool: def _check_fts5_support(self) -> bool:
@@ -69,6 +103,14 @@ class MemoryStorage:
# Check FTS5 support # Check FTS5 support
self.fts5_available = self._check_fts5_support() self.fts5_available = self._check_fts5_support()
if not _HAS_UPSERT:
from common.log import logger
logger.warning(
"[MemoryStorage] SQLite %s < 3.24 — UPSERT unavailable. "
"Falling back to INSERT OR REPLACE; FTS5 rowid may drift on "
"chunk updates (rebuild index periodically to recover).",
sqlite3.sqlite_version,
)
if not self.fts5_available: if not self.fts5_available:
from common.log import logger from common.log import logger
logger.debug("[MemoryStorage] FTS5 not available, using LIKE-based keyword search") logger.debug("[MemoryStorage] FTS5 not available, using LIKE-based keyword search")
@@ -175,6 +217,75 @@ class MemoryStorage:
) )
self._rebuild_fts5_from_chunks() self._rebuild_fts5_from_chunks()
# Internal key-value store for persistent flags (e.g. backfill tracking)
self.conn.execute("""
CREATE TABLE IF NOT EXISTS _meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
)
""")
# Create trigram FTS5 table for CJK / mixed-language search
self.trigram_fts5_available = False
if self.fts5_available:
try:
self.conn.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts_trigram USING fts5(
text,
id UNINDEXED,
user_id UNINDEXED,
path UNINDEXED,
source UNINDEXED,
scope UNINDEXED,
content='chunks',
content_rowid='rowid',
tokenize='trigram case_sensitive 0'
)
""")
self.conn.execute("""
CREATE TRIGGER IF NOT EXISTS chunks_trigram_ai
AFTER INSERT ON chunks BEGIN
INSERT INTO chunks_fts_trigram(rowid, text, id, user_id, path, source, scope)
VALUES (new.rowid, new.text, new.id, new.user_id, new.path, new.source, new.scope);
END
""")
self.conn.execute("""
CREATE TRIGGER IF NOT EXISTS chunks_trigram_ad
AFTER DELETE ON chunks BEGIN
DELETE FROM chunks_fts_trigram WHERE rowid = old.rowid;
END
""")
self.conn.execute("""
CREATE TRIGGER IF NOT EXISTS chunks_trigram_au
AFTER UPDATE ON chunks BEGIN
UPDATE chunks_fts_trigram
SET text=new.text, id=new.id, user_id=new.user_id,
path=new.path, source=new.source, scope=new.scope
WHERE rowid = new.rowid;
END
""")
# One-time backfill for existing rows.
# NOTE: COUNT(*) on an FTS5 content table always returns 0, so we
# use a persistent flag in _meta instead of counting trigram rows.
backfill_done = self.conn.execute(
"SELECT 1 FROM _meta WHERE key = 'trigram_backfill_done'"
).fetchone()
chunks_count = self.conn.execute(
"SELECT COUNT(*) as c FROM chunks"
).fetchone()['c']
if chunks_count > 0 and not backfill_done:
self.conn.execute(
"INSERT INTO chunks_fts_trigram(chunks_fts_trigram) VALUES('rebuild')"
)
self.conn.execute(
"INSERT OR REPLACE INTO _meta(key, value) VALUES('trigram_backfill_done', '1')"
)
self.trigram_fts5_available = True
except Exception:
from common.log import logger
logger.warning("[MemoryStorage] trigram FTS5 unavailable, CJK search will use LIKE fallback", exc_info=True)
self.trigram_fts5_available = False
# Create files metadata table # Create files metadata table
self.conn.execute(""" self.conn.execute("""
CREATE TABLE IF NOT EXISTS files ( CREATE TABLE IF NOT EXISTS files (
@@ -186,7 +297,7 @@ class MemoryStorage:
updated_at INTEGER DEFAULT (strftime('%s', 'now')) updated_at INTEGER DEFAULT (strftime('%s', 'now'))
) )
""") """)
self.conn.commit() self.conn.commit()
def _fts5_state_inconsistent(self) -> bool: def _fts5_state_inconsistent(self) -> bool:
@@ -299,43 +410,98 @@ class MemoryStorage:
self.conn.commit() self.conn.commit()
def save_chunk(self, chunk: MemoryChunk): def save_chunk(self, chunk: MemoryChunk):
"""Save a memory chunk""" """Save a memory chunk (insert or update by id).
self.conn.execute("""
INSERT OR REPLACE INTO chunks Uses SQLite UPSERT (INSERT … ON CONFLICT DO UPDATE) instead of
(id, user_id, scope, source, path, start_line, end_line, text, embedding, hash, metadata, updated_at) INSERT OR REPLACE. INSERT OR REPLACE internally does DELETE+INSERT,
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now')) which changes the row's rowid. Because both FTS5 tables use
""", ( content_rowid='rowid', a new rowid would leave the old FTS index
chunk.id, entries pointing at a non-existent rowid and trigger
chunk.user_id, "fts5: missing row N from content table" errors.
chunk.scope, ON CONFLICT DO UPDATE fires the AFTER UPDATE trigger (chunks_au /
chunk.source, chunks_trigram_au) and keeps the original rowid intact.
chunk.path, """
chunk.start_line, if _HAS_UPSERT:
chunk.end_line, _SQL = """
chunk.text, INSERT INTO chunks
json.dumps(chunk.embedding) if chunk.embedding else None, (id, user_id, scope, source, path, start_line, end_line,
text, embedding, hash, metadata, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
ON CONFLICT(id) DO UPDATE SET
user_id = excluded.user_id,
scope = excluded.scope,
source = excluded.source,
path = excluded.path,
start_line = excluded.start_line,
end_line = excluded.end_line,
text = excluded.text,
embedding = excluded.embedding,
hash = excluded.hash,
metadata = excluded.metadata,
updated_at = strftime('%s', 'now')
"""
else:
_SQL = """
INSERT OR REPLACE INTO chunks
(id, user_id, scope, source, path, start_line, end_line,
text, embedding, hash, metadata, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
"""
params = (
chunk.id, chunk.user_id, chunk.scope, chunk.source, chunk.path,
chunk.start_line, chunk.end_line, chunk.text,
self._encode_embedding(chunk.embedding),
chunk.hash, chunk.hash,
json.dumps(chunk.metadata) if chunk.metadata else None json.dumps(chunk.metadata) if chunk.metadata else None,
)) )
self.conn.commit() with self._lock:
self.conn.execute(_SQL, params)
self.conn.commit()
def save_chunks_batch(self, chunks: List[MemoryChunk]): def save_chunks_batch(self, chunks: List[MemoryChunk]):
"""Save multiple chunks in a batch""" """Save multiple chunks in a batch (insert or update by id).
self.conn.executemany("""
INSERT OR REPLACE INTO chunks See save_chunk for why UPSERT is used instead of INSERT OR REPLACE.
(id, user_id, scope, source, path, start_line, end_line, text, embedding, hash, metadata, updated_at) """
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now')) if _HAS_UPSERT:
""", [ _SQL = """
INSERT INTO chunks
(id, user_id, scope, source, path, start_line, end_line,
text, embedding, hash, metadata, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
ON CONFLICT(id) DO UPDATE SET
user_id = excluded.user_id,
scope = excluded.scope,
source = excluded.source,
path = excluded.path,
start_line = excluded.start_line,
end_line = excluded.end_line,
text = excluded.text,
embedding = excluded.embedding,
hash = excluded.hash,
metadata = excluded.metadata,
updated_at = strftime('%s', 'now')
"""
else:
_SQL = """
INSERT OR REPLACE INTO chunks
(id, user_id, scope, source, path, start_line, end_line,
text, embedding, hash, metadata, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
"""
params_list = [
( (
c.id, c.user_id, c.scope, c.source, c.path, c.id, c.user_id, c.scope, c.source, c.path,
c.start_line, c.end_line, c.text, c.start_line, c.end_line, c.text,
json.dumps(c.embedding) if c.embedding else None, self._encode_embedding(c.embedding),
c.hash, c.hash,
json.dumps(c.metadata) if c.metadata else None json.dumps(c.metadata) if c.metadata else None,
) )
for c in chunks for c in chunks
]) ]
self.conn.commit() with self._lock:
self.conn.executemany(_SQL, params_list)
self.conn.commit()
def get_chunk(self, chunk_id: str) -> Optional[MemoryChunk]: def get_chunk(self, chunk_id: str) -> Optional[MemoryChunk]:
"""Get a chunk by ID""" """Get a chunk by ID"""
@@ -356,21 +522,21 @@ class MemoryStorage:
limit: int = 10 limit: int = 10
) -> List[SearchResult]: ) -> List[SearchResult]:
""" """
Vector similarity search using in-memory cosine similarity Vector similarity search using numpy-vectorized cosine similarity.
(sqlite-vec can be added later for better performance) All embeddings are loaded then scored in a single BLAS matrix-vector
multiply, which is ~100x faster than the pure-Python per-row loop.
""" """
if scopes is None: if scopes is None:
scopes = ["shared"] scopes = ["shared"]
if user_id: if user_id:
scopes.append("user") scopes.append("user")
# Build query
scope_placeholders = ','.join('?' * len(scopes)) scope_placeholders = ','.join('?' * len(scopes))
params = scopes params = list(scopes)
if user_id: if user_id:
query = f""" query = f"""
SELECT * FROM chunks SELECT * FROM chunks
WHERE scope IN ({scope_placeholders}) WHERE scope IN ({scope_placeholders})
AND (scope = 'shared' OR user_id = ?) AND (scope = 'shared' OR user_id = ?)
AND embedding IS NOT NULL AND embedding IS NOT NULL
@@ -378,51 +544,95 @@ class MemoryStorage:
params.append(user_id) params.append(user_id)
else: else:
query = f""" query = f"""
SELECT * FROM chunks SELECT * FROM chunks
WHERE scope IN ({scope_placeholders}) WHERE scope IN ({scope_placeholders})
AND embedding IS NOT NULL AND embedding IS NOT NULL
""" """
rows = self.conn.execute(query, params).fetchall() rows = self.conn.execute(query, params).fetchall()
if not rows:
return []
# Calculate cosine similarity. We probe the first row's dim to fail # Parse embeddings and build a (N, D) matrix in one pass.
# loudly on a query/index dim mismatch — otherwise every doc would # New rows store BLOB bytes (np.frombuffer); legacy rows fall back to JSON.
# score 0 silently, leaving the user wondering why search broke. # Filter out rows whose embedding dimension differs from the query —
results = [] # mixing dimensions would cause np.array() to produce an object array
query_dim = len(query_embedding) # and matrix @ q_vec to raise ValueError.
if rows: expected_dim = len(query_embedding)
first = json.loads(rows[0]['embedding']) valid_rows = []
if isinstance(first, list) and len(first) != query_dim: vectors = []
raise ValueError(
f"Embedding dim mismatch: query is {query_dim}-dim but "
f"index stores {len(first)}-dim vectors. The configured "
f"embedding model differs from the one that built the "
f"index — run /memory rebuild-index to re-embed."
)
for row in rows: for row in rows:
embedding = json.loads(row['embedding']) vec = self._decode_embedding(row['embedding'])
similarity = self._cosine_similarity(query_embedding, embedding) if not vec:
continue
if len(vec) != expected_dim:
from common.log import logger
logger.warning(
"[MemoryStorage] Skipping chunk %s: embedding dim %d != query dim %d",
row['id'], len(vec), expected_dim
)
continue
valid_rows.append(row)
vectors.append(vec)
if similarity > 0: if not vectors:
results.append((similarity, row)) return []
# Sort by similarity and limit if _HAS_NUMPY:
results.sort(key=lambda x: x[0], reverse=True) matrix = np.array(vectors, dtype=np.float32) # (N, D)
results = results[:limit] q_vec = np.array(query_embedding, dtype=np.float32) # (D,)
return [ # Vectorized cosine similarity: dot(matrix, q) / (||matrix|| * ||q||)
SearchResult( dots = matrix @ q_vec # (N,)
path=row['path'], row_norms = np.linalg.norm(matrix, axis=1) # (N,)
start_line=row['start_line'], q_norm = float(np.linalg.norm(q_vec))
end_line=row['end_line'], denominators = row_norms * q_norm
score=score, np.maximum(denominators, 1e-10, out=denominators) # avoid div-by-zero
snippet=self._truncate_text(row['text'], 500), sims = dots / denominators # (N,)
source=row['source'],
user_id=row['user_id'] # Select TopK using argpartition (O(N) average), then sort only those K
) k = min(limit, len(valid_rows))
for score, row in results top_idx = np.argpartition(sims, -k)[-k:]
] top_idx = top_idx[np.argsort(sims[top_idx])[::-1]]
return [
SearchResult(
path=valid_rows[i]['path'],
start_line=valid_rows[i]['start_line'],
end_line=valid_rows[i]['end_line'],
score=float(sims[i]),
snippet=self._truncate_text(valid_rows[i]['text'], 500),
source=valid_rows[i]['source'],
user_id=valid_rows[i]['user_id']
)
for i in top_idx
if sims[i] > 0
]
else:
# Pure-Python cosine similarity fallback (numpy not installed)
import math
q = query_embedding
q_norm = math.sqrt(sum(x * x for x in q)) or 1e-10
scored = []
for i, vec in enumerate(vectors):
dot = sum(a * b for a, b in zip(vec, q))
v_norm = math.sqrt(sum(x * x for x in vec)) or 1e-10
sim = dot / (v_norm * q_norm)
if sim > 0:
scored.append((sim, valid_rows[i]))
scored.sort(key=lambda x: x[0], reverse=True)
return [
SearchResult(
path=row['path'],
start_line=row['start_line'],
end_line=row['end_line'],
score=sim,
snippet=self._truncate_text(row['text'], 500),
source=row['source'],
user_id=row['user_id']
)
for sim, row in scored[:limit]
]
def search_keyword( def search_keyword(
self, self,
@@ -445,12 +655,37 @@ class MemoryStorage:
if user_id: if user_id:
scopes.append("user") scopes.append("user")
if self.fts5_available: # Step 1: Standard FTS5 (unicode61) — pure ASCII queries only.
# Skipped when query contains any CJK characters: unicode61 tokenises CJK
# as individual characters without forming meaningful tokens, so it would
# match only the ASCII portion of a mixed query (e.g. "Python" from
# "Python教程") and silently discard the CJK part. Those queries go
# directly to Step 2 (trigram), which handles both ASCII and CJK together.
fts1_attempted = False
if (self.fts5_available
and not MemoryStorage._contains_cjk(query)
and MemoryStorage._build_fts_query(query)):
fts1_attempted = True
fts_results = self._search_fts5(query, user_id, scopes, limit) fts_results = self._search_fts5(query, user_id, scopes, limit)
if fts_results: if fts_results:
return fts_results return fts_results
return self._search_like(query, user_id, scopes, limit) # Step 2: Trigram FTS5 — CJK/mixed queries, plus fallback when unicode61
# returned nothing (trigram indexes all scripts with 3-char sliding windows,
# so it can catch terms that unicode61 tokenisation misses).
if self.trigram_fts5_available and (
MemoryStorage._contains_cjk(query) or fts1_attempted
):
trigram_results = self._search_fts5_trigram(query, user_id, scopes, limit)
if trigram_results:
return trigram_results
# Step 3: LIKE fallback — last resort (FTS5 unavailable, or CJK tokens
# shorter than 3 characters that trigram cannot match, e.g. a single-char query).
if not self.fts5_available or MemoryStorage._contains_cjk(query):
return self._search_like(query, user_id, scopes, limit)
return []
def _search_fts5( def _search_fts5(
self, self,
@@ -471,7 +706,7 @@ class MemoryStorage:
sql_query = f""" sql_query = f"""
SELECT chunks.*, bm25(chunks_fts) as rank SELECT chunks.*, bm25(chunks_fts) as rank
FROM chunks_fts FROM chunks_fts
JOIN chunks ON chunks.id = chunks_fts.id JOIN chunks ON chunks.rowid = chunks_fts.rowid
WHERE chunks_fts MATCH ? WHERE chunks_fts MATCH ?
AND chunks.scope IN ({scope_placeholders}) AND chunks.scope IN ({scope_placeholders})
AND (chunks.scope = 'shared' OR chunks.user_id = ?) AND (chunks.scope = 'shared' OR chunks.user_id = ?)
@@ -483,7 +718,7 @@ class MemoryStorage:
sql_query = f""" sql_query = f"""
SELECT chunks.*, bm25(chunks_fts) as rank SELECT chunks.*, bm25(chunks_fts) as rank
FROM chunks_fts FROM chunks_fts
JOIN chunks ON chunks.id = chunks_fts.id JOIN chunks ON chunks.rowid = chunks_fts.rowid
WHERE chunks_fts MATCH ? WHERE chunks_fts MATCH ?
AND chunks.scope IN ({scope_placeholders}) AND chunks.scope IN ({scope_placeholders})
ORDER BY rank ORDER BY rank
@@ -505,13 +740,11 @@ class MemoryStorage:
) )
for row in rows for row in rows
] ]
except Exception as e: except Exception:
from common.log import logger from common.log import logger
logger.error( logger.warning("[MemoryStorage] _search_fts5 failed, returning empty", exc_info=True)
f"[MemoryStorage] FTS5 search failed (caller will fall back to LIKE): {e}"
)
return [] return []
def _search_like( def _search_like(
self, self,
query: str, query: str,
@@ -522,12 +755,11 @@ class MemoryStorage:
"""LIKE-based search. """LIKE-based search.
Used as the keyword-search fallback when FTS5 is unavailable, fails, Used as the keyword-search fallback when FTS5 is unavailable, fails,
or returns empty. Supports both CJK runs and ASCII word tokens so it or returns empty. Supports both CJK runs (1+ chars) and ASCII word
can serve as a true safety net for any query. tokens (3+ chars) so it can serve as a true safety net for any query.
""" """
import re # CJK runs (1+ chars, wide Unicode range) + ASCII words (3+ chars to avoid noise)
# CJK runs (2+ chars) + ASCII word tokens (3+ chars to avoid noise) cjk_words = _RE_CJK_WORDS.findall(query)
cjk_words = re.findall(r'[\u4e00-\u9fff]{2,}', query)
ascii_words = [t for t in re.findall(r'[A-Za-z0-9_]+', query) if len(t) >= 3] ascii_words = [t for t in re.findall(r'[A-Za-z0-9_]+', query) if len(t) >= 3]
words = cjk_words + ascii_words words = cjk_words + ascii_words
if not words: if not words:
@@ -565,44 +797,54 @@ class MemoryStorage:
try: try:
rows = self.conn.execute(sql_query, params).fetchall() rows = self.conn.execute(sql_query, params).fetchall()
return [ results = []
SearchResult( for row in rows:
# Dynamic score: reward chunks that contain more of the query words.
# Use all tokens (CJK + ASCII) so pure-ASCII queries are not skipped.
# matched_count is always ≥1 because the WHERE clause uses OR, but
# guard defensively so unexpected zero-match rows are never surfaced.
text_lower = row['text'].lower()
matched_count = sum(1 for w in words if w.lower() in text_lower)
if matched_count == 0:
continue
score = min(0.85, 0.3 + 0.15 * matched_count)
results.append(SearchResult(
path=row['path'], path=row['path'],
start_line=row['start_line'], start_line=row['start_line'],
end_line=row['end_line'], end_line=row['end_line'],
score=0.5, # Fixed score for LIKE search score=score,
snippet=self._truncate_text(row['text'], 500), snippet=self._truncate_text(row['text'], 500),
source=row['source'], source=row['source'],
user_id=row['user_id'] user_id=row['user_id']
) ))
for row in rows results.sort(key=lambda r: r.score, reverse=True)
] return results
except Exception as e: except Exception:
from common.log import logger from common.log import logger
logger.error(f"[MemoryStorage] LIKE search failed: {e}") logger.warning("[MemoryStorage] _search_like failed, returning empty", exc_info=True)
return [] return []
def delete_by_path(self, path: str): def delete_by_path(self, path: str):
"""Delete all chunks from a file""" """Delete all chunks from a file"""
self.conn.execute(""" with self._lock:
DELETE FROM chunks WHERE path = ? self.conn.execute("DELETE FROM chunks WHERE path = ?", (path,))
""", (path,)) self.conn.commit()
self.conn.commit()
def get_file_hash(self, path: str) -> Optional[str]: def get_file_hash(self, path: str) -> Optional[str]:
"""Get stored file hash""" """Get stored file hash"""
row = self.conn.execute(""" row = self.conn.execute("""
SELECT hash FROM files WHERE path = ? SELECT hash FROM files WHERE path = ?
""", (path,)).fetchone() """, (path,)).fetchone()
return row['hash'] if row else None return row['hash'] if row else None
def update_file_metadata(self, path: str, source: str, file_hash: str, mtime: int, size: int): def update_file_metadata(self, path: str, source: str, file_hash: str, mtime: int, size: int):
"""Update file metadata""" """Update file metadata"""
self.conn.execute(""" with self._lock:
INSERT OR REPLACE INTO files (path, source, hash, mtime, size, updated_at) self.conn.execute("""
VALUES (?, ?, ?, ?, ?, strftime('%s', 'now')) INSERT OR REPLACE INTO files (path, source, hash, mtime, size, updated_at)
""", (path, source, file_hash, mtime, size)) VALUES (?, ?, ?, ?, ?, strftime('%s', 'now'))
self.conn.commit() """, (path, source, file_hash, mtime, size))
self.conn.commit()
def get_stats(self) -> Dict[str, int]: def get_stats(self) -> Dict[str, int]:
"""Get storage statistics""" """Get storage statistics"""
@@ -632,7 +874,8 @@ class MemoryStorage:
self.conn.close() self.conn.close()
self.conn = None # Mark as closed self.conn = None # Mark as closed
except Exception as e: except Exception as e:
print(f"⚠️ Error closing database connection: {e}") from common.log import logger
logger.warning("[MemoryStorage] Error closing database connection: %s", e)
def __del__(self): def __del__(self):
"""Destructor to ensure connection is closed""" """Destructor to ensure connection is closed"""
@@ -642,7 +885,33 @@ class MemoryStorage:
pass # Ignore errors during cleanup pass # Ignore errors during cleanup
# Helper methods # Helper methods
@staticmethod
def _encode_embedding(embedding: Optional[List[float]]) -> Optional[bytes]:
"""Encode embedding as float32 BLOB bytes (~6x smaller and faster than JSON).
Falls back to struct.pack when numpy is unavailable."""
if embedding is None:
return None
if _HAS_NUMPY:
return np.array(embedding, dtype=np.float32).tobytes()
import struct
return struct.pack(f'{len(embedding)}f', *embedding)
@staticmethod
def _decode_embedding(raw) -> Optional[List[float]]:
"""Decode embedding from BLOB bytes or legacy JSON string.
Handles both numpy and numpy-free environments."""
if raw is None:
return None
if isinstance(raw, (bytes, bytearray)):
if _HAS_NUMPY:
return np.frombuffer(raw, dtype=np.float32).tolist()
import struct
n = len(raw) // 4
return list(struct.unpack(f'{n}f', raw))
# Legacy JSON format written by older versions
return json.loads(raw)
def _row_to_chunk(self, row) -> MemoryChunk: def _row_to_chunk(self, row) -> MemoryChunk:
"""Convert database row to MemoryChunk""" """Convert database row to MemoryChunk"""
return MemoryChunk( return MemoryChunk(
@@ -654,32 +923,89 @@ class MemoryStorage:
start_line=row['start_line'], start_line=row['start_line'],
end_line=row['end_line'], end_line=row['end_line'],
text=row['text'], text=row['text'],
embedding=json.loads(row['embedding']) if row['embedding'] else None, embedding=self._decode_embedding(row['embedding']),
hash=row['hash'], hash=row['hash'],
metadata=json.loads(row['metadata']) if row['metadata'] else None metadata=json.loads(row['metadata']) if row['metadata'] else None
) )
@staticmethod @staticmethod
def _cosine_similarity(vec1: List[float], vec2: List[float]) -> float: def _contains_cjk(text: str) -> bool:
"""Calculate cosine similarity between two vectors""" """Check if text contains CJK or related characters (Chinese, Japanese, Korean)."""
if len(vec1) != len(vec2): return bool(_RE_CONTAINS_CJK.search(text))
return 0.0
dot_product = sum(a * b for a, b in zip(vec1, vec2))
norm1 = sum(a * a for a in vec1) ** 0.5
norm2 = sum(b * b for b in vec2) ** 0.5
if norm1 == 0 or norm2 == 0:
return 0.0
return dot_product / (norm1 * norm2)
@staticmethod @staticmethod
def _contains_cjk(text: str) -> bool: def _build_trigram_query(raw_query: str) -> Optional[str]:
"""Check if text contains CJK (Chinese/Japanese/Korean) characters""" """
import re Build FTS5 MATCH query for the trigram tokenizer.
return bool(re.search(r'[\u4e00-\u9fff]', text)) Extracts CJK sequences (including single characters) and ASCII words,
joining them with AND so all terms must appear in the matched chunk.
"""
tokens = _RE_TRIGRAM_TOKENS.findall(raw_query)
tokens = [t for t in tokens if t]
if not tokens:
return None
# Escape embedded double-quotes (FTS5 uses "" inside quoted phrases)
quoted = [f'"{t.replace(chr(34), chr(34)*2)}"' for t in tokens]
return ' AND '.join(quoted)
def _search_fts5_trigram(
self,
query: str,
user_id: Optional[str],
scopes: List[str],
limit: int
) -> List[SearchResult]:
"""Trigram FTS5 search — handles CJK and mixed queries with BM25 ranking."""
trigram_query = self._build_trigram_query(query)
if not trigram_query:
return []
scope_placeholders = ','.join('?' * len(scopes))
params = [trigram_query] + list(scopes)
if user_id:
sql = f"""
SELECT chunks.*, bm25(chunks_fts_trigram) as rank
FROM chunks_fts_trigram
JOIN chunks ON chunks.rowid = chunks_fts_trigram.rowid
WHERE chunks_fts_trigram MATCH ?
AND chunks.scope IN ({scope_placeholders})
AND (chunks.scope = 'shared' OR chunks.user_id = ?)
ORDER BY rank
LIMIT ?
"""
params.extend([user_id, limit])
else:
sql = f"""
SELECT chunks.*, bm25(chunks_fts_trigram) as rank
FROM chunks_fts_trigram
JOIN chunks ON chunks.rowid = chunks_fts_trigram.rowid
WHERE chunks_fts_trigram MATCH ?
AND chunks.scope IN ({scope_placeholders})
ORDER BY rank
LIMIT ?
"""
params.append(limit)
try:
rows = self.conn.execute(sql, params).fetchall()
return [
SearchResult(
path=row['path'],
start_line=row['start_line'],
end_line=row['end_line'],
score=self._bm25_rank_to_score(row['rank']),
snippet=self._truncate_text(row['text'], 500),
source=row['source'],
user_id=row['user_id']
)
for row in rows
]
except Exception:
from common.log import logger
logger.warning("[MemoryStorage] _search_fts5_trigram failed, returning empty", exc_info=True)
return []
@staticmethod @staticmethod
def _build_fts_query(raw_query: str) -> Optional[str]: def _build_fts_query(raw_query: str) -> Optional[str]:
""" """
@@ -688,7 +1014,6 @@ class MemoryStorage:
Works best for English and word-based languages. Works best for English and word-based languages.
For CJK characters, LIKE search will be used as fallback. For CJK characters, LIKE search will be used as fallback.
""" """
import re
# Extract words (primarily English words and numbers) # Extract words (primarily English words and numbers)
tokens = re.findall(r'[A-Za-z0-9_]+', raw_query) tokens = re.findall(r'[A-Za-z0-9_]+', raw_query)
if not tokens: if not tokens:
@@ -701,9 +1026,22 @@ class MemoryStorage:
@staticmethod @staticmethod
def _bm25_rank_to_score(rank: float) -> float: def _bm25_rank_to_score(rank: float) -> float:
"""Convert BM25 rank to 0-1 score""" """Convert SQLite BM25 rank to a [0, 1) relevance score.
normalized = max(0, rank) if rank is not None else 999
return 1 / (1 + normalized) SQLite's bm25() returns a non-positive float (0 or negative).
More negative = more relevant. max(0, rank) would clip every
negative value to 0, making every score 1/(1+0) = 1.0 and
destroying all ranking information.
abs(rank) / (1 + abs(rank)) maps the absolute relevance magnitude
to [0, 1): larger |rank| (stronger match) → score closer to 1.
"""
if rank is None:
return 0.0
# Add a floor of 0.3 so any FTS5 match always exceeds typical
# min_score thresholds (default 0.1). Small-corpus ranks close to
# 0 would otherwise produce score≈0 and be filtered out downstream.
return 0.3 + 0.69 * (abs(rank) / (1.0 + abs(rank)))
@staticmethod @staticmethod
def _truncate_text(text: str, max_chars: int) -> str: def _truncate_text(text: str, max_chars: int) -> str:

View File

@@ -3,6 +3,11 @@ from .agent_stream import AgentStreamExecutor
from .task import Task, TaskType, TaskStatus from .task import Task, TaskType, TaskStatus
from .result import AgentResult, AgentAction, AgentActionType, ToolResult from .result import AgentResult, AgentAction, AgentActionType, ToolResult
from .models import LLMModel, LLMRequest, ModelFactory from .models import LLMModel, LLMRequest, ModelFactory
from .cancel import (
AgentCancelledError,
CancelTokenRegistry,
get_cancel_registry,
)
__all__ = [ __all__ = [
'Agent', 'Agent',
@@ -16,5 +21,8 @@ __all__ = [
'ToolResult', 'ToolResult',
'LLMModel', 'LLMModel',
'LLMRequest', 'LLMRequest',
'ModelFactory' 'ModelFactory',
] 'AgentCancelledError',
'CancelTokenRegistry',
'get_cancel_registry',
]

View File

@@ -365,7 +365,8 @@ class Agent:
return action return action
def run_stream(self, user_message: str, on_event=None, clear_history: bool = False, skill_filter=None) -> str: def run_stream(self, user_message: str, on_event=None, clear_history: bool = False,
skill_filter=None, cancel_event=None) -> str:
""" """
Execute single agent task with streaming (based on tool-call) Execute single agent task with streaming (based on tool-call)
@@ -374,6 +375,7 @@ class Agent:
- Multi-turn reasoning based on tool-call - Multi-turn reasoning based on tool-call
- Event callbacks - Event callbacks
- Persistent conversation history across calls - Persistent conversation history across calls
- User-initiated cancellation via ``cancel_event``
Args: Args:
user_message: User message user_message: User message
@@ -381,6 +383,11 @@ class Agent:
event = {"type": str, "timestamp": float, "data": dict} event = {"type": str, "timestamp": float, "data": dict}
clear_history: If True, clear conversation history before this call (default: False) clear_history: If True, clear conversation history before this call (default: False)
skill_filter: Optional list of skill names to include in this run skill_filter: Optional list of skill names to include in this run
cancel_event: Optional threading.Event polled at agent checkpoints.
When set, the loop exits at the next safe point, injects a
"[Interrupted by user]" assistant note, and returns the
partial response. ``messages`` stays in a valid state
(tool_use/tool_result pairs preserved).
Returns: Returns:
Final response text Final response text
@@ -424,7 +431,8 @@ class Agent:
max_turns=self.max_steps, max_turns=self.max_steps,
on_event=on_event, on_event=on_event,
messages=messages_copy, # Pass copied message history messages=messages_copy, # Pass copied message history
max_context_turns=max_context_turns max_context_turns=max_context_turns,
cancel_event=cancel_event,
) )
# Execute # Execute

View File

@@ -7,11 +7,19 @@ import json
import time import time
from typing import List, Dict, Any, Optional, Callable, Tuple from typing import List, Dict, Any, Optional, Callable, Tuple
from agent.protocol.cancel import AgentCancelledError
from agent.protocol.models import LLMRequest, LLMModel from agent.protocol.models import LLMRequest, LLMModel
from agent.protocol.message_utils import sanitize_claude_messages, compress_turn_to_text_only from agent.protocol.message_utils import sanitize_claude_messages, compress_turn_to_text_only
from agent.tools.base_tool import BaseTool, ToolResult from agent.tools.base_tool import BaseTool, ToolResult
from common.log import logger from common.log import logger
# Optional: repair malformed JSON args from non-strict providers (e.g. unescaped quotes in long content).
try:
from json_repair import repair_json as _repair_json
_HAS_JSON_REPAIR = True
except ImportError:
_HAS_JSON_REPAIR = False
# Maximum number of characters of model "reasoning / thinking" content to persist # Maximum number of characters of model "reasoning / thinking" content to persist
# in conversation history. The full reasoning is still streamed to the UI in real # in conversation history. The full reasoning is still streamed to the UI in real
@@ -44,6 +52,30 @@ def _truncate_reasoning_for_storage(text: str) -> str:
return head + _REASONING_TRUNCATE_MARKER.format(omitted=omitted) + tail return head + _REASONING_TRUNCATE_MARKER.format(omitted=omitted) + tail
def _parse_tool_args(args_str: str, finish_reason: Optional[str]) -> Tuple[dict, Optional[str]]:
"""Parse tool args JSON. Returns (args, error_msg); error_msg is None on success.
On JSONDecodeError: detect truncation first (skip repair, surface max_tokens hint);
otherwise try json-repair for escape issues; finally fall back to the raw decoder error.
"""
if not args_str:
return {}, None
try:
return json.loads(args_str), None
except json.JSONDecodeError as e:
if finish_reason in ("length", "max_tokens") or not args_str.rstrip().endswith("}"):
return {}, "Output truncated (max_tokens reached). Split content into smaller chunks across multiple tool calls."
if _HAS_JSON_REPAIR:
try:
repaired = _repair_json(args_str, return_objects=True)
if isinstance(repaired, dict):
logger.warning(f"Tool args JSON repaired ({len(args_str)} chars)")
return repaired, None
except Exception:
pass
return {}, f"Invalid JSON in tool arguments: {e.msg}"
class AgentStreamExecutor: class AgentStreamExecutor:
""" """
Agent Stream Executor Agent Stream Executor
@@ -64,7 +96,8 @@ class AgentStreamExecutor:
max_turns: int = 50, max_turns: int = 50,
on_event: Optional[Callable] = None, on_event: Optional[Callable] = None,
messages: Optional[List[Dict]] = None, messages: Optional[List[Dict]] = None,
max_context_turns: int = 30 max_context_turns: int = 30,
cancel_event=None,
): ):
""" """
Initialize stream executor Initialize stream executor
@@ -78,6 +111,10 @@ class AgentStreamExecutor:
on_event: Event callback function on_event: Event callback function
messages: Optional existing message history (for persistent conversations) messages: Optional existing message history (for persistent conversations)
max_context_turns: Maximum number of conversation turns to keep in context max_context_turns: Maximum number of conversation turns to keep in context
cancel_event: Optional threading.Event used to signal user cancel.
Checked at every safe point (turn boundary, before tool execution,
during LLM streaming). When set, raises AgentCancelledError which
run_stream catches to gracefully wind down.
""" """
self.agent = agent self.agent = agent
self.model = model self.model = model
@@ -87,6 +124,7 @@ class AgentStreamExecutor:
self.max_turns = max_turns self.max_turns = max_turns
self.on_event = on_event self.on_event = on_event
self.max_context_turns = max_context_turns self.max_context_turns = max_context_turns
self.cancel_event = cancel_event
# Message history - use provided messages or create new list # Message history - use provided messages or create new list
self.messages = messages if messages is not None else [] self.messages = messages if messages is not None else []
@@ -97,6 +135,73 @@ class AgentStreamExecutor:
# Track files to send (populated by read tool) # Track files to send (populated by read tool)
self.files_to_send = [] # List of file metadata dicts self.files_to_send = [] # List of file metadata dicts
def _check_cancelled(self) -> None:
"""Raise AgentCancelledError if the user requested cancellation.
Called at safe points (turn start, between tool calls, between LLM
chunks). Cheap to call: just an Event.is_set() probe.
"""
if self.cancel_event is not None and self.cancel_event.is_set():
raise AgentCancelledError("agent cancelled by user")
def _handle_cancelled(self, partial_response: str) -> None:
"""Wind down ``self.messages`` after a user-initiated cancel.
The messages list may be in any of these states when we get here:
(a) Last message is an assistant message containing tool_use
blocks but the matching tool_result has not been appended yet.
(b) Last message is an assistant text-only reply (cancel happened
right before the next turn started).
(c) Last message is a user tool_result message and we cancelled
between turns.
For (a) we MUST synthesise tool_result blocks, otherwise the next
request will fail Claude/OpenAI's strict pairing validation. For
(b)/(c) the state is already valid and we just append a small
cancellation note so the user/LLM both see the boundary clearly.
"""
try:
# Step 1: close any orphaned tool_use in the trailing assistant
# message by injecting matching tool_result blocks.
if self.messages and isinstance(self.messages[-1], dict) \
and self.messages[-1].get("role") == "assistant":
last = self.messages[-1]
content = last.get("content")
if isinstance(content, list):
pending_tool_use_ids = [
block.get("id")
for block in content
if isinstance(block, dict) and block.get("type") == "tool_use"
]
pending_tool_use_ids = [tid for tid in pending_tool_use_ids if tid]
if pending_tool_use_ids:
tool_result_blocks = [
{
"type": "tool_result",
"tool_use_id": tid,
"content": "Cancelled by user before this tool finished.",
"is_error": True,
}
for tid in pending_tool_use_ids
]
self.messages.append({
"role": "user",
"content": tool_result_blocks,
})
logger.info(
f"[Agent] Injected {len(tool_result_blocks)} cancellation "
f"tool_result blocks to keep message history valid"
)
# Step 2: append a stable "interrupted" marker so the LLM sees a
# clear stop boundary on the next turn.
self.messages.append({
"role": "assistant",
"content": [{"type": "text", "text": "_(Cancelled by user)_"}],
})
except Exception as e:
logger.warning(f"[Agent] _handle_cancelled cleanup failed: {e}")
def _emit_event(self, event_type: str, data: dict = None): def _emit_event(self, event_type: str, data: dict = None):
"""Emit event""" """Emit event"""
if self.on_event: if self.on_event:
@@ -270,8 +375,13 @@ class AgentStreamExecutor:
final_response = "" final_response = ""
turn = 0 turn = 0
cancelled = False
try: try:
while turn < self.max_turns: while turn < self.max_turns:
# Check at the very top of every turn so a cancel arriving
# between turns short-circuits cleanly.
self._check_cancelled()
turn += 1 turn += 1
logger.info(f"[Agent] 第 {turn}") logger.info(f"[Agent] 第 {turn}")
self._emit_event("turn_start", {"turn": turn}) self._emit_event("turn_start", {"turn": turn})
@@ -375,6 +485,8 @@ class AgentStreamExecutor:
try: try:
for tool_call in tool_calls: for tool_call in tool_calls:
# Honour cancel between tool invocations within the same turn
self._check_cancelled()
result = self._execute_tool(tool_call) result = self._execute_tool(tool_call)
tool_results.append(result) tool_results.append(result)
@@ -557,6 +669,15 @@ class AgentStreamExecutor:
self.messages.pop(prompt_insert_idx) self.messages.pop(prompt_insert_idx)
logger.debug("[Agent] Removed injected max-steps prompt from message history") logger.debug("[Agent] Removed injected max-steps prompt from message history")
except AgentCancelledError:
# User-initiated stop: wind down message history cleanly so the
# next turn is unaffected; channels emit a "cancelled" UI event.
cancelled = True
logger.info(f"[Agent] 🛑 已被用户中止 (第 {turn} 轮)")
self._handle_cancelled(final_response)
if not final_response or not final_response.strip():
final_response = "_(Cancelled)_"
except Exception as e: except Exception as e:
logger.error(f"❌ Agent执行错误: {e}") logger.error(f"❌ Agent执行错误: {e}")
self._emit_event("error", {"error": str(e)}) self._emit_event("error", {"error": str(e)})
@@ -564,8 +685,11 @@ class AgentStreamExecutor:
finally: finally:
final_response = final_response.strip() if final_response else final_response final_response = final_response.strip() if final_response else final_response
logger.info(f"[Agent] 🏁 完成 ({turn}轮)") if cancelled:
self._emit_event("agent_end", {"final_response": final_response}) # Emit before agent_end so channels can mark UI as cancelled
self._emit_event("agent_cancelled", {"final_response": final_response})
logger.info(f"[Agent] 🏁 完成 ({turn}轮)" + (" [cancelled]" if cancelled else ""))
self._emit_event("agent_end", {"final_response": final_response, "cancelled": cancelled})
return final_response return final_response
@@ -603,15 +727,24 @@ class AgentStreamExecutor:
except Exception as e: except Exception as e:
logger.debug(f"[Agent] MCP sync skipped: {e}") logger.debug(f"[Agent] MCP sync skipped: {e}")
# Prepare tool definitions (OpenAI/Claude format) # Prepare tool definitions. Prefer get_json_schema() when it yields
# real properties (lets tools augment schema at runtime), otherwise
# fall back to the static `tool.params` (MCP tools rely on this).
tools_schema = None tools_schema = None
if self.tools: if self.tools:
tools_schema = [] tools_schema = []
for tool in self.tools.values(): for tool in self.tools.values():
input_schema = tool.params
try:
dynamic = (tool.get_json_schema() or {}).get("parameters") or {}
if dynamic.get("properties"):
input_schema = dynamic
except Exception:
pass
tools_schema.append({ tools_schema.append({
"name": tool.name, "name": tool.name,
"description": tool.description, "description": tool.description,
"input_schema": tool.params # Claude uses input_schema "input_schema": input_schema,
}) })
# Create request # Create request
@@ -635,7 +768,32 @@ class AgentStreamExecutor:
try: try:
stream = self.model.call_stream(request) stream = self.model.call_stream(request)
# Probe cancel every N chunks to bound reaction time without
# checking on every token.
_cancel_probe_counter = 0
_CANCEL_PROBE_EVERY = 8
for chunk in stream: for chunk in stream:
_cancel_probe_counter += 1
if _cancel_probe_counter >= _CANCEL_PROBE_EVERY:
_cancel_probe_counter = 0
if self.cancel_event is not None and self.cancel_event.is_set():
# Persist partial text only; tool_use args may be
# truncated mid-stream and would fail validation.
logger.info("[Agent] cancel detected mid-stream, aborting LLM call")
if full_content:
partial_msg = {
"role": "assistant",
"content": [{"type": "text", "text": full_content}],
}
self.messages.append(partial_msg)
self._emit_event("message_end", {
"content": full_content,
"tool_calls": [],
"cancelled": True,
})
raise AgentCancelledError("cancelled during LLM streaming")
# Check for errors # Check for errors
if isinstance(chunk, dict) and chunk.get("error"): if isinstance(chunk, dict) and chunk.get("error"):
# Extract error message from nested structure # Extract error message from nested structure
@@ -729,6 +887,10 @@ class AgentStreamExecutor:
elif isinstance(choice, dict) and choice.get("_gemini_raw_parts"): elif isinstance(choice, dict) and choice.get("_gemini_raw_parts"):
gemini_raw_parts = choice["_gemini_raw_parts"] gemini_raw_parts = choice["_gemini_raw_parts"]
except AgentCancelledError:
# Must propagate untouched; never treat as a retryable error.
raise
except Exception as e: except Exception as e:
error_str = str(e) error_str = str(e)
error_str_lower = error_str.lower() error_str_lower = error_str.lower()
@@ -842,26 +1004,17 @@ class AgentStreamExecutor:
import uuid import uuid
tool_id = f"call_{uuid.uuid4().hex[:24]}" tool_id = f"call_{uuid.uuid4().hex[:24]}"
try: args_str = tc.get("arguments") or ""
# Safely get arguments, handle None case arguments, parse_err = _parse_tool_args(args_str, stop_reason)
args_str = tc.get("arguments") or "" if parse_err:
arguments = json.loads(args_str) if args_str else {} logger.error(
except json.JSONDecodeError as e: f"Tool args parse failed for {tc['name']} ({len(args_str)} chars): {parse_err}"
# Handle None or invalid arguments safely )
args_str = tc.get('arguments') or ""
args_preview = args_str[:200] if len(args_str) > 200 else args_str
logger.error(f"Failed to parse tool arguments for {tc['name']}")
logger.error(f"Arguments length: {len(args_str)} chars")
logger.error(f"Arguments preview: {args_preview}...")
logger.error(f"JSON decode error: {e}")
# Return a clear error message to the LLM instead of empty dict
# This helps the LLM understand what went wrong
tool_calls.append({ tool_calls.append({
"id": tool_id, "id": tool_id,
"name": tc["name"], "name": tc["name"],
"arguments": {}, "arguments": {},
"_parse_error": f"Invalid JSON in tool arguments: {args_preview}... Error: {str(e)}. Tip: For large content, consider splitting into smaller chunks or using a different approach." "_parse_error": parse_err,
}) })
continue continue
@@ -949,14 +1102,11 @@ class AgentStreamExecutor:
tool_id = tool_call["id"] tool_id = tool_call["id"]
arguments = tool_call["arguments"] arguments = tool_call["arguments"]
# Check if there was a JSON parse error
if "_parse_error" in tool_call: if "_parse_error" in tool_call:
parse_error = tool_call["_parse_error"]
logger.error(f"Skipping tool execution due to parse error: {parse_error}")
result = { result = {
"status": "error", "status": "error",
"result": f"Failed to parse tool arguments. {parse_error}. Please ensure your tool call uses valid JSON format with all required parameters.", "result": tool_call["_parse_error"],
"execution_time": 0 "execution_time": 0,
} }
self._record_tool_result(tool_name, arguments, False) self._record_tool_result(tool_name, arguments, False)
return result return result

121
agent/protocol/cancel.py Normal file
View File

@@ -0,0 +1,121 @@
"""
Cancel token registry for aborting in-flight agent runs.
A user cancel (web Cancel button, /cancel command) sets a threading.Event
that the agent loop polls at safe checkpoints. Tokens are keyed by
request_id (preferred) and tracked under session_id as a fallback. Entries
are released after the run completes to keep the registry bounded.
No project deps — importable from any layer without circular imports.
"""
from __future__ import annotations
import threading
from typing import Dict, Optional
class AgentCancelledError(Exception):
"""Raised inside the agent loop when a stop has been requested.
The agent stream executor catches this, injects a "[Interrupted]" note
into the message history (preserving tool_use/tool_result integrity)
and returns a partial response to the caller.
"""
class _CancelEntry:
__slots__ = ("event", "session_id")
def __init__(self, session_id: Optional[str]):
self.event = threading.Event()
self.session_id = session_id
class CancelTokenRegistry:
"""In-process registry mapping request_id -> cancel Event.
Thread-safe. Singleton via module-level ``_registry``.
"""
def __init__(self):
self._lock = threading.Lock()
self._by_request: Dict[str, _CancelEntry] = {}
# session_id -> set of request_ids currently in flight (usually 1).
self._by_session: Dict[str, set] = {}
def register(self, request_id: str, session_id: Optional[str] = None) -> threading.Event:
"""Create (or return existing) cancel event for a request.
Returns the threading.Event the caller should poll via ``is_set()``.
"""
if not request_id:
return threading.Event()
with self._lock:
entry = self._by_request.get(request_id)
if entry is None:
entry = _CancelEntry(session_id)
self._by_request[request_id] = entry
if session_id:
self._by_session.setdefault(session_id, set()).add(request_id)
return entry.event
def get_event(self, request_id: str) -> Optional[threading.Event]:
if not request_id:
return None
with self._lock:
entry = self._by_request.get(request_id)
return entry.event if entry else None
def cancel_request(self, request_id: str) -> bool:
"""Trigger cancel for a specific request. Returns True when matched."""
if not request_id:
return False
with self._lock:
entry = self._by_request.get(request_id)
if entry is None:
return False
entry.event.set()
return True
def cancel_session(self, session_id: str) -> int:
"""Trigger cancel for every in-flight request of a session.
Returns the number of requests cancelled (0 when nothing was running).
"""
if not session_id:
return 0
with self._lock:
request_ids = list(self._by_session.get(session_id, ()))
entries = [self._by_request[r] for r in request_ids if r in self._by_request]
for entry in entries:
entry.event.set()
return len(entries)
def unregister(self, request_id: str) -> None:
"""Remove an entry once the agent run is done. Safe to call twice."""
if not request_id:
return
with self._lock:
entry = self._by_request.pop(request_id, None)
if entry and entry.session_id:
bucket = self._by_session.get(entry.session_id)
if bucket is not None:
bucket.discard(request_id)
if not bucket:
self._by_session.pop(entry.session_id, None)
def has_active(self, session_id: str) -> bool:
if not session_id:
return False
with self._lock:
bucket = self._by_session.get(session_id)
return bool(bucket)
_registry = CancelTokenRegistry()
def get_cancel_registry() -> CancelTokenRegistry:
"""Module-level accessor for the singleton registry."""
return _registry

View File

@@ -15,7 +15,7 @@ import threading
from typing import Optional, Dict, Any, List, Callable from typing import Optional, Dict, Any, List, Callable
from common.log import logger from common.log import logger
from common.utils import expand_path from common.utils import expand_path, is_cloud_deployment
_DEFAULT_USER_DATA_DIR = "~/.cow/browser_profile" _DEFAULT_USER_DATA_DIR = "~/.cow/browser_profile"
@@ -436,6 +436,20 @@ class BrowserService:
if self._headless: if self._headless:
launch_args.append("--no-sandbox") launch_args.append("--no-sandbox")
if is_cloud_deployment():
launch_args.extend([
"--disable-gpu",
"--disable-software-rasterizer",
"--disable-extensions",
"--disable-background-networking",
"--disable-background-timer-throttling",
"--disable-renderer-backgrounding",
"--disable-features=site-per-process,TranslateUI,IsolateOrigins",
"--no-zygote",
"--js-flags=--max-old-space-size=384",
"--memory-pressure-off",
])
extra_args = self._config.get("launch_args", []) extra_args = self._config.get("launch_args", [])
if extra_args: if extra_args:
launch_args.extend(extra_args) launch_args.extend(extra_args)

View File

@@ -145,7 +145,8 @@ class BrowserTool(BaseTool):
url = args.get("url", "").strip() url = args.get("url", "").strip()
if not url: if not url:
return ToolResult.fail("Error: 'url' is required for navigate action") return ToolResult.fail("Error: 'url' is required for navigate action")
if not url.startswith(("http://", "https://")): # Only auto-prepend https:// for bare hosts; preserve file://, about:, data:, etc.
if "://" not in url and not url.startswith(("about:", "data:")):
url = "https://" + url url = "https://" + url
timeout = args.get("timeout", 30000) timeout = args.get("timeout", 30000)
service = self._get_service() service = self._get_service()

View File

@@ -1,8 +1,8 @@
""" """
MCP (Model Context Protocol) client module. MCP (Model Context Protocol) client module.
Implements JSON-RPC 2.0 over stdio and SSE transports without any external Implements JSON-RPC 2.0 over stdio, SSE and Streamable HTTP transports
MCP SDK dependency. without any external MCP SDK dependency.
""" """
import json import json
@@ -17,18 +17,29 @@ from typing import Optional
from common.log import logger from common.log import logger
# Aliases accepted for the Streamable HTTP transport type
_STREAMABLE_HTTP_ALIASES = {"streamable-http", "streamable_http", "streamablehttp", "http"}
class McpClient: class McpClient:
"""Single MCP Server client supporting stdio and SSE transports.""" """Single MCP Server client supporting stdio, SSE and Streamable HTTP transports."""
def __init__(self, config: dict): def __init__(self, config: dict):
""" """
config examples: config examples:
stdio: {"name": "filesystem", "type": "stdio", "command": "npx", "args": [...]} stdio: {"name": "filesystem", "type": "stdio", "command": "npx", "args": [...]}
SSE: {"name": "my-api", "type": "sse", "url": "http://localhost:8000/sse"} SSE: {"name": "my-api", "type": "sse", "url": "http://localhost:8000/sse"}
streamable-http: {"name": "pubmed", "type": "streamable-http", "url": "https://x/mcp"}
""" """
self.config = config self.config = config
self.name: str = config.get("name", "unknown") self.name: str = config.get("name", "unknown")
self.transport: str = config.get("type", "stdio") raw_transport: str = config.get("type", "stdio")
# Normalize streamable-http aliases to a single internal key
self.transport: str = (
"streamable-http"
if raw_transport.lower() in _STREAMABLE_HTTP_ALIASES
else raw_transport
)
# stdio state # stdio state
self._proc: Optional[subprocess.Popen] = None self._proc: Optional[subprocess.Popen] = None
@@ -37,6 +48,11 @@ class McpClient:
self._sse_url: Optional[str] = None self._sse_url: Optional[str] = None
self._post_url: Optional[str] = None # endpoint for sending messages (resolved from SSE) self._post_url: Optional[str] = None # endpoint for sending messages (resolved from SSE)
# Streamable HTTP state
self._http_url: Optional[str] = None
self._http_headers: dict = {} # extra headers from user config (e.g. Authorization)
self._http_session_id: Optional[str] = None # Mcp-Session-Id assigned by the server
# Shared state # Shared state
self._next_id = 1 self._next_id = 1
self._id_lock = threading.Lock() self._id_lock = threading.Lock()
@@ -54,6 +70,8 @@ class McpClient:
return self._init_stdio() return self._init_stdio()
elif self.transport == "sse": elif self.transport == "sse":
return self._init_sse() return self._init_sse()
elif self.transport == "streamable-http":
return self._init_streamable_http()
else: else:
logger.warning(f"[MCP:{self.name}] Unknown transport type: {self.transport!r}") logger.warning(f"[MCP:{self.name}] Unknown transport type: {self.transport!r}")
return False return False
@@ -109,6 +127,21 @@ class McpClient:
pass pass
self._proc = None self._proc = None
logger.debug(f"[MCP:{self.name}] stdio process terminated") logger.debug(f"[MCP:{self.name}] stdio process terminated")
# Best-effort streamable-http session termination
if self.transport == "streamable-http" and self._http_session_id and self._http_url:
try:
req = urllib.request.Request(
self._http_url,
method="DELETE",
headers={"Mcp-Session-Id": self._http_session_id, **self._http_headers},
)
with urllib.request.urlopen(req, timeout=5):
pass
except Exception:
pass
self._http_session_id = None
self._initialized = False self._initialized = False
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -234,6 +267,120 @@ class McpClient:
raw = resp.read().decode("utf-8") raw = resp.read().decode("utf-8")
return json.loads(raw) return json.loads(raw)
# ------------------------------------------------------------------
# Streamable HTTP transport (MCP spec 2025-03-26)
# ------------------------------------------------------------------
def _init_streamable_http(self) -> bool:
url = self.config.get("url")
if not url:
logger.warning(f"[MCP:{self.name}] streamable-http config missing 'url'")
return False
self._http_url = url
# Allow user-provided headers (e.g. {"Authorization": "Bearer xxx"})
extra_headers = self.config.get("headers") or {}
if isinstance(extra_headers, dict):
self._http_headers = {str(k): str(v) for k, v in extra_headers.items()}
return self._handshake()
def _streamable_http_send(self, message: dict) -> dict:
"""POST a JSON-RPC request and return the response (JSON or SSE-wrapped)."""
return self._streamable_http_post(message, expect_response=True)
def _streamable_http_post(self, message: dict, expect_response: bool) -> dict:
"""
POST a JSON-RPC message over Streamable HTTP.
Per the spec, the response Content-Type can be either:
- application/json -> single JSON-RPC response in body
- text/event-stream -> SSE stream; we read until we get a matching response
"""
body = json.dumps(message).encode("utf-8")
headers = {
"Content-Type": "application/json",
"Accept": "application/json, text/event-stream",
}
if self._http_session_id:
headers["Mcp-Session-Id"] = self._http_session_id
headers.update(self._http_headers)
req = urllib.request.Request(
self._http_url,
data=body,
method="POST",
headers=headers,
)
try:
resp = urllib.request.urlopen(req, timeout=30)
except urllib.error.HTTPError as e:
# Surface the server-provided error body for easier debugging
detail = ""
try:
detail = e.read().decode("utf-8", errors="ignore")
except Exception:
pass
raise IOError(
f"[MCP:{self.name}] streamable-http HTTP {e.code}: {detail[:200]}"
)
with resp:
# Capture session id assigned by the server (if any)
session_id = resp.headers.get("Mcp-Session-Id")
if session_id and not self._http_session_id:
self._http_session_id = session_id
status = resp.status if hasattr(resp, "status") else resp.getcode()
# Notifications: server may reply with 202 Accepted and no body
if not expect_response or status == 202:
try:
resp.read()
except Exception:
pass
return {}
content_type = (resp.headers.get("Content-Type") or "").lower()
expected_id = message.get("id")
if "text/event-stream" in content_type:
return self._read_sse_response(resp, expected_id)
raw = resp.read().decode("utf-8")
if not raw:
return {}
return json.loads(raw)
def _read_sse_response(self, resp, expected_id) -> dict:
"""Read an SSE stream and return the first JSON-RPC response with matching id."""
data_buf: list = []
for raw_line in resp:
line = raw_line.decode("utf-8").rstrip("\n\r")
if line == "":
# End of an SSE event, attempt to parse accumulated data
if data_buf:
payload = "\n".join(data_buf)
data_buf = []
try:
msg = json.loads(payload)
except json.JSONDecodeError:
continue
# Skip notifications / mismatched ids
if "id" not in msg:
continue
if expected_id is None or msg.get("id") == expected_id:
return msg
continue
if line.startswith(":"):
continue # SSE comment / keepalive
if line.startswith("data:"):
data_buf.append(line[len("data:"):].lstrip())
# Ignore 'event:' / 'id:' lines; we only care about JSON-RPC payloads
raise IOError(f"[MCP:{self.name}] streamable-http SSE stream closed before response")
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Common JSON-RPC helpers # Common JSON-RPC helpers
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -267,6 +414,8 @@ class McpClient:
return self._stdio_send(message) return self._stdio_send(message)
elif self.transport == "sse": elif self.transport == "sse":
return self._sse_send(message) return self._sse_send(message)
elif self.transport == "streamable-http":
return self._streamable_http_send(message)
else: else:
raise ValueError(f"[MCP:{self.name}] Unsupported transport: {self.transport}") raise ValueError(f"[MCP:{self.name}] Unsupported transport: {self.transport}")
@@ -291,6 +440,11 @@ class McpClient:
pass pass
except Exception: except Exception:
pass # notifications are fire-and-forget pass # notifications are fire-and-forget
elif self.transport == "streamable-http":
try:
self._streamable_http_post(notification, expect_response=False)
except Exception:
pass # notifications are fire-and-forget
def _handshake(self) -> bool: def _handshake(self) -> bool:
"""Perform the MCP initialize / notifications/initialized handshake.""" """Perform the MCP initialize / notifications/initialized handshake."""

View File

@@ -57,34 +57,44 @@ def init_scheduler(agent_bridge) -> bool:
_task_store = TaskStore(store_path) _task_store = TaskStore(store_path)
logger.debug(f"[Scheduler] Task store initialized: {store_path}") logger.debug(f"[Scheduler] Task store initialized: {store_path}")
# Create execute callback # Create execute callback. Returns True on success, False to ask
# the scheduler to retry on the next tick (e.g. channel not yet
# ready right after process start).
def execute_task_callback(task: dict): def execute_task_callback(task: dict):
"""Callback to execute a scheduled task"""
try: try:
action = task.get("action", {}) action = task.get("action", {})
action_type = action.get("type") action_type = action.get("type")
channel_type = action.get("channel_type", "unknown")
receiver = action.get("receiver", "")
if not _is_channel_ready(channel_type, receiver):
logger.warning(
f"[Scheduler] Task {task.get('id')}: channel "
f"'{channel_type}' not ready for receiver={receiver} "
f"(no inbound msg cached since restart?); deferring"
)
return False
if action_type == "agent_task": if action_type == "agent_task":
_execute_agent_task(task, agent_bridge) return _execute_agent_task(task, agent_bridge)
elif action_type == "send_message": elif action_type == "send_message":
# Legacy support for old tasks return _execute_send_message(task, agent_bridge)
_execute_send_message(task, agent_bridge)
elif action_type == "tool_call": elif action_type == "tool_call":
# Legacy support for old tasks return _execute_tool_call(task, agent_bridge)
_execute_tool_call(task, agent_bridge)
elif action_type == "skill_call": elif action_type == "skill_call":
# Legacy support for old tasks return _execute_skill_call(task, agent_bridge)
_execute_skill_call(task, agent_bridge)
else: else:
logger.warning(f"[Scheduler] Unknown action type: {action_type}") logger.warning(f"[Scheduler] Unknown action type: {action_type}")
return True
except Exception as e: except Exception as e:
logger.error(f"[Scheduler] Error executing task {task.get('id')}: {e}") logger.error(f"[Scheduler] Error executing task {task.get('id')}: {e}")
return False
# Create scheduler service # Create scheduler service
_scheduler_service = SchedulerService(_task_store, execute_task_callback) _scheduler_service = SchedulerService(_task_store, execute_task_callback)
_scheduler_service.start() _scheduler_service.start()
logger.debug("[Scheduler] Scheduler service initialized and started") logger.info("[Scheduler] Service initialized and started")
return True return True
except Exception as e: except Exception as e:
@@ -92,6 +102,40 @@ def init_scheduler(agent_bridge) -> bool:
return False return False
def _is_channel_ready(channel_type: str, receiver: str) -> bool:
"""Best-effort readiness probe for outbound channels.
Returns False when we know the send will drop (e.g. weixin not yet
logged in, web session has no polling queue), so the scheduler can
defer instead of consuming the task. Unknown channels return True
to preserve previous behaviour.
"""
if not channel_type or channel_type == "unknown":
return True
try:
from channel.channel_factory import create_channel
channel = create_channel(channel_type)
if channel is None:
return False
if channel_type == "weixin":
tokens = getattr(channel, "_context_tokens", None)
if not tokens or receiver not in tokens:
return False
return True
if channel_type == "web":
queues = getattr(channel, "session_queues", None)
if not queues or receiver not in queues:
return False
return True
return True
except Exception as e:
logger.warning(f"[Scheduler] Channel readiness check failed for {channel_type}: {e}")
return True
def get_task_store(): def get_task_store():
"""Get the global task store instance""" """Get the global task store instance"""
return _task_store return _task_store
@@ -145,13 +189,10 @@ def _remember_delivered_output(
) )
def _execute_agent_task(task: dict, agent_bridge): def _execute_agent_task(task: dict, agent_bridge) -> bool:
""" """
Execute an agent_task action - let Agent handle the task Execute an agent_task action - let Agent handle the task.
Returns True on successful delivery, False to retry next tick.
Args:
task: Task dictionary
agent_bridge: AgentBridge instance
""" """
try: try:
action = task.get("action", {}) action = task.get("action", {})
@@ -162,11 +203,11 @@ def _execute_agent_task(task: dict, agent_bridge):
if not task_description: if not task_description:
logger.error(f"[Scheduler] Task {task['id']}: No task_description specified") logger.error(f"[Scheduler] Task {task['id']}: No task_description specified")
return return True # malformed task, don't loop forever
if not receiver: if not receiver:
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified") logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
return return True
# Check for unsupported channels # Check for unsupported channels
if channel_type == "dingtalk": if channel_type == "dingtalk":
@@ -209,51 +250,47 @@ def _execute_agent_task(task: dict, agent_bridge):
try: try:
# Don't clear history - scheduler tasks use isolated session_id so they won't pollute user conversations # Don't clear history - scheduler tasks use isolated session_id so they won't pollute user conversations
reply = agent_bridge.agent_reply(task_description, context=context, on_event=None, clear_history=False) reply = agent_bridge.agent_reply(task_description, context=context, on_event=None, clear_history=False)
if reply and reply.content: if not (reply and reply.content):
# Send the reply via channel
from channel.channel_factory import create_channel
try:
channel = create_channel(channel_type)
if channel:
# For web channel, register request_id
if channel_type == "web" and hasattr(channel, 'request_to_session'):
request_id = context.get("request_id")
if request_id:
channel.request_to_session[request_id] = receiver
logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
# Send the reply
channel.send(reply, context)
_remember_delivered_output(agent_bridge, task, channel_type, reply.content)
logger.info(f"[Scheduler] Task {task['id']} executed successfully, result sent to {receiver}")
else:
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
except Exception as e:
logger.error(f"[Scheduler] Failed to send result: {e}")
else:
logger.error(f"[Scheduler] Task {task['id']}: No result from agent execution") logger.error(f"[Scheduler] Task {task['id']}: No result from agent execution")
return True # agent ran but produced nothing; don't loop
from channel.channel_factory import create_channel
channel = create_channel(channel_type)
if not channel:
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
return False
if channel_type == "web" and hasattr(channel, 'request_to_session'):
request_id = context.get("request_id")
if request_id:
channel.request_to_session[request_id] = receiver
try:
channel.send(reply, context)
except Exception as e:
logger.error(f"[Scheduler] Failed to send result: {e}")
return False
_remember_delivered_output(agent_bridge, task, channel_type, reply.content)
logger.info(f"[Scheduler] Task {task['id']} executed successfully, result sent to {receiver}")
return True
except Exception as e: except Exception as e:
logger.error(f"[Scheduler] Failed to execute task via Agent: {e}") logger.error(f"[Scheduler] Failed to execute task via Agent: {e}")
import traceback import traceback
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}") logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
return False
except Exception as e: except Exception as e:
logger.error(f"[Scheduler] Error in _execute_agent_task: {e}") logger.error(f"[Scheduler] Error in _execute_agent_task: {e}")
import traceback import traceback
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}") logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
return False
def _execute_send_message(task: dict, agent_bridge): def _execute_send_message(task: dict, agent_bridge) -> bool:
""" """Execute a send_message action. Returns True/False for delivery."""
Execute a send_message action
Args:
task: Task dictionary
agent_bridge: AgentBridge instance
"""
try: try:
action = task.get("action", {}) action = task.get("action", {})
content = action.get("content", "") content = action.get("content", "")
@@ -263,7 +300,7 @@ def _execute_send_message(task: dict, agent_bridge):
if not receiver: if not receiver:
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified") logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
return return True
# Create context for sending message # Create context for sending message
context = Context(ContextType.TEXT, content) context = Context(ContextType.TEXT, content)
@@ -308,169 +345,135 @@ def _execute_send_message(task: dict, agent_bridge):
# Get channel and send # Get channel and send
from channel.channel_factory import create_channel from channel.channel_factory import create_channel
channel = create_channel(channel_type)
if not channel:
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
return False
if channel_type == "web" and hasattr(channel, 'request_to_session'):
channel.request_to_session[request_id] = receiver
try: try:
channel = create_channel(channel_type) channel.send(reply, context)
if channel:
# For web channel, register the request_id to session mapping
if channel_type == "web" and hasattr(channel, 'request_to_session'):
channel.request_to_session[request_id] = receiver
logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
channel.send(reply, context)
_remember_delivered_output(agent_bridge, task, channel_type, content)
logger.info(f"[Scheduler] Task {task['id']} executed: sent message to {receiver}")
else:
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
except Exception as e: except Exception as e:
logger.error(f"[Scheduler] Failed to send message: {e}") logger.error(f"[Scheduler] Failed to send message: {e}")
import traceback return False
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
_remember_delivered_output(agent_bridge, task, channel_type, content)
logger.info(f"[Scheduler] Task {task['id']} executed: sent message to {receiver}")
return True
except Exception as e: except Exception as e:
logger.error(f"[Scheduler] Error in _execute_send_message: {e}") logger.error(f"[Scheduler] Error in _execute_send_message: {e}")
import traceback import traceback
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}") logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
return False
def _execute_tool_call(task: dict, agent_bridge): def _execute_tool_call(task: dict, agent_bridge) -> bool:
""" """Execute a tool_call action. Returns True/False for delivery."""
Execute a tool_call action
Args:
task: Task dictionary
agent_bridge: AgentBridge instance
"""
try: try:
action = task.get("action", {}) action = task.get("action", {})
# Support both old and new field names
tool_name = action.get("call_name") or action.get("tool_name") tool_name = action.get("call_name") or action.get("tool_name")
tool_params = action.get("call_params") or action.get("tool_params", {}) tool_params = action.get("call_params") or action.get("tool_params", {})
result_prefix = action.get("result_prefix", "") result_prefix = action.get("result_prefix", "")
receiver = action.get("receiver") receiver = action.get("receiver")
is_group = action.get("is_group", False) is_group = action.get("is_group", False)
channel_type = action.get("channel_type", "unknown") channel_type = action.get("channel_type", "unknown")
if not tool_name: if not tool_name:
logger.error(f"[Scheduler] Task {task['id']}: No tool_name specified") logger.error(f"[Scheduler] Task {task['id']}: No tool_name specified")
return return True
if not receiver: if not receiver:
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified") logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
return return True
# Get tool manager and create tool instance
from agent.tools.tool_manager import ToolManager from agent.tools.tool_manager import ToolManager
tool_manager = ToolManager() tool = ToolManager().create_tool(tool_name)
tool = tool_manager.create_tool(tool_name)
if not tool: if not tool:
logger.error(f"[Scheduler] Task {task['id']}: Tool '{tool_name}' not found") logger.error(f"[Scheduler] Task {task['id']}: Tool '{tool_name}' not found")
return return True
# Execute tool
logger.info(f"[Scheduler] Task {task['id']}: Executing tool '{tool_name}' with params {tool_params}") logger.info(f"[Scheduler] Task {task['id']}: Executing tool '{tool_name}' with params {tool_params}")
result = tool.execute(tool_params) result = tool.execute(tool_params)
content = result.result if hasattr(result, 'result') else str(result)
# Get result content
if hasattr(result, 'result'):
content = result.result
else:
content = str(result)
# Add prefix if specified
if result_prefix: if result_prefix:
content = f"{result_prefix}\n\n{content}" content = f"{result_prefix}\n\n{content}"
# Send result as message
context = Context(ContextType.TEXT, content) context = Context(ContextType.TEXT, content)
context["receiver"] = receiver context["receiver"] = receiver
context["isgroup"] = is_group context["isgroup"] = is_group
context["session_id"] = receiver context["session_id"] = receiver
# Channel-specific context setup request_id = None
if channel_type == "web": if channel_type == "web":
# Web channel needs request_id
import uuid import uuid
request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}" request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
context["request_id"] = request_id context["request_id"] = request_id
logger.debug(f"[Scheduler] Generated request_id for web channel: {request_id}")
elif channel_type == "feishu": elif channel_type == "feishu":
context["receive_id_type"] = "chat_id" if is_group else "open_id" context["receive_id_type"] = "chat_id" if is_group else "open_id"
context["msg"] = None context["msg"] = None
logger.debug(f"[Scheduler] Feishu: receive_id_type={context['receive_id_type']}, is_group={is_group}, receiver={receiver}")
elif channel_type == "wecom_bot": elif channel_type == "wecom_bot":
context["msg"] = None context["msg"] = None
reply = Reply(ReplyType.TEXT, content) reply = Reply(ReplyType.TEXT, content)
# Get channel and send
from channel.channel_factory import create_channel from channel.channel_factory import create_channel
channel = create_channel(channel_type)
if not channel:
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
return False
if channel_type == "web" and request_id and hasattr(channel, 'request_to_session'):
channel.request_to_session[request_id] = receiver
try: try:
channel = create_channel(channel_type) channel.send(reply, context)
if channel:
if channel_type == "web" and hasattr(channel, 'request_to_session'):
channel.request_to_session[request_id] = receiver
logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
channel.send(reply, context)
_remember_delivered_output(agent_bridge, task, channel_type, content)
logger.info(f"[Scheduler] Task {task['id']} executed: sent tool result to {receiver}")
else:
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
except Exception as e: except Exception as e:
logger.error(f"[Scheduler] Failed to send tool result: {e}") logger.error(f"[Scheduler] Failed to send tool result: {e}")
return False
_remember_delivered_output(agent_bridge, task, channel_type, content)
logger.info(f"[Scheduler] Task {task['id']} executed: sent tool result to {receiver}")
return True
except Exception as e: except Exception as e:
logger.error(f"[Scheduler] Error in _execute_tool_call: {e}") logger.error(f"[Scheduler] Error in _execute_tool_call: {e}")
return False
def _execute_skill_call(task: dict, agent_bridge): def _execute_skill_call(task: dict, agent_bridge) -> bool:
""" """Execute a skill_call action by asking Agent to run the skill.
Execute a skill_call action by asking Agent to run the skill Returns True/False for delivery."""
Args:
task: Task dictionary
agent_bridge: AgentBridge instance
"""
try: try:
action = task.get("action", {}) action = task.get("action", {})
# Support both old and new field names
skill_name = action.get("call_name") or action.get("skill_name") skill_name = action.get("call_name") or action.get("skill_name")
skill_params = action.get("call_params") or action.get("skill_params", {}) skill_params = action.get("call_params") or action.get("skill_params", {})
result_prefix = action.get("result_prefix", "") result_prefix = action.get("result_prefix", "")
receiver = action.get("receiver") receiver = action.get("receiver")
is_group = action.get("isgroup", False) is_group = action.get("isgroup", False)
channel_type = action.get("channel_type", "unknown") channel_type = action.get("channel_type", "unknown")
if not skill_name: if not skill_name:
logger.error(f"[Scheduler] Task {task['id']}: No skill_name specified") logger.error(f"[Scheduler] Task {task['id']}: No skill_name specified")
return return True
if not receiver: if not receiver:
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified") logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
return return True
logger.info(f"[Scheduler] Task {task['id']}: Executing skill '{skill_name}' with params {skill_params}") logger.info(f"[Scheduler] Task {task['id']}: Executing skill '{skill_name}' with params {skill_params}")
# Create a unique session_id for this scheduled task to avoid polluting user's conversation
# Format: scheduler_<receiver>_<task_id> to ensure isolation
scheduler_session_id = f"scheduler_{receiver}_{task['id']}" scheduler_session_id = f"scheduler_{receiver}_{task['id']}"
# Build a natural language query for the Agent to execute the skill
# Format: "Use skill-name to do something with params"
param_str = ", ".join([f"{k}={v}" for k, v in skill_params.items()]) param_str = ", ".join([f"{k}={v}" for k, v in skill_params.items()])
query = f"Use {skill_name} skill" query = f"Use {skill_name} skill"
if param_str: if param_str:
query += f" with {param_str}" query += f" with {param_str}"
# Create context for Agent
context = Context(ContextType.TEXT, query) context = Context(ContextType.TEXT, query)
context["receiver"] = receiver context["receiver"] = receiver
context["isgroup"] = is_group context["isgroup"] = is_group
context["session_id"] = scheduler_session_id context["session_id"] = scheduler_session_id
# Channel-specific setup
if channel_type == "web": if channel_type == "web":
import uuid import uuid
request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}" request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
@@ -481,49 +484,48 @@ def _execute_skill_call(task: dict, agent_bridge):
elif channel_type == "wecom_bot": elif channel_type == "wecom_bot":
context["msg"] = None context["msg"] = None
# Use Agent to execute the skill
try: try:
# Don't clear history - scheduler tasks use isolated session_id so they won't pollute user conversations
reply = agent_bridge.agent_reply(query, context=context, on_event=None, clear_history=False) reply = agent_bridge.agent_reply(query, context=context, on_event=None, clear_history=False)
if reply and reply.content:
content = reply.content
# Add prefix if specified
if result_prefix:
content = f"{result_prefix}\n\n{content}"
# Send the result via channel
from channel.channel_factory import create_channel
try:
channel = create_channel(channel_type)
if channel:
# For web channel, register request_id
if channel_type == "web" and hasattr(channel, 'request_to_session'):
req_id = context.get("request_id")
if req_id:
channel.request_to_session[req_id] = receiver
logger.debug(f"[Scheduler] Registered request_id {req_id} -> session {receiver}")
channel.send(Reply(ReplyType.TEXT, content), context)
_remember_delivered_output(agent_bridge, task, channel_type, content)
except Exception as e:
logger.error(f"[Scheduler] Failed to send skill result: {e}")
logger.info(f"[Scheduler] Task {task['id']} executed: skill result sent to {receiver}")
else:
logger.error(f"[Scheduler] Task {task['id']}: No result from skill execution")
except Exception as e: except Exception as e:
logger.error(f"[Scheduler] Failed to execute skill via Agent: {e}") logger.error(f"[Scheduler] Failed to execute skill via Agent: {e}")
import traceback import traceback
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}") logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
return False
if not (reply and reply.content):
logger.error(f"[Scheduler] Task {task['id']}: No result from skill execution")
return True
content = reply.content
if result_prefix:
content = f"{result_prefix}\n\n{content}"
from channel.channel_factory import create_channel
channel = create_channel(channel_type)
if not channel:
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
return False
if channel_type == "web" and hasattr(channel, 'request_to_session'):
req_id = context.get("request_id")
if req_id:
channel.request_to_session[req_id] = receiver
try:
channel.send(Reply(ReplyType.TEXT, content), context)
except Exception as e:
logger.error(f"[Scheduler] Failed to send skill result: {e}")
return False
_remember_delivered_output(agent_bridge, task, channel_type, content)
logger.info(f"[Scheduler] Task {task['id']} executed: skill result sent to {receiver}")
return True
except Exception as e: except Exception as e:
logger.error(f"[Scheduler] Error in _execute_skill_call: {e}") logger.error(f"[Scheduler] Error in _execute_skill_call: {e}")
import traceback import traceback
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}") logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
return False
def attach_scheduler_to_tool(tool, context: Context = None): def attach_scheduler_to_tool(tool, context: Context = None):

View File

@@ -52,7 +52,6 @@ class SchedulerService:
self.running = True self.running = True
self.thread = threading.Thread(target=self._run_loop, daemon=True) self.thread = threading.Thread(target=self._run_loop, daemon=True)
self.thread.start() self.thread.start()
logger.debug("[Scheduler] Service started")
def stop(self): def stop(self):
"""Stop the scheduler service""" """Stop the scheduler service"""
@@ -67,7 +66,7 @@ class SchedulerService:
def _run_loop(self): def _run_loop(self):
"""Main scheduler loop""" """Main scheduler loop"""
logger.debug("[Scheduler] Scheduler loop started") logger.info("[Scheduler] Scheduler loop started")
while self.running: while self.running:
try: try:
@@ -84,12 +83,18 @@ class SchedulerService:
for task in tasks: for task in tasks:
try: try:
# Check if task is due
if self._is_task_due(task, now): if self._is_task_due(task, now):
logger.info(f"[Scheduler] Executing task: {task['id']} - {task['name']}") logger.info(f"[Scheduler] Executing task: {task['id']} - {task['name']}")
self._execute_task(task) ok = self._execute_task(task)
if not ok:
# Update next run time # Leave next_run_at as-is so the next loop retries.
# Cron tasks within the catch-up window will keep
# firing; beyond it _is_task_due will reschedule.
logger.warning(
f"[Scheduler] Task {task['id']} delivery failed, will retry next tick"
)
continue
next_run = self._calculate_next_run(task, now) next_run = self._calculate_next_run(task, now)
if next_run: if next_run:
self.task_store.update_task(task['id'], { self.task_store.update_task(task['id'], {
@@ -97,7 +102,6 @@ class SchedulerService:
"last_run_at": now.isoformat() "last_run_at": now.isoformat()
}) })
else: else:
# One-time task completed, remove it
self.task_store.delete_task(task['id']) self.task_store.delete_task(task['id'])
logger.info(f"[Scheduler] One-time task completed and removed: {task['id']}") logger.info(f"[Scheduler] One-time task completed and removed: {task['id']}")
except Exception as e: except Exception as e:
@@ -128,30 +132,35 @@ class SchedulerService:
try: try:
next_run = _parse_naive_local(next_run_str) next_run = _parse_naive_local(next_run_str)
# Check if task is overdue (e.g., service restart)
if next_run < now: if next_run < now:
time_diff = (now - next_run).total_seconds() time_diff = (now - next_run).total_seconds()
schedule = task.get("schedule", {})
# If overdue by more than 5 minutes, skip this run and schedule next schedule_type = schedule.get("type")
if time_diff > 300: # 5 minutes
logger.warning(f"[Scheduler] Task {task['id']} is overdue by {int(time_diff)}s, skipping and scheduling next run") # Catch-up window: fire if we're within 10 minutes of the
# scheduled tick. Beyond that we'd rather skip than push a
# For one-time tasks, remove them directly # stale daily report to the user.
schedule = task.get("schedule", {}) if time_diff <= 600:
if schedule.get("type") == "once": return True
self.task_store.delete_task(task['id'])
logger.info(f"[Scheduler] One-time task {task['id']} expired, removed") logger.warning(
return False f"[Scheduler] Task {task['id']} is overdue by {int(time_diff)}s, "
f"skipping and scheduling next run"
# For recurring tasks, calculate next run from now )
next_next_run = self._calculate_next_run(task, now)
if next_next_run: if schedule_type == "once":
self.task_store.update_task(task['id'], { self.task_store.delete_task(task['id'])
"next_run_at": next_next_run.isoformat() logger.info(f"[Scheduler] One-time task {task['id']} expired, removed")
})
logger.info(f"[Scheduler] Rescheduled task {task['id']} to {next_next_run}")
return False return False
next_next_run = self._calculate_next_run(task, now)
if next_next_run:
self.task_store.update_task(task['id'], {
"next_run_at": next_next_run.isoformat()
})
logger.info(f"[Scheduler] Rescheduled task {task['id']} to {next_next_run}")
return False
return now >= next_run return now >= next_run
except Exception as e: except Exception as e:
logger.error( logger.error(
@@ -213,20 +222,22 @@ class SchedulerService:
return None return None
def _execute_task(self, task: dict): def _execute_task(self, task: dict) -> bool:
""" """
Execute a task Execute a task.
Args: Returns True if delivery succeeded (caller should advance state),
task: Task dictionary False if it failed (caller should keep next_run_at so the next
loop iteration retries). Callback may return None for legacy
behaviour, treated as success.
""" """
try: try:
# Call the execute callback result = self.execute_callback(task)
self.execute_callback(task) return False if result is False else True
except Exception as e: except Exception as e:
logger.error(f"[Scheduler] Error executing task {task['id']}: {e}") logger.error(f"[Scheduler] Error executing task {task['id']}: {e}")
# Update task with error
self.task_store.update_task(task['id'], { self.task_store.update_task(task['id'], {
"last_error": str(e), "last_error": str(e),
"last_error_at": datetime.now().isoformat() "last_error_at": datetime.now().isoformat()
}) })
return False

View File

@@ -3,7 +3,7 @@ Vision tool - Analyze images using Vision API.
Supports local files (auto base64-encoded) and HTTP URLs. Supports local files (auto base64-encoded) and HTTP URLs.
Provider resolution: Provider resolution:
- tool.vision.model (if set) means "prefer this model first; fall back to - tools.vision.model (if set) means "prefer this model first; fall back to
other configured providers if it fails". The model name is mapped to its other configured providers if it fails". The model name is mapped to its
native provider (e.g. doubao-* → Doubao, kimi-* → Moonshot, gpt-* → native provider (e.g. doubao-* → Doubao, kimi-* → Moonshot, gpt-* →
OpenAI/LinkAI). That provider is tried first, then the standard auto OpenAI/LinkAI). That provider is tried first, then the standard auto
@@ -53,14 +53,15 @@ _DISCOVERABLE_MODELS = [
("ark_api_key", const.DOUBAO, const.DOUBAO_SEED_2_PRO, "Doubao"), ("ark_api_key", const.DOUBAO, const.DOUBAO_SEED_2_PRO, "Doubao"),
("dashscope_api_key", const.QWEN_DASHSCOPE, const.QWEN36_PLUS, "DashScope"), ("dashscope_api_key", const.QWEN_DASHSCOPE, const.QWEN36_PLUS, "DashScope"),
("claude_api_key", const.CLAUDEAPI, const.CLAUDE_4_6_SONNET, "Claude"), ("claude_api_key", const.CLAUDEAPI, const.CLAUDE_4_6_SONNET, "Claude"),
("gemini_api_key", const.GEMINI, const.GEMINI_31_FLASH_LITE_PRE, "Gemini"), ("gemini_api_key", const.GEMINI, const.GEMINI_35_FLASH, "Gemini"),
("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"), ("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"),
("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"), ("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"),
("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"), ("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
("mimo_api_key", const.MIMO, const.MIMO_V2_5_PRO, "MiMo"),
] ]
# Model name prefix → discoverable provider display_name. # Model name prefix → discoverable provider display_name.
# Used to auto-route tool.vision.model to its native provider. # Used to auto-route tools.vision.model to its native provider.
# Matched case-insensitively; longest prefix wins. # Matched case-insensitively; longest prefix wins.
_MODEL_PREFIX_TO_PROVIDER = [ _MODEL_PREFIX_TO_PROVIDER = [
("doubao-", "Doubao"), ("doubao-", "Doubao"),
@@ -73,11 +74,29 @@ _MODEL_PREFIX_TO_PROVIDER = [
("glm-", "ZhipuAI"), ("glm-", "ZhipuAI"),
("minimax-", "MiniMax"), ("minimax-", "MiniMax"),
("abab", "MiniMax"), ("abab", "MiniMax"),
("mimo-", "MiMo"),
] ]
# Model prefixes that natively belong to OpenAI / LinkAI (raw HTTP providers). # Model prefixes that natively belong to OpenAI / LinkAI (raw HTTP providers).
_OPENAI_MODEL_PREFIXES = ("gpt-", "o1-", "o3-", "o4-", "chatgpt-") _OPENAI_MODEL_PREFIXES = ("gpt-", "o1-", "o3-", "o4-", "chatgpt-")
# Maps the UI provider id (persisted in tools.vision.provider) to the internal
# display name used in VisionProvider.name. Keep in sync with _DISCOVERABLE_MODELS
# and the openai/linkai branches in _route_by_model_name.
_PROVIDER_ID_TO_DISPLAY = {
"openai": "OpenAI",
"linkai": "LinkAI",
"moonshot": "Moonshot",
"doubao": "Doubao",
"dashscope": "DashScope",
"claudeAPI": "Claude",
"gemini": "Gemini",
"qianfan": "Qianfan",
"zhipu": "ZhipuAI",
"minimax": "MiniMax",
"mimo": "MiMo",
}
@dataclass @dataclass
class VisionProvider: class VisionProvider:
@@ -154,7 +173,7 @@ class Vision(BaseTool):
# Default model is only used as a last-resort placeholder for providers # Default model is only used as a last-resort placeholder for providers
# whose VisionProvider.model_override is None (e.g. raw OpenAI provider # whose VisionProvider.model_override is None (e.g. raw OpenAI provider
# when the user did not configure tool.vision.model). # when the user did not configure tools.vision.model).
return self._call_with_fallback(providers, DEFAULT_MODEL, question, image_content) return self._call_with_fallback(providers, DEFAULT_MODEL, question, image_content)
def _call_with_fallback(self, providers: List[VisionProvider], model: str, def _call_with_fallback(self, providers: List[VisionProvider], model: str,
@@ -193,12 +212,12 @@ class Vision(BaseTool):
""" """
Build an ordered list of providers to try. Build an ordered list of providers to try.
Semantics of `tool.vision.model`: Semantics of `tools.vision.model`:
"Prefer this model first; fall back to other configured providers "Prefer this model first; fall back to other configured providers
if it fails." if it fails."
Order: Order:
1. The provider that natively serves `tool.vision.model` (if any 1. The provider that natively serves `tools.vision.model` (if any
and its API key is configured) — using the user-specified model and its API key is configured) — using the user-specified model
name verbatim. name verbatim.
2. Auto-discovery chain as fallback: 2. Auto-discovery chain as fallback:
@@ -211,13 +230,19 @@ class Vision(BaseTool):
are de-duplicated to avoid retrying the same endpoint twice. are de-duplicated to avoid retrying the same endpoint twice.
""" """
user_model = self._resolve_user_vision_model() user_model = self._resolve_user_vision_model()
user_provider = self._resolve_user_vision_provider()
providers: List[VisionProvider] = [] providers: List[VisionProvider] = []
# Step 1: preferred provider derived from tool.vision.model # Step 1: preferred provider — explicit `tools.vision.provider`
if user_model: # wins so custom model names can still be routed correctly. Falls
# through to model-name prefix inference when provider is unset.
preferred = None
if user_provider and user_model:
preferred = self._route_by_provider_id(user_provider, user_model)
if not preferred and user_model:
preferred = self._route_by_model_name(user_model) preferred = self._route_by_model_name(user_model)
if preferred: if preferred:
providers.extend(preferred) providers.extend(preferred)
# Step 2: auto-discovery chain as fallback # Step 2: auto-discovery chain as fallback
existing = {p.name for p in providers} existing = {p.name for p in providers}
@@ -251,11 +276,11 @@ class Vision(BaseTool):
@staticmethod @staticmethod
def _resolve_user_vision_model() -> Optional[str]: def _resolve_user_vision_model() -> Optional[str]:
"""Read tool.vision.model from config; return None if unset/blank.""" """Read tools.vision.model (singular ``tool`` kept as runtime fallback)."""
tool_conf = conf().get("tool", {}) tools_conf = conf().get("tools") or conf().get("tool") or {}
if not isinstance(tool_conf, dict): if not isinstance(tools_conf, dict):
return None return None
vision_conf = tool_conf.get("vision", {}) vision_conf = tools_conf.get("vision", {})
if not isinstance(vision_conf, dict): if not isinstance(vision_conf, dict):
return None return None
m = vision_conf.get("model") m = vision_conf.get("model")
@@ -263,6 +288,24 @@ class Vision(BaseTool):
return m.strip() return m.strip()
return None return None
@staticmethod
def _resolve_user_vision_provider() -> Optional[str]:
"""Read tools.vision.provider — the UI-persisted vendor id.
Lets users pin a vendor for custom model names that prefix-inference
can't recognize. Returns None when unset/blank.
"""
tools_conf = conf().get("tools") or conf().get("tool") or {}
if not isinstance(tools_conf, dict):
return None
vision_conf = tools_conf.get("vision", {})
if not isinstance(vision_conf, dict):
return None
p = vision_conf.get("provider")
if isinstance(p, str) and p.strip():
return p.strip()
return None
@staticmethod @staticmethod
def _infer_provider_from_model(model_name: str) -> Optional[str]: def _infer_provider_from_model(model_name: str) -> Optional[str]:
""" """
@@ -279,6 +322,54 @@ class Vision(BaseTool):
return display_name return display_name
return None return None
def _route_by_provider_id(self, provider_id: str, user_model: str) -> Optional[List[VisionProvider]]:
"""Route by the UI-persisted provider id.
Returns:
- [provider] : provider id is known and its key is configured.
- None : unknown provider id, or the bot can't be created.
Caller falls through to model-name-based routing.
"""
display_name = _PROVIDER_ID_TO_DISPLAY.get(provider_id)
if not display_name:
return None
# OpenAI / LinkAI use raw HTTP providers, not the discoverable bot path.
if provider_id == "openai":
p = self._build_openai_provider(user_model)
return [p] if p else None
if provider_id == "linkai":
p = self._build_linkai_provider(user_model)
return [p] if p else None
# Discoverable bot-backed providers.
for config_key, bot_type, _default_model, name in _DISCOVERABLE_MODELS:
if name != display_name:
continue
api_key = conf().get(config_key, "")
if not api_key or not api_key.strip():
logger.warning(f"[Vision] tools.vision.provider='{provider_id}' "
f"but '{config_key}' is not configured. Falling back.")
return None
try:
from models.bot_factory import create_bot
bot = create_bot(bot_type)
if not hasattr(bot, 'call_vision'):
logger.warning(f"[Vision] '{display_name}' bot does not implement call_vision.")
return None
except Exception as e:
logger.warning(f"[Vision] Failed to create '{display_name}' bot: {e}")
return None
return [VisionProvider(
name=display_name,
api_key="",
api_base="",
model_override=user_model,
use_bot=True,
fallback_bot=bot,
)]
return None
def _route_by_model_name(self, user_model: str) -> Optional[List[VisionProvider]]: def _route_by_model_name(self, user_model: str) -> Optional[List[VisionProvider]]:
""" """
Try to build a provider list using the user-specified model name. Try to build a provider list using the user-specified model name.
@@ -303,7 +394,7 @@ class Vision(BaseTool):
self._append_provider(providers, lambda: self._build_linkai_provider(user_model)) self._append_provider(providers, lambda: self._build_linkai_provider(user_model))
if providers: if providers:
return providers return providers
logger.warning(f"[Vision] tool.vision.model='{user_model}' looks like an OpenAI " logger.warning(f"[Vision] tools.vision.model='{user_model}' looks like an OpenAI "
f"model but neither OPENAI_API_KEY nor LINKAI_API_KEY is configured.") f"model but neither OPENAI_API_KEY nor LINKAI_API_KEY is configured.")
return None # fall through to auto return None # fall through to auto
@@ -317,7 +408,7 @@ class Vision(BaseTool):
continue continue
api_key = conf().get(config_key, "") api_key = conf().get(config_key, "")
if not api_key or not api_key.strip(): if not api_key or not api_key.strip():
logger.warning(f"[Vision] tool.vision.model='{user_model}' routes to " logger.warning(f"[Vision] tools.vision.model='{user_model}' routes to "
f"'{display_name}' but '{config_key}' is not configured. " f"'{display_name}' but '{config_key}' is not configured. "
f"Falling back to auto-discovery.") f"Falling back to auto-discovery.")
return None # fall through to auto return None # fall through to auto
@@ -452,8 +543,8 @@ class Vision(BaseTool):
if not self._main_bot_supports_vision(bot): if not self._main_bot_supports_vision(bot):
return None return None
# Use the configured main model name; do NOT inject tool.vision.model # Use the configured main model name; do NOT inject tools.vision.model
# here, because by the time we reach this branch the tool.vision.model # here, because by the time we reach this branch the tools.vision.model
# routing has already been attempted (and either matched the main bot # routing has already been attempted (and either matched the main bot
# or failed to find a provider). # or failed to find a provider).
main_model_name = conf().get("model") or None main_model_name = conf().get("model") or None

View File

@@ -1,13 +1,27 @@
""" """Web Search tool. Supports four backends with a unified response format:
Web Search tool - Search the web using Bocha or LinkAI search API. - bocha (https://open.bochaai.com)
Supports two backends with unified response format: - zhipu (https://docs.bigmodel.cn/cn/guide/tools/web-search)
1. Bocha Search (primary, requires BOCHA_API_KEY) - qianfan (https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy)
2. LinkAI Search (fallback, requires LINKAI_API_KEY) - linkai (https://link-ai.tech, fallback)
Provider selection
- strategy 'auto' (default): pick the first configured provider in the
canonical order [bocha, zhipu, qianfan, linkai]. When the caller passes
an explicit `provider` it overrides the pick; an invalid/unconfigured
one silently falls back to the auto order.
- strategy 'fixed': use the configured provider; if its credential is
missing at call time, silently fall back to auto order (no card hint).
Credentials
- bocha : tools.web_search.bocha_api_key -> env BOCHA_API_KEY
- zhipu : conf.zhipu_ai_api_key -> env ZHIPUAI_API_KEY
- qianfan : conf.qianfan_api_key -> env QIANFAN_API_KEY
- linkai : conf.linkai_api_key -> env LINKAI_API_KEY
""" """
import os
import json import json
from typing import Dict, Any, Optional import os
from typing import Any, Dict, List, Optional
import requests import requests
@@ -16,12 +30,63 @@ from common.log import logger
from config import conf from config import conf
# Default timeout for API requests (seconds)
DEFAULT_TIMEOUT = 30 DEFAULT_TIMEOUT = 30
# Canonical fallback order. Empirically ordered by Chinese real-time
# quality + relevance: bocha (best overall), qianfan (best for hot news),
# zhipu (strong on long-form articles), linkai (cloud aggregator, last
# resort).
PROVIDER_ORDER = ("bocha", "qianfan", "zhipu", "linkai")
PROVIDER_LABELS = {
"bocha": "Bocha",
"zhipu": "Zhipu",
"qianfan": "Baidu Qianfan",
"linkai": "LinkAI",
}
def _tools_web_search_conf() -> dict:
"""Return the tools.web_search config block (dict-like)."""
tools_cfg = conf().get("tools") or {}
if not isinstance(tools_cfg, dict):
return {}
block = tools_cfg.get("web_search") or {}
return block if isinstance(block, dict) else {}
def _get_api_key(provider: str) -> str:
"""Resolve API key for a provider, with conf -> env fallback."""
if provider == "bocha":
key = (_tools_web_search_conf().get("bocha_api_key") or "").strip()
return key or os.environ.get("BOCHA_API_KEY", "").strip()
if provider == "zhipu":
key = (conf().get("zhipu_ai_api_key") or "").strip()
return key or os.environ.get("ZHIPUAI_API_KEY", "").strip()
if provider == "qianfan":
key = (conf().get("qianfan_api_key") or "").strip()
return key or os.environ.get("QIANFAN_API_KEY", "").strip()
if provider == "linkai":
key = (conf().get("linkai_api_key") or "").strip()
return key or os.environ.get("LINKAI_API_KEY", "").strip()
return ""
def configured_providers() -> List[str]:
"""Return configured providers in canonical order."""
return [p for p in PROVIDER_ORDER if _get_api_key(p)]
def _configured_strategy() -> str:
return (_tools_web_search_conf().get("strategy") or "auto").strip().lower()
def _configured_provider() -> str:
return (_tools_web_search_conf().get("provider") or "").strip().lower()
class WebSearch(BaseTool): class WebSearch(BaseTool):
"""Tool for searching the web using Bocha or LinkAI search API""" """Tool for searching the web across multiple providers."""
name: str = "web_search" name: str = "web_search"
description: str = "Search the web for real-time information. Returns titles, URLs, and snippets." description: str = "Search the web for real-time information. Returns titles, URLs, and snippets."
@@ -55,264 +120,368 @@ class WebSearch(BaseTool):
def __init__(self, config: dict = None): def __init__(self, config: dict = None):
self.config = config or {} self.config = config or {}
self._backend = None # Will be resolved on first execute
@staticmethod @staticmethod
def is_available() -> bool: def is_available() -> bool:
"""Check if web search is available (at least one API key is configured)""" """Tool is offered to the agent when at least one provider has a key."""
return bool(os.environ.get("BOCHA_API_KEY") or os.environ.get("LINKAI_API_KEY")) return bool(configured_providers())
def _resolve_backend(self) -> Optional[str]: @classmethod
""" def get_json_schema(cls) -> dict:
Determine which search backend to use. """Augment the static schema with a `provider` field — only when the
Priority: Bocha > LinkAI user has ≥2 providers configured AND strategy is 'auto'. Otherwise
the backend picks silently and exposing the field would only waste
the agent's tokens."""
schema = {
"name": cls.name,
"description": cls.description,
"parameters": json.loads(json.dumps(cls.params)), # deep copy
}
if _configured_strategy() != "auto":
return schema
available = configured_providers()
if len(available) < 2:
return schema
:return: 'bocha', 'linkai', or None schema["parameters"]["properties"]["provider"] = {
"type": "string",
"enum": available,
"description": "Optional. Specifies the search backend. You may switch between providers when the user wants results from a particular source or from multiple sources.",
}
return schema
# ------------------------------------------------------------------
# Provider resolution
# ------------------------------------------------------------------
def _resolve_provider(self, requested: Optional[str]) -> Optional[str]:
"""Pick a provider for this call.
Priority: caller-supplied (if configured) > fixed strategy (if
configured) > first configured in PROVIDER_ORDER. Silent fallback
when the desired one has no key.
""" """
if os.environ.get("BOCHA_API_KEY"): available = configured_providers()
return "bocha" if not available:
if os.environ.get("LINKAI_API_KEY"): return None
return "linkai"
return None if requested:
req = requested.strip().lower()
if req in available:
return req
logger.warning(f"[WebSearch] requested provider '{requested}' unavailable, falling back")
if _configured_strategy() == "fixed":
pinned = _configured_provider()
if pinned in available:
return pinned
if pinned:
logger.warning(f"[WebSearch] pinned provider '{pinned}' unavailable, falling back to auto")
return available[0]
@staticmethod
def _resolution_reason(requested: Optional[str], chosen: str) -> str:
"""Human-readable explanation for why `chosen` won the resolver."""
if requested and requested.strip().lower() == chosen:
return "caller-requested"
strategy = _configured_strategy()
if strategy == "fixed" and _configured_provider() == chosen:
return "fixed-strategy"
return "auto-fallback"
# ------------------------------------------------------------------
# Entry point
# ------------------------------------------------------------------
def execute(self, args: Dict[str, Any]) -> ToolResult: def execute(self, args: Dict[str, Any]) -> ToolResult:
""" query = (args.get("query") or "").strip()
Execute web search
:param args: Search parameters (query, count, freshness, summary)
:return: Search results
"""
query = args.get("query", "").strip()
if not query: if not query:
return ToolResult.fail("Error: 'query' parameter is required") return ToolResult.fail("Error: 'query' parameter is required")
count = args.get("count", 10) count = args.get("count", 10)
freshness = args.get("freshness", "noLimit") freshness = args.get("freshness", "noLimit")
summary = args.get("summary", False) summary = args.get("summary", False)
# Validate count
if not isinstance(count, int) or count < 1 or count > 50: if not isinstance(count, int) or count < 1 or count > 50:
count = 10 count = 10
# Resolve backend requested = args.get("provider")
backend = self._resolve_backend() provider = self._resolve_provider(requested)
if not backend: if not provider:
return ToolResult.fail( return ToolResult.fail(
"Error: No search API key configured. " "Error: No search provider configured. "
"Please set BOCHA_API_KEY or LINKAI_API_KEY using env_config tool.\n" "Configure one of BOCHA_API_KEY / zhipu_ai_api_key / qianfan_api_key / linkai_api_key."
" - Bocha Search: https://open.bocha.cn\n"
" - LinkAI Search: https://link-ai.tech"
) )
# Always log the routing decision so multi-provider deployments can
# tell at a glance which backend served any given query.
available = configured_providers()
reason = self._resolution_reason(requested, provider)
q_preview = query if len(query) <= 60 else (query[:57] + "...")
logger.info(
f"[WebSearch] provider={provider} reason={reason} "
f"available={list(available)} query={q_preview!r} count={count} freshness={freshness}"
)
try: try:
if backend == "bocha": if provider == "bocha":
return self._search_bocha(query, count, freshness, summary) return self._search_bocha(query, count, freshness, summary)
else: if provider == "zhipu":
return self._search_zhipu(query, count, freshness)
if provider == "qianfan":
return self._search_qianfan(query, count, freshness)
if provider == "linkai":
return self._search_linkai(query, count, freshness) return self._search_linkai(query, count, freshness)
return ToolResult.fail(f"Error: Unknown provider '{provider}'")
except requests.Timeout: except requests.Timeout:
return ToolResult.fail(f"Error: Search request timed out after {DEFAULT_TIMEOUT}s") return ToolResult.fail(f"Error: Search request timed out after {DEFAULT_TIMEOUT}s")
except requests.ConnectionError: except requests.ConnectionError:
return ToolResult.fail("Error: Failed to connect to search API") return ToolResult.fail("Error: Failed to connect to search API")
except Exception as e: except Exception as e:
logger.error(f"[WebSearch] Unexpected error: {e}", exc_info=True) logger.error(f"[WebSearch] Unexpected error ({provider}): {e}", exc_info=True)
return ToolResult.fail(f"Error: Search failed - {str(e)}") return ToolResult.fail(f"Error: Search failed - {str(e)}")
# ------------------------------------------------------------------
# Bocha
# ------------------------------------------------------------------
def _search_bocha(self, query: str, count: int, freshness: str, summary: bool) -> ToolResult: def _search_bocha(self, query: str, count: int, freshness: str, summary: bool) -> ToolResult:
""" api_key = _get_api_key("bocha")
Search using Bocha API url = "https://api.bochaai.com/v1/web-search"
:param query: Search query
:param count: Number of results
:param freshness: Time range filter
:param summary: Whether to include summary
:return: Formatted search results
"""
api_key = os.environ.get("BOCHA_API_KEY", "")
url = "https://api.bocha.cn/v1/web-search"
headers = { headers = {
"Authorization": f"Bearer {api_key}", "Authorization": f"Bearer {api_key}",
"Content-Type": "application/json", "Content-Type": "application/json",
"Accept": "application/json" "Accept": "application/json",
} }
payload = {"query": query, "count": count, "freshness": freshness, "summary": summary}
payload = { logger.debug(f"[WebSearch] bocha: query='{query}', count={count}")
"query": query, resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
"count": count,
"freshness": freshness,
"summary": summary
}
logger.debug(f"[WebSearch] Bocha search: query='{query}', count={count}") if resp.status_code == 401:
return ToolResult.fail("Error: Invalid bocha API key.")
if resp.status_code == 403:
return ToolResult.fail("Error: bocha API — insufficient balance. Top up at https://open.bochaai.com")
if resp.status_code == 429:
return ToolResult.fail("Error: bocha API rate limit reached.")
if resp.status_code != 200:
return ToolResult.fail(f"Error: bocha API returned HTTP {resp.status_code}")
response = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT) data = resp.json()
if response.status_code == 401:
return ToolResult.fail("Error: Invalid BOCHA_API_KEY. Please check your API key.")
if response.status_code == 403:
return ToolResult.fail("Error: Bocha API - insufficient balance. Please top up at https://open.bocha.cn")
if response.status_code == 429:
return ToolResult.fail("Error: Bocha API rate limit reached. Please try again later.")
if response.status_code != 200:
return ToolResult.fail(f"Error: Bocha API returned HTTP {response.status_code}")
data = response.json()
# Check API-level error code
api_code = data.get("code") api_code = data.get("code")
if api_code is not None and api_code != 200: if api_code is not None and api_code != 200:
msg = data.get("msg") or "Unknown error" msg = data.get("msg") or "Unknown error"
return ToolResult.fail(f"Error: Bocha API error (code={api_code}): {msg}") return ToolResult.fail(f"Error: bocha API error (code={api_code}): {msg}")
# Extract and format results
return self._format_bocha_results(data, query)
def _format_bocha_results(self, data: dict, query: str) -> ToolResult:
"""
Format Bocha API response into unified result structure
:param data: Raw API response
:param query: Original query
:return: Formatted ToolResult
"""
search_data = data.get("data", {})
web_pages = search_data.get("webPages", {})
pages = web_pages.get("value", [])
if not pages:
return ToolResult.success({
"query": query,
"backend": "bocha",
"total": 0,
"results": [],
"message": "No results found"
})
pages = (data.get("data") or {}).get("webPages", {}).get("value", []) or []
results = [] results = []
for page in pages: for p in pages:
result = { item = {
"title": page.get("name", ""), "title": p.get("name", ""),
"url": page.get("url", ""), "url": p.get("url", ""),
"snippet": page.get("snippet", ""), "snippet": p.get("snippet", ""),
"siteName": page.get("siteName", ""), "siteName": p.get("siteName", ""),
"datePublished": page.get("datePublished") or page.get("dateLastCrawled", ""), "datePublished": p.get("datePublished") or p.get("dateLastCrawled", ""),
} }
# Include summary only if present if p.get("summary"):
if page.get("summary"): item["summary"] = p["summary"]
result["summary"] = page["summary"] results.append(item)
results.append(result) total = (data.get("data") or {}).get("webPages", {}).get("totalEstimatedMatches", len(results))
total = web_pages.get("totalEstimatedMatches", len(results))
return ToolResult.success({ return ToolResult.success({
"query": query, "query": query, "backend": "bocha",
"backend": "bocha", "total": total, "count": len(results), "results": results,
"total": total,
"count": len(results),
"results": results
}) })
def _search_linkai(self, query: str, count: int, freshness: str) -> ToolResult: # ------------------------------------------------------------------
""" # Zhipu
Search using LinkAI plugin API # ------------------------------------------------------------------
:param query: Search query def _search_zhipu(self, query: str, count: int, freshness: str) -> ToolResult:
:param count: Number of results api_key = _get_api_key("zhipu")
:param freshness: Time range filter api_base = (conf().get("zhipu_ai_api_base") or "https://open.bigmodel.cn/api/paas/v4").rstrip("/")
:return: Formatted search results url = f"{api_base}/web_search"
""" headers = {
api_key = os.environ.get("LINKAI_API_KEY", "") "Authorization": f"Bearer {api_key}",
api_base = conf().get("linkai_api_base", "https://api.link-ai.tech") "Content-Type": "application/json",
url = f"{api_base.rstrip('/')}/v1/plugin/execute" }
# Zhipu Web Search expects `search_query` <= 70 chars; truncate
# gracefully so a long agent-supplied query doesn't get rejected.
trimmed_query = (query or "")[:70]
engine = (_tools_web_search_conf().get("zhipu_search_engine") or "search_pro").strip().lower()
if engine not in ("search_std", "search_pro", "search_pro_sogou", "search_pro_quark"):
engine = "search_pro"
payload: Dict[str, Any] = {
"search_engine": engine,
"search_query": trimmed_query,
"search_intent": False,
"count": max(1, min(int(count or 10), 50)),
"search_recency_filter": freshness if freshness in (
"oneDay", "oneWeek", "oneMonth", "oneYear", "noLimit"
) else "noLimit",
}
content_size = (_tools_web_search_conf().get("zhipu_content_size") or "").strip().lower()
if content_size in ("medium", "high"):
payload["content_size"] = content_size
logger.debug(f"[WebSearch] zhipu: query='{trimmed_query}', count={payload['count']}, engine={engine}")
resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
if resp.status_code == 401:
return ToolResult.fail("Error: Invalid Zhipu API key.")
if resp.status_code != 200:
return ToolResult.fail(f"Error: Zhipu API returned HTTP {resp.status_code}: {resp.text[:200]}")
data = resp.json()
# Business-level errors (1701/1702/1703 etc.) come back as
# {"error": {"code","message"}} even on HTTP 200.
if isinstance(data, dict) and data.get("error"):
err = data["error"] or {}
return ToolResult.fail(f"Error: Zhipu returned {err.get('code')}: {err.get('message','')}")
items = data.get("search_result") or (data.get("data") or {}).get("search_result") or []
results = []
for it in items:
results.append({
"title": it.get("title", ""),
"url": it.get("link") or it.get("url", ""),
"snippet": it.get("content") or it.get("snippet", ""),
"siteName": it.get("media") or it.get("siteName", ""),
"datePublished": it.get("publish_date") or it.get("datePublished", ""),
})
return ToolResult.success({
"query": query, "backend": "zhipu",
"total": len(results), "count": len(results), "results": results,
})
# ------------------------------------------------------------------
# Qianfan (Baidu)
# ------------------------------------------------------------------
def _search_qianfan(self, query: str, count: int, freshness: str) -> ToolResult:
api_key = _get_api_key("qianfan")
api_base = (conf().get("qianfan_api_base") or "https://qianfan.baidubce.com/v2").rstrip("/")
url = f"{api_base}/ai_search/web_search"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
"X-Appbuilder-From": "cow",
}
count = max(1, min(int(count or 10), 50))
payload: Dict[str, Any] = {
"messages": [{"role": "user", "content": query}],
"search_source": "baidu_search_v2",
"resource_type_filter": [{"type": "web", "top_k": count}],
}
# Baidu AI Search expects freshness as a date-range filter, not a
# named recency token. Translate our shared vocabulary into the
# underlying page_time range expected by the API.
search_filter = self._qianfan_build_freshness_filter(freshness)
if search_filter:
payload["search_filter"] = search_filter
logger.debug(f"[WebSearch] qianfan: query='{query}', count={count}, freshness={freshness!r}")
resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
if resp.status_code == 401:
return ToolResult.fail("Error: Invalid Qianfan API key.")
if resp.status_code != 200:
return ToolResult.fail(f"Error: Qianfan API returned HTTP {resp.status_code}: {resp.text[:200]}")
data = resp.json()
# Even on HTTP 200 Baidu surfaces business errors as {"code","message"}.
if isinstance(data, dict) and data.get("code"):
return ToolResult.fail(f"Error: Qianfan returned {data.get('code')}: {data.get('message','')}")
refs = data.get("references") or []
results = []
for d in refs:
results.append({
"title": d.get("title", ""),
"url": d.get("url", ""),
"snippet": (d.get("content") or "")[:200],
"siteName": d.get("web_anchor") or d.get("website") or "",
"datePublished": d.get("date", ""),
})
return ToolResult.success({
"query": query, "backend": "qianfan",
"total": len(results), "count": len(results), "results": results,
})
@staticmethod
def _qianfan_build_freshness_filter(freshness: str) -> Optional[Dict[str, Any]]:
if not freshness or freshness == "noLimit":
return None
delta_days = {"oneDay": 1, "oneWeek": 7, "oneMonth": 30, "oneYear": 365}.get(freshness)
if not delta_days:
return None
from datetime import datetime, timedelta
now = datetime.now()
end_date = (now + timedelta(days=1)).strftime("%Y-%m-%d")
start_date = (now - timedelta(days=delta_days)).strftime("%Y-%m-%d")
return {"range": {"page_time": {"gte": start_date, "lt": end_date}}}
# ------------------------------------------------------------------
# LinkAI (plugin)
# ------------------------------------------------------------------
def _search_linkai(self, query: str, count: int, freshness: str) -> ToolResult:
api_key = _get_api_key("linkai")
api_base = (conf().get("linkai_api_base") or "https://api.link-ai.tech").rstrip("/")
url = f"{api_base}/v1/plugin/execute"
from common.utils import get_cloud_headers from common.utils import get_cloud_headers
headers = get_cloud_headers(api_key) headers = get_cloud_headers(api_key)
payload = { payload = {"code": "web-search", "args": {"query": query, "count": count, "freshness": freshness}}
"code": "web-search", logger.debug(f"[WebSearch] linkai: query='{query}', count={count}")
"args": { resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
"query": query,
"count": count,
"freshness": freshness
}
}
logger.debug(f"[WebSearch] LinkAI search: query='{query}', count={count}") if resp.status_code == 401:
return ToolResult.fail("Error: Invalid LinkAI API key.")
response = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT) if resp.status_code != 200:
return ToolResult.fail(f"Error: LinkAI API returned HTTP {resp.status_code}")
if response.status_code == 401:
return ToolResult.fail("Error: Invalid LINKAI_API_KEY. Please check your API key.")
if response.status_code != 200:
return ToolResult.fail(f"Error: LinkAI API returned HTTP {response.status_code}")
data = response.json()
data = resp.json()
if not data.get("success"): if not data.get("success"):
msg = data.get("message") or "Unknown error" msg = data.get("message") or "Unknown error"
return ToolResult.fail(f"Error: LinkAI search failed: {msg}") return ToolResult.fail(f"Error: LinkAI search failed: {msg}")
return self._format_linkai_results(data, query) raw = data.get("data", "")
if isinstance(raw, str):
def _format_linkai_results(self, data: dict, query: str) -> ToolResult:
"""
Format LinkAI API response into unified result structure.
LinkAI returns the search data in data.data field, which follows
the same Bing-compatible format as Bocha.
:param data: Raw API response
:param query: Original query
:return: Formatted ToolResult
"""
raw_data = data.get("data", "")
# LinkAI may return data as a JSON string
if isinstance(raw_data, str):
try: try:
raw_data = json.loads(raw_data) raw = json.loads(raw)
except (json.JSONDecodeError, TypeError): except (json.JSONDecodeError, TypeError):
# If data is plain text, return it as a single result
return ToolResult.success({ return ToolResult.success({
"query": query, "query": query, "backend": "linkai",
"backend": "linkai", "total": 1, "count": 1, "results": [{"content": raw}],
"total": 1,
"count": 1,
"results": [{"content": raw_data}]
}) })
# If the response follows Bing-compatible structure if isinstance(raw, dict):
if isinstance(raw_data, dict): pages = (raw.get("webPages") or {}).get("value", []) or []
web_pages = raw_data.get("webPages", {})
pages = web_pages.get("value", [])
if pages: if pages:
results = [] results = []
for page in pages: for p in pages:
result = { item = {
"title": page.get("name", ""), "title": p.get("name", ""),
"url": page.get("url", ""), "url": p.get("url", ""),
"snippet": page.get("snippet", ""), "snippet": p.get("snippet", ""),
"siteName": page.get("siteName", ""), "siteName": p.get("siteName", ""),
"datePublished": page.get("datePublished") or page.get("dateLastCrawled", ""), "datePublished": p.get("datePublished") or p.get("dateLastCrawled", ""),
} }
if page.get("summary"): if p.get("summary"):
result["summary"] = page["summary"] item["summary"] = p["summary"]
results.append(result) results.append(item)
total = (raw.get("webPages") or {}).get("totalEstimatedMatches", len(results))
total = web_pages.get("totalEstimatedMatches", len(results))
return ToolResult.success({ return ToolResult.success({
"query": query, "query": query, "backend": "linkai",
"backend": "linkai", "total": total, "count": len(results), "results": results,
"total": total,
"count": len(results),
"results": results
}) })
# Fallback: return raw data
return ToolResult.success({ return ToolResult.success({
"query": query, "query": query, "backend": "linkai",
"backend": "linkai", "total": 1, "count": 1, "results": [{"content": str(raw)}],
"total": 1,
"count": 1,
"results": [{"content": str(raw_data)}]
}) })

12
app.py
View File

@@ -289,6 +289,16 @@ def _warmup_mcp_tools():
logger.warning(f"[App] MCP warmup failed (non-fatal): {e}") logger.warning(f"[App] MCP warmup failed (non-fatal): {e}")
def _warmup_scheduler():
"""Eager-init AgentBridge so the scheduler thread starts at process
boot rather than waiting for the first user message."""
try:
from bridge.bridge import Bridge
Bridge().get_agent_bridge()
except Exception as e:
logger.warning(f"[App] Scheduler warmup failed: {e}")
def _sync_builtin_skills(): def _sync_builtin_skills():
"""Sync builtin skills from project skills/ to workspace skills/ on startup.""" """Sync builtin skills from project skills/ to workspace skills/ on startup."""
import shutil import shutil
@@ -354,6 +364,8 @@ def run():
# latency isn't dominated by npx package downloads. # latency isn't dominated by npx package downloads.
_warmup_mcp_tools() _warmup_mcp_tools()
_warmup_scheduler()
logger.info(f"[App] Starting channels: {channel_names}") logger.info(f"[App] Starting channels: {channel_names}")
_channel_mgr = ChannelManager() _channel_mgr = ChannelManager()

View File

@@ -5,7 +5,7 @@ Agent Bridge - Integrates Agent system with existing COW bridge
import os import os
from typing import Optional, List from typing import Optional, List
from agent.protocol import Agent, LLMModel, LLMRequest from agent.protocol import Agent, LLMModel, LLMRequest, get_cancel_registry
from bridge.agent_event_handler import AgentEventHandler from bridge.agent_event_handler import AgentEventHandler
from bridge.agent_initializer import AgentInitializer from bridge.agent_initializer import AgentInitializer
from bridge.bridge import Bridge from bridge.bridge import Bridge
@@ -285,6 +285,15 @@ class AgentBridge:
# Create helper instances # Create helper instances
self.initializer = AgentInitializer(bridge, self) self.initializer = AgentInitializer(bridge, self)
# Eager-start the scheduler so cron tasks fire without waiting
# for the first user message. init_scheduler is idempotent.
try:
from agent.tools.scheduler.integration import init_scheduler
if init_scheduler(self):
self.scheduler_initialized = True
except Exception as e:
logger.warning(f"[AgentBridge] Eager scheduler init failed: {e}")
def create_agent(self, system_prompt: str, tools: List = None, **kwargs) -> Agent: def create_agent(self, system_prompt: str, tools: List = None, **kwargs) -> Agent:
""" """
Create the super agent with COW integration Create the super agent with COW integration
@@ -390,11 +399,22 @@ class AgentBridge:
""" """
session_id = None session_id = None
agent = None agent = None
request_id = None
cancel_event = None
try: try:
# Extract session_id from context for user isolation # Extract session_id from context for user isolation
if context: if context:
session_id = context.kwargs.get("session_id") or context.get("session_id") session_id = context.kwargs.get("session_id") or context.get("session_id")
request_id = context.kwargs.get("request_id") or context.get("request_id")
# Register a cancel token. Prefer per-turn request_id (web),
# fall back to session_id (IM channels). The Event is polled by
# AgentStreamExecutor at safe checkpoints.
registry = get_cancel_registry()
token_key = request_id or session_id
if token_key:
cancel_event = registry.register(token_key, session_id=session_id)
# Get agent for this session (will auto-initialize if needed) # Get agent for this session (will auto-initialize if needed)
agent = self.get_agent(session_id=session_id) agent = self.get_agent(session_id=session_id)
if not agent: if not agent:
@@ -449,7 +469,8 @@ class AgentBridge:
response = agent.run_stream( response = agent.run_stream(
user_message=query, user_message=query,
on_event=event_handler.handle_event, on_event=event_handler.handle_event,
clear_history=clear_history clear_history=clear_history,
cancel_event=cancel_event,
) )
finally: finally:
# Restore original tools # Restore original tools
@@ -459,6 +480,13 @@ class AgentBridge:
# Log execution summary # Log execution summary
event_handler.log_summary() event_handler.log_summary()
# Release cancel token; keep registry bounded.
if token_key:
try:
registry.unregister(token_key)
except Exception:
pass
# Persist new messages generated during this run # Persist new messages generated during this run
if session_id: if session_id:
channel_type = (context.get("channel_type") or "") if context else "" channel_type = (context.get("channel_type") or "") if context else ""
@@ -512,6 +540,12 @@ class AgentBridge:
logger.info(f"[AgentBridge] Cleared DB for session after error: {session_id}") logger.info(f"[AgentBridge] Cleared DB for session after error: {session_id}")
except Exception as db_err: except Exception as db_err:
logger.warning(f"[AgentBridge] Failed to clear DB after error: {db_err}") logger.warning(f"[AgentBridge] Failed to clear DB after error: {db_err}")
# Release cancel token on error path too (idempotent).
if cancel_event is not None and (request_id or session_id):
try:
get_cancel_registry().unregister(request_id or session_id)
except Exception:
pass
return Reply(ReplyType.ERROR, f"Agent error: {str(e)}") return Reply(ReplyType.ERROR, f"Agent error: {str(e)}")
def _schedule_mcp_hot_reload(self, agent): def _schedule_mcp_hot_reload(self, agent):

View File

@@ -2,44 +2,40 @@
Agent Event Handler - Handles agent events and thinking process output Agent Event Handler - Handles agent events and thinking process output
""" """
from common import const
from common.log import logger from common.log import logger
# Cap intermediate thinking messages on weixin to stay within send quota.
WEIXIN_THINKING_INSTANT_MAX = 7
class AgentEventHandler: class AgentEventHandler:
""" """
Handles agent events and optionally sends intermediate messages to channel Handles agent events and optionally sends intermediate messages to channel
""" """
def __init__(self, context=None, original_callback=None): def __init__(self, context=None, original_callback=None):
"""
Initialize event handler
Args:
context: COW context (for accessing channel)
original_callback: Original event callback to chain
"""
self.context = context self.context = context
self.original_callback = original_callback self.original_callback = original_callback
# Get channel for sending intermediate messages
self.channel = None self.channel = None
if context: if context:
self.channel = context.kwargs.get("channel") if hasattr(context, "kwargs") else None self.channel = context.kwargs.get("channel") if hasattr(context, "kwargs") else None
self.current_content = "" self.current_content = ""
self.turn_number = 0 self.turn_number = 0
channel_type = ""
if context and hasattr(context, "kwargs"):
channel_type = context.kwargs.get("channel_type", "") or ""
self._is_weixin = channel_type == const.WEIXIN
self._thinking_sent_count = 0
self._merged_buf: list[str] = []
def handle_event(self, event): def handle_event(self, event):
"""
Main event handler
Args:
event: Event dict with type and data
"""
event_type = event.get("type") event_type = event.get("type")
data = event.get("data", {}) data = event.get("data", {})
# Dispatch to specific handlers
if event_type == "turn_start": if event_type == "turn_start":
self._handle_turn_start(data) self._handle_turn_start(data)
elif event_type == "message_update": elif event_type == "message_update":
@@ -52,25 +48,23 @@ class AgentEventHandler:
self._handle_tool_execution_start(data) self._handle_tool_execution_start(data)
elif event_type == "tool_execution_end": elif event_type == "tool_execution_end":
self._handle_tool_execution_end(data) self._handle_tool_execution_end(data)
elif event_type == "agent_end":
# Call original callback if provided self._handle_agent_end(data)
if self.original_callback: if self.original_callback:
self.original_callback(event) self.original_callback(event)
def _handle_turn_start(self, data): def _handle_turn_start(self, data):
"""Handle turn start event"""
self.turn_number = data.get("turn", 0) self.turn_number = data.get("turn", 0)
self.current_content = "" self.current_content = ""
def _handle_message_update(self, data): def _handle_message_update(self, data):
"""Handle message update event (streaming content text)"""
delta = data.get("delta", "") delta = data.get("delta", "")
self.current_content += delta self.current_content += delta
def _handle_message_end(self, data): def _handle_message_end(self, data):
"""Handle message end event"""
tool_calls = data.get("tool_calls", []) tool_calls = data.get("tool_calls", [])
if tool_calls: if tool_calls:
if self.current_content.strip(): if self.current_content.strip():
logger.info(f"💭 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}") logger.info(f"💭 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}")
@@ -78,35 +72,54 @@ class AgentEventHandler:
else: else:
if self.current_content.strip(): if self.current_content.strip():
logger.debug(f"💬 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}") logger.debug(f"💬 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}")
# Drain weixin buffer before final reply leaves chat_channel
self._flush_merged_now()
self.current_content = "" self.current_content = ""
def _handle_agent_end(self, data):
self._flush_merged_now()
def _handle_tool_execution_start(self, data): def _handle_tool_execution_start(self, data):
"""Handle tool execution start event - logged by agent_stream.py"""
pass pass
def _handle_tool_execution_end(self, data): def _handle_tool_execution_end(self, data):
"""Handle tool execution end event - logged by agent_stream.py"""
pass pass
def _send_to_channel(self, message): def _send_to_channel(self, message):
"""
Try to send intermediate message to channel.
Skipped in SSE mode because thinking text is already streamed via on_event.
"""
if self.context and self.context.get("on_event"): if self.context and self.context.get("on_event"):
return return
if not self.channel:
return
if not self._is_weixin:
self._do_send(message)
return
if self._thinking_sent_count < WEIXIN_THINKING_INSTANT_MAX:
self._do_send(message)
self._thinking_sent_count += 1
return
self._merged_buf.append(message)
def _flush_merged_now(self):
if not self._merged_buf:
return
merged = "\n\n".join(self._merged_buf)
count = len(self._merged_buf)
self._merged_buf = []
logger.debug(f"[AgentEventHandler] Flushing {count} merged thinking msgs, len={len(merged)}")
self._do_send(merged)
self._thinking_sent_count += 1
def _do_send(self, message):
try:
from bridge.reply import Reply, ReplyType
reply = Reply(ReplyType.TEXT, message)
self.channel._send(reply, self.context)
except Exception as e:
logger.debug(f"[AgentEventHandler] Failed to send to channel: {e}")
if self.channel:
try:
from bridge.reply import Reply, ReplyType
reply = Reply(ReplyType.TEXT, message)
self.channel._send(reply, self.context)
except Exception as e:
logger.debug(f"[AgentEventHandler] Failed to send to channel: {e}")
def log_summary(self): def log_summary(self):
"""Log execution summary - simplified"""
# Summary removed as per user request
# Real-time logging during execution is sufficient
pass pass

View File

@@ -521,7 +521,7 @@ class AgentInitializer:
if tool_name == "web_search": if tool_name == "web_search":
from agent.tools.web_search.web_search import WebSearch from agent.tools.web_search.web_search import WebSearch
if not WebSearch.is_available(): if not WebSearch.is_available():
logger.debug("[AgentInitializer] WebSearch skipped - no BOCHA_API_KEY or LINKAI_API_KEY") logger.debug("[AgentInitializer] WebSearch skipped - no search provider configured")
continue continue
# Special handling for EnvConfig tool # Special handling for EnvConfig tool

View File

@@ -14,7 +14,9 @@ class Bridge(object):
def __init__(self): def __init__(self):
self.btype = { self.btype = {
"chat": const.OPENAI, "chat": const.OPENAI,
"voice_to_text": conf().get("voice_to_text", "openai"), # Empty `voice_to_text` (the default in new configs) triggers
# the auto-pick below — see _auto_pick_voice_to_text for order.
"voice_to_text": conf().get("voice_to_text") or self._auto_pick_voice_to_text(),
"text_to_voice": conf().get("text_to_voice", "google"), "text_to_voice": conf().get("text_to_voice", "google"),
"translate": conf().get("translate", "baidu"), "translate": conf().get("translate", "baidu"),
} }
@@ -61,6 +63,10 @@ class Bridge(object):
if model_type and model_type.startswith("deepseek"): if model_type and model_type.startswith("deepseek"):
self.btype["chat"] = const.DEEPSEEK self.btype["chat"] = const.DEEPSEEK
# 小米 MiMo 系列模型,全部以 mimo- 开头
if model_type and model_type.startswith("mimo-"):
self.btype["chat"] = const.MIMO
if model_type and isinstance(model_type, str): if model_type and isinstance(model_type, str):
lowered_model_type = model_type.lower() lowered_model_type = model_type.lower()
if lowered_model_type == const.QIANFAN or lowered_model_type.startswith("ernie"): if lowered_model_type == const.QIANFAN or lowered_model_type.startswith("ernie"):
@@ -84,6 +90,46 @@ class Bridge(object):
self.chat_bots = {} self.chat_bots = {}
self._agent_bridge = None self._agent_bridge = None
def refresh_voice(self):
"""Re-read voice_to_text / text_to_voice from config and drop the
cached voice bots so the next call picks up the new provider.
Used by the web console after the user edits voice settings.
Does NOT touch the agent_bridge / agent state.
"""
new_v2t = conf().get("voice_to_text") or self._auto_pick_voice_to_text()
new_t2v = conf().get("text_to_voice", "google")
if conf().get("use_linkai") and conf().get("linkai_api_key"):
if not conf().get("voice_to_text") or conf().get("voice_to_text") in ["openai"]:
new_v2t = const.LINKAI
if not conf().get("text_to_voice") or conf().get("text_to_voice") in ["openai", const.TTS_1, const.TTS_1_HD]:
new_t2v = const.LINKAI
self.btype["voice_to_text"] = new_v2t
self.btype["text_to_voice"] = new_t2v
self.bots.pop("voice_to_text", None)
self.bots.pop("text_to_voice", None)
logger.info(f"[Bridge] voice refreshed: voice_to_text={new_v2t}, text_to_voice={new_t2v}")
@staticmethod
def _auto_pick_voice_to_text() -> str:
"""Pick an ASR provider by configured api keys when voice_to_text is
unset. Order matches the web console: openai → dashscope → zhipu →
linkai. Falls back to 'openai' when nothing is configured so the
original "missing key" error is preserved.
"""
def has(k: str) -> bool:
v = (conf().get(k) or "").strip()
return v != "" and v not in ("YOUR API KEY", "YOUR_API_KEY")
for key, provider in (
("open_ai_api_key", "openai"),
("dashscope_api_key", "dashscope"),
("zhipu_ai_api_key", "zhipu"),
("linkai_api_key", "linkai"),
):
if has(key):
return provider
return "openai"
# 模型对应的接口 # 模型对应的接口
def get_bot(self, typename): def get_bot(self, typename):
if self.bots.get(typename) is None: if self.bots.get(typename) is None:

View File

@@ -42,6 +42,12 @@ def create_channel(channel_type) -> Channel:
elif channel_type == const.QQ: elif channel_type == const.QQ:
from channel.qq.qq_channel import QQChannel from channel.qq.qq_channel import QQChannel
ch = QQChannel() ch = QQChannel()
elif channel_type == const.TELEGRAM:
from channel.telegram.telegram_channel import TelegramChannel
ch = TelegramChannel()
elif channel_type == const.SLACK:
from channel.slack.slack_channel import SlackChannel
ch = SlackChannel()
elif channel_type in (const.WEIXIN, "wx"): elif channel_type in (const.WEIXIN, "wx"):
from channel.weixin.weixin_channel import WeixinChannel from channel.weixin.weixin_channel import WeixinChannel
ch = WeixinChannel() ch = WeixinChannel()

View File

@@ -171,7 +171,13 @@ class ChatChannel(Channel):
if "desire_rtype" not in context and conf().get("always_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE: if "desire_rtype" not in context and conf().get("always_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
context["desire_rtype"] = ReplyType.VOICE context["desire_rtype"] = ReplyType.VOICE
elif context.type == ContextType.VOICE: elif context.type == ContextType.VOICE:
if "desire_rtype" not in context and conf().get("voice_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE: # Voice input replies with voice when either voice_reply_voice
# (mirror voice) or the global always_reply_voice toggle is on.
if (
"desire_rtype" not in context
and (conf().get("voice_reply_voice") or conf().get("always_reply_voice"))
and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE
):
context["desire_rtype"] = ReplyType.VOICE context["desire_rtype"] = ReplyType.VOICE
return context return context
@@ -264,6 +270,8 @@ class ChatChannel(Channel):
if reply.type == ReplyType.TEXT: if reply.type == ReplyType.TEXT:
reply_text = reply.content reply_text = reply.content
if desire_rtype == ReplyType.VOICE and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE: if desire_rtype == ReplyType.VOICE and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
# Preserve original text for the "text-then-voice" pattern in _send_reply.
context["voice_reply_text"] = reply.content
reply = super().build_text_to_voice(reply.content) reply = super().build_text_to_voice(reply.content)
return self._decorate_reply(context, reply) return self._decorate_reply(context, reply)
if context.get("isgroup", False): if context.get("isgroup", False):
@@ -311,6 +319,15 @@ class ChatChannel(Channel):
# 短暂延迟后发送图片 # 短暂延迟后发送图片
time.sleep(0.3) time.sleep(0.3)
self._send(reply, context) self._send(reply, context)
# Send text bubble before voice, unless channel already streamed
# the text (feishu) or natively renders STT under the voice (wechatcom).
elif reply.type == ReplyType.VOICE and context.get("voice_reply_text") \
and not context.get("feishu_streamed") \
and context.get("channel_type") not in ("wechatcom_app",):
text_reply = Reply(ReplyType.TEXT, context.get("voice_reply_text"))
self._send(text_reply, context)
time.sleep(0.3)
self._send(reply, context)
else: else:
self._send(reply, context) self._send(reply, context)
@@ -421,8 +438,21 @@ class ChatChannel(Channel):
return func return func
# Chat commands that must bypass the per-session serial queue,
# otherwise /cancel would queue behind the task it tries to cancel.
# Use /cancel (not /stop) to avoid colliding with `cow stop` CLI.
_BYPASS_QUEUE_COMMANDS = ("/cancel",)
def produce(self, context: Context): def produce(self, context: Context):
session_id = context["session_id"] session_id = context["session_id"]
# Fast path: /cancel must not enter the queue.
if context.type == ContextType.TEXT and context.content:
stripped = context.content.strip().lower()
if stripped in self._BYPASS_QUEUE_COMMANDS:
self._handle_cancel_command(context, session_id)
return
with self.lock: with self.lock:
if session_id not in self.sessions: if session_id not in self.sessions:
self.sessions[session_id] = [ self.sessions[session_id] = [
@@ -434,6 +464,29 @@ class ChatChannel(Channel):
else: else:
self.sessions[session_id][0].put(context) self.sessions[session_id][0].put(context)
def _handle_cancel_command(self, context: Context, session_id: str) -> None:
"""Cancel any in-flight agent run for *session_id* and reply inline.
Runs synchronously on the caller's thread. Reply is sent through
_send_reply so plugins (e.g. logging) still observe it.
"""
try:
from agent.protocol import get_cancel_registry
from bridge.reply import Reply, ReplyType
cancelled = get_cancel_registry().cancel_session(session_id)
text = (
"🛑 已中止"
if cancelled > 0
else "当前没有可中止的任务。"
)
logger.info(
f"[chat_channel] /cancel fast-path: session={session_id}, cancelled={cancelled}"
)
self._send_reply(context, Reply(ReplyType.TEXT, text))
except Exception as e:
logger.warning(f"[chat_channel] /cancel fast-path failed: {e}")
# 消费者函数,单独线程,用于从消息队列中取出消息并处理 # 消费者函数,单独线程,用于从消息队列中取出消息并处理
def consume(self): def consume(self):
while True: while True:

View File

@@ -86,6 +86,8 @@ def _check(func):
@singleton @singleton
class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler): class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
NOT_SUPPORT_REPLYTYPE = []
dingtalk_client_id = conf().get('dingtalk_client_id') dingtalk_client_id = conf().get('dingtalk_client_id')
dingtalk_client_secret = conf().get('dingtalk_client_secret') dingtalk_client_secret = conf().get('dingtalk_client_secret')
@@ -870,6 +872,48 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
self.reply_text("抱歉,文件上传失败", incoming_message) self.reply_text("抱歉,文件上传失败", incoming_message)
return return
# Native sampleAudio. Upload only accepts ogg/amr, so convert TTS mp3/wav to amr.
elif reply.type == ReplyType.VOICE:
logger.info(f"[DingTalk] Sending voice: {reply.content}")
access_token = self.get_access_token()
if not access_token:
logger.error("[DingTalk] Cannot get access token for voice")
self.reply_text("抱歉语音发送失败无法获取token", incoming_message)
return
voice_path = reply.content
if voice_path.startswith("file://"):
voice_path = voice_path[7:]
amr_path = voice_path
duration_ms = 0
if not voice_path.lower().endswith((".amr", ".ogg")):
try:
from voice.audio_convert import any_to_amr
amr_path = os.path.splitext(voice_path)[0] + ".amr"
duration_ms = int(any_to_amr(voice_path, amr_path) or 0)
except Exception as e:
logger.error(f"[DingTalk] Failed to convert voice to amr: {e}")
self.reply_text("抱歉,语音转码失败", incoming_message)
return
media_id = self.upload_media(amr_path, media_type="voice")
if not media_id:
logger.error("[DingTalk] Failed to upload voice media")
self.reply_text("抱歉,语音上传失败", incoming_message)
return
msg_param = {
"mediaId": media_id,
"duration": str(duration_ms or 1000),
}
success = self._send_file_message(
access_token, incoming_message, "sampleAudio", msg_param, isgroup
)
if not success:
self.reply_text("抱歉,语音发送失败", incoming_message)
return
# 处理文本消息 # 处理文本消息
elif reply.type == ReplyType.TEXT: elif reply.type == ReplyType.TEXT:
logger.info(f"[DingTalk] Sending text message, length={len(reply.content)}") logger.info(f"[DingTalk] Sending text message, length={len(reply.content)}")

View File

@@ -752,6 +752,9 @@ class FeiShuChanel(ChatChannel):
init_in_flight = [False] init_in_flight = [False]
# 一旦初始化失败就长期标记为 disabled本次回复不再尝试任何流式调用 # 一旦初始化失败就长期标记为 disabled本次回复不再尝试任何流式调用
disabled = [False] disabled = [False]
# True after agent_cancelled: agent_end stops rewriting the card
# with stale final_response and just finalizes current content.
cancelled = [False]
lock = threading.Lock() lock = threading.Lock()
# ---- 异步推送队列 ---------------------------------------------------- # ---- 异步推送队列 ----------------------------------------------------
@@ -1076,18 +1079,42 @@ class FeiShuChanel(ChatChannel):
message_id[0] = None message_id[0] = None
sequence[0] = 0 sequence[0] = 0
elif event_type == "agent_cancelled":
# Lock channel into "no-rewrite" mode: the subsequent
# agent_end's final_response is from the last *completed*
# turn (the user already saw it), so rewriting the card
# would duplicate it visually.
with lock:
cancelled[0] = True
elif event_type == "agent_end": elif event_type == "agent_end":
# 最终回复:用 final_response 覆盖当前流式卡片,然后关闭流式模式。 # 最终回复:用 final_response 覆盖当前流式卡片,然后关闭流式模式。
final_response = data.get("final_response", "") final_response = data.get("final_response", "")
if not final_response:
return
final_text = str(final_response)
# 标记 streamed 让 chat_channel 跳过 send() # 标记 streamed 让 chat_channel 跳过 send()
context["feishu_streamed"] = True context["feishu_streamed"] = True
with lock: with lock:
was_cancelled = cancelled[0]
has_card = card_id[0] is not None has_card = card_id[0] is not None
init_busy = init_in_flight[0] init_busy = init_in_flight[0]
pending_text = current_text[0]
if was_cancelled:
# Cancelled path: finalize the in-flight card with
# partial output (or a short marker if empty); drop
# stale final_response to avoid duplicating last turn.
if has_card:
_drain_push_queue()
partial = (pending_text or "").rstrip()
final_text = partial or "_(已中止)_"
_stream_update_text(final_text)
_close_streaming_mode(final_text)
push_queue.put(None)
return
if not final_response:
return
final_text = str(final_response)
# 罕见情况agent_end 触发时还没创建过卡片(极快返回 / 没有 # 罕见情况agent_end 触发时还没创建过卡片(极快返回 / 没有
# message_update主动创建一张承载 final_text。 # message_update主动创建一张承载 final_text。
@@ -1515,10 +1542,16 @@ class FeiShuChanel(ChatChannel):
else: else:
context.type = ContextType.TEXT context.type = ContextType.TEXT
context.content = content.strip() context.content = content.strip()
# Text input opts into voice replies only when the always-on toggle is set.
if "desire_rtype" not in context and conf().get("always_reply_voice"):
context["desire_rtype"] = ReplyType.VOICE
elif context.type == ContextType.VOICE: elif context.type == ContextType.VOICE:
# 2.语音请求 # 2.语音请求: voice input replies with voice if either
if "desire_rtype" not in context and conf().get("voice_reply_voice"): # voice_reply_voice (mirror reply) or always_reply_voice is on.
if "desire_rtype" not in context and (
conf().get("voice_reply_voice") or conf().get("always_reply_voice")
):
context["desire_rtype"] = ReplyType.VOICE context["desire_rtype"] = ReplyType.VOICE
return context return context

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,506 @@
"""
Slack channel via Bolt for Python (Socket Mode).
Features:
- Direct message & channel chat (text / image / file)
- Channel trigger: @mention or reply in a thread the bot is in (configurable)
- /cancel fast-path matches Web channel behaviour
- Socket Mode: no public IP / callback URL required, works behind NAT
Implementation note:
slack_bolt's SocketModeHandler is blocking and runs its own background
threads. We start it in a dedicated thread so the rest of cow (sync) stays
untouched. Inbound events are dispatched onto cow's existing sync
ChatChannel.produce() pipeline; outbound send() calls the Slack Web API
client directly (it is sync-safe).
"""
import os
import re
import threading
import requests
from bridge.context import Context, ContextType
from bridge.reply import Reply, ReplyType
from channel.chat_channel import ChatChannel, check_prefix
from channel.slack.slack_message import SlackMessage
from common.expired_dict import ExpiredDict
from common.log import logger
from common.singleton import singleton
from config import conf
@singleton
class SlackChannel(ChatChannel):
NOT_SUPPORT_REPLYTYPE = []
def __init__(self):
super().__init__()
self.bot_token = ""
self.app_token = ""
self.bot_user_id = "" # used to strip @mention and ignore self messages
self._app = None
self._handler = None
self._client = None
self._loop_thread = None
# Idempotent dedup; Slack retries event delivery on slow ack
self._received_msgs = ExpiredDict(60 * 60 * 1)
# Disable group whitelist / prefix checks (we handle triggering ourselves
# in _should_reply_in_channel), aligned with telegram / feishu channels.
conf()["group_name_white_list"] = ["ALL_GROUP"]
conf()["single_chat_prefix"] = [""]
# ------------------------------------------------------------------
# Lifecycle
# ------------------------------------------------------------------
def startup(self):
self.bot_token = conf().get("slack_bot_token", "")
self.app_token = conf().get("slack_app_token", "")
if not self.bot_token or not self.app_token:
err = "[Slack] slack_bot_token and slack_app_token are both required"
logger.error(err)
self.report_startup_error(err)
return
# Guard against the common mistake of swapping the two tokens:
# bot token must start with xoxb-, app-level token with xapp-.
if not self.bot_token.startswith("xoxb-") or not self.app_token.startswith("xapp-"):
err = (
"[Slack] token type mismatch: slack_bot_token must start with 'xoxb-' "
"and slack_app_token must start with 'xapp-' (they look swapped)"
)
logger.error(err)
self.report_startup_error(err)
return
try:
from slack_bolt import App
from slack_bolt.adapter.socket_mode import SocketModeHandler
except ImportError:
err = (
"[Slack] slack_bolt is not installed. "
"Run: pip install slack_bolt"
)
logger.error(err)
self.report_startup_error(err)
return
try:
self._app = App(token=self.bot_token)
self._client = self._app.client
# Resolve our own bot user id (needed for @mention strip / self-ignore)
auth = self._client.auth_test()
self.bot_user_id = auth.get("user_id", "")
self.name = self.bot_user_id # ChatChannel uses self.name to strip @-mention
logger.info(f"[Slack] Bot logged in as user_id={self.bot_user_id}, team={auth.get('team')}")
except Exception as e:
err = f"[Slack] auth_test failed: {e}"
logger.error(err)
self.report_startup_error(err)
return
self._register_handlers()
self._handler = SocketModeHandler(self._app, self.app_token)
def _run():
try:
logger.info("[Slack] Starting Socket Mode connection...")
self.report_startup_success()
logger.info("[Slack] ✅ Slack bot ready, listening for events")
self._handler.start()
except Exception as e:
logger.error(f"[Slack] socket mode crashed: {e}", exc_info=True)
self.report_startup_error(str(e))
finally:
logger.info("[Slack] socket mode exited")
self._loop_thread = threading.Thread(target=_run, daemon=True, name="slack-socket")
self._loop_thread.start()
# Block startup() until the handler thread exits, matching other channels'
# behaviour (startup is a blocking call).
self._loop_thread.join()
def _register_handlers(self):
app = self._app
# app_mention: bot is @-mentioned in a channel
@app.event("app_mention")
def _on_app_mention(event, ack):
ack()
self._handle_event(event, is_group=True)
# message: DMs and channel messages (including thread replies)
@app.event("message")
def _on_message(event, ack):
ack()
self._handle_message_event(event)
def stop(self):
logger.info("[Slack] stop() called")
try:
if self._handler is not None:
self._handler.close()
except Exception as e:
logger.warning(f"[Slack] handler close error: {e}")
if self._loop_thread and self._loop_thread.is_alive():
try:
self._loop_thread.join(timeout=10)
except Exception:
pass
logger.info("[Slack] stop() completed")
# ------------------------------------------------------------------
# Inbound: slack event -> ChatMessage -> ChatChannel.produce
# ------------------------------------------------------------------
def _handle_message_event(self, event: dict):
"""Route a raw `message` event: skip bot/system noise, decide grouping."""
try:
logger.debug(
f"[Slack] message event: channel_type={event.get('channel_type')}, "
f"subtype={event.get('subtype')}, user={event.get('user')}, "
f"ts={event.get('ts')}, thread_ts={event.get('thread_ts')}"
)
# Ignore bot messages (including our own) and message edits/deletes
if event.get("bot_id") or event.get("subtype") in ("bot_message", "message_changed", "message_deleted"):
return
if event.get("user") == self.bot_user_id:
return
channel_type = event.get("channel_type", "")
# DM (im) is single chat; channel/group is group chat. app_mention
# already covers channel @-mentions, so for plain channel messages we
# only react when configured / thread-following.
is_group = channel_type in ("channel", "group", "mpim")
if is_group:
# app_mention handler covers explicit @bot; here we only handle
# follow-up replies in threads the bot participates in.
if not self._should_reply_in_channel(event):
return
self._handle_event(event, is_group=is_group)
except Exception as e:
logger.error(f"[Slack] _handle_message_event error: {e}", exc_info=True)
def _handle_event(self, event: dict, is_group: bool):
"""Parse event -> build SlackMessage -> produce()."""
try:
channel_id = event.get("channel", "")
ts = event.get("ts", "")
if not channel_id:
return
# Idempotent dedup
msg_uid = f"{channel_id}:{ts}"
if self._received_msgs.get(msg_uid):
return
self._received_msgs[msg_uid] = True
# Parse type + download media if needed.
ctype, content, caption = self._parse_event(event)
if ctype is None:
logger.debug(f"[Slack] unsupported message type, skip. event={event}")
return
# Strip <@bot_user_id> mention from channel text
if is_group and self.bot_user_id:
if ctype == ContextType.TEXT and content:
content = self._strip_at_mention(content)
if caption:
caption = self._strip_at_mention(caption)
slack_msg = SlackMessage(
event,
is_group=is_group,
bot_user_id=self.bot_user_id,
ctype=ctype,
content=content,
)
slack_msg.is_at = is_group # if we reached here in a channel, bot is mentioned/threaded
from channel.file_cache import get_file_cache
file_cache = get_file_cache()
session_id = self._compute_session_id(event, is_group)
# Media + caption together: treat as a complete query and bypass the cache
if ctype in (ContextType.IMAGE, ContextType.FILE) and caption:
tag = "image" if ctype == ContextType.IMAGE else "file"
merged_text = f"{caption}\n[{tag}: {content}]"
slack_msg.ctype = ContextType.TEXT
slack_msg.content = merged_text
ctype = ContextType.TEXT
logger.info(f"[Slack] Media+caption merged for session {session_id}")
# fallthrough to the TEXT branch below
elif ctype == ContextType.IMAGE:
file_cache.add(session_id, content, file_type="image")
logger.info(f"[Slack] Image cached for session {session_id}, waiting for query...")
return
elif ctype == ContextType.FILE:
file_cache.add(session_id, content, file_type="file")
logger.info(f"[Slack] File cached for session {session_id}: {content}")
return
if ctype == ContextType.TEXT:
# Fast-path: /cancel mirrors Web channel behaviour
if (content or "").strip().lower() in ("/cancel", "cancel"):
self._do_cancel(session_id, channel_id, event)
return
cached_files = file_cache.get(session_id)
if cached_files:
refs = []
for fi in cached_files:
ftype = fi["type"]
tag = ftype if ftype in ("image", "video") else "file"
refs.append(f"[{tag}: {fi['path']}]")
slack_msg.content = (slack_msg.content or "") + "\n" + "\n".join(refs)
file_cache.clear(session_id)
logger.info(f"[Slack] Attached {len(cached_files)} cached file(s) to query")
# Reply in the originating thread when present, else start one on this msg
thread_ts = event.get("thread_ts") or ts
context = self._compose_context(
slack_msg.ctype,
slack_msg.content,
isgroup=is_group,
msg=slack_msg,
# Replies go back into the thread, no manual @mention needed
no_need_at=True,
)
if context:
context["session_id"] = session_id
context["receiver"] = channel_id
context["slack_channel"] = channel_id
context["slack_thread_ts"] = thread_ts if is_group else None
self.produce(context)
logger.debug(f"[Slack] received: type={ctype}, content={str(slack_msg.content)[:80]}")
except Exception as e:
logger.error(f"[Slack] _handle_event error: {e}", exc_info=True)
def _do_cancel(self, session_id: str, channel_id: str, event: dict):
"""Fast-path: /cancel calls cancel_session directly without going through agent."""
try:
from agent.protocol import get_cancel_registry
cancelled = get_cancel_registry().cancel_session(session_id)
text = "Current task cancelled." if cancelled else "No running task to cancel."
thread_ts = event.get("thread_ts") or event.get("ts")
self._client.chat_postMessage(channel=channel_id, text=text, thread_ts=thread_ts)
logger.info(f"[Slack] /cancel session={session_id}, cancelled={cancelled}")
except Exception as e:
logger.error(f"[Slack] /cancel error: {e}", exc_info=True)
def _parse_event(self, event: dict):
"""Parse a slack event and return (ctype, content, caption).
- content is text for ContextType.TEXT, otherwise the local file path
- caption is the optional text accompanying a file; empty for plain text
"""
text = (event.get("text") or "").strip()
files = event.get("files") or []
if files:
# Handle the first attachment; caption is the accompanying message text
f = files[0]
mimetype = (f.get("mimetype") or "").lower()
url = f.get("url_private_download") or f.get("url_private")
name = f.get("name") or f.get("id") or "file"
if not url:
return (None, None, "")
path = self._download_file(url, name)
if not path:
return (None, None, "")
if mimetype.startswith("image/"):
return (ContextType.IMAGE, path, text)
return (ContextType.FILE, path, text)
if text:
return (ContextType.TEXT, text, "")
return (None, None, "")
def _download_file(self, url: str, name: str):
"""Download a Slack private file (requires bot token auth) to local tmp dir."""
try:
headers = {"Authorization": f"Bearer {self.bot_token}"}
resp = requests.get(url, headers=headers, timeout=60, stream=True)
resp.raise_for_status()
tmp_dir = SlackMessage.get_tmp_dir()
# Sanitize the name and keep it unique-ish via the url tail
safe_name = re.sub(r"[^\w.\-]", "_", name)
local_path = os.path.join(tmp_dir, safe_name)
with open(local_path, "wb") as fp:
for chunk in resp.iter_content(chunk_size=8192):
if chunk:
fp.write(chunk)
logger.debug(f"[Slack] downloaded {name} -> {local_path}")
return local_path
except Exception as e:
logger.error(f"[Slack] download_file failed ({name}): {e}")
return None
# ------------------------------------------------------------------
# Channel trigger logic
# ------------------------------------------------------------------
def _should_reply_in_channel(self, event: dict) -> bool:
"""Decide whether to reply to a plain channel message (no @mention).
app_mention already handles explicit @bot, so here we only deal with
follow-up messages. `all` replies to every message; `mention_or_reply`
replies inside threads the bot already participates in.
"""
mode = conf().get("slack_group_trigger", "mention_or_reply")
if mode == "all":
return True
if mode == "mention_only":
return False
# mention_or_reply: follow up only within an existing thread
return bool(event.get("thread_ts"))
def _strip_at_mention(self, content: str) -> str:
"""Strip <@BOT_USER_ID> from channel text."""
if not content or not self.bot_user_id:
return content
pattern = re.compile(r"<@" + re.escape(self.bot_user_id) + r">", re.IGNORECASE)
return pattern.sub("", content).strip()
@staticmethod
def _compute_session_id(event: dict, is_group: bool) -> str:
channel_id = event.get("channel", "")
user_id = event.get("user", "")
if is_group:
if conf().get("group_shared_session", True):
return f"slack_channel_{channel_id}"
return f"slack_channel_{channel_id}_{user_id}"
return f"slack_user_{user_id}"
# ------------------------------------------------------------------
# Override _compose_context: skip the parent's group whitelist/at checks
# (already handled via _should_reply_in_channel). Same idea as telegram.
# ------------------------------------------------------------------
def _compose_context(self, ctype: ContextType, content, **kwargs):
context = Context(ctype, content)
context.kwargs = kwargs
if "channel_type" not in context:
context["channel_type"] = self.channel_type
if "origin_ctype" not in context:
context["origin_ctype"] = ctype
cmsg = context["msg"]
if cmsg.is_group:
if conf().get("group_shared_session", True):
context["session_id"] = cmsg.other_user_id
else:
context["session_id"] = f"{cmsg.from_user_id}:{cmsg.other_user_id}"
else:
context["session_id"] = cmsg.from_user_id
context["receiver"] = cmsg.other_user_id
if ctype == ContextType.TEXT:
img_match_prefix = check_prefix(content, conf().get("image_create_prefix"))
if img_match_prefix:
content = content.replace(img_match_prefix, "", 1)
context.type = ContextType.IMAGE_CREATE
else:
context.type = ContextType.TEXT
context.content = (content or "").strip()
if "desire_rtype" not in context and conf().get("always_reply_voice"):
context["desire_rtype"] = ReplyType.VOICE
elif ctype == ContextType.VOICE:
if "desire_rtype" not in context and (
conf().get("voice_reply_voice") or conf().get("always_reply_voice")
):
context["desire_rtype"] = ReplyType.VOICE
return context
# ------------------------------------------------------------------
# Outbound: ChatChannel.send -> Slack Web API
# ------------------------------------------------------------------
def send(self, reply: Reply, context: Context):
"""Called from cow's sync main thread; Slack Web client is sync-safe."""
if self._client is None:
logger.warning("[Slack] client not ready, drop reply")
return
channel_id = context.get("slack_channel")
thread_ts = context.get("slack_thread_ts")
if not channel_id:
logger.warning("[Slack] no slack_channel in context, drop reply")
return
try:
self._do_send(reply, channel_id, thread_ts)
logger.info(f"[Slack] sent reply (type={reply.type}, channel={channel_id})")
except Exception as e:
logger.error(f"[Slack] send failed: {e}", exc_info=True)
def _do_send(self, reply: Reply, channel_id: str, thread_ts):
rtype = reply.type
content = reply.content
if rtype in (ReplyType.TEXT, ReplyType.INFO, ReplyType.ERROR):
text = str(content) if content is not None else ""
if not text:
return
# Slack caps a message around 40k chars; split conservatively
for chunk in _split_text(text, 3500):
self._client.chat_postMessage(channel=channel_id, text=chunk, thread_ts=thread_ts)
elif rtype == ReplyType.IMAGE:
# Already a local BytesIO; upload it directly
content.seek(0)
self._client.files_upload_v2(
channel=channel_id, file=content, filename="image.png", thread_ts=thread_ts,
)
elif rtype == ReplyType.IMAGE_URL:
url = str(content)
if url.startswith("file://"):
local = url[7:]
self._client.files_upload_v2(
channel=channel_id, file=local, thread_ts=thread_ts,
)
else:
# Post the URL as text; Slack will unfurl it as an image preview
self._client.chat_postMessage(channel=channel_id, text=url, thread_ts=thread_ts)
elif rtype in (ReplyType.VOICE, ReplyType.FILE):
local = content[7:] if isinstance(content, str) and content.startswith("file://") else content
caption = getattr(reply, "text_content", None) or None
self._client.files_upload_v2(
channel=channel_id, file=local, initial_comment=caption, thread_ts=thread_ts,
)
else:
# Fallback: send as plain text
self._client.chat_postMessage(channel=channel_id, text=str(content), thread_ts=thread_ts)
def _split_text(text: str, limit: int):
"""Split long text preferring line breaks to keep markdown structure intact."""
if len(text) <= limit:
yield text
return
buf = []
size = 0
for line in text.splitlines(keepends=True):
if size + len(line) > limit and buf:
yield "".join(buf)
buf, size = [], 0
# Hard-split single lines that exceed the limit
while len(line) > limit:
yield line[:limit]
line = line[limit:]
buf.append(line)
size += len(line)
if buf:
yield "".join(buf)

View File

@@ -0,0 +1,60 @@
"""
Slack message adapter.
Convert a Slack event payload into cow's unified ChatMessage.
File downloads are NOT performed here; the channel layer downloads files
on demand because it needs the bot token for authenticated download URLs.
"""
import os
from bridge.context import ContextType
from channel.chat_message import ChatMessage
from common.utils import expand_path
from config import conf
class SlackMessage(ChatMessage):
"""Wrap a Slack event into the unified ChatMessage."""
def __init__(self, event: dict, is_group: bool = False, bot_user_id: str = "",
ctype: ContextType = ContextType.TEXT, content: str = ""):
super().__init__(event)
# Basic fields
self.msg_id = event.get("client_msg_id") or event.get("ts") or ""
try:
self.create_time = int(float(event.get("ts", 0)))
except (TypeError, ValueError):
self.create_time = 0
self.ctype = ctype
self.content = content
# Sender / chat info
from_user_id = event.get("user", "unknown")
channel_id = event.get("channel", "")
self.from_user_id = from_user_id
self.from_user_nickname = from_user_id
self.to_user_id = bot_user_id or "slack_bot"
self.to_user_nickname = bot_user_id or "slack_bot"
self.is_group = is_group
if is_group:
# Channel chat: other_user_id = channel_id, actual_user_id = sender id
self.other_user_id = channel_id
self.other_user_nickname = channel_id
self.actual_user_id = from_user_id
self.actual_user_nickname = from_user_id
else:
# DM: use channel_id so replies go back to the same DM channel
self.other_user_id = channel_id or from_user_id
self.other_user_nickname = from_user_id
# Whether the bot was triggered by @-mention (set by channel layer)
self.is_at = False
@staticmethod
def get_tmp_dir() -> str:
"""Local download directory, aligned with other channels (agent_workspace/tmp)."""
workspace_root = expand_path(conf().get("agent_workspace", "~/cow"))
tmp_dir = os.path.join(workspace_root, "tmp")
os.makedirs(tmp_dir, exist_ok=True)
return tmp_dir

View File

View File

@@ -0,0 +1,719 @@
"""
Telegram channel via Bot API (long polling mode).
Features:
- Single chat & group chat (text / photo / voice / video / document)
- Group trigger: @mention or reply-to-bot (configurable)
- /cancel fast-path matches Web channel behaviour
- Auto-register bot commands menu on startup (mirrors Web slash menu)
- Optional HTTP/SOCKS5 proxy support for restricted networks
Implementation note:
python-telegram-bot is async-first. We run the bot inside a dedicated
thread with its own asyncio loop so the rest of cow (which is sync)
stays untouched. Inbound updates are dispatched onto cow's existing
sync ChatChannel.produce() pipeline; outbound send() schedules
coroutines back onto that loop via asyncio.run_coroutine_threadsafe.
"""
import asyncio
import os
import re
import threading
from bridge.context import Context, ContextType
from bridge.reply import Reply, ReplyType
from channel.chat_channel import ChatChannel, check_prefix
from channel.telegram.telegram_message import TelegramMessage
from common.expired_dict import ExpiredDict
from common.log import logger
from common.singleton import singleton
from config import conf
# Bot command menu, aligned with Web slash commands.
# Top-level commands only; sub-commands are entered with a space (e.g. "/skill list").
TELEGRAM_BOT_COMMANDS = [
("help", "Show command help"),
("status", "Show running status"),
("context", "View/clear conversation context (sub: clear)"),
("skill", "Manage skills (list/search/install/...)"),
("memory", "Manage memory (sub: dream)"),
("knowledge", "Manage knowledge base (list/on/off)"),
("config", "Show current config"),
("cancel", "Cancel running agent task"),
("logs", "Show recent logs"),
("version", "Show version"),
]
@singleton
class TelegramChannel(ChatChannel):
NOT_SUPPORT_REPLYTYPE = []
def __init__(self):
super().__init__()
self.bot_token = ""
self.bot_username = "" # used for @-mention matching
self._bot = None
self._application = None
self._loop = None
self._loop_thread = None
self._stop_event = threading.Event()
# Idempotent dedup; TG occasionally redelivers the same update on flaky networks
self._received_msgs = ExpiredDict(60 * 60 * 1)
# Disable group whitelist / prefix checks (we handle triggering ourselves
# in _should_reply_in_group), aligned with feishu / wecom_bot channels.
conf()["group_name_white_list"] = ["ALL_GROUP"]
conf()["single_chat_prefix"] = [""]
# ------------------------------------------------------------------
# Lifecycle
# ------------------------------------------------------------------
def startup(self):
self.bot_token = conf().get("telegram_token", "")
if not self.bot_token:
err = "[Telegram] telegram_token is required"
logger.error(err)
self.report_startup_error(err)
return
try:
from telegram.ext import (
Application,
MessageHandler,
CommandHandler,
filters,
)
except ImportError:
err = (
"[Telegram] python-telegram-bot is not installed. "
"Run: pip install python-telegram-bot"
)
logger.error(err)
self.report_startup_error(err)
return
# Run the asyncio event loop in a dedicated thread so the sync cow body
# is untouched.
self._loop = asyncio.new_event_loop()
def _run_loop():
asyncio.set_event_loop(self._loop)
try:
self._loop.run_until_complete(self._async_main(Application, MessageHandler, CommandHandler, filters))
except Exception as e:
logger.error(f"[Telegram] event loop crashed: {e}", exc_info=True)
self.report_startup_error(str(e))
finally:
try:
self._loop.close()
except Exception:
pass
logger.info("[Telegram] event loop exited")
self._loop_thread = threading.Thread(target=_run_loop, daemon=True, name="telegram-loop")
self._loop_thread.start()
# Block startup() until the loop thread exits, matching other channels'
# behaviour (startup is a blocking call).
self._loop_thread.join()
async def _async_main(self, Application, MessageHandler, CommandHandler, filters):
"""Build Application, register handlers, and run polling."""
builder = Application.builder().token(self.bot_token)
# Proxy: prefer telegram_proxy config, fall back to HTTPS_PROXY env var
proxy_url = conf().get("telegram_proxy", "") or os.environ.get("HTTPS_PROXY", "")
if proxy_url:
try:
builder = builder.proxy(proxy_url).get_updates_proxy(proxy_url)
logger.info(f"[Telegram] using proxy: {proxy_url}")
except Exception as e:
logger.warning(f"[Telegram] proxy config failed, fallback to direct: {e}")
# Media uploads (photo/voice/video/document) over a proxy can be slow,
# bump read/write/connect/pool timeouts.
builder = (
builder
.read_timeout(60)
.write_timeout(120)
.connect_timeout(30)
.pool_timeout(30)
)
application = builder.build()
self._application = application
self._bot = application.bot
# Fetch our own username (needed for @-mention matching in groups)
try:
me = await self._bot.get_me()
self.bot_username = me.username or ""
self.name = self.bot_username # ChatChannel uses self.name to strip @-mention
logger.info(f"[Telegram] Bot logged in as @{self.bot_username} (id={me.id})")
except Exception as e:
err = f"[Telegram] get_me failed: {e}"
logger.error(err)
self.report_startup_error(err)
return
# Register the command menu (failure is non-fatal)
if conf().get("telegram_register_commands", True):
try:
from telegram import BotCommand
cmds = [BotCommand(name, desc) for name, desc in TELEGRAM_BOT_COMMANDS]
await self._bot.set_my_commands(cmds)
logger.info(f"[Telegram] Registered {len(cmds)} bot commands")
except Exception as e:
logger.warning(f"[Telegram] set_my_commands failed: {e}")
# Handlers:
# 1) /cancel uses the fast-path
application.add_handler(CommandHandler("cancel", self._on_cancel))
# 2) Normal messages (text + media)
application.add_handler(MessageHandler(filters.ALL & ~filters.COMMAND, self._on_message))
# 3) Other slash commands are forwarded as plain text for the agent to handle
application.add_handler(MessageHandler(filters.COMMAND, self._on_command_passthrough))
# Start polling. drop_pending_updates avoids replaying backlog after restart.
# Transient "Server disconnected" / RemoteProtocolError during get_updates
# are common over proxies/flaky networks; PTB's network loop auto-retries,
# so we only need to keep the noise down (see _quiet_polling_network_errors).
self._quiet_polling_network_errors()
logger.info("[Telegram] Starting long polling...")
await application.initialize()
await application.start()
await application.updater.start_polling(
drop_pending_updates=True,
# Long-poll hold time on the server side; smaller value = reconnect more
# often but each hung connection fails faster.
timeout=30,
# Retry forever on transient get_updates network errors instead of giving up.
bootstrap_retries=-1,
)
self.report_startup_success()
logger.info("[Telegram] ✅ Telegram bot ready, polling for updates")
# Block until stop()
try:
while not self._stop_event.is_set():
await asyncio.sleep(0.5)
finally:
try:
await application.updater.stop()
await application.stop()
await application.shutdown()
except Exception as e:
logger.warning(f"[Telegram] shutdown error: {e}")
@staticmethod
def _quiet_polling_network_errors():
"""Downgrade PTB's noisy 'Exception happened while polling for updates' logs.
These transient get_updates errors (RemoteProtocolError / NetworkError /
TimedOut, typically over a proxy) are auto-retried by PTB's network loop,
so logging the full traceback at ERROR is just noise. We attach a filter
that drops these specific records while leaving real errors untouched.
"""
import logging
class _PollingNoiseFilter(logging.Filter):
_NEEDLES = (
"Exception happened while polling for updates",
"Server disconnected without sending a response",
)
def filter(self, record: logging.LogRecord) -> bool:
try:
msg = record.getMessage()
except Exception:
return True
if any(n in msg for n in self._NEEDLES):
# Keep a single-line breadcrumb at DEBUG, drop the traceback.
logger.debug(f"[Telegram] transient polling network error (auto-retrying): {msg.splitlines()[0]}")
return False
return True
noise_filter = _PollingNoiseFilter()
for name in ("telegram.ext.Updater", "telegram.ext._updater", "telegram.ext"):
logging.getLogger(name).addFilter(noise_filter)
def stop(self):
logger.info("[Telegram] stop() called")
self._stop_event.set()
if self._loop_thread and self._loop_thread.is_alive():
try:
self._loop_thread.join(timeout=10)
except Exception:
pass
logger.info("[Telegram] stop() completed")
# ------------------------------------------------------------------
# Inbound: telegram update -> ChatMessage -> ChatChannel.produce
# ------------------------------------------------------------------
async def _on_cancel(self, update, _context):
"""Fast-path: /cancel calls cancel_session directly without going through agent."""
try:
from agent.protocol import get_cancel_registry
session_id = self._compute_session_id(update)
cancelled = get_cancel_registry().cancel_session(session_id)
text = "Current task cancelled." if cancelled else "No running task to cancel."
await update.effective_message.reply_text(text)
logger.info(f"[Telegram] /cancel session={session_id}, cancelled={cancelled}")
except Exception as e:
logger.error(f"[Telegram] /cancel error: {e}", exc_info=True)
try:
await update.effective_message.reply_text(f"⚠️ /cancel failed: {e}")
except Exception:
pass
async def _on_command_passthrough(self, update, _context):
"""All non-/cancel commands fall through to plain message handling."""
await self._on_message(update, _context)
async def _on_message(self, update, _context):
"""Telegram update entry: parse message -> build ChatMessage -> produce()."""
try:
message = update.effective_message
chat = update.effective_chat
if not message or not chat:
return
# Idempotent dedup
msg_uid = f"{chat.id}:{message.message_id}"
if self._received_msgs.get(msg_uid):
return
self._received_msgs[msg_uid] = True
is_group = chat.type in ("group", "supergroup")
# Debug log: helpful when group messages are silently dropped
if is_group:
logger.debug(
f"[Telegram] group update received: chat_id={chat.id}, "
f"text={(message.text or message.caption or '')[:40]!r}, "
f"reply_to_bot={bool(message.reply_to_message and message.reply_to_message.from_user and message.reply_to_message.from_user.username == self.bot_username)}"
)
# Group trigger gate (silently drop if not triggered)
if is_group and not self._should_reply_in_group(update):
logger.debug(f"[Telegram] group message not triggered (need @{self.bot_username} or reply), skip")
return
# Parse message type + download media if needed.
# Media messages with caption return both the local path and the caption text.
ctype, content, caption = await self._parse_message(message)
if ctype is None:
logger.debug(f"[Telegram] unsupported message type, skip. msg={message}")
return
# Strip @bot mention for group text/caption
if is_group and self.bot_username:
if ctype == ContextType.TEXT and content:
content = self._strip_at_mention(content)
if caption:
caption = self._strip_at_mention(caption)
tg_msg = TelegramMessage(
update,
is_group=is_group,
bot_username=self.bot_username,
ctype=ctype,
content=content,
)
tg_msg.is_at = is_group # If we got here in a group, the bot is mentioned/replied
# File cache: standalone media goes into cache, the next text query attaches them
from channel.file_cache import get_file_cache
file_cache = get_file_cache()
session_id = self._compute_session_id(update)
# Media + caption together: treat as a complete query and bypass the cache
if ctype in (ContextType.IMAGE, ContextType.FILE) and caption:
tag = "image" if ctype == ContextType.IMAGE else "file"
merged_text = f"{caption}\n[{tag}: {content}]"
tg_msg.ctype = ContextType.TEXT
tg_msg.content = merged_text
ctype = ContextType.TEXT
logger.info(f"[Telegram] Media+caption merged for session {session_id}")
# fallthrough to the TEXT branch below
elif ctype == ContextType.IMAGE:
file_cache.add(session_id, content, file_type="image")
logger.info(f"[Telegram] Image cached for session {session_id}, waiting for query...")
return
elif ctype == ContextType.FILE:
file_cache.add(session_id, content, file_type="file")
logger.info(f"[Telegram] File cached for session {session_id}: {content}")
return
if ctype == ContextType.TEXT:
cached_files = file_cache.get(session_id)
if cached_files:
refs = []
for fi in cached_files:
ftype = fi["type"]
tag = ftype if ftype in ("image", "video") else "file"
refs.append(f"[{tag}: {fi['path']}]")
tg_msg.content = (tg_msg.content or "") + "\n" + "\n".join(refs)
file_cache.clear(session_id)
logger.info(f"[Telegram] Attached {len(cached_files)} cached file(s) to query")
# Dispatch to cow main pipeline (reuses ChatChannel._compose_context routing)
context = self._compose_context(
tg_msg.ctype,
tg_msg.content,
isgroup=is_group,
msg=tg_msg,
)
if context:
context["session_id"] = session_id
context["receiver"] = str(chat.id)
context["telegram_chat_id"] = chat.id
context["telegram_reply_to_msg_id"] = message.message_id if is_group else None
self.produce(context)
logger.debug(f"[Telegram] received: type={ctype}, content={str(tg_msg.content)[:80]}")
except Exception as e:
logger.error(f"[Telegram] _on_message error: {e}", exc_info=True)
async def _parse_message(self, message):
"""Parse a telegram message and return (ctype, content, caption).
- content is text for ContextType.TEXT, otherwise the local file path
- caption is the optional text accompanying a media message; empty for plain text
"""
caption = (message.caption or "").strip()
if message.photo:
largest = message.photo[-1]
path = await self._download_file(largest.file_id, suffix=".jpg")
return (ContextType.IMAGE, path, caption) if path else (None, None, "")
if message.voice or message.audio:
audio_obj = message.voice or message.audio
suffix = ".ogg" if message.voice else (
"." + (audio_obj.mime_type.split("/")[-1] if getattr(audio_obj, "mime_type", "") else "mp3")
)
path = await self._download_file(audio_obj.file_id, suffix=suffix)
return (ContextType.VOICE, path, caption) if path else (None, None, "")
if message.video or message.video_note:
video_obj = message.video or message.video_note
path = await self._download_file(video_obj.file_id, suffix=".mp4")
return (ContextType.FILE, path, caption) if path else (None, None, "")
if message.document:
doc = message.document
ext = ""
if doc.file_name and "." in doc.file_name:
ext = "." + doc.file_name.rsplit(".", 1)[-1]
path = await self._download_file(doc.file_id, suffix=ext, original_name=doc.file_name)
if not path:
return (None, None, "")
# Image-typed documents (user picked "send as file") are treated as images
mime = (doc.mime_type or "").lower()
if mime.startswith("image/"):
return (ContextType.IMAGE, path, caption)
return (ContextType.FILE, path, caption)
if message.text:
return (ContextType.TEXT, message.text.strip(), "")
return (None, None, "")
async def _download_file(self, file_id: str, suffix: str = "", original_name: str = ""):
"""Download via bot.get_file into the local tmp dir; return path or None on failure."""
try:
f = await self._bot.get_file(file_id)
tmp_dir = TelegramMessage.get_tmp_dir()
base = original_name or f"{file_id}{suffix or ''}"
# Prefix with file_id to avoid name collisions / weird chars
safe_name = f"{file_id}_{base}" if original_name else base
local_path = os.path.join(tmp_dir, safe_name)
await f.download_to_drive(custom_path=local_path)
logger.debug(f"[Telegram] downloaded file_id={file_id} -> {local_path}")
return local_path
except Exception as e:
logger.error(f"[Telegram] download_file failed (file_id={file_id}): {e}")
return None
# ------------------------------------------------------------------
# Group trigger logic
# ------------------------------------------------------------------
def _should_reply_in_group(self, update) -> bool:
"""Decide whether to reply to a group message based on configuration."""
mode = conf().get("telegram_group_trigger", "mention_or_reply")
if mode == "all":
return True
message = update.effective_message
if not message:
return False
# 1) Mentioned
if self.bot_username and self._is_mentioned(message, self.bot_username):
return True
# 2) Reply to a bot message
if mode == "mention_or_reply":
reply = message.reply_to_message
if reply and reply.from_user and reply.from_user.username == self.bot_username:
return True
return False
@staticmethod
def _is_mentioned(message, bot_username: str) -> bool:
"""Check whether entities/caption_entities contain a @mention of the bot."""
bot_at = "@" + bot_username.lower()
text = (message.text or message.caption or "").lower()
if bot_at in text:
return True
# Also check entities strictly to support text_mention (no-username @)
for ent in (message.entities or []) + (message.caption_entities or []):
if ent.type == "mention":
src = message.text or message.caption or ""
if src[ent.offset: ent.offset + ent.length].lower() == bot_at:
return True
return False
def _strip_at_mention(self, content: str) -> str:
"""Strip @bot_username from group text (case-insensitive)."""
if not content or not self.bot_username:
return content
pattern = re.compile(r"@" + re.escape(self.bot_username), re.IGNORECASE)
return pattern.sub("", content).strip()
@staticmethod
def _compute_session_id(update) -> str:
chat = update.effective_chat
user = update.effective_user
is_group = chat.type in ("group", "supergroup")
if is_group:
if conf().get("group_shared_session", True):
return f"tg_group_{chat.id}"
return f"tg_group_{chat.id}_{user.id}"
return f"tg_user_{user.id}"
# ------------------------------------------------------------------
# Override _compose_context: skip the parent's group whitelist/at checks
# (already handled in _on_message via _should_reply_in_group). Same idea
# as the feishu channel.
# ------------------------------------------------------------------
def _compose_context(self, ctype: ContextType, content, **kwargs):
context = Context(ctype, content)
context.kwargs = kwargs
if "channel_type" not in context:
context["channel_type"] = self.channel_type
if "origin_ctype" not in context:
context["origin_ctype"] = ctype
cmsg = context["msg"]
if cmsg.is_group:
if conf().get("group_shared_session", True):
context["session_id"] = cmsg.other_user_id
else:
context["session_id"] = f"{cmsg.from_user_id}:{cmsg.other_user_id}"
else:
context["session_id"] = cmsg.from_user_id
context["receiver"] = cmsg.other_user_id
if ctype == ContextType.TEXT:
img_match_prefix = check_prefix(content, conf().get("image_create_prefix"))
if img_match_prefix:
content = content.replace(img_match_prefix, "", 1)
context.type = ContextType.IMAGE_CREATE
else:
context.type = ContextType.TEXT
context.content = (content or "").strip()
if "desire_rtype" not in context and conf().get("always_reply_voice"):
context["desire_rtype"] = ReplyType.VOICE
elif ctype == ContextType.VOICE:
if "desire_rtype" not in context and (
conf().get("voice_reply_voice") or conf().get("always_reply_voice")
):
context["desire_rtype"] = ReplyType.VOICE
return context
# ------------------------------------------------------------------
# Outbound: ChatChannel.send -> Telegram API
# ------------------------------------------------------------------
def send(self, reply: Reply, context: Context):
"""Called from cow's sync main thread; we marshal the coroutine onto the loop thread."""
if self._loop is None or self._bot is None:
logger.warning("[Telegram] bot not ready, drop reply")
return
chat_id = context.get("telegram_chat_id")
reply_to = context.get("telegram_reply_to_msg_id")
if chat_id is None:
logger.warning("[Telegram] no telegram_chat_id in context, drop reply")
return
coro = self._async_send(reply, chat_id, reply_to)
try:
future = asyncio.run_coroutine_threadsafe(coro, self._loop)
# Media uploads through a proxy can be slow; let PTB's own timeouts win
future.result(timeout=180)
except Exception as e:
logger.error(f"[Telegram] send failed: {e}")
# Number of retries for transient network errors (proxy hiccups etc.)
_SEND_RETRIES = 2
_SEND_RETRY_BACKOFF = 2.0 # seconds
async def _send_with_retry(self, send_fn, *, label: str):
"""Run a single Telegram API call with retries for transient network errors."""
from telegram.error import NetworkError, TimedOut
last_err = None
for attempt in range(self._SEND_RETRIES + 1):
try:
return await send_fn()
except (NetworkError, TimedOut) as e:
last_err = e
if attempt >= self._SEND_RETRIES:
break
wait = self._SEND_RETRY_BACKOFF * (attempt + 1)
logger.warning(
f"[Telegram] {label} transient error (attempt {attempt + 1}/"
f"{self._SEND_RETRIES + 1}): {e}; retry in {wait}s"
)
await asyncio.sleep(wait)
raise last_err
async def _async_send(self, reply: Reply, chat_id, reply_to_msg_id):
try:
rtype = reply.type
content = reply.content
if rtype == ReplyType.TEXT or rtype == ReplyType.INFO or rtype == ReplyType.ERROR:
# Telegram caps a single text message at 4096 chars; auto-split
text = str(content) if content is not None else ""
if not text:
return
for chunk in _split_text(text, 4000):
await self._send_with_retry(
lambda c=chunk: self._bot.send_message(
chat_id=chat_id,
text=c,
reply_to_message_id=reply_to_msg_id,
# Avoid failing the whole send if reply_to was deleted
allow_sending_without_reply=True,
),
label="send_message",
)
elif rtype == ReplyType.IMAGE:
# Already a local BytesIO; send it directly
content.seek(0)
await self._send_with_retry(
lambda: self._bot.send_photo(
chat_id=chat_id,
photo=content,
reply_to_message_id=reply_to_msg_id,
allow_sending_without_reply=True,
),
label="send_photo",
)
elif rtype == ReplyType.IMAGE_URL:
url = str(content)
if url.startswith("file://"):
local = url[7:]
# Open inside the lambda so each retry gets a fresh stream
async def _send_local_photo():
with open(local, "rb") as f:
return await self._bot.send_photo(
chat_id=chat_id, photo=f,
reply_to_message_id=reply_to_msg_id,
allow_sending_without_reply=True,
)
await self._send_with_retry(_send_local_photo, label="send_photo(file)")
else:
await self._send_with_retry(
lambda: self._bot.send_photo(
chat_id=chat_id, photo=url,
reply_to_message_id=reply_to_msg_id,
allow_sending_without_reply=True,
),
label="send_photo(url)",
)
elif rtype == ReplyType.VOICE:
local = content[7:] if isinstance(content, str) and content.startswith("file://") else content
async def _send_voice():
with open(local, "rb") as f:
return await self._bot.send_voice(
chat_id=chat_id, voice=f,
reply_to_message_id=reply_to_msg_id,
allow_sending_without_reply=True,
)
await self._send_with_retry(_send_voice, label="send_voice")
elif rtype == ReplyType.FILE:
# Videos go through send_video, everything else through send_document
local = content[7:] if isinstance(content, str) and content.startswith("file://") else content
# File replies may carry an accompanying text caption
caption = getattr(reply, "text_content", None) or None
is_video = isinstance(local, str) and local.lower().endswith(
(".mp4", ".mov", ".avi", ".mkv", ".webm")
)
async def _send_file():
with open(local, "rb") as f:
if is_video:
return await self._bot.send_video(
chat_id=chat_id, video=f, caption=caption,
reply_to_message_id=reply_to_msg_id,
allow_sending_without_reply=True,
)
return await self._bot.send_document(
chat_id=chat_id, document=f, caption=caption,
reply_to_message_id=reply_to_msg_id,
allow_sending_without_reply=True,
)
await self._send_with_retry(_send_file, label="send_video" if is_video else "send_document")
else:
# Fallback: send as plain text
await self._send_with_retry(
lambda: self._bot.send_message(
chat_id=chat_id, text=str(content),
reply_to_message_id=reply_to_msg_id,
allow_sending_without_reply=True,
),
label="send_message(fallback)",
)
logger.info(f"[Telegram] sent reply (type={rtype}, chat_id={chat_id})")
except Exception as e:
logger.error(f"[Telegram] _async_send error: {e}", exc_info=True)
def _split_text(text: str, limit: int):
"""Split long text preferring line breaks to keep markdown structure intact."""
if len(text) <= limit:
yield text
return
buf = []
size = 0
for line in text.splitlines(keepends=True):
if size + len(line) > limit and buf:
yield "".join(buf)
buf, size = [], 0
# Hard-split single lines that exceed the limit
while len(line) > limit:
yield line[:limit]
line = line[limit:]
buf.append(line)
size += len(line)
if buf:
yield "".join(buf)

View File

@@ -0,0 +1,62 @@
"""
Telegram message adapter.
Convert a python-telegram-bot Update into cow's unified ChatMessage.
File downloads are NOT performed here; the channel layer triggers
bot.get_file() on demand because it requires the async event loop.
"""
import os
from bridge.context import ContextType
from channel.chat_message import ChatMessage
from common.utils import expand_path
from config import conf
class TelegramMessage(ChatMessage):
"""Wrap a Telegram Update into the unified ChatMessage."""
def __init__(self, update, is_group: bool = False, bot_username: str = "",
ctype: ContextType = ContextType.TEXT, content: str = ""):
super().__init__(update)
message = update.effective_message
chat = update.effective_chat
user = update.effective_user
# Basic fields
self.msg_id = str(message.message_id) if message else ""
self.create_time = int(message.date.timestamp()) if message and message.date else 0
self.ctype = ctype
self.content = content
# Sender / chat info
from_user_id = str(user.id) if user else "unknown"
from_user_nick = (
user.full_name if user and user.full_name else (user.username if user else "unknown")
)
self.from_user_id = from_user_id
self.from_user_nickname = from_user_nick or from_user_id
self.to_user_id = bot_username or "telegram_bot"
self.to_user_nickname = bot_username or "telegram_bot"
self.is_group = is_group
if is_group:
# Group: other_user_id = group_id, actual_user_id = sender id
self.other_user_id = str(chat.id)
self.other_user_nickname = chat.title or str(chat.id)
self.actual_user_id = from_user_id
self.actual_user_nickname = self.from_user_nickname
else:
self.other_user_id = from_user_id
self.other_user_nickname = self.from_user_nickname
# Whether the bot was triggered by @-mention or reply (set by channel layer)
self.is_at = False
@staticmethod
def get_tmp_dir() -> str:
"""Local download directory, aligned with other channels (agent_workspace/tmp)."""
workspace_root = expand_path(conf().get("agent_workspace", "~/cow"))
tmp_dir = os.path.join(workspace_root, "tmp")
os.makedirs(tmp_dir, exist_ok=True)
return tmp_dir

View File

@@ -137,6 +137,11 @@
<i class="fas fa-sliders item-icon text-xs w-5 text-center"></i> <i class="fas fa-sliders item-icon text-xs w-5 text-center"></i>
<span data-i18n="menu_config">配置</span> <span data-i18n="menu_config">配置</span>
</a> </a>
<a class="sidebar-item flex items-center gap-3 px-3 py-2 rounded-lg cursor-pointer transition-all duration-150 hover:bg-white/5 hover:text-neutral-200 text-[14px]"
data-view="models">
<i class="fas fa-microchip item-icon text-xs w-5 text-center"></i>
<span data-i18n="menu_models">模型</span>
</a>
<a class="sidebar-item flex items-center gap-3 px-3 py-2 rounded-lg cursor-pointer transition-all duration-150 hover:bg-white/5 hover:text-neutral-200 text-[14px]" <a class="sidebar-item flex items-center gap-3 px-3 py-2 rounded-lg cursor-pointer transition-all duration-150 hover:bg-white/5 hover:text-neutral-200 text-[14px]"
data-view="skills"> data-view="skills">
<i class="fas fa-bolt item-icon text-xs w-5 text-center"></i> <i class="fas fa-bolt item-icon text-xs w-5 text-center"></i>
@@ -417,21 +422,30 @@
</button> </button>
</div> </div>
<div id="slash-menu" class="slash-menu hidden"></div> <div id="slash-menu" class="slash-menu hidden"></div>
<textarea id="chat-input" <div class="flex-1 min-w-0 relative flex items-center">
class="flex-1 min-w-0 px-4 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600 <textarea id="chat-input"
bg-slate-50 dark:bg-white/5 text-slate-800 dark:text-slate-100 class="w-full pl-4 pr-11 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600
placeholder:text-slate-400 dark:placeholder:text-slate-500 bg-slate-50 dark:bg-white/5 text-slate-800 dark:text-slate-100
focus:outline-none focus:ring-0 focus:border-primary-600 placeholder:text-slate-400 dark:placeholder:text-slate-500
text-sm leading-relaxed" focus:outline-none focus:ring-0 focus:border-primary-600
rows="1" text-sm leading-relaxed"
data-i18n-placeholder="input_placeholder" rows="1"
placeholder="输入消息,或输入 / 使用指令"></textarea> data-i18n-placeholder="input_placeholder"
placeholder="输入消息,或输入 / 使用指令"></textarea>
<button id="mic-btn" type="button"
class="absolute right-2 top-1/2 -translate-y-1/2 w-8 h-8 flex items-center justify-center rounded-lg
text-slate-400 hover:text-primary-500 hover:bg-primary-50 dark:hover:bg-primary-900/20
cursor-pointer transition-colors duration-150"
data-i18n-title="mic_idle_title" title="点击录音 / 再按一次结束">
<i class="fas fa-microphone text-sm"></i>
</button>
</div>
<button id="send-btn" <button id="send-btn"
class="flex-shrink-0 w-10 h-10 flex items-center justify-center rounded-lg class="flex-shrink-0 w-10 h-10 flex items-center justify-center rounded-lg
bg-primary-400 text-white hover:bg-primary-500 bg-primary-400 text-white hover:bg-primary-500
disabled:bg-slate-300 dark:disabled:bg-slate-600 disabled:bg-slate-300 dark:disabled:bg-slate-600
disabled:cursor-not-allowed cursor-pointer transition-colors duration-150" disabled:cursor-not-allowed cursor-pointer transition-colors duration-150"
disabled onclick="sendMessage()"> disabled>
<i class="fas fa-paper-plane text-sm"></i> <i class="fas fa-paper-plane text-sm"></i>
</button> </button>
</div> </div>
@@ -460,6 +474,11 @@
<i class="fas fa-microchip text-primary-500 text-sm"></i> <i class="fas fa-microchip text-primary-500 text-sm"></i>
</div> </div>
<h3 class="font-semibold text-slate-800 dark:text-slate-100" data-i18n="config_model">模型配置</h3> <h3 class="font-semibold text-slate-800 dark:text-slate-100" data-i18n="config_model">模型配置</h3>
<a class="ml-auto text-xs text-slate-500 dark:text-slate-400 hover:text-primary-500 dark:hover:text-primary-400 cursor-pointer transition-colors flex items-center gap-1"
onclick="navigateTo('models')">
<span data-i18n="config_model_advanced">高级配置</span>
<i class="fas fa-arrow-right text-[10px]"></i>
</a>
</div> </div>
<div class="space-y-5"> <div class="space-y-5">
<!-- Provider --> <!-- Provider -->
@@ -850,6 +869,41 @@
</div> </div>
</div> </div>
<!-- ====================================================== -->
<!-- VIEW: Models -->
<!-- ====================================================== -->
<div id="view-models" class="view">
<!-- Tailwind JIT safelist: capability-card icon colors are
emitted from JS template strings. Listing them here
(display:none) guarantees the CDN-side compiler picks
them up regardless of render timing. -->
<div class="hidden bg-blue-50 dark:bg-blue-900/30 text-blue-500
bg-orange-50 dark:bg-orange-900/30 text-orange-500
bg-purple-50 dark:bg-purple-900/30 text-purple-500
bg-amber-50 dark:bg-amber-900/30 text-amber-500
bg-primary-50 dark:bg-primary-900/30 text-primary-500"></div>
<div class="flex-1 overflow-y-auto p-6">
<div class="max-w-4xl mx-auto">
<div class="flex items-center justify-between mb-6">
<div>
<h2 class="text-xl font-bold text-slate-800 dark:text-slate-100" data-i18n="models_title">模型管理</h2>
<p class="text-sm text-slate-500 dark:text-slate-400 mt-1" data-i18n="models_desc">统一管理对话、视觉、语音、向量、图像、搜索能力</p>
</div>
<button id="models-add-vendor-btn" onclick="openVendorModal('')"
class="flex items-center gap-2 px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600
text-white text-sm font-medium cursor-pointer transition-colors duration-150">
<i class="fas fa-plus text-xs"></i>
<span data-i18n="models_add_vendor">添加厂商</span>
</button>
</div>
<div id="models-loading" class="flex items-center gap-2 py-12 justify-center text-slate-400 dark:text-slate-500 text-sm">
<i class="fas fa-spinner fa-spin text-xs"></i><span>Loading...</span>
</div>
<div id="models-content" class="grid gap-6 hidden"></div>
</div>
</div>
</div>
<!-- ====================================================== --> <!-- ====================================================== -->
<!-- VIEW: Channels --> <!-- VIEW: Channels -->
<!-- ====================================================== --> <!-- ====================================================== -->
@@ -959,7 +1013,7 @@
</div><!-- /app --> </div><!-- /app -->
<!-- Confirm Dialog --> <!-- Confirm Dialog -->
<div id="confirm-dialog-overlay" class="fixed inset-0 bg-black/50 z-[100] hidden flex items-center justify-center"> <div id="confirm-dialog-overlay" class="fixed inset-0 bg-black/50 z-[200] hidden flex items-center justify-center">
<div class="bg-white dark:bg-[#1A1A1A] rounded-2xl border border-slate-200 dark:border-white/10 shadow-xl <div class="bg-white dark:bg-[#1A1A1A] rounded-2xl border border-slate-200 dark:border-white/10 shadow-xl
w-full max-w-sm mx-4 overflow-hidden"> w-full max-w-sm mx-4 overflow-hidden">
<div class="p-6"> <div class="p-6">
@@ -984,6 +1038,77 @@
</div> </div>
</div> </div>
<!-- Vendor Credentials Modal -->
<div id="vendor-modal-overlay" class="fixed inset-0 bg-black/50 z-[100] hidden flex items-center justify-center">
<div class="bg-white dark:bg-[#1A1A1A] rounded-2xl border border-slate-200 dark:border-white/10 shadow-xl
w-full max-w-md mx-4">
<div class="p-6">
<div class="flex items-center gap-3 mb-5">
<div class="w-10 h-10 rounded-xl bg-primary-50 dark:bg-primary-900/20 flex items-center justify-center flex-shrink-0">
<i class="fas fa-key text-primary-500"></i>
</div>
<div class="min-w-0 flex-1">
<h3 id="vendor-modal-title" class="font-semibold text-slate-800 dark:text-slate-100 text-base"></h3>
<p id="vendor-modal-subtitle" class="text-xs text-slate-500 dark:text-slate-400 mt-0.5 font-mono"></p>
</div>
</div>
<!-- Provider selector (only visible when adding via top button) -->
<div id="vendor-modal-picker-wrap" class="mb-4 hidden">
<label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5" data-i18n="models_provider">厂商</label>
<div id="vendor-modal-picker" class="cfg-dropdown" tabindex="0">
<div class="cfg-dropdown-selected">
<span class="cfg-dropdown-text">--</span>
<i class="fas fa-chevron-down cfg-dropdown-arrow"></i>
</div>
<div class="cfg-dropdown-menu"></div>
</div>
</div>
<div class="space-y-4">
<div>
<label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">API Key</label>
<input id="vendor-modal-key" type="text" autocomplete="off" data-1p-ignore data-lpignore="true"
class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
focus:outline-none focus:border-primary-500 font-mono transition-colors"
placeholder="sk-...">
</div>
<div id="vendor-modal-base-wrap">
<label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">API Base</label>
<input id="vendor-modal-base" type="text"
class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
focus:outline-none focus:border-primary-500 font-mono transition-colors"
placeholder="https://...../v1">
<p id="vendor-modal-base-hint" class="mt-1.5 text-xs text-slate-400 dark:text-slate-500 hidden">
<i class="fas fa-info-circle mr-1"></i><span data-i18n="models_base_default_hint">留空将使用官方默认地址</span>
</p>
</div>
</div>
</div>
<div class="flex items-center justify-between gap-3 px-6 py-4 border-t border-slate-100 dark:border-white/5 rounded-b-2xl">
<button id="vendor-modal-clear"
class="px-3 py-2 rounded-lg text-xs
text-red-500 dark:text-red-400 hover:bg-red-50 dark:hover:bg-red-900/20
cursor-pointer transition-colors duration-150 hidden"
data-i18n="models_clear_credential">清除凭据</button>
<span id="vendor-modal-status"
class="flex-1 text-xs text-primary-500 opacity-0 transition-opacity duration-300 text-center"></span>
<button id="vendor-modal-cancel"
class="px-4 py-2 rounded-lg border border-slate-200 dark:border-white/10
text-slate-600 dark:text-slate-300 text-sm font-medium
hover:bg-slate-50 dark:hover:bg-white/5
cursor-pointer transition-colors duration-150"
data-i18n="cancel">取消</button>
<button id="vendor-modal-save"
class="px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600 text-white text-sm font-medium
cursor-pointer transition-colors duration-150 disabled:opacity-50 disabled:cursor-not-allowed"
data-i18n="save">保存</button>
</div>
</div>
</div>
<script defer src="assets/js/console.js"></script> <script defer src="assets/js/console.js"></script>
</body> </body>
</html> </html>

View File

@@ -725,6 +725,58 @@
background: rgba(74, 190, 110, 0.15); background: rgba(74, 190, 110, 0.15);
color: #74E9A4; color: #74E9A4;
} }
/* When an item carries a hint (e.g. brand alias next to a technical model
id), label/hint are split into two spans so the hint sits on the right in
a dim, smaller weight. Without a hint the row stays a plain text node and
uses the default ellipsis behaviour, so no layout regressions for old call
sites. */
.cfg-dropdown-label {
flex: 1 1 auto;
min-width: 0;
overflow: hidden;
text-overflow: ellipsis;
}
.cfg-dropdown-hint {
flex-shrink: 0;
margin-left: auto;
padding-left: 12px;
color: #94a3b8;
font-size: 12px;
font-weight: 400;
}
.dark .cfg-dropdown-hint {
color: #64748b;
}
.cfg-dropdown-item.active .cfg-dropdown-hint {
/* Tint the hint toward the brand colour on the active row so it doesn't
fight with the highlighted label tone. */
color: rgba(34, 133, 71, 0.65);
}
.dark .cfg-dropdown-item.active .cfg-dropdown-hint {
color: rgba(116, 233, 164, 0.6);
}
/* The active row gets a trailing brand-green checkmark via a Font Awesome
pseudo-element so every dropdown (chat / vision / image / asr / tts / etc.)
surfaces "this is what's currently selected" without per-call JS plumbing.
When a hint is present, the ✓ sits to its right with a small gap; without
a hint, margin-left:auto pushes the ✓ flush against the right edge. */
.cfg-dropdown-item.active::after {
content: '\f00c'; /* FontAwesome check glyph */
font-family: 'Font Awesome 6 Free', 'Font Awesome 5 Free', 'FontAwesome';
font-weight: 900;
margin-left: auto;
padding-left: 12px;
color: #4abe6e;
font-size: 11px;
flex-shrink: 0;
}
.cfg-dropdown-item.active:has(.cfg-dropdown-hint)::after {
/* When hint occupies the auto-margin slot, the ✓ no longer benefits
from `margin-left: auto`; replace it with a small fixed gap so the
✓ trails the hint cleanly. */
margin-left: 0;
padding-left: 10px;
}
/* API Key masking via CSS (avoids browser password prompts) */ /* API Key masking via CSS (avoids browser password prompts) */
.cfg-key-masked { .cfg-key-masked {
@@ -732,6 +784,77 @@
text-security: disc; text-security: disc;
} }
/* Provider logo image — vendors flagged as `provider-logo-invert-dark`
ship a black wordmark that disappears on the dark canvas; we invert their
luminance only in dark mode so the brand stays recognizable without
touching multi-color marks like Google/MiniMax. */
.provider-logo-img {
object-fit: contain;
object-position: center;
}
.dark .provider-logo-invert-dark {
filter: invert(1) brightness(1.15);
}
/* Models page — provider dropdown rows.
Configured rows look like ordinary picker entries; the .active row's
trailing brand-green ✓ already announces "this is what's selected"
(handled globally by .cfg-dropdown-item.active::after above).
Unconfigured rows are visually subdued and carry a trailing gear icon
as a "click to set up" affordance. */
.cap-provider-label {
flex: 1 1 auto;
overflow: hidden;
text-overflow: ellipsis;
}
.cap-provider-gear {
margin-left: auto;
padding-left: 12px;
color: #94a3b8;
font-size: 11px;
flex-shrink: 0;
}
.cap-provider-item.cap-provider-unconfigured {
color: #94a3b8;
}
.dark .cap-provider-item.cap-provider-unconfigured {
color: #64748b;
}
.cap-provider-item.cap-provider-unconfigured:hover {
color: #475569;
}
.dark .cap-provider-item.cap-provider-unconfigured:hover {
color: #cbd5e1;
}
.cap-provider-item.cap-provider-unconfigured:hover .cap-provider-gear {
color: #475569;
}
.dark .cap-provider-item.cap-provider-unconfigured:hover .cap-provider-gear {
color: #cbd5e1;
}
/* If the active row ever lands on an unconfigured vendor (defensive — the
click handler normally diverts to the modal), suppress the global ✓ so
the gear remains the sole trailing icon and the row keeps reading as
"needs setup" rather than "already selected". */
.cap-provider-item.cap-provider-unconfigured.active::after {
content: none;
}
/* "Add vendor" modal picker — each configured row carries a static
brand-green ✓ via decorateVendorModalPicker so users can see what's set
up at a glance. The active row's global ✓ is suppressed here to avoid
showing two checks side by side on configured + selected rows. */
.vendor-picker-item.active::after {
content: none;
}
.vendor-picker-configured-mark {
margin-left: auto;
padding-left: 12px;
color: #4abe6e;
font-size: 11px;
flex-shrink: 0;
}
/* Chat Input */ /* Chat Input */
#chat-input { #chat-input {
resize: none; height: 42px; max-height: 180px; resize: none; height: 42px; max-height: 180px;
@@ -1171,3 +1294,108 @@
overflow: hidden; overflow: hidden;
min-height: 2.5em; /* ~2 lines at text-sm leading-relaxed */ min-height: 2.5em; /* ~2 lines at text-sm leading-relaxed */
} }
/* --------------------------------------------------------------------
* Voice pill — compact custom audio player used by mic uploads and TTS
* replies. Replaces the bulky native <audio controls> with a play/pause
* icon + thin progress bar + duration counter so it blends into chat
* bubbles without the chrome-grey browser default look.
* ------------------------------------------------------------------ */
.voice-pill {
display: inline-flex;
align-items: center;
gap: 8px;
padding: 6px 10px;
border-radius: 999px;
background: rgba(15, 23, 42, 0.05);
color: rgb(71, 85, 105);
font-size: 12px;
line-height: 1;
max-width: 240px;
user-select: none;
cursor: default;
}
.dark .voice-pill {
background: rgba(255, 255, 255, 0.08);
color: rgb(203, 213, 225);
}
.voice-pill[data-loading="1"] {
opacity: 0.65;
}
.voice-pill-btn {
width: 22px;
height: 22px;
border-radius: 999px;
display: inline-flex;
align-items: center;
justify-content: center;
background: var(--color-primary-500, #2563eb);
color: #fff;
flex-shrink: 0;
cursor: pointer;
transition: transform 0.1s ease;
}
.voice-pill-btn:hover { transform: scale(1.05); }
.voice-pill-btn i { font-size: 9px; margin-left: 1px; }
.voice-pill-btn[data-state="play"] i { margin-left: 2px; }
.voice-pill-btn[data-state="pause"] i { margin-left: 0; }
.voice-pill-track {
flex: 1;
height: 3px;
border-radius: 999px;
background: rgba(100, 116, 139, 0.25);
overflow: hidden;
min-width: 70px;
}
.dark .voice-pill-track {
background: rgba(148, 163, 184, 0.25);
}
.voice-pill-fill {
height: 100%;
width: 0%;
background: var(--color-primary-500, #2563eb);
border-radius: inherit;
transition: width 0.1s linear;
}
.voice-pill-time {
font-variant-numeric: tabular-nums;
font-size: 11px;
color: inherit;
opacity: 0.75;
flex-shrink: 0;
min-width: 28px;
text-align: right;
}
.voice-pill audio { display: none; }
/* Send button toggles into a Stop button while an SSE stream is in flight.
Match the look of the disabled send button (light grey block + white
glyph) so it reads as the same visual element, just paused/idle from
sending perspective and clickable to stop. */
#send-btn.send-btn-cancel {
background-color: rgb(203 213 225) !important; /* slate-300, == disabled send-btn */
color: white !important;
}
#send-btn.send-btn-cancel:hover {
background-color: rgb(148 163 184) !important; /* slate-400 */
}
#send-btn.send-btn-cancel:disabled {
background-color: rgb(226 232 240) !important; /* slate-200, while stop is in flight */
color: white !important;
cursor: progress;
}
.dark #send-btn.send-btn-cancel {
background-color: rgb(71 85 105) !important; /* slate-600, == dark disabled send-btn */
color: white !important;
}
.dark #send-btn.send-btn-cancel:hover {
background-color: rgb(100 116 139) !important; /* slate-500 */
}
.dark #send-btn.send-btn-cancel:disabled {
background-color: rgb(51 65 85) !important; /* slate-700 */
color: rgb(203 213 225) !important;
}
.agent-cancelled-tag {
font-style: italic;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251656961" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="18432" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M252.8 652.8l167.893333-94.293333 2.773334-8.106667-2.773334-4.48h-8.106666l-28.16-1.706667-96-2.56-83.2-3.413333-80.64-4.266667-20.266667-4.266666L85.333333 504.746667l1.92-12.586667 17.066667-11.52 24.32 2.133333 53.973333 3.626667 81.066667 5.546667 58.666667 3.413333 87.04 9.173333h13.866666l1.92-5.546666-4.693333-3.413334-3.626667-3.413333-83.84-56.746667-90.666666-60.16-47.573334-34.56-25.813333-17.493333-13.013333-16.426667-5.546667-35.84 23.253333-25.813333 31.36 2.133333 7.893334 2.133334 31.786666 24.32 67.84 52.48L401.066667 391.466667l13.013333 10.88 5.12-3.626667 0.64-2.56-5.76-9.813333-48.213333-87.04L314.453333 210.773333l-22.826666-36.693333-5.973334-21.973333a107.861333 107.861333 0 0 1-3.626666-26.026667l26.666666-36.053333L323.413333 85.333333l35.413334 4.693334 14.933333 13.013333 21.973333 50.346667 35.626667 79.36 55.253333 107.733333 16.213334 32 8.746666 29.653333 3.2 9.173334h5.546667v-5.12l4.48-60.8 8.32-74.453334 8.106667-96 2.773333-27.093333 13.44-32.426667 26.666667-17.493333 20.693333 10.026667 17.066667 24.32-2.346667 15.786666-10.24 65.92-19.84 103.253334-13.013333 69.12h7.466666l8.746667-8.746667 34.986667-46.506667 58.666666-73.386666 26.026667-29.226667 30.293333-32.213333 19.413334-15.36h36.693333l27.093333 40.106666-12.16 41.386667-37.76 48-31.36 40.533333-45.013333 60.586667-28.16 48.426667 2.56 3.84 6.613333-0.64 101.546667-21.546667 54.826667-10.026667 65.493333-11.306666 29.653333 13.866666 3.2 14.08-11.733333 28.8-69.973333 17.28-82.133334 16.426667-122.24 29.013333-1.493333 1.066667 1.706667 2.133333 55.04 5.12 23.466666 1.28h57.6l107.306667 7.893334 28.16 18.56 16.853333 22.613333-2.773333 17.28-43.306667 21.973333-58.24-13.866666-136.106666-32.426667-46.72-11.733333h-6.4v3.84l38.826666 37.973333 71.253334 64.426667 89.173333 82.986666 4.48 20.48-11.52 16.213334-12.16-1.706667-78.506667-58.88-30.293333-26.666667-68.48-57.6h-4.48v5.973334l15.786667 23.04 83.413333 125.226666 4.266667 38.4-5.973334 12.586667-21.546666 7.466667-23.68-4.266667-48.853334-68.48-50.346666-77.226667-40.533334-69.12-4.906666 2.773334-23.893334 258.133333-11.306666 13.226667-26.026667 10.026666-21.546667-16.426666-11.52-26.666667 11.52-52.48 13.866667-68.48 11.306667-54.4 10.24-67.626667 5.973333-22.4-0.426667-1.493333-4.906666 0.64-50.986667 69.973333-77.653333 104.746667-61.44 65.706667-14.72 5.76-25.386667-13.226667 2.346667-23.466667 14.293333-20.906666 84.906667-107.946667 51.2-66.986667 33.066666-38.613333v-5.546667h-2.133333l-225.493333 146.56-40.106667 5.12-17.28-16.213333 2.133333-26.666667 8.106667-8.746666 67.84-46.72h-0.213333l0.853333 0.853333z" fill="#D97757" p-id="18433"></path></svg>

After

Width:  |  Height:  |  Size: 2.9 KiB

View File

@@ -0,0 +1,10 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="200" height="200" fill="none" stroke="#475569" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<!-- Horizontal slider tracks -->
<line x1="4" y1="7" x2="20" y2="7"/>
<line x1="4" y1="12" x2="20" y2="12"/>
<line x1="4" y1="17" x2="20" y2="17"/>
<!-- Knobs (filled circles) -->
<circle cx="9" cy="7" r="2.2" fill="#475569" stroke="none"/>
<circle cx="15" cy="12" r="2.2" fill="#475569" stroke="none"/>
<circle cx="7" cy="17" r="2.2" fill="#475569" stroke="none"/>
</svg>

After

Width:  |  Height:  |  Size: 573 B

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251621200" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="17444" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M1019.364785 620.816931L891.797142 397.807295 946.450846 293.15069a29.097778 29.097778 0 0 0 6.399732-36.393472l-70.184053-126.586684a30.078737 30.078737 0 0 0-24.574968-13.652427H597.4945L539.171949 14.549389a27.348852 27.348852 0 0 0-20.906122-14.549389H380.628607a29.139776 29.139776 0 0 0-24.616967 14.549389v5.545767L225.797108 243.062793H100.919352a29.182775 29.182775 0 0 0-25.513928 13.653427L3.428446 384.11187a32.766624 32.766624 0 0 0 0 29.182775L132.831012 638.096205 74.508461 740.064923a32.766624 32.766624 0 0 0 0 29.05478l66.514207 116.561105a29.905744 29.905744 0 0 0 25.513929 14.505391H427.132654l62.845361 109.222414A30.078737 30.078737 0 0 0 512.762058 1024H660.382859a29.139776 29.139776 0 0 0 24.574968-14.549389l128.463606-224.843558h114.76818a31.91366 31.91366 0 0 0 24.660965-15.444352l66.471208-117.414069a28.158818 28.158818 0 0 0 0-30.9747l0.042999 0.042999z m-161.273228 14.591387L791.57735 512.490479 518.265827 993.964261l-74.748861-122.87484h-273.268525l65.618244-119.205994h139.386147L101.856313 272.244568h143.055993L380.671605 30.121735l68.34913 119.247993-70.184053 122.87484H925.501726l-69.202094 121.936879 137.594222 241.183873H858.134555z" fill="#605BEC" p-id="17445"></path><path d="M499.962596 699.320634l174.371677-274.719464H324.694955z" fill="#605BEC" p-id="17446"></path></svg>

After

Width:  |  Height:  |  Size: 1.6 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 5.1 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779261485522" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5381" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M958.976 439.808C804.864 336.896 642.56 321.536 642.56 321.536s8.192 235.008-10.752 306.176c-0.512 9.728-11.776 75.264-43.008 157.696-10.752 28.16-24.064 55.296-39.424 81.408-40.96 74.24-89.6 127.488-89.6 127.488 119.808-48.64 205.312-92.672 309.76-175.616 122.88-96.768 229.376-254.464 189.44-378.88z" fill="#37E1BE" p-id="5382"></path><path d="M329.728 395.776c158.208-100.864 308.736-78.848 312.32-74.752 0.512 0.512 1.024 0.512 1.024 0.512 0-14.336-6.656-60.928-13.312-106.496-11.776-60.928-22.528-124.928-23.04-133.632-170.496-139.264-356.864-78.336-448 25.6-61.44 70.144-103.424 169.984-102.4 224.256V762.88c0.512-12.8 1.536-20.48 2.048-20.48 17.92-197.12 271.36-346.624 271.36-346.624z" fill="#A569FF" p-id="5383"></path><path d="M792.064 272.384c-41.984-43.52-87.552-88.576-122.368-125.44-33.28-34.816-59.392-60.928-62.976-65.536 0.512 8.704 11.264 72.704 23.04 133.632 6.656 45.568 12.8 92.672 13.312 106.496 0 0 162.304 15.36 316.416 118.272-0.512 0-83.456-80.384-167.424-167.424zM549.888 866.816c-2.56 1.024-198.656 107.008-292.352-30.72-20.992-30.72-31.744-68.096-33.28-106.496-3.072-74.752 5.12-227.84 105.472-333.824 0 0-253.44 149.504-270.848 346.624-0.512 0.512-2.048 8.192-2.048 20.48-1.024 32.768 4.608 98.304 43.008 155.136 52.224 78.336 193.024 138.752 328.192 85.504l33.28-9.728c-1.024 0.512 47.616-52.224 88.576-126.976z" fill="#1E37FC" p-id="5384"></path></svg>

After

Width:  |  Height:  |  Size: 1.7 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251750646" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="29551" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M214.101333 512c0-32.512 5.546667-63.701333 15.36-92.928L57.173333 290.218667A491.861333 491.861333 0 0 0 4.693333 512c0 79.701333 18.858667 154.88 52.394667 221.610667l172.202667-129.066667A290.56 290.56 0 0 1 214.101333 512" fill="#FBBC05" p-id="29552"></path><path d="M516.693333 216.192c72.106667 0 137.258667 25.002667 188.458667 65.962667L854.101333 136.533333C763.349333 59.178667 646.997333 11.392 516.693333 11.392c-202.325333 0-376.234667 113.28-459.52 278.826667l172.373334 128.853333c39.68-118.016 152.832-202.88 287.146666-202.88" fill="#EA4335" p-id="29553"></path><path d="M516.693333 807.808c-134.357333 0-247.509333-84.864-287.232-202.88l-172.288 128.853333c83.242667 165.546667 257.152 278.826667 459.52 278.826667 124.842667 0 244.053333-43.392 333.568-124.757333l-163.584-123.818667c-46.122667 28.458667-104.234667 43.776-170.026666 43.776" fill="#34A853" p-id="29554"></path><path d="M1005.397333 512c0-29.568-4.693333-61.44-11.648-91.008H516.650667V614.4h274.602666c-13.696 65.962667-51.072 116.650667-104.533333 149.632l163.541333 123.818667c93.994667-85.418667 155.136-212.650667 155.136-375.850667" fill="#4285F4" p-id="29555"></path></svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 11 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251514432" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="11888" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M415.392 475.808v329.984c-22.304 111.744-170.56 82.944-171.2 1.92-0.672-101.824 0-202.976 0-304.064v-117.184c0-14.656-3.2-26.24-16-35.392-24.96-18.72-54.944 3.264-55.584 30.208-1.408 36.16-0.704 71.616-1.408 107.264 0 28.16 0 55.52 0.64 83.648-18.368 123.776-168.32 103.232-171.808 0.704V487.04c0-28.032 54.944-34.624 52.256 7.36-1.792 20.8-0.64 42.272-1.344 62.912-0.64 36.8 55.648 61.6 68.896 1.408 0.64-49.632 0.64-99.264 0.64-149.344 0-62.752 17.824-113.856 84.352-118.624 28.8-2.56 47.968 9.504 66.336 30.304 7.04 7.36 23.68 30.72 24.32 56.16 0 23.456 0.64 46.752 0.64 70.464 0 46.72-0.64 93.76-0.64 140.48 0 30.304 0.64 60.256 0.64 89.856 0 37.536 0 75.552-0.64 113.152-0.64 48.864 58.816 48.16 68.352-0.768 0-57.632 0.64-114.56 0.64-172.192 0-141.984-0.64-283.968-0.64-425.856 0-14.72-2.048-55.584 5.76-70.464 41.504-101.12 167.392-56.96 168.544 26.72 2.432 171.52 0 344.896 0.64 516.8 0 59.616-48.416 46.816-51.104 23.488 0-178.88 0-358.4 0.64-537.024-2.368-44.832-68.832-38.72-72.672-6.592-1.28 36.864-0.64 74.4-1.28 111.232v219.008h0.64l0.448 0.256h-0.064z" fill="#D4367A" p-id="11889"></path><path d="M610.016 473.184v242.336V143.648c21.632-112.512 169.824-83.264 170.464-2.176 0.704 101.12 0 202.912 0.704 304 0 38.784 0 77.728-0.64 116.544 0 15.36 3.776 26.176 16.64 36.032 24.32 18.24 54.24-3.2 55.584-30.592 1.344-35.488 0.64-70.976 0.64-107.328V376.96c18.56-123.776 168.128-103.232 171.264-0.704v310.592c0 28.16-54.304 34.848-51.872-7.296 1.472-21.44 0-267.104 0.768-288.64 1.28-36.16-55.712-61.664-68.928-0.768v148.576c0 63.68-17.856 113.92-84.96 119.36-63.264 1.504-88.704-42.24-90.752-86.432V271.328c0-38.24 0-75.552 0.64-113.088 0.64-48.864-58.784-48.864-68.896 0.704V831.36c0 14.592 2.048 55.52-5.184 70.432-41.44 101.056-168 56.864-169.152-26.752v-79.616c3.136-53.6 48.416-40.864 50.464-18.176v94.464c2.432 44.928 68.928 39.488 72.064 6.656 1.344-36.896 1.344-73.728 1.344-111.296v-293.824h-0.192v-0.064z" fill="#ED6D48" p-id="11890"></path></svg>

After

Width:  |  Height:  |  Size: 2.2 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251592968" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="16416" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M117.9648 684.6464l342.30272 93.57312v75.34592l209.7152 58.5728A428.99456 428.99456 0 0 1 512 942.08c-176.128 0-327.53664-105.8816-394.0352-257.4336zM83.29216 477.42976l407.30624 112.64-9.6256 37.00736-6.0416 35.0208 383.3856 104.96a432.5376 432.5376 0 0 1-65.10592 70.32832l-688.18944-185.9584A429.4656 429.4656 0 0 1 81.92 512c0-11.63264 0.47104-23.1424 1.37216-34.54976z m57.344-182.4768l429.07648 114.21696a279.94112 279.94112 0 0 0-23.06048 35.55328 201.17504 201.17504 0 0 0-14.70464 34.93888l403.08736 110.26432a426.8032 426.8032 0 0 1-23.552 81.7152L86.54848 448.7168a427.25376 427.25376 0 0 1 54.0672-153.76384z m158.47424-156.75392l404.23424 108.31872a190.2592 190.2592 0 0 0-32.80896 24.90368c-9.13408 8.8064-19.8656 21.4016-32.1536 37.74464l285.24544 77.78304c9.216 30.45376 15.03232 61.8496 17.32608 93.5936L156.61056 269.68064a432.27136 432.27136 0 0 1 142.49984-131.4816zM512 81.92c142.90944 0 269.55776 69.71392 347.7504 176.98816L337.26464 118.90688A428.50304 428.50304 0 0 1 512 81.92z" fill="#000000" p-id="16417"></path></svg>

After

Width:  |  Height:  |  Size: 1.3 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251225589" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="9015" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M881.664 431.488a218.88 218.88 0 0 0-18.176-177.088A218.624 218.624 0 0 0 628.992 149.76c-40.576-45.824-100.288-71.424-162.176-71.424a219.136 219.136 0 0 0-208 150.4 215.68 215.68 0 0 0-144 104.512 218.944 218.944 0 0 0 26.688 254.912 218.752 218.752 0 0 0 19.2 177.152 217.088 217.088 0 0 0 234.624 104.512 219.136 219.136 0 0 0 162.112 72.512 219.136 219.136 0 0 0 208-150.4 215.68 215.68 0 0 0 144-104.512 219.008 219.008 0 0 0-27.712-256z m-324.288 454.4a158.08 158.08 0 0 1-103.424-37.376c1.088-1.088 4.288-2.176 5.376-3.2l171.712-99.2a28.16 28.16 0 0 0 13.824-24.512V479.488l72.576 41.6c1.024 0 1.024 1.024 1.024 2.112v200.512a160.512 160.512 0 0 1-161.088 162.112z m-347.712-148.288c-19.2-33.088-25.6-71.488-19.2-108.8 1.088 1.024 3.2 2.176 5.376 3.2l171.712 99.2a25.984 25.984 0 0 0 27.712 0l210.112-121.6v84.224c0 1.152 0 2.176-1.024 2.176L430.464 796.16c-76.8 44.8-176 18.176-220.8-58.624z m-44.736-375.424c19.2-32.64 48.896-57.856 84.224-71.488v204.8c0 9.6 5.376 19.2 13.888 24.512l210.176 121.6-72.576 41.6c-1.024 0-2.112 1.088-2.112 0L224.64 582.912a160.448 160.448 0 0 1-59.776-220.8h0.064z m597.312 138.688l-210.112-121.6 72.512-41.6c1.088 0 2.176-1.088 2.176 0l173.824 100.224a161.088 161.088 0 0 1-25.6 291.2V525.44a26.304 26.304 0 0 0-12.8-24.512z m71.488-108.8a23.232 23.232 0 0 0-5.312-3.2L656.64 289.536a26.048 26.048 0 0 0-27.712 0l-210.176 121.6V326.912c0-1.088 0-2.176 1.088-2.176l173.824-100.224a161.152 161.152 0 0 1 220.8 59.712c19.2 32 25.6 70.4 19.2 107.776z m-454.4 149.248l-72.64-41.6c-1.024 0-1.024-1.088-1.024-2.176V297.088A162.048 162.048 0 0 1 467.84 135.04a158.08 158.08 0 0 1 103.424 37.312 22.848 22.848 0 0 1-5.312 3.2L394.24 274.688a28.16 28.16 0 0 0-13.888 24.512v242.112h-1.088z m39.424-85.312l93.824-54.4 93.888 54.4v107.712l-93.888 54.4-93.824-54.4V456z" fill="#000000" p-id="9016"></path></svg>

After

Width:  |  Height:  |  Size: 2.1 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251568791" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="14450" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M96.20121136 636.3124965c-0.1472897-113.41305959-0.29457937-226.8261192-0.29457937-340.23917879 0-14.87625845 7.65906378-26.51214381 20.4732666-34.02391789 45.51251353-26.65943349 91.02502705-53.31886698 136.83211997-79.53643141 71.1409192-40.94653321 142.42912809-81.59848704 213.71733698-122.39773055 7.36448439-4.12411126 14.58167909-8.3955122 21.50429441-13.2560719 19.44223878-13.40336159 39.03176725-16.05457598 60.09419263-3.53495252 27.39588193 16.34915535 54.93905355 32.25644163 82.48222516 48.16372793 88.0792333 50.96223197 176.30575629 101.77717426 264.38498958 152.59211653 9.86840908 5.74429781 19.88410785 11.19401627 29.60522725 17.0856038 14.13981003 8.54280189 21.50429441 21.06242535 21.50429443 37.70616007 0 147.73155685 0.29457937 295.46311371-0.1472897 443.19467057 0 15.46541722-7.2171947 28.57419943-21.7988738 36.96971163-34.7603663 20.17868721-70.55176044 38.88447758-104.57567833 59.94690293-48.90017634 30.19438599-100.00969801 56.11737105-148.76258466 86.60633642-29.01606849 18.11663161-59.50503387 34.02391789-89.11026112 50.96223197-13.10878221 7.51177407-26.07027474 15.17083783-39.03176726 22.9771913-13.84523065 8.3955122-27.83775099 8.83738127-41.97756102 0.73644843-56.41195043-32.55102101-112.82390085-65.10204201-169.38314098-97.653063-61.86166887-35.64410444-123.72333775-71.1409192-185.4377169-106.78502365-11.19401627-6.48074626-22.24074286-12.81420285-32.99289009-19.88410785-11.48859565-7.65906378-17.08560379-19.14765941-17.08560378-32.69831069-0.1472897-34.7603663 0.1472897-69.52073264 0.29457938-104.28109895 1.62018657-0.58915875 1.62018657-1.62018657-0.29457938-2.65121438z m356.58833414-225.500512c2.20934532-1.76747625 4.41869063-3.68224221 6.77532565-5.15513907 68.93157389-39.62092601 137.86314777-79.24185204 206.94201135-118.86277807 2.79850407-1.62018657 6.48074626-1.62018657 6.62803594-6.18616688 0.1472897-4.8605597-4.12411126-4.71327001-6.77532564-6.18616688-40.65195383-23.56635005-81.59848704-46.83812071-122.10315117-70.84633984-16.79102442-10.01569877-32.84560039-8.54280189-48.45830728 0.58915876-45.9543826 26.51214381-91.46689612 53.61344636-137.27398903 80.42016953-31.96186226 18.70579035-64.21830387 37.11700133-96.32745581 55.67550198-18.41121097 10.60485751-27.54317163 25.33382629-27.24859225 47.72185885 0.88373813 89.55213018 0.58915875 179.10426036 0.14728969 268.65639053-0.1472897 20.17868721 9.27925033 33.58204881 25.33382629 43.15587853 31.3727035 18.70579035 63.18727606 37.11700133 95.14913832 54.93905355 10.89943689 6.03887719 21.06242535 13.99252034 35.79139414 18.41121096V505.51925374c6.48074626 19.58952848 18.55850066 34.02391789 36.67513226 44.6287754 27.83775099 16.20186565 63.18727606 12.51962347 86.31175705-10.45756784 26.95401286-26.65943349 28.72148912-62.89269668 12.81420282-90.14128893-16.34915535-28.42690974-43.59774757-37.55887038-74.38129233-38.73718787z m82.48222517 429.64401928c14.28709972-3.82953187 25.92298506-13.99252034 38.88447758-21.35700473 40.94653321-23.27177067 81.30390766-47.72185885 122.54502023-70.55176046 26.95401286-15.02354815 52.87699792-31.66728287 80.71474891-45.21793415 16.79102442-8.10093283 29.60522723-22.53532223 29.60522726-43.4504579 0.1472897-92.939793 0.29457937-185.73229631 0.14728969-278.6720893 0-11.19401627-5.15513907-13.99252034-13.84523067-7.06990501-26.51214381 20.76784598-57.29568854 34.46578693-86.16446735 51.25681135-54.49718448 31.81457257-109.14165865 63.33456576-163.78613282 95.00184862-8.54280189 4.8605597-11.78317502 10.45756784-11.63588535 20.47326662 0.29457937 96.18016613 0.1472897 192.50762194 0.1472897 288.68778806-0.29457937 3.5349525-1.47289687 7.65906378 3.38766282 10.8994369z" fill="#066AF3" p-id="14451"></path><path d="M96.20121136 636.3124965c1.91476594 1.03102783 1.91476594 2.06205563 0 3.09308345v-3.09308345z" fill="#4372E0" p-id="14452"></path><path d="M391.3697457 505.37196405c-5.44971845-44.33419602 13.84523065-74.08671296 61.4197998-94.55997955 30.93083443 1.17831749 58.03213699 10.31027814 74.38129233 38.5898982 15.75999659 27.39588193 14.13981003 63.48185543-12.81420282 90.14128893-23.27177067 22.97719129-58.47400606 26.65943349-86.31175705 10.45756783-18.11663161-10.60485751-30.34167568-25.03924691-36.67513226-44.62877541z" fill="#002A9A" p-id="14453"></path></svg>

After

Width:  |  Height:  |  Size: 4.5 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251419020" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="10062" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M520.063496 0v77.563152c0 269.231173-144.758953 414.054122-434.212862 434.340854L86.106618 511.968002H76.827198V255.984001l443.236298-255.984001z" fill="#5B55F6" p-id="10063"></path><path d="M520.063496 1023.936004v-77.563152c0-269.231173-144.758953-414.054122-434.212862-434.340854L86.042622 511.968002H76.827198v255.984001l443.236298 255.984001z" fill="#376AF3" p-id="10064"></path><path d="M520.063496 0v77.563152c0 269.231173 144.758953 414.054122 434.276858 434.340854L954.08437 511.968002h9.215424V255.984001L520.063496 0z" fill="#5B55F6" p-id="10065"></path><path d="M520.063496 1023.936004v-77.563152c0-269.231173 144.758953-414.054122 434.276858-434.340854L954.08437 511.968002h9.27942v255.984001l-443.236298 255.984001z" fill="#376AF3" p-id="10066"></path></svg>

After

Width:  |  Height:  |  Size: 1.1 KiB

File diff suppressed because it is too large Load Diff

View File

@@ -103,14 +103,21 @@ class Query:
task_running = True task_running = True
waiting_until = request_time + 4 waiting_until = request_time + 4
while time.time() < waiting_until: while time.time() < waiting_until:
if from_user in channel.running: if from_user not in channel.running:
time.sleep(0.1)
else:
task_running = False task_running = False
break break
# Task still running, but if it has already produced cached
# segments (e.g. multi-turn thinking output), return them now
# instead of forcing the user to wait for the whole task. The
# remaining segments are fetched by the user's next message.
if channel.cache_dict.get(from_user):
break
time.sleep(0.1)
reply_text = "" reply_text = ""
if task_running: # Only fall back to retry / "thinking" hint when the task is still
# running AND there is nothing cached to send yet.
if task_running and not channel.cache_dict.get(from_user):
if request_cnt < 3: if request_cnt < 3:
# waiting for timeout (the POST request will be closed by Wechat official server) # waiting for timeout (the POST request will be closed by Wechat official server)
time.sleep(2) time.sleep(2)
@@ -131,8 +138,22 @@ class Query:
# Only one request can access to the cached data # Only one request can access to the cached data
try: try:
(reply_type, reply_content) = channel.cache_dict[from_user].pop(0) # WeChat passive reply allows only a single reply per request.
if not channel.cache_dict[from_user]: # If popping the message makes the list empty, delete the user entry from cache # To avoid forcing the user to send an extra message for every
# segment of multi-turn agent output, drain all consecutive
# cached text segments at once and merge them into one reply.
# Media (voice/image) can only be returned one at a time, so it
# stops the merge and is returned on its own.
cached = channel.cache_dict[from_user]
if cached[0][0] == "text":
reply_type = "text"
merged_parts = []
while cached and cached[0][0] == "text":
merged_parts.append(cached.pop(0)[1])
reply_content = "\n\n".join(merged_parts)
else:
(reply_type, reply_content) = cached.pop(0)
if not channel.cache_dict[from_user]: # If draining empties the list, delete the user entry from cache
del channel.cache_dict[from_user] del channel.cache_dict[from_user]
except IndexError: except IndexError:
return "success" return "success"

View File

@@ -134,10 +134,16 @@ class WechatMPChannel(ChatChannel):
elif reply.type == ReplyType.IMAGE_URL: # 从网络下载图片 elif reply.type == ReplyType.IMAGE_URL: # 从网络下载图片
img_url = reply.content img_url = reply.content
pic_res = requests.get(img_url, stream=True)
image_storage = io.BytesIO() image_storage = io.BytesIO()
for block in pic_res.iter_content(1024): if img_url.startswith("file://") or os.path.isfile(img_url):
image_storage.write(block) # Local file produced by the agent (e.g. a generated image)
local_path = img_url[len("file://"):] if img_url.startswith("file://") else img_url
with open(local_path, "rb") as f:
image_storage.write(f.read())
else:
pic_res = requests.get(img_url, stream=True)
for block in pic_res.iter_content(1024):
image_storage.write(block)
image_storage.seek(0) image_storage.seek(0)
image_type = imghdr.what(image_storage) image_type = imghdr.what(image_storage)
filename = receiver + "-" + str(context["msg"].msg_id) + "." + image_type filename = receiver + "-" + str(context["msg"].msg_id) + "." + image_type
@@ -258,10 +264,16 @@ class WechatMPChannel(ChatChannel):
logger.info("[wechatmp] Do send voice to {}".format(receiver)) logger.info("[wechatmp] Do send voice to {}".format(receiver))
elif reply.type == ReplyType.IMAGE_URL: # 从网络下载图片 elif reply.type == ReplyType.IMAGE_URL: # 从网络下载图片
img_url = reply.content img_url = reply.content
pic_res = requests.get(img_url, stream=True)
image_storage = io.BytesIO() image_storage = io.BytesIO()
for block in pic_res.iter_content(1024): if img_url.startswith("file://") or os.path.isfile(img_url):
image_storage.write(block) # Local file produced by the agent (e.g. a generated image)
local_path = img_url[len("file://"):] if img_url.startswith("file://") else img_url
with open(local_path, "rb") as f:
image_storage.write(f.read())
else:
pic_res = requests.get(img_url, stream=True)
for block in pic_res.iter_content(1024):
image_storage.write(block)
image_storage.seek(0) image_storage.seek(0)
image_type = imghdr.what(image_storage) image_type = imghdr.what(image_storage)
filename = receiver + "-" + str(context["msg"].msg_id) + "." + image_type filename = receiver + "-" + str(context["msg"].msg_id) + "." + image_type

View File

@@ -81,6 +81,8 @@ def _loads_wecom_ws_json(raw):
@singleton @singleton
class WecomBotChannel(ChatChannel): class WecomBotChannel(ChatChannel):
NOT_SUPPORT_REPLYTYPE = []
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.bot_id = "" self.bot_id = ""
@@ -438,6 +440,17 @@ class WecomBotChannel(ChatChannel):
state["current"] = "" state["current"] = ""
_push_stream(state, force=True) _push_stream(state, force=True)
elif event_type == "agent_cancelled":
# Flush partial output and strip trailing "---" separator
# left over from previous turn, to avoid a dangling divider.
if state["current"]:
state["committed"] += state["current"]
state["current"] = ""
state["committed"] = state["committed"].rstrip()
if state["committed"].endswith("---"):
state["committed"] = state["committed"][:-3].rstrip()
_push_stream(state, force=True)
return on_event return on_event
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -472,6 +485,8 @@ class WecomBotChannel(ChatChannel):
else: else:
context.type = ContextType.TEXT context.type = ContextType.TEXT
context.content = content.strip() context.content = content.strip()
if "desire_rtype" not in context and conf().get("always_reply_voice"):
context["desire_rtype"] = ReplyType.VOICE
return context return context
@@ -498,6 +513,8 @@ class WecomBotChannel(ChatChannel):
self._send_file(reply.content, receiver, is_group, req_id) self._send_file(reply.content, receiver, is_group, req_id)
elif reply.type == ReplyType.VIDEO or reply.type == ReplyType.VIDEO_URL: elif reply.type == ReplyType.VIDEO or reply.type == ReplyType.VIDEO_URL:
self._send_file(reply.content, receiver, is_group, req_id, media_type="video") self._send_file(reply.content, receiver, is_group, req_id, media_type="video")
elif reply.type == ReplyType.VOICE:
self._send_voice(reply.content, receiver, is_group, req_id)
else: else:
logger.warning(f"[WecomBot] Unsupported reply type: {reply.type}, falling back to text") logger.warning(f"[WecomBot] Unsupported reply type: {reply.type}, falling back to text")
self._send_text(str(reply.content), receiver, is_group, req_id) self._send_text(str(reply.content), receiver, is_group, req_id)
@@ -730,6 +747,65 @@ class WecomBotChannel(ChatChannel):
}, },
}) })
def _send_voice(self, voice_path: str, receiver: str, is_group: bool, req_id: str = None):
"""Send native voice reply. WeCom voice media must be amr."""
local_path = voice_path
if local_path.startswith("file://"):
local_path = local_path[7:]
if local_path.startswith(("http://", "https://")):
try:
resp = requests.get(local_path, timeout=60)
resp.raise_for_status()
ext = os.path.splitext(local_path)[1] or ".mp3"
tmp_path = f"/tmp/wecom_voice_{uuid.uuid4().hex[:8]}{ext}"
with open(tmp_path, "wb") as f:
f.write(resp.content)
local_path = tmp_path
except Exception as e:
logger.error(f"[WecomBot] Failed to download voice for sending: {e}")
return
if not os.path.exists(local_path):
logger.error(f"[WecomBot] Voice file not found: {local_path}")
return
amr_path = local_path
if not local_path.lower().endswith(".amr"):
try:
from voice.audio_convert import any_to_amr
amr_path = os.path.splitext(local_path)[0] + ".amr"
any_to_amr(local_path, amr_path)
except Exception as e:
logger.error(f"[WecomBot] Failed to convert voice to amr: {e}")
return
media_id = self._upload_media(amr_path, "voice")
if not media_id:
logger.error("[WecomBot] Failed to upload voice media")
return
if req_id:
self._ws_send({
"cmd": "aibot_respond_msg",
"headers": {"req_id": req_id},
"body": {
"msgtype": "voice",
"voice": {"media_id": media_id},
},
})
else:
self._ws_send({
"cmd": "aibot_send_msg",
"headers": {"req_id": self._gen_req_id()},
"body": {
"chatid": receiver,
"chat_type": 2 if is_group else 1,
"msgtype": "voice",
"voice": {"media_id": media_id},
},
})
def _active_send_markdown(self, content: str, receiver: str, is_group: bool): def _active_send_markdown(self, content: str, receiver: str, is_group: bool):
"""Proactively send markdown message (for scheduled tasks, no req_id).""" """Proactively send markdown message (for scheduled tasks, no req_id)."""
self._ws_send({ self._ws_send({

View File

@@ -47,19 +47,24 @@ def _load_credentials(cred_path: str) -> dict:
def _save_credentials(cred_path: str, data: dict): def _save_credentials(cred_path: str, data: dict):
"""Save credentials to JSON file.""" """Atomically save credentials to JSON file (tmp + rename)."""
os.makedirs(os.path.dirname(cred_path), exist_ok=True) os.makedirs(os.path.dirname(cred_path), exist_ok=True)
with open(cred_path, "w") as f: tmp_path = f"{cred_path}.tmp"
with open(tmp_path, "w") as f:
json.dump(data, f, indent=2) json.dump(data, f, indent=2)
try: try:
os.chmod(cred_path, 0o600) os.chmod(tmp_path, 0o600)
except Exception: except Exception:
pass pass
os.replace(tmp_path, cred_path)
@singleton @singleton
class WeixinChannel(ChatChannel): class WeixinChannel(ChatChannel):
# ilink bot protocol has no outbound voice item; deliver TTS as a file.
NOT_SUPPORT_REPLYTYPE = []
LOGIN_STATUS_IDLE = "idle" LOGIN_STATUS_IDLE = "idle"
LOGIN_STATUS_WAITING = "waiting_scan" LOGIN_STATUS_WAITING = "waiting_scan"
LOGIN_STATUS_SCANNED = "scanned" LOGIN_STATUS_SCANNED = "scanned"
@@ -70,7 +75,10 @@ class WeixinChannel(ChatChannel):
self.api = None self.api = None
self._stop_event = threading.Event() self._stop_event = threading.Event()
self._poll_thread = None self._poll_thread = None
self._context_tokens = {} # user_id -> context_token # user_id -> context_token. Guarded by _context_tokens_lock for any
# mutation that races with disk persistence.
self._context_tokens = {}
self._context_tokens_lock = threading.Lock()
self._received_msgs = ExpiredDict(60 * 60 * 7.1) self._received_msgs = ExpiredDict(60 * 60 * 7.1)
self._get_updates_buf = "" self._get_updates_buf = ""
self._credentials_path = "" self._credentials_path = ""
@@ -92,12 +100,19 @@ class WeixinChannel(ChatChannel):
conf().get("weixin_credentials_path", "~/.weixin_cow_credentials.json") conf().get("weixin_credentials_path", "~/.weixin_cow_credentials.json")
) )
# Always load credentials so we can restore context_tokens even when
# the bot token itself comes from config.
creds = _load_credentials(self._credentials_path)
if not token: if not token:
creds = _load_credentials(self._credentials_path)
token = creds.get("token", "") token = creds.get("token", "")
if creds.get("base_url"): if creds.get("base_url"):
base_url = creds["base_url"] base_url = creds["base_url"]
# Restore persisted context_tokens so scheduler can deliver pushes
# immediately after restart, without waiting for the user to ping
# the bot first.
self._restore_context_tokens_from_creds(creds)
if not token: if not token:
token, base_url = self._login_with_retry(base_url) token, base_url = self._login_with_retry(base_url)
if not token: if not token:
@@ -137,11 +152,16 @@ class WeixinChannel(ChatChannel):
def _relogin(self) -> bool: def _relogin(self) -> bool:
"""Re-login after session expiry. Returns True on success.""" """Re-login after session expiry. Returns True on success."""
base_url = self.api.base_url if self.api else DEFAULT_BASE_URL base_url = self.api.base_url if self.api else DEFAULT_BASE_URL
if os.path.exists(self._credentials_path): # Clearing the whole credentials file is intentional: the new login
try: # will issue a fresh `token` and persisted context_tokens belong to
os.remove(self._credentials_path) # the previous bot identity, so they must not survive.
except Exception: with self._context_tokens_lock:
pass self._context_tokens.clear()
if os.path.exists(self._credentials_path):
try:
os.remove(self._credentials_path)
except Exception:
pass
self.login_status = self.LOGIN_STATUS_WAITING self.login_status = self.LOGIN_STATUS_WAITING
result = self._qr_login(base_url) result = self._qr_login(base_url)
if not result: if not result:
@@ -153,9 +173,62 @@ class WeixinChannel(ChatChannel):
cdn_base_url=self.api.cdn_base_url if self.api else CDN_BASE_URL, cdn_base_url=self.api.cdn_base_url if self.api else CDN_BASE_URL,
) )
self.login_status = self.LOGIN_STATUS_OK self.login_status = self.LOGIN_STATUS_OK
self._context_tokens.clear()
return True return True
# ── Context token persistence ──────────────────────────────────────
# ilink requires every outbound send to echo the context_token from the
# user's latest inbound message. We mirror the in-memory map into the
# credentials JSON so scheduled pushes survive process restarts.
# All mutation + disk IO is serialized via _context_tokens_lock so that
# concurrent updates can never lose each other's writes.
def _restore_context_tokens_from_creds(self, creds: dict) -> None:
if not isinstance(creds, dict):
return
tokens = creds.get("context_tokens")
if not isinstance(tokens, dict):
return
restored = 0
with self._context_tokens_lock:
for user_id, token in tokens.items():
if isinstance(user_id, str) and isinstance(token, str) and token:
self._context_tokens[user_id] = token
restored += 1
if restored:
logger.info(f"[Weixin] Restored {restored} context_tokens from credentials")
def _persist_context_tokens_locked(self) -> None:
"""Flush the token map to disk. Caller must hold _context_tokens_lock."""
if not self._credentials_path:
return
try:
creds = _load_credentials(self._credentials_path) or {}
creds["context_tokens"] = dict(self._context_tokens)
_save_credentials(self._credentials_path, creds)
except Exception as e:
logger.warning(f"[Weixin] Failed to persist context_tokens: {e}")
def _update_context_token(self, user_id: str, token: str) -> None:
"""Update the in-memory token for a user; flush to disk only on change."""
if not user_id or not token:
return
with self._context_tokens_lock:
if self._context_tokens.get(user_id) == token:
return
self._context_tokens[user_id] = token
self._persist_context_tokens_locked()
def _invalidate_context_token(self, user_id: str) -> None:
"""Drop the cached token for a user (used after -14 / send rejection)."""
if not user_id:
return
with self._context_tokens_lock:
if user_id not in self._context_tokens:
return
del self._context_tokens[user_id]
logger.info(f"[Weixin] Invalidated stale context_token for {user_id}")
self._persist_context_tokens_locked()
# ── QR Login ─────────────────────────────────────────────────────── # ── QR Login ───────────────────────────────────────────────────────
@staticmethod @staticmethod
@@ -388,7 +461,7 @@ class WeixinChannel(ChatChannel):
context_token = raw_msg.get("context_token", "") context_token = raw_msg.get("context_token", "")
if context_token and from_user: if context_token and from_user:
self._context_tokens[from_user] = context_token self._update_context_token(from_user, context_token)
cdn_base_url = self.api.cdn_base_url if self.api else CDN_BASE_URL cdn_base_url = self.api.cdn_base_url if self.api else CDN_BASE_URL
try: try:
@@ -464,6 +537,14 @@ class WeixinChannel(ChatChannel):
else: else:
context.type = ContextType.TEXT context.type = ContextType.TEXT
context.content = content.strip() context.content = content.strip()
if "desire_rtype" not in context and conf().get("always_reply_voice"):
context["desire_rtype"] = ReplyType.VOICE
elif ctype == ContextType.VOICE:
if "desire_rtype" not in context and (
conf().get("voice_reply_voice") or conf().get("always_reply_voice")
):
context["desire_rtype"] = ReplyType.VOICE
return context return context
@@ -486,6 +567,9 @@ class WeixinChannel(ChatChannel):
self._send_file(reply.content, receiver, context_token) self._send_file(reply.content, receiver, context_token)
elif reply.type in (ReplyType.VIDEO, ReplyType.VIDEO_URL): elif reply.type in (ReplyType.VIDEO, ReplyType.VIDEO_URL):
self._send_video(reply.content, receiver, context_token) self._send_video(reply.content, receiver, context_token)
elif reply.type == ReplyType.VOICE:
# ilink has no outbound voice item; deliver TTS as a file attachment.
self._send_file(reply.content, receiver, context_token)
else: else:
logger.warning(f"[Weixin] Unsupported reply type: {reply.type}, fallback to text") logger.warning(f"[Weixin] Unsupported reply type: {reply.type}, fallback to text")
self._send_text(str(reply.content), receiver, context_token) self._send_text(str(reply.content), receiver, context_token)
@@ -496,10 +580,30 @@ class WeixinChannel(ChatChannel):
return msg.context_token return msg.context_token
return self._context_tokens.get(receiver, "") return self._context_tokens.get(receiver, "")
def _check_send_response(self, resp, receiver: str) -> None:
"""Inspect a send-API response; drop stale context_token on -14.
ilink uses ret/errcode = -14 to signal that the session (and any
cached context_token) is no longer valid. The plugin keeps running
because the bot itself can re-login; we just need to forget the
per-user token so the next push won't retry forever.
"""
if not isinstance(resp, dict):
return
ret = resp.get("ret")
errcode = resp.get("errcode")
if ret == -14 or errcode == -14:
logger.warning(
f"[Weixin] Send returned -14 (session expired) for "
f"receiver={receiver}; dropping cached context_token"
)
self._invalidate_context_token(receiver)
def _send_text(self, text: str, receiver: str, context_token: str): def _send_text(self, text: str, receiver: str, context_token: str):
if len(text) <= TEXT_CHUNK_LIMIT: if len(text) <= TEXT_CHUNK_LIMIT:
try: try:
self.api.send_text(receiver, text, context_token) resp = self.api.send_text(receiver, text, context_token)
self._check_send_response(resp, receiver)
logger.debug(f"[Weixin] Text sent to {receiver}, len={len(text)}") logger.debug(f"[Weixin] Text sent to {receiver}, len={len(text)}")
except Exception as e: except Exception as e:
logger.error(f"[Weixin] Failed to send text: {e}") logger.error(f"[Weixin] Failed to send text: {e}")
@@ -508,7 +612,8 @@ class WeixinChannel(ChatChannel):
chunks = self._split_text(text, TEXT_CHUNK_LIMIT) chunks = self._split_text(text, TEXT_CHUNK_LIMIT)
for i, chunk in enumerate(chunks): for i, chunk in enumerate(chunks):
try: try:
self.api.send_text(receiver, chunk, context_token) resp = self.api.send_text(receiver, chunk, context_token)
self._check_send_response(resp, receiver)
logger.debug(f"[Weixin] Text chunk {i+1}/{len(chunks)} sent to {receiver}, len={len(chunk)}") logger.debug(f"[Weixin] Text chunk {i+1}/{len(chunks)} sent to {receiver}, len={len(chunk)}")
except Exception as e: except Exception as e:
logger.error(f"[Weixin] Failed to send text chunk {i+1}/{len(chunks)}: {e}") logger.error(f"[Weixin] Failed to send text chunk {i+1}/{len(chunks)}: {e}")
@@ -542,13 +647,14 @@ class WeixinChannel(ChatChannel):
return return
try: try:
result = upload_media_to_cdn(self.api, local_path, receiver, media_type=1) result = upload_media_to_cdn(self.api, local_path, receiver, media_type=1)
self.api.send_image_item( resp = self.api.send_image_item(
to=receiver, to=receiver,
context_token=context_token, context_token=context_token,
encrypt_query_param=result["encrypt_query_param"], encrypt_query_param=result["encrypt_query_param"],
aes_key_b64=result["aes_key_b64"], aes_key_b64=result["aes_key_b64"],
ciphertext_size=result["ciphertext_size"], ciphertext_size=result["ciphertext_size"],
) )
self._check_send_response(resp, receiver)
logger.info(f"[Weixin] Image sent to {receiver}") logger.info(f"[Weixin] Image sent to {receiver}")
except Exception as e: except Exception as e:
logger.error(f"[Weixin] Image send failed: {e}") logger.error(f"[Weixin] Image send failed: {e}")
@@ -561,7 +667,7 @@ class WeixinChannel(ChatChannel):
return return
try: try:
result = upload_media_to_cdn(self.api, local_path, receiver, media_type=3) result = upload_media_to_cdn(self.api, local_path, receiver, media_type=3)
self.api.send_file_item( resp = self.api.send_file_item(
to=receiver, to=receiver,
context_token=context_token, context_token=context_token,
encrypt_query_param=result["encrypt_query_param"], encrypt_query_param=result["encrypt_query_param"],
@@ -569,6 +675,7 @@ class WeixinChannel(ChatChannel):
file_name=os.path.basename(local_path), file_name=os.path.basename(local_path),
file_size=result["raw_size"], file_size=result["raw_size"],
) )
self._check_send_response(resp, receiver)
logger.info(f"[Weixin] File sent to {receiver}") logger.info(f"[Weixin] File sent to {receiver}")
except Exception as e: except Exception as e:
logger.error(f"[Weixin] File send failed: {e}") logger.error(f"[Weixin] File send failed: {e}")
@@ -581,13 +688,14 @@ class WeixinChannel(ChatChannel):
return return
try: try:
result = upload_media_to_cdn(self.api, local_path, receiver, media_type=2) result = upload_media_to_cdn(self.api, local_path, receiver, media_type=2)
self.api.send_video_item( resp = self.api.send_video_item(
to=receiver, to=receiver,
context_token=context_token, context_token=context_token,
encrypt_query_param=result["encrypt_query_param"], encrypt_query_param=result["encrypt_query_param"],
aes_key_b64=result["aes_key_b64"], aes_key_b64=result["aes_key_b64"],
ciphertext_size=result["ciphertext_size"], ciphertext_size=result["ciphertext_size"],
) )
self._check_send_response(resp, receiver)
logger.info(f"[Weixin] Video sent to {receiver}") logger.info(f"[Weixin] Video sent to {receiver}")
except Exception as e: except Exception as e:
logger.error(f"[Weixin] Video send failed: {e}") logger.error(f"[Weixin] Video send failed: {e}")

View File

@@ -1 +1 @@
2.0.8 2.0.9

View File

@@ -15,6 +15,7 @@ ZHIPU_AI = "zhipu"
MOONSHOT = "moonshot" MOONSHOT = "moonshot"
MiniMax = "minimax" MiniMax = "minimax"
DEEPSEEK = "deepseek" DEEPSEEK = "deepseek"
MIMO = "mimo" # 小米 MiMo 大模型
CUSTOM = "custom" # custom OpenAI-compatible API, bot_type won't auto-switch on model change CUSTOM = "custom" # custom OpenAI-compatible API, bot_type won't auto-switch on model change
MODELSCOPE = "modelscope" MODELSCOPE = "modelscope"
@@ -29,8 +30,9 @@ CLAUDE_35_SONNET = "claude-3-5-sonnet-latest" # 带 latest 标签的模型名
CLAUDE_35_SONNET_1022 = "claude-3-5-sonnet-20241022" # 带具体日期的模型名称,会固定为该日期发布的模型 CLAUDE_35_SONNET_1022 = "claude-3-5-sonnet-20241022" # 带具体日期的模型名称,会固定为该日期发布的模型
CLAUDE_35_SONNET_0620 = "claude-3-5-sonnet-20240620" CLAUDE_35_SONNET_0620 = "claude-3-5-sonnet-20240620"
CLAUDE_4_OPUS = "claude-opus-4-0" CLAUDE_4_OPUS = "claude-opus-4-0"
CLAUDE_4_8_OPUS = "claude-opus-4-8" # Claude Opus 4.8 - Agent推荐模型
CLAUDE_4_7_OPUS = "claude-opus-4-7" # Claude Opus 4.7 CLAUDE_4_7_OPUS = "claude-opus-4-7" # Claude Opus 4.7
CLAUDE_4_6_OPUS = "claude-opus-4-6" # Claude Opus 4.6 - Agent推荐模型 CLAUDE_4_6_OPUS = "claude-opus-4-6" # Claude Opus 4.6
CLAUDE_4_SONNET = "claude-sonnet-4-0" # Claude Sonnet 4.0 CLAUDE_4_SONNET = "claude-sonnet-4-0" # Claude Sonnet 4.0
CLAUDE_4_5_SONNET = "claude-sonnet-4-5" # Claude Sonnet 4.5 - Agent推荐模型 CLAUDE_4_5_SONNET = "claude-sonnet-4-5" # Claude Sonnet 4.5 - Agent推荐模型
CLAUDE_4_6_SONNET = "claude-sonnet-4-6" # Claude Sonnet 4.6 - Agent推荐模型 CLAUDE_4_6_SONNET = "claude-sonnet-4-6" # Claude Sonnet 4.6 - Agent推荐模型
@@ -47,6 +49,7 @@ GEMINI_3_FLASH_PRE = "gemini-3-flash-preview" # Gemini 3 Flash Preview - Agent
GEMINI_3_PRO_PRE = "gemini-3-pro-preview" # Gemini 3 Pro Preview GEMINI_3_PRO_PRE = "gemini-3-pro-preview" # Gemini 3 Pro Preview
GEMINI_31_PRO_PRE = "gemini-3.1-pro-preview" # Gemini 3.1 Pro Preview - Agent推荐模型 GEMINI_31_PRO_PRE = "gemini-3.1-pro-preview" # Gemini 3.1 Pro Preview - Agent推荐模型
GEMINI_31_FLASH_LITE_PRE = "gemini-3.1-flash-lite-preview" # Gemini 3.1 Flash Lite Preview - Agent推荐模型 GEMINI_31_FLASH_LITE_PRE = "gemini-3.1-flash-lite-preview" # Gemini 3.1 Flash Lite Preview - Agent推荐模型
GEMINI_35_FLASH = "gemini-3.5-flash" # Gemini 3.5 Flash - Agent推荐模型
# OpenAI # OpenAI
GPT35 = "gpt-3.5-turbo" GPT35 = "gpt-3.5-turbo"
@@ -74,6 +77,7 @@ GPT_5_NANO = "gpt-5-nano"
GPT_54 = "gpt-5.4" # GPT-5.4 - Agent recommended model GPT_54 = "gpt-5.4" # GPT-5.4 - Agent recommended model
GPT_54_MINI = "gpt-5.4-mini" GPT_54_MINI = "gpt-5.4-mini"
GPT_54_NANO = "gpt-5.4-nano" GPT_54_NANO = "gpt-5.4-nano"
GPT_55 = "gpt-5.5" # GPT-5.5 - top-tier (expensive), not default
O1 = "o1-preview" O1 = "o1-preview"
O1_MINI = "o1-mini" O1_MINI = "o1-mini"
WHISPER_1 = "whisper-1" WHISPER_1 = "whisper-1"
@@ -104,10 +108,12 @@ QWEN_LONG = "qwen-long"
QWEN3_MAX = "qwen3-max" # Qwen3 Max - Agent推荐模型 QWEN3_MAX = "qwen3-max" # Qwen3 Max - Agent推荐模型
QWEN35_PLUS = "qwen3.5-plus" # Qwen3.5 Plus - Omni model (MultiModalConversation) QWEN35_PLUS = "qwen3.5-plus" # Qwen3.5 Plus - Omni model (MultiModalConversation)
QWEN36_PLUS = "qwen3.6-plus" # Qwen3.6 Plus - Omni model (MultiModalConversation) QWEN36_PLUS = "qwen3.6-plus" # Qwen3.6 Plus - Omni model (MultiModalConversation)
QWEN37_MAX = "qwen3.7-max" # Qwen3.7 Max - Agent推荐模型
QWQ_PLUS = "qwq-plus" QWQ_PLUS = "qwq-plus"
# MiniMax # MiniMax
MINIMAX_M2_7 = "MiniMax-M2.7" # MiniMax M2.7 - Latest MINIMAX_M2_7 = "MiniMax-M2.7" # MiniMax M2.7 - Latest
MINIMAX_TEXT_01 = "MiniMax-Text-01" # MiniMax 多模态 (vision)
MINIMAX_M2_7_HIGHSPEED = "MiniMax-M2.7-highspeed" # MiniMax M2.7 highspeed MINIMAX_M2_7_HIGHSPEED = "MiniMax-M2.7-highspeed" # MiniMax M2.7 highspeed
MINIMAX_M2_5 = "MiniMax-M2.5" # MiniMax M2.5 MINIMAX_M2_5 = "MiniMax-M2.5" # MiniMax M2.5
MINIMAX_M2_1 = "MiniMax-M2.1" # MiniMax M2.1 MINIMAX_M2_1 = "MiniMax-M2.1" # MiniMax M2.1
@@ -119,6 +125,7 @@ MINIMAX_ABAB6_5 = "abab6.5-chat" # MiniMax abab6.5
GLM_5_1 = "glm-5.1" # 智谱 GLM-5.1 - Agent recommended model (default) GLM_5_1 = "glm-5.1" # 智谱 GLM-5.1 - Agent recommended model (default)
GLM_5_TURBO = "glm-5-turbo" # 智谱 GLM-5-Turbo GLM_5_TURBO = "glm-5-turbo" # 智谱 GLM-5-Turbo
GLM_5 = "glm-5" # 智谱 GLM-5 GLM_5 = "glm-5" # 智谱 GLM-5
GLM_5V_TURBO = "glm-5v-turbo" # 智谱多模态 (vision)
GLM_4 = "glm-4" GLM_4 = "glm-4"
GLM_4_PLUS = "glm-4-plus" GLM_4_PLUS = "glm-4-plus"
GLM_4_flash = "glm-4-flash" GLM_4_flash = "glm-4-flash"
@@ -135,6 +142,13 @@ KIMI_K2 = "kimi-k2"
KIMI_K2_5 = "kimi-k2.5" KIMI_K2_5 = "kimi-k2.5"
KIMI_K2_6 = "kimi-k2.6" # Kimi K2.6 - Agent recommended model (default) KIMI_K2_6 = "kimi-k2.6" # Kimi K2.6 - Agent recommended model (default)
# 小米 MiMo
MIMO_V2_5_PRO = "mimo-v2.5-pro" # MiMo V2.5 Pro - 旗舰,长上下文(默认推荐)
MIMO_V2_5 = "mimo-v2.5" # MiMo V2.5 - 多模态(文/图/音/视频)
MIMO_V2_PRO = "mimo-v2-pro" # MiMo V2 Pro
MIMO_V2_OMNI = "mimo-v2-omni" # MiMo V2 Omni - 多模态
MIMO_V2_FLASH = "mimo-v2-flash" # MiMo V2 Flash - 极速版
# Doubao (Volcengine Ark) # Doubao (Volcengine Ark)
DOUBAO = "doubao" DOUBAO = "doubao"
DOUBAO_SEED_2_CODE = "doubao-seed-2-0-code-preview-260215" DOUBAO_SEED_2_CODE = "doubao-seed-2-0-code-preview-260215"
@@ -177,13 +191,16 @@ MODEL_LIST = [
# MiniMax # MiniMax
MiniMax, MINIMAX_M2_7, MINIMAX_M2_7_HIGHSPEED, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5, MiniMax, MINIMAX_M2_7, MINIMAX_M2_7_HIGHSPEED, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5,
# 小米 MiMo
MIMO, MIMO_V2_5_PRO, MIMO_V2_5, MIMO_V2_PRO, MIMO_V2_OMNI, MIMO_V2_FLASH,
# Claude # Claude
CLAUDE3, CLAUDE_4_6_SONNET, CLAUDE_4_7_OPUS, CLAUDE_4_6_OPUS, CLAUDE_4_OPUS, CLAUDE_4_5_SONNET, CLAUDE_4_SONNET, CLAUDE_3_OPUS, CLAUDE_3_OPUS_0229, CLAUDE3, CLAUDE_4_8_OPUS, CLAUDE_4_7_OPUS, CLAUDE_4_6_SONNET, CLAUDE_4_6_OPUS, CLAUDE_4_OPUS, CLAUDE_4_5_SONNET, CLAUDE_4_SONNET, CLAUDE_3_OPUS, CLAUDE_3_OPUS_0229,
CLAUDE_35_SONNET, CLAUDE_35_SONNET_1022, CLAUDE_35_SONNET_0620, CLAUDE_3_SONNET, CLAUDE_3_HAIKU, CLAUDE_35_SONNET, CLAUDE_35_SONNET_1022, CLAUDE_35_SONNET_0620, CLAUDE_3_SONNET, CLAUDE_3_HAIKU,
"claude", "claude-3-haiku", "claude-3-sonnet", "claude-3-opus", "claude-3.5-sonnet", "claude", "claude-3-haiku", "claude-3-sonnet", "claude-3-opus", "claude-3.5-sonnet",
# Gemini # Gemini
GEMINI_31_FLASH_LITE_PRE, GEMINI_31_PRO_PRE, GEMINI_3_PRO_PRE, GEMINI_3_FLASH_PRE, GEMINI_25_PRO_PRE, GEMINI_25_FLASH_PRE, GEMINI_35_FLASH, GEMINI_31_FLASH_LITE_PRE, GEMINI_31_PRO_PRE, GEMINI_3_PRO_PRE, GEMINI_3_FLASH_PRE, GEMINI_25_PRO_PRE, GEMINI_25_FLASH_PRE,
GEMINI_20_FLASH, GEMINI_20_flash_exp, GEMINI_15_PRO, GEMINI_15_flash, GEMINI_PRO, GEMINI, GEMINI_20_FLASH, GEMINI_20_flash_exp, GEMINI_15_PRO, GEMINI_15_flash, GEMINI_PRO, GEMINI,
# OpenAI # OpenAI
@@ -193,7 +210,7 @@ MODEL_LIST = [
GPT_4o, GPT_4O_0806, GPT_4o_MINI, GPT_4o, GPT_4O_0806, GPT_4o_MINI,
GPT_41, GPT_41_MINI, GPT_41_NANO, GPT_41, GPT_41_MINI, GPT_41_NANO,
GPT_5, GPT_5_MINI, GPT_5_NANO, GPT_5, GPT_5_MINI, GPT_5_NANO,
GPT_54, GPT_54_MINI, GPT_54_NANO, GPT_54, GPT_55, GPT_54_MINI, GPT_54_NANO,
O1, O1_MINI, O1, O1_MINI,
# GLM (智谱AI) # GLM (智谱AI)
@@ -201,7 +218,7 @@ MODEL_LIST = [
GLM_4_0520, GLM_4_AIR, GLM_4_AIRX, GLM_4_7, GLM_4_0520, GLM_4_AIR, GLM_4_AIRX, GLM_4_7,
# Qwen (通义千问) # Qwen (通义千问)
QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG, QWEN37_MAX, QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG,
# Doubao (豆包) # Doubao (豆包)
DOUBAO, DOUBAO_SEED_2_CODE, DOUBAO_SEED_2_PRO, DOUBAO_SEED_2_LITE, DOUBAO_SEED_2_MINI, DOUBAO, DOUBAO_SEED_2_CODE, DOUBAO_SEED_2_PRO, DOUBAO_SEED_2_LITE, DOUBAO_SEED_2_MINI,
@@ -227,4 +244,6 @@ DINGTALK = "dingtalk"
WECOM_BOT = "wecom_bot" WECOM_BOT = "wecom_bot"
QQ = "qq" QQ = "qq"
WEIXIN = "weixin" WEIXIN = "weixin"
WECHAT_KF = "wechat_kf" # WeCom customer service (微信客服) channel WECHAT_KF = "wechat_kf"
TELEGRAM = "telegram"
SLACK = "slack"

View File

@@ -117,6 +117,18 @@ def expand_path(path: str) -> str:
return expanded return expanded
def is_cloud_deployment() -> bool:
if os.environ.get("CLOUD_DEPLOYMENT_ID"):
return True
try:
from config import conf
if conf().get("cloud_deployment_id"):
return True
except Exception:
pass
return False
def get_cloud_headers(api_key: str) -> dict: def get_cloud_headers(api_key: str) -> dict:
""" """
Build standard headers for LinkAI API requests, Build standard headers for LinkAI API requests,

View File

@@ -16,8 +16,8 @@
"open_ai_api_base": "https://api.openai.com/v1", "open_ai_api_base": "https://api.openai.com/v1",
"gemini_api_key": "", "gemini_api_key": "",
"gemini_api_base": "https://generativelanguage.googleapis.com", "gemini_api_base": "https://generativelanguage.googleapis.com",
"voice_to_text": "openai", "voice_to_text": "",
"text_to_voice": "openai", "text_to_voice": "",
"voice_reply_voice": false, "voice_reply_voice": false,
"speech_recognition": true, "speech_recognition": true,
"group_speech_recognition": false, "group_speech_recognition": false,

116
config.py
View File

@@ -173,6 +173,15 @@ available_setting = {
# 企微智能机器人配置(长连接模式) # 企微智能机器人配置(长连接模式)
"wecom_bot_id": "", # 企微智能机器人BotID "wecom_bot_id": "", # 企微智能机器人BotID
"wecom_bot_secret": "", # 企微智能机器人长连接Secret "wecom_bot_secret": "", # 企微智能机器人长连接Secret
# Telegram 配置
"telegram_token": "", # 从 @BotFather 申请的 bot token
"telegram_proxy": "", # 可选的 HTTP/SOCKS5 代理,例如 http://127.0.0.1:7890 或 socks5://127.0.0.1:1080留空则走系统环境变量
"telegram_group_trigger": "mention_or_reply", # 群聊触发方式: mention_or_reply(@或回复触发,推荐) | mention_only(仅@) | all(所有消息)
"telegram_register_commands": True, # 启动时是否自动向 BotFather 注册命令菜单(与 web 端 slash 命令一致)
# Slack 配置Socket Mode无需公网 IP
"slack_bot_token": "", # Bot User OAuth Token形如 xoxb-...
"slack_app_token": "", # App-Level Token开启 Socket Mode 后生成),形如 xapp-...
"slack_group_trigger": "mention_or_reply", # 频道触发方式: mention_or_reply(@或线程内回复,推荐) | mention_only(仅@) | all(所有消息)
# 微信配置 # 微信配置
"weixin_token": "", # 微信登录后获取的bot_token留空则启动时自动扫码登录 "weixin_token": "", # 微信登录后获取的bot_token留空则启动时自动扫码登录
"weixin_base_url": "https://ilinkai.weixin.qq.com", # Weixin ilink API base URL "weixin_base_url": "https://ilinkai.weixin.qq.com", # Weixin ilink API base URL
@@ -181,7 +190,7 @@ available_setting = {
# chatgpt指令自定义触发词 # chatgpt指令自定义触发词
"clear_memory_commands": ["#清除记忆"], # 重置会话指令,必须以#开头 "clear_memory_commands": ["#清除记忆"], # 重置会话指令,必须以#开头
# channel配置 # channel配置
"channel_type": "", # 通道类型,支持多渠道同时运行。单个: "feishu",多个: "feishu, dingtalk" 或 ["feishu", "dingtalk"]。可选值: web,feishu,dingtalk,wecom_bot,weixin,wechatmp,wechatmp_service,wechatcom_app,wechat_kf "channel_type": "", # 通道类型,支持多渠道同时运行。单个: "feishu",多个: "feishu, dingtalk" 或 ["feishu", "dingtalk"]。可选值: web,feishu,dingtalk,wecom_bot,weixin,wechatmp,wechatmp_service,wechatcom_app,wechat_kf,telegram,slack
"web_console": True, # 是否自动启动Web控制台默认启动。设为False可禁用 "web_console": True, # 是否自动启动Web控制台默认启动。设为False可禁用
"subscribe_msg": "", # 订阅消息, 支持: wechatmp, wechatmp_service, wechatcom_app "subscribe_msg": "", # 订阅消息, 支持: wechatmp, wechatmp_service, wechatcom_app
"debug": False, # 是否开启debug模式开启后会打印更多日志 "debug": False, # 是否开启debug模式开启后会打印更多日志
@@ -216,10 +225,14 @@ available_setting = {
"Minimax_base_url": "", "Minimax_base_url": "",
"deepseek_api_key": "", "deepseek_api_key": "",
"deepseek_api_base": "https://api.deepseek.com/v1", "deepseek_api_base": "https://api.deepseek.com/v1",
# 小米 MiMo 大模型
"mimo_api_key": "",
"mimo_api_base": "https://api.xiaomimimo.com/v1",
"web_host": "", # Web console bind address; empty means auto "web_host": "", # Web console bind address; empty means auto
"web_port": 9899, "web_port": 9899,
"web_password": "", # Web console password; empty means no authentication required "web_password": "", # Web console password; empty means no authentication required
"web_session_expire_days": 30, # Auth session expiry in days "web_session_expire_days": 30, # Auth session expiry in days
"web_file_serve_root": "~", # Root dir the /api/file endpoint may serve; "/" allows the whole filesystem
"agent": True, # 是否开启Agent模式 "agent": True, # 是否开启Agent模式
"agent_workspace": "~/cow", # agent工作空间路径用于存储skills、memory等 "agent_workspace": "~/cow", # agent工作空间路径用于存储skills、memory等
"agent_max_context_tokens": 50000, # Agent模式下最大上下文tokens "agent_max_context_tokens": 50000, # Agent模式下最大上下文tokens
@@ -337,8 +350,18 @@ def load_config():
config_str = read_file(config_path) config_str = read_file(config_path)
logger.debug("[INIT] config str: {}".format(drag_sensitive(config_str))) logger.debug("[INIT] config str: {}".format(drag_sensitive(config_str)))
# 将json字符串反序列化为dict类型 # 将json字符串反序列化为dict类型
config = Config(json.loads(config_str)) # `object_pairs_hook` lets us catch users who accidentally typed the
# same key twice (e.g. two `"tools"` blocks) — json.loads would
# otherwise silently drop all but the last occurrence.
config = Config(json.loads(config_str, object_pairs_hook=_merge_duplicate_keys))
# Migrate legacy singular keys (`tool`, `skill`) into the canonical
# plural buckets so the rest of the codebase only reads one schema.
# Deep-merge so existing `tools`/`skills` entries are preserved and
# only missing namespaces are filled in from the legacy section.
_merge_legacy_namespace(config, legacy="tool", canonical="tools")
_merge_legacy_namespace(config, legacy="skill", canonical="skills")
# override config with environment variables. # override config with environment variables.
# Some online deployment platforms (e.g. Railway) deploy project from github directly. So you shouldn't put your secrets like api key in a config file, instead use environment variables to override the default config. # Some online deployment platforms (e.g. Railway) deploy project from github directly. So you shouldn't put your secrets like api key in a config file, instead use environment variables to override the default config.
@@ -398,6 +421,8 @@ def load_config():
"minimax_api_base": "MINIMAX_API_BASE", "minimax_api_base": "MINIMAX_API_BASE",
"deepseek_api_key": "DEEPSEEK_API_KEY", "deepseek_api_key": "DEEPSEEK_API_KEY",
"deepseek_api_base": "DEEPSEEK_API_BASE", "deepseek_api_base": "DEEPSEEK_API_BASE",
"mimo_api_key": "MIMO_API_KEY",
"mimo_api_base": "MIMO_API_BASE",
"qianfan_api_key": "QIANFAN_API_KEY", "qianfan_api_key": "QIANFAN_API_KEY",
"qianfan_api_base": "QIANFAN_API_BASE", "qianfan_api_base": "QIANFAN_API_BASE",
"zhipu_ai_api_key": "ZHIPU_AI_API_KEY", "zhipu_ai_api_key": "ZHIPU_AI_API_KEY",
@@ -434,7 +459,7 @@ def load_config():
os.environ[env_key] = str(val) os.environ[env_key] = str(val)
injected += 1 injected += 1
injected += _sync_skill_config_to_env(config.get("skill", {})) injected += _sync_skill_config_to_env(config.get("skills", {}))
if injected: if injected:
logger.info("[INIT] Synced {} config values to environment variables".format(injected)) logger.info("[INIT] Synced {} config values to environment variables".format(injected))
@@ -442,11 +467,90 @@ def load_config():
config.load_user_datas() config.load_user_datas()
def _deep_merge_dicts(base: dict, incoming: dict) -> dict:
"""Recursively merge ``incoming`` into ``base`` (incoming wins on leaves)."""
for key, val in incoming.items():
if (
key in base
and isinstance(base[key], dict)
and isinstance(val, dict)
):
_deep_merge_dicts(base[key], val)
else:
base[key] = val
return base
def _merge_duplicate_keys(pairs):
"""object_pairs_hook for json.loads: deep-merge duplicate top-level keys
(lists concat, dicts merge, scalars take the latter) instead of dropping."""
out = {}
duplicates = []
for key, val in pairs:
if key not in out:
out[key] = val
continue
duplicates.append(key)
prev = out[key]
if isinstance(prev, dict) and isinstance(val, dict):
_deep_merge_dicts(prev, val)
elif isinstance(prev, list) and isinstance(val, list):
prev.extend(val)
else:
out[key] = val
if duplicates:
# logger may not be wired yet — fall back to print so we never lose the warning.
unique = sorted(set(duplicates))
try:
logger.warning("[INIT] config.json has duplicate keys (merged): %s", unique)
except Exception:
print("[INIT] config.json has duplicate keys (merged):", unique)
return out
def _merge_legacy_namespace(cfg, legacy: str, canonical: str) -> None:
"""Fold deprecated singular keys (``tool`` / ``skill``) into their plural
canonical counterparts at load time. Canonical entries always win."""
legacy_section = cfg.get(legacy)
if not isinstance(legacy_section, dict) or not legacy_section:
cfg.pop(legacy, None)
return
canonical_section = cfg.get(canonical)
if not isinstance(canonical_section, dict):
canonical_section = {}
merged_keys = []
for name, val in legacy_section.items():
if name in canonical_section:
if isinstance(canonical_section[name], dict) and isinstance(val, dict):
for sub_key, sub_val in val.items():
if (
sub_key in canonical_section[name]
and isinstance(canonical_section[name][sub_key], dict)
and isinstance(sub_val, dict)
):
_deep_merge_dicts(sub_val, canonical_section[name][sub_key])
canonical_section[name][sub_key] = sub_val
else:
canonical_section[name].setdefault(sub_key, sub_val)
continue
canonical_section[name] = val
merged_keys.append(name)
cfg[canonical] = canonical_section
cfg.pop(legacy, None)
if merged_keys:
logger.warning(
"[INIT] Legacy config key '{}' is deprecated; merged into '{}': {}. "
"Please rename '{}' to '{}' in your config.json.".format(
legacy, canonical, merged_keys, legacy, canonical,
)
)
def _sync_skill_config_to_env(skill_section) -> int: def _sync_skill_config_to_env(skill_section) -> int:
"""Flatten skill-namespaced config into environment variables. """Flatten skill-namespaced config into environment variables.
Mapping rule: ``config["skill"][<name>][<key>]`` -> ``SKILL_<NAME>_<KEY>`` Mapping rule: ``config["skills"][<name>][<key>]`` -> ``SKILL_<NAME>_<KEY>``
(e.g. ``skill["image-generation"].model`` -> ``SKILL_IMAGE_GENERATION_MODEL``). (e.g. ``skills["image-generation"].model`` -> ``SKILL_IMAGE_GENERATION_MODEL``).
This lets subprocess-based skill scripts read their own settings without This lets subprocess-based skill scripts read their own settings without
importing project code. Existing env vars are NOT overwritten so the importing project code. Existing env vars are NOT overwritten so the

30
docs/README.md Normal file
View File

@@ -0,0 +1,30 @@
# Documentation
This directory contains the Mintlify documentation site for the project.
## Prerequisites
- Node.js v20.17.0 or higher (LTS recommended)
## Install the CLI (one-time, global)
```bash
npm i -g mint
```
## Run the docs locally
From this `docs/` directory:
```bash
mint dev
```
Then open http://localhost:3000 (or the port Mint reports if 3000 is in use).
> The first run downloads the Mint preview framework (~90 MB) into `~/.mintlify/`.
> Subsequent runs start instantly from the local cache.
## More
- Mintlify docs: https://www.mintlify.com/docs

43
docs/channels/index.mdx Normal file
View File

@@ -0,0 +1,43 @@
---
title: 通道概览
description: CowAgent 支持的通道及能力矩阵
---
CowAgent 支持接入多种聊天通道,启动时通过 `channel_type` 切换。Web 控制台默认开启,可与其他接入通道并行运行。
## 能力矩阵
下表汇总各通道支持的入站消息类型、机器人回复类型与群聊能力,方便按场景选择。
| 通道 | 文本 | 图片 | 文件 | 语音 | 群聊 |
| --- | :-: | :-: | :-: | :-: | :-: |
| [微信](/channels/weixin) | ✅ | ✅ | ✅ | ✅ | |
| [Web 控制台](/channels/web) | ✅ | ✅ | ✅ | ✅ | |
| [飞书](/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [钉钉](/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [企微智能机器人](/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [QQ](/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
| [企业微信应用](/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
| [公众号](/channels/wechatmp) | ✅ | ✅ | | ✅ | |
| [Telegram](/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [Slack](/channels/slack) | ✅ | ✅ | ✅ | | ✅ |
- **图片 / 文件 / 语音**列表示通道支持收发对应消息类型,具体细节详见各通道文档
- **群聊**列指可识别并响应群消息
<Tip>
每个通道的语音 / 图像能力依赖对应模型厂商的配置,详见 [模型概览](/models)。
</Tip>
## 通道一览
- [Web 控制台](/channels/web) — 内置浏览器对话和管理面板,默认开启
- [微信](/channels/weixin) — 通过个人微信扫码登录
- [飞书](/channels/feishu) — 飞书自建机器人
- [钉钉](/channels/dingtalk) — 钉钉自建机器人
- [企微智能机器人](/channels/wecom-bot) — 企业微信智能机器人
- [QQ](/channels/qq) — QQ 官方机器人开放平台
- [企业微信应用](/channels/wecom) — 企业微信自建应用接入
- [公众号](/channels/wechatmp) — 微信公众号(订阅号 / 服务号)
- [Telegram](/channels/telegram) — 海外 IM5 分钟接入,无需公网 IP
- [Slack](/channels/slack) — 团队协作 IMSocket Mode 接入,无需公网 IP

118
docs/channels/slack.mdx Normal file
View File

@@ -0,0 +1,118 @@
---
title: Slack
description: 将 CowAgent 接入 Slack App
---
> 通过 Slack App 的 **Socket Mode** 接入 CowAgent支持私聊DM与频道@机器人 / 线程内回复触发。Socket Mode 基于长连接,无需公网 IP 与回调地址,开箱即用。
## 一、接入步骤
### 步骤一:创建 Slack App
1. 打开 [Slack API 应用管理页](https://api.slack.com/apps),点击 **Create New App** → **From scratch**。
2. 填写 **App Name**(如 `CowAgent`),选择要安装的 **Workspace**,点击创建。
### 步骤二:开启 Socket Mode 并获取 App Token
1. 左侧菜单进入 **Settings → Socket Mode**,打开 **Enable Socket Mode**。
2. 系统会提示生成一个 **App-Level Token**,作用域勾选 `connections:write`,生成后保存这串以 `xapp-` 开头的 Token。
<Tip>
Socket Mode 通过 WebSocket 长连接接收事件,无需在公网暴露回调 URL适合本地或内网部署。
</Tip>
### 步骤三:配置 Bot 权限并安装
1. 进入 **Features → OAuth & Permissions**,在 **Bot Token Scopes** 中点击 **Add an OAuth Scope**,逐项添加以下权限:
```
app_mentions:read
channels:history
chat:write
commands
files:read
files:write
groups:history
im:history
mpim:history
users:read
```
<Note>
`files:read` / `files:write` 用于图片、文件的收发;若仅需文本对话可省略。
</Note>
2. 进入 **Features → Event Subscriptions**,打开 **Enable Events**,在 **Subscribe to bot events** 中点击 **Add Bot User Event** 添加以下事件:
```
app_mention
message.im
message.channels
```
<Note>
如需在私有频道使用,再添加 `message.groups`。
</Note>
3. 进入 **Features → App Home**,在 **Show Tabs** 区域勾选 **Messages Tab**,并勾选下方的 **Allow users to send Slash commands and messages from the messages tab**(允许用户从消息标签页发送消息),否则私聊输入框会被关闭、无法给机器人发消息。
4. 回到 **OAuth & Permissions**,点击 **Install to Workspace** 完成安装,安装后获取以 `xoxb-` 开头的 **Bot User OAuth Token**。
<Tip>
若 Slack 客户端仍提示「向此应用发送消息的功能已关闭」,请确认已完成上一步的 App Home 设置,并刷新或重启 Slack 客户端(必要时把 App 从对话列表移除后重新打开)。
</Tip>
### 步骤四:接入 CowAgent
<Tabs>
<Tab title="Web 控制台(推荐)">
打开 Web 控制台本地链接http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **Slack**,分别填入 Bot Token`xoxb-`)和 App Token`xapp-`),点击接入即可。
</Tab>
<Tab title="配置文件">
在 `config.json` 中添加以下配置后启动:
```json
{
"channel_type": "slack",
"slack_bot_token": "xoxb-xxxxxxxxxxxx",
"slack_app_token": "xapp-xxxxxxxxxxxx",
"slack_group_trigger": "mention_or_reply"
}
```
| 参数 | 说明 | 默认值 |
| --- | --- | --- |
| `slack_bot_token` | Bot User OAuth Token形如 `xoxb-...` | - |
| `slack_app_token` | App-Level Token开启 Socket Mode 后生成),形如 `xapp-...` | - |
| `slack_group_trigger` | 频道触发方式:`mention_or_reply`@或线程内回复)/ `mention_only`(仅@ / `all`(所有消息) | `mention_or_reply` |
</Tab>
</Tabs>
启动 Cow 后,日志中出现以下输出即表示接入成功:
```
[Slack] Bot logged in as user_id=U0XXXXXXX, team=Txxxxxxxx
[Slack] ✅ Slack bot ready, listening for events
```
## 二、功能说明
| 功能 | 支持情况 |
| --- | --- |
| 私聊DM | ✅ |
| 频道(@机器人 / 线程内回复) | ✅ |
| 文本消息 | ✅ 收发 |
| 图片消息 | ✅ 收发 |
| 文件消息 | ✅ 收发PDF / Word / Excel 等) |
| 线程回复 | ✅ 回复发送至触发消息所在线程 |
<Note>
Slack 通过线程Thread组织对话。机器人会把回复发送到触发消息所在的线程频道内更整洁。
</Note>
## 三、使用
完成接入后:
- **私聊DM**:在 Slack 左侧 **Apps** 中找到你的 App直接发消息对话。
- **频道**:把 App 邀请进频道(`/invite @你的App`),使用 `@你的App 你好` 触发对话;后续在同一线程内直接回复即可继续对话。
发送图片或文件时,可以在附件的输入框中 **添加文字说明**(描述/问题)一并发送,机器人会结合附件回答。也支持先发附件再发问题,两条消息会自动合并提问。

112
docs/channels/telegram.mdx Normal file
View File

@@ -0,0 +1,112 @@
---
title: Telegram
description: 将 CowAgent 接入 Telegram Bot
---
> 通过 Telegram Bot API 接入 CowAgent支持单聊与群聊@机器人 / 回复机器人触发),使用 Long Polling 模式无需公网 IP开箱即用。
## 一、接入步骤
### 步骤一:通过 BotFather 创建 Bot
1. 在 Telegram 中搜索并打开官方账号 [@BotFather](https://t.me/BotFather)。
2. 发送 `/newbot` 命令,按提示输入:
- **Bot 名称**(显示名,可中文,例如 `My CowAgent Bot`
- **Bot 用户名**(必须以 `bot` 结尾,例如 `my_cowagent_bot`
3. 创建成功后BotFather 会返回一段 **HTTP API Token**(形如 `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`),妥善保存。
<Tip>
这个 Token 等同于 Bot 的密码,请勿泄露。如果意外泄漏可向 `@BotFather` 发送 `/revoke` 重置。
</Tip>
### 步骤二:(群聊使用)关闭 Privacy Mode
仅使用单聊可跳过此步。Telegram Bot 默认开启 **Privacy Mode**,群聊中只能收到带 `@bot` 的命令(如 `/start@your_bot`)以及对 bot 消息的 reply**普通的 `@bot 你好` 文字消息收不到**,会导致群聊无响应。
向 `@BotFather` 发送:
1. `/setprivacy`
2. 选择刚才创建的 bot
3. 选择 `Disable`
<Note>
若设置后群聊仍无响应,可尝试把 Bot 从群里移除并重新拉入。
</Note>
### 步骤三:接入 CowAgent
<Tabs>
<Tab title="Web 控制台(推荐)">
打开 Web 控制台本地链接http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **Telegram**,填入 Bot Token点击接入即可。
</Tab>
<Tab title="配置文件">
在 `config.json` 中添加以下配置后启动:
```json
{
"channel_type": "telegram",
"telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ",
"telegram_group_trigger": "mention_or_reply"
}
```
| 参数 | 说明 | 默认值 |
| --- | --- | --- |
| `telegram_token` | BotFather 返回的 HTTP API Token | - |
| `telegram_group_trigger` | 群聊触发方式:`mention_or_reply`@或回复机器人)/ `mention_only`(仅@ / `all`(所有消息) | `mention_or_reply` |
| `telegram_register_commands` | 启动时是否自动向 BotFather 注册命令菜单 | `true` |
| `telegram_proxy` | (可选)代理地址,如 `http://127.0.0.1:7890`、`socks5://127.0.0.1:1080`;运行环境无法直连 `api.telegram.org` 时配置,留空则使用环境变量 `HTTPS_PROXY` | `""` |
</Tab>
</Tabs>
启动 Cow 后,日志中出现以下输出即表示接入成功:
```
[Telegram] Bot logged in as @my_cowagent_bot (id=123456789)
[Telegram] Registered 10 bot commands
[Telegram] ✅ Telegram bot ready, polling for updates
```
## 二、功能说明
| 功能 | 支持情况 |
| --- | --- |
| 单聊 | ✅ |
| 群聊(@机器人 / 回复机器人) | ✅ |
| 文本消息 | ✅ 收发 |
| 图片消息 | ✅ 收发 |
| 语音消息 | ✅ 收发(接收 OGG/Opus发送 OGG/Opus |
| 视频消息 | ✅ 收发 |
| 文件消息 | ✅ 收发PDF / Word / Excel 等) |
| 命令菜单 | ✅ 与 Web 控制台 slash 命令一致 |
### 命令菜单
启动时会自动向 BotFather 注册命令菜单,用户在 Telegram 输入框输入 `/` 会出现下拉提示:
| 命令 | 说明 |
| --- | --- |
| `/help` | 显示命令帮助 |
| `/status` | 查看运行状态 |
| `/context` | 查看对话上下文(`/context clear` 清除) |
| `/skill` | 技能管理(`/skill list`、`/skill install` 等) |
| `/memory` | 记忆管理(`/memory dream` |
| `/knowledge` | 知识库管理(`/knowledge list` / `on` / `off` |
| `/config` | 查看当前配置 |
| `/cancel` | 中止当前正在运行的 Agent 任务 |
| `/logs` | 查看最近日志 |
| `/version` | 查看版本 |
<Note>
Telegram 命令菜单只能展示一级命令,子命令通过空格输入即可,例如 `/skill list`、`/context clear`。
</Note>
## 三、使用
完成接入后:
- **单聊**:在 Telegram 中搜索你创建的 Bot 用户名(如 `@my_cowagent_bot`),点击 `Start` 即可开始对话。
- **群聊**:把 Bot 拉进群,使用 `@bot 你好` 或 **回复 Bot 的某条消息** 触发对话。若群聊无响应,请检查 Privacy Mode 是否已按 [步骤二](#步骤二-群聊使用-关闭-privacy-mode) 关闭。
发送图片或文件时,可以直接在附件上方的输入框中 **添加 Caption**(描述/问题)一并发送,机器人会结合附件回答。也支持先发附件再发问题,两条消息会自动合并提问。

View File

@@ -59,9 +59,9 @@ Web 控制台是 CowAgent 的默认通道,启动后会自动运行,通过浏
### 模型管理 ### 模型管理
支持在线管理模型配置,无需手动编辑配置文件: 支持在线管理不同模型厂商的文本、图像、语音、向量模型配置,无需手动编辑配置文件:
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" /> <img width="850" src="https://cdn.link-ai.tech/doc/20260521212949.png" />
### 技能管理 ### 技能管理

View File

@@ -39,6 +39,14 @@ Mode: agent
Session: 12 messages | 8 skills loaded Session: 12 messages | 8 skills loaded
``` ```
## cancel
中止当前会话正在运行的 Agent 任务。在 Agent 执行长时间任务(例如多轮工具调用、长流式输出)时,可随时发送 `/cancel`Agent 会在下一次工具执行前停止。Web 端、微信、企业微信、飞书等各通道均可使用。
```text
/cancel
```
## config ## config
查看或修改运行时配置。修改后立即生效,无需重启服务。 查看或修改运行时配置。修改后立即生效,无需重启服务。

View File

@@ -57,6 +57,7 @@ Others:
| --- | --- | | --- | --- |
| `/help` | 显示命令帮助 | | `/help` | 显示命令帮助 |
| `/status` | 查看服务状态和配置 | | `/status` | 查看服务状态和配置 |
| `/cancel` | 中止当前正在运行的 Agent 任务 |
| `/config` | 查看或修改运行时配置 | | `/config` | 查看或修改运行时配置 |
| `/skill` | 管理技能(安装、卸载、启用、禁用等) | | `/skill` | 管理技能(安装、卸载、启用、禁用等) |
| `/memory dream [N]` | 手动触发记忆蒸馏(默认 3 天,最大 30 | | `/memory dream [N]` | 手动触发记忆蒸馏(默认 3 天,最大 30 |
@@ -82,6 +83,7 @@ Others:
| version | ✓ | ✓ | | version | ✓ | ✓ |
| status | ✓ | ✓ | | status | ✓ | ✓ |
| logs | ✓ | ✓ | | logs | ✓ | ✓ |
| cancel | ✗ | ✓ |
| config | ✗ | ✓ | | config | ✗ | ✓ |
| context | — | ✓ | | context | — | ✓ |
| memory (子命令) | ✗ | ✓ | | memory (子命令) | ✗ | ✓ |

View File

@@ -38,6 +38,12 @@
{ {
"language": "zh", "language": "zh",
"default": true, "default": true,
"navbar": {
"links": [
{ "label": "官网", "href": "https://cowagent.ai/?lang=zh" },
{ "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
]
},
"tabs": [ "tabs": [
{ {
"tab": "项目介绍", "tab": "项目介绍",
@@ -82,6 +88,7 @@
"models/doubao", "models/doubao",
"models/kimi", "models/kimi",
"models/qianfan", "models/qianfan",
"models/mimo",
"models/linkai", "models/linkai",
"models/coding-plan", "models/coding-plan",
"models/custom" "models/custom"
@@ -181,6 +188,7 @@
{ {
"group": "接入渠道", "group": "接入渠道",
"pages": [ "pages": [
"channels/index",
"channels/weixin", "channels/weixin",
"channels/web", "channels/web",
"channels/feishu", "channels/feishu",
@@ -189,7 +197,9 @@
"channels/qq", "channels/qq",
"channels/wecom", "channels/wecom",
"channels/wechat-kf", "channels/wechat-kf",
"channels/wechatmp" "channels/wechatmp",
"channels/telegram",
"channels/slack"
] ]
} }
] ]
@@ -216,6 +226,7 @@
"group": "发布记录", "group": "发布记录",
"pages": [ "pages": [
"releases/overview", "releases/overview",
"releases/v2.0.9",
"releases/v2.0.8", "releases/v2.0.8",
"releases/v2.0.7", "releases/v2.0.7",
"releases/v2.0.6", "releases/v2.0.6",
@@ -233,6 +244,12 @@
}, },
{ {
"language": "en", "language": "en",
"navbar": {
"links": [
{ "label": "Website", "href": "https://cowagent.ai/" },
{ "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
]
},
"tabs": [ "tabs": [
{ {
"tab": "Introduction", "tab": "Introduction",
@@ -254,7 +271,8 @@
"group": "Installation", "group": "Installation",
"pages": [ "pages": [
"en/guide/quick-start", "en/guide/quick-start",
"en/guide/manual-install" "en/guide/manual-install",
"en/guide/upgrade"
] ]
} }
] ]
@@ -276,6 +294,7 @@
"en/models/doubao", "en/models/doubao",
"en/models/kimi", "en/models/kimi",
"en/models/qianfan", "en/models/qianfan",
"en/models/mimo",
"en/models/linkai", "en/models/linkai",
"en/models/coding-plan", "en/models/coding-plan",
"en/models/custom" "en/models/custom"
@@ -331,6 +350,7 @@
"pages": [ "pages": [
"en/skills/index", "en/skills/index",
"en/skills/install", "en/skills/install",
"en/skills/create",
"en/skills/hub" "en/skills/hub"
] ]
}, },
@@ -374,6 +394,7 @@
{ {
"group": "Platforms", "group": "Platforms",
"pages": [ "pages": [
"en/channels/index",
"en/channels/weixin", "en/channels/weixin",
"en/channels/web", "en/channels/web",
"en/channels/feishu", "en/channels/feishu",
@@ -382,7 +403,9 @@
"en/channels/qq", "en/channels/qq",
"en/channels/wecom", "en/channels/wecom",
"en/channels/wechat-kf", "en/channels/wechat-kf",
"en/channels/wechatmp" "en/channels/wechatmp",
"en/channels/telegram",
"en/channels/slack"
] ]
} }
] ]
@@ -397,7 +420,7 @@
"en/cli/process", "en/cli/process",
"en/cli/skill", "en/cli/skill",
"en/cli/memory-knowledge", "en/cli/memory-knowledge",
"en/cli/chat" "en/cli/general"
] ]
} }
] ]
@@ -409,6 +432,7 @@
"group": "Release Notes", "group": "Release Notes",
"pages": [ "pages": [
"en/releases/overview", "en/releases/overview",
"en/releases/v2.0.9",
"en/releases/v2.0.8", "en/releases/v2.0.8",
"en/releases/v2.0.7", "en/releases/v2.0.7",
"en/releases/v2.0.6", "en/releases/v2.0.6",
@@ -426,6 +450,12 @@
}, },
{ {
"language": "ja", "language": "ja",
"navbar": {
"links": [
{ "label": "ウェブサイト", "href": "https://cowagent.ai/" },
{ "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
]
},
"tabs": [ "tabs": [
{ {
"tab": "紹介", "tab": "紹介",
@@ -470,6 +500,7 @@
"ja/models/doubao", "ja/models/doubao",
"ja/models/kimi", "ja/models/kimi",
"ja/models/qianfan", "ja/models/qianfan",
"ja/models/mimo",
"ja/models/linkai", "ja/models/linkai",
"ja/models/coding-plan", "ja/models/coding-plan",
"ja/models/custom" "ja/models/custom"
@@ -569,6 +600,7 @@
{ {
"group": "プラットフォーム", "group": "プラットフォーム",
"pages": [ "pages": [
"ja/channels/index",
"ja/channels/weixin", "ja/channels/weixin",
"ja/channels/web", "ja/channels/web",
"ja/channels/feishu", "ja/channels/feishu",
@@ -577,7 +609,9 @@
"ja/channels/qq", "ja/channels/qq",
"ja/channels/wecom", "ja/channels/wecom",
"ja/channels/wechat-kf", "ja/channels/wechat-kf",
"ja/channels/wechatmp" "ja/channels/wechatmp",
"ja/channels/telegram",
"ja/channels/slack"
] ]
} }
] ]
@@ -604,6 +638,7 @@
"group": "リリースノート", "group": "リリースノート",
"pages": [ "pages": [
"ja/releases/overview", "ja/releases/overview",
"ja/releases/v2.0.9",
"ja/releases/v2.0.8", "ja/releases/v2.0.8",
"ja/releases/v2.0.7", "ja/releases/v2.0.7",
"ja/releases/v2.0.6", "ja/releases/v2.0.6",

View File

@@ -1,250 +0,0 @@
<p align="center"><img src="https://github.com/user-attachments/assets/eca9a9ec-8534-4615-9e0f-96c5ac1d10a3" alt="CowAgent" width="550" /></p>
<p align="center">
<a href="https://github.com/zhayujie/CowAgent/releases/latest"><img src="https://img.shields.io/github/v/release/zhayujie/CowAgent" alt="Latest release"></a>
<a href="https://github.com/zhayujie/CowAgent/blob/master/LICENSE"><img src="https://img.shields.io/github/license/zhayujie/CowAgent" alt="License: MIT"></a>
<a href="https://github.com/zhayujie/CowAgent"><img src="https://img.shields.io/github/stars/zhayujie/CowAgent?style=flat-square" alt="Stars"></a> <br/>
[<a href="https://github.com/zhayujie/CowAgent/blob/master/README.md">中文</a>] | [English] | [<a href="https://github.com/zhayujie/CowAgent/blob/master/docs/ja/README.md">日本語</a>]
</p>
**CowAgent** is an AI super assistant powered by LLMs, capable of autonomous task planning, operating computers and external resources, creating and executing Skills, and continuously growing with long-term memory and a personal knowledge base. It supports flexible model switching, handles text, voice, images, and files, and can be integrated into WeChat, Web, Feishu, DingTalk, WeCom Bot, WeCom App, and WeChat Official Account — running 7×24 hours on your personal computer or server.
<p align="center">
<a href="https://cowagent.ai/">🌐 Website</a> &nbsp;·&nbsp;
<a href="https://docs.cowagent.ai/en/intro/index">📖 Docs</a> &nbsp;·&nbsp;
<a href="https://docs.cowagent.ai/en/guide/quick-start">🚀 Quick Start</a> &nbsp;·&nbsp;
<a href="https://skills.cowagent.ai/">🧩 Skill Hub</a> &nbsp;·&nbsp;
<a href="https://link-ai.tech/cowagent/create">☁️ Try Online</a>
</p>
## Introduction
> CowAgent is both an out-of-the-box AI super assistant and a highly extensible Agent framework. You can extend it with new model interfaces, channels, built-in tools, and the Skills system to flexibly implement various customization needs.
-**Autonomous Task Planning**: Understands complex tasks and autonomously plans execution, continuously thinking and invoking tools until goals are achieved.
-**Long-term Memory**: Automatically persists conversation memory to local files and databases, including core memory, daily memory, and Deep Dream distillation, with keyword and vector retrieval support.
-**Personal Knowledge Base**: Automatically organizes structured knowledge with cross-references to build a knowledge graph, with web-based visualization and conversational management.
-**Skills System**: Implements a Skills creation and execution engine, supports installing skills from [Skill Hub](https://skills.cowagent.ai), GitHub, etc., or creating custom Skills through conversation.
-**Tool System**: Built-in tools for file I/O, terminal execution, browser automation, scheduled tasks, messaging, and more — autonomously invoked by the Agent.
-**CLI System**: Provides terminal commands and in-chat commands for process management, skill installation, configuration, and more.
-**Multimodal Messages**: Supports parsing, processing, generating, and sending text, images, voice, files, and other message types.
-**Multiple Model Support**: Supports DeepSeek, MiniMax, Claude, Gemini, OpenAI, GLM, Qwen, Doubao, Kimi, and other mainstream model providers.
-**Multi-platform Deployment**: Runs on local computers or servers, integrable into WeChat, Web, Feishu, DingTalk, WeChat Official Account, and WeCom applications.
## Disclaimer
1. This project follows the [MIT License](/LICENSE) and is intended for technical research and learning. Users must comply with local laws, regulations, policies, and corporate bylaws. Any illegal or rights-infringing use is prohibited.
2. Agent mode consumes more tokens than normal chat mode. Choose models based on effectiveness and cost. Agent has access to the host OS — please deploy in trusted environments.
3. CowAgent focuses on open-source development and does not participate in, authorize, or issue any cryptocurrency.
## Demo
Try online (no deployment needed): [CowAgent](https://link-ai.tech/cowagent/create)
## Changelog
> **2026.04.14:** [v2.0.6](https://github.com/zhayujie/CowAgent/releases/tag/2.0.6) — Knowledge Base, Deep Dream Memory Distillation, Smart Context Compression, Web Console upgrades.
> **2026.04.01:** [v2.0.5](https://github.com/zhayujie/CowAgent/releases/tag/2.0.5) — Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more.
> **2026.02.27:** [v2.0.2](https://github.com/zhayujie/CowAgent/releases/tag/2.0.2) — Web console overhaul (streaming chat, model/skill/memory/channel/scheduler/log management), multi-channel concurrent running, session persistence, new models including Gemini 3.1 Pro / Claude 4.6 Sonnet / Qwen3.5 Plus.
> **2026.02.13:** [v2.0.1](https://github.com/zhayujie/CowAgent/releases/tag/2.0.1) — Built-in Web Search tool, smart context trimming, runtime info dynamic update, Windows compatibility, fixes for scheduler memory loss, Feishu connection issues, and more.
> **2026.02.03:** [v2.0.0](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) — Full upgrade to AI super assistant with multi-step task planning, long-term memory, built-in tools, Skills framework, new models, and optimized channels.
> **2025.05.23:** [v1.7.6](https://github.com/zhayujie/CowAgent/releases/tag/1.7.6) — Web channel optimization, AgentMesh multi-agent plugin, Baidu TTS, claude-4-sonnet/opus support.
> **2025.04.11:** [v1.7.5](https://github.com/zhayujie/CowAgent/releases/tag/1.7.5) — wechatferry protocol, DeepSeek model, Tencent Cloud voice, ModelScope and Gitee-AI support.
> **2024.12.13:** [v1.7.4](https://github.com/zhayujie/CowAgent/releases/tag/1.7.4) — Gemini 2.0 model, Web channel, memory leak fix.
Full changelog: [Release Notes](https://docs.cowagent.ai/en/releases/overview)
<br/>
## 🚀 Quick Start
The project provides a one-click script for installation, configuration, startup, and management:
**Linux / macOS:**
```bash
bash <(curl -fsSL https://cdn.link-ai.tech/code/cow/run.sh)
```
**Windows (PowerShell):**
```powershell
irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex
```
After running, the Web service starts by default. Access `http://localhost:9899/chat` to chat.
Script usage: [One-click Install](https://docs.cowagent.ai/en/guide/quick-start). After installation, you can also use `cow start`, `cow stop`, and other [CLI commands](https://docs.cowagent.ai/en/cli/index) to manage the service.
### Manual Installation
**1. Clone the project**
```bash
git clone https://github.com/zhayujie/CowAgent
cd CowAgent/
```
**2. Install dependencies**
```bash
pip3 install -r requirements.txt
pip3 install -r requirements-optional.txt # optional but recommended
```
**3. Install Cow CLI (recommended)**
```bash
pip3 install -e .
```
After installation, use `cow` commands to manage the service (start, stop, update, etc.) and skills. See [Command Docs](https://docs.cowagent.ai/en/cli/index).
**4. Install browser (optional)**
If you need the Agent to operate a browser (visit web pages, fill forms, etc.):
```bash
cow install-browser
```
This auto-installs `playwright` and Chromium. See [Browser Tool Docs](https://docs.cowagent.ai/en/tools/browser).
**5. Configure**
```bash
cp config-template.json config.json
```
Fill in your model API key and channel type in `config.json`. See the [configuration docs](https://docs.cowagent.ai/en/guide/manual-install) for details.
**6. Run**
```bash
cow start # recommended, requires Cow CLI
python3 app.py # or run directly
```
For server deployment, use `cow` commands to manage the service:
```bash
cow start # start in background
cow stop # stop service
cow restart # restart service
cow status # check running status
cow logs # view logs
cow update # pull latest code and restart
```
Or use the traditional way:
```bash
nohup python3 app.py & tail -f nohup.out
```
### Docker Deployment
```bash
curl -O https://cdn.link-ai.tech/code/cow/docker-compose.yml
# Edit docker-compose.yml with your config
sudo docker compose up -d
sudo docker logs -f chatgpt-on-wechat
```
<br/>
## Models
Supports mainstream model providers. Recommended models for Agent mode:
| Provider | Recommended Model |
| --- | --- |
| DeepSeek | `deepseek-v4-flash` |
| MiniMax | `MiniMax-M2.7` |
| Claude | `claude-sonnet-4-6` |
| Gemini | `gemini-3.1-pro-preview` |
| OpenAI | `gpt-5.4` |
| GLM | `glm-5.1` |
| Qwen | `qwen3.6-plus` |
| Doubao | `doubao-seed-2-0-code-preview-260215` |
| Kimi | `kimi-k2.6` |
For detailed configuration of each model, see the [Models documentation](https://docs.cowagent.ai/en/models/index).
### Coding Plan
Coding Plan is a monthly subscription package offered by various providers, ideal for high-frequency Agent usage. All providers can be accessed via OpenAI-compatible mode:
```json
{
"bot_type": "openai",
"model": "MODEL_NAME",
"open_ai_api_base": "PROVIDER_CODING_PLAN_API_BASE",
"open_ai_api_key": "YOUR_API_KEY"
}
```
- `bot_type`: Must be `openai`
- `model`: Model name supported by the provider
- `open_ai_api_base`: Provider's Coding Plan API Base (different from standard pay-as-you-go)
- `open_ai_api_key`: Provider's Coding Plan API Key
> Note: Coding Plan API Base and API Key are usually separate from standard pay-as-you-go ones. Please obtain them from each provider's platform.
Supported providers include Alibaba Cloud, MiniMax, Zhipu GLM, Kimi, Volcengine, and more. For detailed configuration of each provider, see the [Coding Plan documentation](https://docs.cowagent.ai/en/models/coding-plan).
<br/>
## Channels
Supports multiple platforms. Set `channel_type` in `config.json` to switch:
| Channel | `channel_type` | Docs |
| --- | --- | --- |
| WeChat | `weixin` | [WeChat Setup](https://docs.cowagent.ai/en/channels/weixin) |
| Web (default) | `web` | [Web Channel](https://docs.cowagent.ai/en/channels/web) |
| Feishu | `feishu` | [Feishu Setup](https://docs.cowagent.ai/en/channels/feishu) |
| DingTalk | `dingtalk` | [DingTalk Setup](https://docs.cowagent.ai/en/channels/dingtalk) |
| WeCom Bot | `wecom_bot` | [WeCom Bot Setup](https://docs.cowagent.ai/en/channels/wecom-bot) |
| WeCom App | `wechatcom_app` | [WeCom Setup](https://docs.cowagent.ai/en/channels/wecom) |
| WeChat MP | `wechatmp` / `wechatmp_service` | [WeChat MP Setup](https://docs.cowagent.ai/en/channels/wechatmp) |
| Terminal | `terminal` | — |
Multiple channels can be enabled simultaneously, separated by commas: `"channel_type": "feishu,dingtalk"`.
<br/>
## Enterprise Services
<a href="https://link-ai.tech" target="_blank"><img width="720" src="https://cdn.link-ai.tech/image/link-ai-intro.jpg"></a>
> [LinkAI](https://link-ai.tech/) is a one-stop AI agent platform for enterprises and developers, integrating multimodal LLMs, knowledge bases, Agent plugins, and workflows. Supports one-click integration with mainstream platforms, SaaS and private deployment.
<br/>
## 🔗 Related Projects
- [Cow Skill Hub](https://github.com/zhayujie/cow-skill-hub): Open skill marketplace for AI Agents — browse, search, install, and publish skills for CowAgent, OpenClaw, Claude Code, and more.
- [bot-on-anything](https://github.com/zhayujie/bot-on-anything): Lightweight and highly extensible LLM application framework supporting Slack, Telegram, Discord, Gmail, and more.
- [AgentMesh](https://github.com/MinimalFuture/AgentMesh): Open-source Multi-Agent framework for complex problem solving through agent team collaboration.
## 🔎 FAQ
FAQs: <https://github.com/zhayujie/CowAgent/wiki/FAQs>
## 🛠️ Contributing
Welcome to add new channels, referring to the [Feishu channel](https://github.com/zhayujie/CowAgent/blob/master/channel/feishu/feishu_channel.py) as an example. Also welcome to contribute new Skills, see the [Skill Creation docs](https://docs.cowagent.ai/en/skills/create), or submit to [Skill Hub](https://skills.cowagent.ai/submit).
## ✉ Contact
Welcome to submit PRs and Issues, and support the project with a 🌟 Star. For questions, check the [FAQ list](https://github.com/zhayujie/CowAgent/wiki/FAQs) or search [Issues](https://github.com/zhayujie/CowAgent/issues).
## 🌟 Contributors
![cow contributors](https://contrib.rocks/image?repo=zhayujie/CowAgent&max=1000)

View File

@@ -15,8 +15,11 @@ description: Integrate CowAgent into Feishu via a custom enterprise app
No need to manually create an app on the Feishu Developer Platform. Start the Cow project, open the web console (default `http://127.0.0.1:9899/`), go to **Channels**, click **Add Channel**, choose **Feishu**, then under the **Scan QR** tab click **One-click Create Feishu App** and scan with the **Feishu App** to complete app creation and connection automatically. No need to manually create an app on the Feishu Developer Platform. Start the Cow project, open the web console (default `http://127.0.0.1:9899/`), go to **Channels**, click **Add Channel**, choose **Feishu**, then under the **Scan QR** tab click **One-click Create Feishu App** and scan with the **Feishu App** to complete app creation and connection automatically.
<img src="https://cdn.link-ai.tech/doc/20260505181126.png" width="800"/>
<Note> <Note>
The created app comes with all required permissions (messaging, card read/write, group events, etc.) and event subscriptions pre-configured. Currently only the Feishu mainland version is supported (Lark international not yet supported). 1. Requires `lark-oapi` ≥ 1.5.5.
2. The created app comes with all required permissions (messaging, card read/write, group events, etc.) and event subscriptions pre-configured — no manual setup on the developer console needed. Currently only the Feishu mainland version is supported (Lark international not yet supported).
</Note> </Note>
When starting from CLI without `feishu_app_id` configured, the QR code is also printed to the terminal. When starting from CLI without `feishu_app_id` configured, the QR code is also printed to the terminal.

View File

@@ -0,0 +1,43 @@
---
title: Channels Overview
description: Channels supported by CowAgent and their capability matrix
---
CowAgent supports multiple chat channels. Switch between them at startup via `channel_type`. The Web Console is enabled by default and can run in parallel with other channels.
## Capability Matrix
The table below summarizes the inbound message types, bot reply types, and group chat capabilities supported by each channel, making it easy to choose by scenario.
| Channel | Text | Image | File | Voice | Group Chat |
| --- | :-: | :-: | :-: | :-: | :-: |
| [WeChat](/en/channels/weixin) | ✅ | ✅ | ✅ | ✅ | |
| [Web Console](/en/channels/web) | ✅ | ✅ | ✅ | ✅ | |
| [Feishu](/en/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [DingTalk](/en/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [WeCom Bot](/en/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [QQ](/en/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
| [WeCom App](/en/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
| [Official Account](/en/channels/wechatmp) | ✅ | ✅ | | ✅ | |
| [Telegram](/en/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [Slack](/en/channels/slack) | ✅ | ✅ | ✅ | | ✅ |
- The **Image / File / Voice** columns indicate that the channel can send and receive the corresponding message types; see each channel's docs for details
- The **Group Chat** column indicates the ability to recognize and respond to group messages
<Tip>
The voice / image capabilities of each channel depend on the configuration of the corresponding model provider. See [Models Overview](/en/models/index) for details.
</Tip>
## Channel List
- [Web Console](/en/channels/web) — built-in browser-based chat and management panel, enabled by default
- [WeChat](/en/channels/weixin) — log in via personal WeChat QR scan
- [Feishu](/en/channels/feishu) — Feishu custom bot
- [DingTalk](/en/channels/dingtalk) — DingTalk custom bot
- [WeCom Bot](/en/channels/wecom-bot) — WeCom AI Bot via WebSocket long connection
- [QQ](/en/channels/qq) — QQ Official Bot open platform
- [WeCom App](/en/channels/wecom) — WeCom custom app integration
- [Official Account](/en/channels/wechatmp) — WeChat Official Account (subscription / service)
- [Telegram](/en/channels/telegram) — global IM, 5-minute setup, no public IP needed
- [Slack](/en/channels/slack) — team collaboration IM, Socket Mode integration, no public IP needed

118
docs/en/channels/slack.mdx Normal file
View File

@@ -0,0 +1,118 @@
---
title: Slack
description: Integrate CowAgent with a Slack App
---
> Integrate CowAgent into Slack via a Slack App in **Socket Mode**. Supports direct messages (DM) and channels (triggered by @mention or replying within a thread). Socket Mode uses a persistent WebSocket connection — no public IP or callback URL required, works out of the box.
## 1. Setup
### Step 1: Create a Slack App
1. Open the [Slack API apps page](https://api.slack.com/apps), click **Create New App** → **From scratch**.
2. Enter an **App Name** (e.g. `CowAgent`), pick the **Workspace** to install into, and create it.
### Step 2: Enable Socket Mode and get the App Token
1. In the left sidebar go to **Settings → Socket Mode** and turn on **Enable Socket Mode**.
2. You will be prompted to generate an **App-Level Token** with the `connections:write` scope. Save this token starting with `xapp-`.
<Tip>
Socket Mode receives events over a WebSocket connection, so you don't need to expose a public callback URL — ideal for local or intranet deployments.
</Tip>
### Step 3: Configure bot scopes and install
1. Go to **Features → OAuth & Permissions**, click **Add an OAuth Scope** under **Bot Token Scopes**, and add the following scopes one by one:
```
app_mentions:read
channels:history
chat:write
commands
files:read
files:write
groups:history
im:history
mpim:history
users:read
```
<Note>
`files:read` / `files:write` are used for sending/receiving images and files; omit them if you only need text conversations.
</Note>
2. Go to **Features → Event Subscriptions**, turn on **Enable Events**, and under **Subscribe to bot events** click **Add Bot User Event** to add:
```
app_mention
message.im
message.channels
```
<Note>
Add `message.groups` if you need to use the bot in private channels.
</Note>
3. Go to **Features → App Home**, enable **Messages Tab** under **Show Tabs**, and check **Allow users to send Slash commands and messages from the messages tab**. Otherwise the DM input box is disabled and users cannot message the bot.
4. Back in **OAuth & Permissions**, click **Install to Workspace**. After installing, copy the **Bot User OAuth Token** starting with `xoxb-`.
<Tip>
If the Slack client still shows "Sending messages to this app has been turned off", make sure you completed the App Home step above, then refresh or restart the Slack client (remove the app from your conversations and reopen it if needed).
</Tip>
### Step 4: Connect to CowAgent
<Tabs>
<Tab title="Web Console (Recommended)">
Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Slack**, paste the Bot Token (`xoxb-`) and App Token (`xapp-`), and click connect.
</Tab>
<Tab title="Config File">
Add the following to `config.json` and start Cow:
```json
{
"channel_type": "slack",
"slack_bot_token": "xoxb-xxxxxxxxxxxx",
"slack_app_token": "xapp-xxxxxxxxxxxx",
"slack_group_trigger": "mention_or_reply"
}
```
| Key | Description | Default |
| --- | --- | --- |
| `slack_bot_token` | Bot User OAuth Token, like `xoxb-...` | - |
| `slack_app_token` | App-Level Token (generated after enabling Socket Mode), like `xapp-...` | - |
| `slack_group_trigger` | Channel trigger: `mention_or_reply` (@ or reply in thread) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` |
</Tab>
</Tabs>
The integration is ready when you see logs like:
```
[Slack] Bot logged in as user_id=U0XXXXXXX, team=Txxxxxxxx
[Slack] ✅ Slack bot ready, listening for events
```
## 2. Capabilities
| Feature | Support |
| --- | --- |
| Direct message (DM) | ✅ |
| Channel (@bot / reply in thread) | ✅ |
| Text messages | ✅ send / receive |
| Image messages | ✅ send / receive |
| File messages | ✅ send / receive (PDF / Word / Excel, etc.) |
| Thread replies | ✅ replies are posted to the thread of the triggering message |
<Note>
Slack organizes conversations into threads. The bot posts replies into the thread of the triggering message, keeping channels tidy.
</Note>
## 3. Usage
Once connected:
- **Direct message (DM)**: find your App under **Apps** in the Slack sidebar and message it directly.
- **Channel**: invite the App into a channel (`/invite @your-app`), then trigger it with `@your-app hello`; continue the conversation by replying within the same thread.
When sending an image or file, you can **add a text caption** (description / question) in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically.

View File

@@ -0,0 +1,111 @@
---
title: Telegram
description: Integrate CowAgent with Telegram via the Bot API
---
> Integrate CowAgent into Telegram via the official Bot API. Supports private chat and group chat (triggered by @mention or replying to the bot). Uses Long Polling — no public IP required, works out of the box.
## 1. Setup
### Step 1: Create a Bot via BotFather
1. Open the official account [@BotFather](https://t.me/BotFather) in Telegram.
2. Send `/newbot` and follow the prompts:
- **Bot name** (display name, e.g. `My CowAgent Bot`)
- **Bot username** (must end with `bot`, e.g. `my_cowagent_bot`)
3. Once created, BotFather returns an **HTTP API Token** (e.g. `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`). Keep it safe.
<Tip>
The token is the password of your bot — never share it. If it leaks, send `/revoke` to `@BotFather` to reset it.
</Tip>
### Step 2: (Group chat only) Disable Privacy Mode
Skip this step if you only use private chat. Telegram bots run in **Privacy Mode** by default — in groups they can only see commands suffixed with `@bot` (e.g. `/start@your_bot`) and replies to bot messages; **plain `@bot hello` text messages are not delivered**, so the bot will appear unresponsive in groups.
Send the following to `@BotFather`:
1. `/setprivacy`
2. Pick the bot you just created
3. Choose `Disable`
<Note>
If the bot is still silent in groups after this, try removing it from the group and adding it back.
</Note>
### Step 3: Connect to CowAgent
<Tabs>
<Tab title="Web Console (Recommended)">
Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Telegram**, paste the Bot Token, and click connect.
</Tab>
<Tab title="Config File">
Add the following to `config.json` and start Cow:
```json
{
"channel_type": "telegram",
"telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ",
"telegram_group_trigger": "mention_or_reply"
}
```
| Key | Description | Default |
| --- | --- | --- |
| `telegram_token` | HTTP API Token returned by BotFather | - |
| `telegram_group_trigger` | Group trigger: `mention_or_reply` (@ or reply) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` |
| `telegram_register_commands` | Whether to register the command menu with BotFather on startup | `true` |
</Tab>
</Tabs>
The integration is ready when you see logs like:
```
[Telegram] Bot logged in as @my_cowagent_bot (id=123456789)
[Telegram] Registered 10 bot commands
[Telegram] ✅ Telegram bot ready, polling for updates
```
## 2. Capabilities
| Feature | Support |
| --- | --- |
| Private chat | ✅ |
| Group chat (@bot / reply to bot) | ✅ |
| Text messages | ✅ send / receive |
| Image messages | ✅ send / receive |
| Voice messages | ✅ send / receive (OGG/Opus) |
| Video messages | ✅ send / receive |
| File messages | ✅ send / receive (PDF / Word / Excel, etc.) |
| Command menu | ✅ aligned with Web Console slash commands |
### Command Menu
On startup, the channel registers a command menu with BotFather. Typing `/` in Telegram shows a dropdown:
| Command | Description |
| --- | --- |
| `/help` | Show command help |
| `/status` | View runtime status |
| `/context` | View conversation context (`/context clear` to clear) |
| `/skill` | Skill management (`/skill list`, `/skill install`, ...) |
| `/memory` | Memory management (`/memory dream`) |
| `/knowledge` | Knowledge base (`/knowledge list` / `on` / `off`) |
| `/config` | View current config |
| `/cancel` | Cancel the running Agent task |
| `/logs` | View recent logs |
| `/version` | Show version |
<Note>
Telegram's command menu only displays top-level commands; subcommands are entered with a space, e.g. `/skill list`, `/context clear`.
</Note>
## 3. Usage
Once connected:
- **Private chat**: search for your bot username (e.g. `@my_cowagent_bot`) in Telegram, click `Start` and chat away.
- **Group chat**: add the bot to a group, then trigger it with `@bot hello` or by **replying to one of the bot's messages**. If the bot doesn't respond in groups, double-check Privacy Mode in [Step 2](#step-2-group-chat-only-disable-privacy-mode).
When sending an image or file, you can **add a caption** (description / question) directly in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically.

View File

@@ -1,23 +1,32 @@
--- ---
title: Web Console title: Web Console
description: Use CowAgent through the web console description: Use CowAgent through the Web Console
--- ---
The Web Console is CowAgent's default channel. It starts automatically after launch, allowing you to chat with the Agent through a browser and manage models, skills, memory, channels, and other configurations online. The Web Console is CowAgent's default channel. It runs automatically once started, letting you chat with the Agent in a browser and manage models, skills, memory, channels, and other configuration online.
## Configuration ## Configuration
```json ```json
{ {
"channel_type": "web", "channel_type": "web",
"web_port": 9899 "web_host": "0.0.0.0",
"web_port": 9899,
"web_password": "",
"enable_thinking": false
} }
``` ```
| Parameter | Description | Default | | Parameter | Description | Default |
| --- | --- | --- | | --- | --- | --- |
| `channel_type` | Set to `web` | `web` | | `channel_type` | Set to `web` | `web` |
| `web_host` | Web service listen address. Defaults to `127.0.0.1` (local only); set to `0.0.0.0` for public access and configure a password | `""` |
| `web_port` | Web service listen port | `9899` | | `web_port` | Web service listen port | `9899` |
| `web_password` | Access password. Leave empty to disable password protection; recommended when listening on `0.0.0.0` | `""` |
| `web_session_expire_days` | Login session validity in days | `30` |
| `enable_thinking` | Whether to enable deep thinking mode | `false` |
Once a password is configured, you must enter it to log in when accessing the console. The login session is kept for 30 days by default, so restarting the service during that period does not require re-login. The password can also be changed online from the "Configuration" page in the console.
## Access URL ## Access URL
@@ -34,13 +43,13 @@ After starting the project, visit:
### Chat Interface ### Chat Interface
Supports streaming output with real-time display of the Agent's reasoning process and tool calls, providing intuitive observation of the Agent's decision-making: Supports streaming output with real-time display of the Agent's reasoning process and tool calls, providing intuitive observation of the Agent's decision-making. Deep thinking can be toggled via configuration or the "Agent Configuration" switch in the console.
<img width="850" src="https://cdn.link-ai.tech/doc/20260227180120.png" /> <img width="850" src="https://cdn.link-ai.tech/doc/20260227180120.png" />
#### Multi-Session Management #### Multi-Session Management
The chat interface supports multi-session management. All session records are persistently stored in a SQLite database: The chat interface supports multi-session management. All session records are persistently stored in the database:
- **Session List**: Click the history icon on the left to expand/collapse the session list panel, with scroll-to-load support for all historical sessions - **Session List**: Click the history icon on the left to expand/collapse the session list panel, with scroll-to-load support for all historical sessions
- **AI-Generated Titles**: After the first exchange in a new session, the model is automatically called to generate a short summary title - **AI-Generated Titles**: After the first exchange in a new session, the model is automatically called to generate a short summary title
@@ -50,9 +59,9 @@ The chat interface supports multi-session management. All session records are pe
### Model Management ### Model Management
Manage model configurations online without manually editing config files: Manage text, image, voice, and embedding model configurations for different providers online — no need to edit config files manually:
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" /> <img width="850" src="https://cdn.link-ai.tech/doc/20260521212949.png" />
### Skill Management ### Skill Management
@@ -80,6 +89,6 @@ View and manage scheduled tasks online, including one-time tasks, fixed interval
### Logs ### Logs
View Agent runtime logs in real-time for monitoring and troubleshooting: View Agent runtime logs in real time for monitoring and troubleshooting:
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173514.png" /> <img width="850" src="https://cdn.link-ai.tech/doc/20260227173514.png" />

View File

@@ -3,71 +3,88 @@ title: WeCom Bot
description: Connect CowAgent to WeCom AI Bot (WebSocket long connection) description: Connect CowAgent to WeCom AI Bot (WebSocket long connection)
--- ---
Connect CowAgent via WeCom AI Bot, supporting both direct messages and group chats. No public IP required — uses WebSocket long connection with Markdown rendering and streaming output. > Connect CowAgent via WeCom AI Bot, supporting both internal direct messages and group chats. No public IP required — uses a WebSocket long connection, with Markdown rendering and streaming output.
<Note> <Note>
WeCom Bot and WeCom App are two different integration methods. WeCom Bot uses WebSocket long connection, requiring no public IP or domain, making it easier to set up. WeCom Bot and WeCom App are two different integration methods. WeCom Bot uses a WebSocket long connection and requires no public IP or domain, making setup much simpler.
</Note> </Note>
## 1. Create an AI Bot ## 1. Connection methods
### Option A: One-click QR scan (recommended)
No need to create the bot ahead of time. Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/), go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **QR scan** mode, and scan the QR code with **WeCom** — bot creation and connection complete automatically.
<img src="https://cdn.link-ai.tech/doc/20260401121213.png" width="800"/>
<Note>
After a successful scan, you can further configure the bot (name, avatar, visibility scope, etc.) in **WeCom Workbench → AI Bot**.
</Note>
### Option B: Manual creation
Create the AI Bot in WeCom and obtain the Bot ID and Secret, then connect via the Web console or config file.
**Step 1: Create the AI Bot**
1. Open the WeCom client, go to **Workbench**, and click **AI Bot**: 1. Open the WeCom client, go to **Workbench**, and click **AI Bot**:
<img src="https://cdn.link-ai.tech/doc/20260316180959.png" width="800"/> <img src="https://cdn.link-ai.tech/doc/20260316180959.png" width="800"/>
2. Click **Create Bot****Manual Creation**: 2. Click **Create Bot → Manual Creation**:
<img src="https://cdn.link-ai.tech/doc/20260316181118.png" width="600"/> <img src="https://cdn.link-ai.tech/doc/20260316181118.png" width="800"/>
3. Scroll to the bottom of the right panel and select **API Mode**: 3. Scroll to the bottom of the right panel and select **API Mode**:
<img src="https://cdn.link-ai.tech/doc/20260316181215.png" width="600"/> <img src="https://cdn.link-ai.tech/doc/20260316181215.png" width="800"/>
4. Set the bot name, avatar, and visibility scope. Select **Long Connection** mode, note down the **Bot ID** and **Secret**, then click Save. 4. Set the bot name, avatar, and visibility scope. Choose **Long Connection** mode, save the **Bot ID** and **Secret**, then click Save.
## 2. Configuration **Step 2: Connect to CowAgent**
### Option A: Web Console <Tabs>
<Tab title="Web Console">
Open the Web console, go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **Manual** mode, enter the Bot ID and Secret, and click Connect.
Start the program and open the Web console (local access: http://127.0.0.1:9899). Go to the **Channels** tab, click **Connect Channel**, select **WeCom Bot**, fill in the Bot ID and Secret from the previous step, and click Connect. <img src="https://cdn.link-ai.tech/doc/20260316181711.png" width="800"/>
</Tab>
<Tab title="Config File">
Add the following to `config.json`, then start CowAgent:
<img src="https://cdn.link-ai.tech/doc/20260316181711.png" width="600"/> ```json
{
"channel_type": "wecom_bot",
"wecom_bot_id": "YOUR_BOT_ID",
"wecom_bot_secret": "YOUR_SECRET"
}
```
### Option B: Config File | Parameter | Description |
| --- | --- |
| `wecom_bot_id` | Bot ID of the AI Bot |
| `wecom_bot_secret` | Secret of the AI Bot |
</Tab>
</Tabs>
Add the following to your `config.json`: The log line `[WecomBot] Subscribe success` confirms the connection is established.
```json ## 2. Supported features
{
"channel_type": "wecom_bot",
"wecom_bot_id": "YOUR_BOT_ID",
"wecom_bot_secret": "YOUR_SECRET"
}
```
| Parameter | Description |
| --- | --- |
| `wecom_bot_id` | Bot ID of the AI Bot |
| `wecom_bot_secret` | Secret for the AI Bot |
After configuration, start the program. The log message `[WecomBot] Subscribe success` indicates a successful connection.
## 3. Supported Features
| Feature | Status | | Feature | Status |
| --- | --- | | --- | --- |
| Direct Messages | ✅ | | Direct chat | ✅ |
| Group Chat (@bot) | ✅ | | Group chat (@bot) | ✅ |
| Text Messages | ✅ Send & Receive | | Text messages | ✅ Send / Receive |
| Image Messages | ✅ Send & Receive | | Image messages | ✅ Send / Receive |
| File Messages | ✅ Send & Receive | | File messages | ✅ Send / Receive |
| Streaming Reply | ✅ | | Streaming replies | ✅ |
| Scheduled Push | ✅ | | Scheduled push messages | ✅ |
## 4. Usage ## 3. Usage
Search for the bot name in WeCom to start a direct conversation. Search for the bot's name inside WeCom to start a direct chat.
To use in group chats, add the bot to a group and @mention it to send messages. To use the bot in an internal group chat, add it to the group and @-mention it.
<img src="https://cdn.link-ai.tech/doc/20260316182902.png" width="800"/> <img src="https://cdn.link-ai.tech/doc/20260316182902.png" width="800"/>

View File

@@ -1,19 +1,21 @@
--- ---
title: WeChat title: WeChat
description: Connect CowAgent to personal WeChat description: Connect CowAgent to personal WeChat (via the official API)
--- ---
> Connect CowAgent to your personal WeChat. Simply scan a QR code to log in no public IP required. Supports text, image, voice, file, and video messages. > Connect CowAgent to your personal WeChat — scan to log in, no public IP required. Supports text, image, voice, file, and video messages in 1-on-1 chats. Backed by WeChat's official API; safe to use. After connecting, a bot assistant is added to your conversation list without affecting normal account usage.
## 1. Configuration ## 1. Setup and run
### Option A: Web Console ### Option A: Web console
Start the program and open the Web console (local access: http://127.0.0.1:9899). Go to the **Channels** tab, click **Connect Channel**, select **WeChat**, and follow the prompts to scan the QR code. Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/). Go to the **Channels** tab, click **Connect Channel**, select **WeChat**, and follow the prompts to scan in.
### Option B: Config File <img src="https://cdn.link-ai.tech/doc/20260322195114.png" width="800" />
Set `channel_type` to `weixin` in your `config.json`: ### Option B: Config file
Set `channel_type` to `weixin` in `config.json`:
```json ```json
{ {
@@ -21,52 +23,49 @@ Set `channel_type` to `weixin` in your `config.json`:
} }
``` ```
After starting the program, a QR code will be displayed in the terminal. Scan it with WeChat and confirm on your phone to complete login. After starting CowAgent, a QR code is displayed in the terminal. Scan it with WeChat to complete login.
<img src="https://cdn.link-ai.tech/doc/20260322195509.png" width="800" />
<Note> <Note>
For backward compatibility, setting `channel_type` to `wx` also activates the WeChat channel. 1. For backward compatibility, setting `channel_type` to `wx` also activates the WeChat channel.
2. The WeChat client must be on version **8.0.69** or higher.
</Note> </Note>
## 2. Parameters ## 2. Usage
| Parameter | Description | Default | Once authorized, the integration completes and you can start chatting. A bot assistant is created in your WeChat conversation list, leaving normal account usage unaffected.
| --- | --- | --- |
| `channel_type` | Set to `weixin` or `wx` | — |
Login credentials are automatically saved to `~/.weixin_cow_credentials.json`. To force a re-login, delete this file and restart. > You can find the bot at any time by searching for **"微信ClawBot"**. You may also rename it, change its avatar, pin it to the top of your conversation list, and so on.
<img src="https://cdn.link-ai.tech/doc/83ae8251d896219fde4803f4205205be.jpg" width="250" />
## 3. Login ## 3. Login
### QR Code Login ### QR code login
On first startup, a QR code is displayed in the terminal (valid for approximately 2 minutes). Scan it with WeChat and confirm on your phone. On first startup, a QR code appears in the terminal (valid for around 2 minutes). Scan it with WeChat and confirm on your phone to log in.
- The QR code automatically refreshes when it expires - The QR code refreshes automatically when it expires
- The `qrcode` dependency is already included in `requirements.txt`, enabling QR code rendering directly in the terminal - The `qrcode` dependency is already included in `requirements.txt`, so the QR code renders directly in the terminal after install
### Credential Persistence ### Credential persistence
After successful login, credentials are saved to `~/.weixin_cow_credentials.json`. Subsequent startups will reuse the saved credentials without requiring a new scan. After a successful login, credentials are saved to `~/.weixin_cow_credentials.json`. Subsequent startups reuse the saved credentials with no need to re-scan.
To force a re-login, delete the credentials file and restart the program. To force a re-login, delete the credentials file and restart.
### Session Expiry ### Session expiry
When the WeChat session expires (errcode -14), the program automatically clears old credentials and initiates a new QR login — no manual intervention required. When the WeChat session expires (errcode `-14`), CowAgent automatically clears old credentials and initiates a new QR login — no manual intervention required.
## 4. Supported Features ## 4. Supported features
| Feature | Status | | Feature | Status |
| --- | --- | | --- | --- |
| Direct Messages | ✅ | | Direct messages | ✅ |
| Text Messages | ✅ Send & Receive | | Text messages | ✅ Send & Receive |
| Image Messages | ✅ Send & Receive | | Image messages | ✅ Send & Receive |
| File Messages | ✅ Send & Receive | | File messages | ✅ Send & Receive |
| Video Messages | ✅ Send & Receive | | Video messages | ✅ Send & Receive |
| Voice Messages | ✅ Receive | | Voice messages | ✅ Receive (built-in speech recognition) |
## 5. Notes
1. Ensure network access to `ilinkai.weixin.qq.com`.
2. Media files (images, files, videos) are transferred via CDN with AES-128-ECB encryption, handled automatically by the program.
3. A stable network connection is recommended to avoid frequent disconnections that would require re-scanning.

View File

@@ -25,6 +25,14 @@ View current session and service status, including process info, model configura
/status /status
``` ```
## cancel
Abort the agent task currently running in this session. When the agent is busy with a long task (e.g. multi-turn tool calls or a long streaming response), send `/cancel` and the agent will stop before the next tool execution. Available across all channels — Web, WeChat, WeCom, Feishu, etc.
```text
/cancel
```
## config ## config
View or modify runtime configuration. Changes take effect immediately without restarting. View or modify runtime configuration. Changes take effect immediately without restarting.

View File

@@ -57,6 +57,7 @@ In the Web console or any connected channel, type `/` to see command suggestions
| --- | --- | | --- | --- |
| `/help` | Show command help | | `/help` | Show command help |
| `/status` | View service status and configuration | | `/status` | View service status and configuration |
| `/cancel` | Abort the currently running agent task |
| `/config` | View or modify runtime configuration | | `/config` | View or modify runtime configuration |
| `/skill` | Manage skills (install, uninstall, enable, disable, etc.) | | `/skill` | Manage skills (install, uninstall, enable, disable, etc.) |
| `/memory dream [N]` | Manually trigger memory distillation (default 3 days, max 30) | | `/memory dream [N]` | Manually trigger memory distillation (default 3 days, max 30) |
@@ -80,6 +81,7 @@ In the Web console or any connected channel, type `/` to see command suggestions
| version | ✓ | ✓ | | version | ✓ | ✓ |
| status | ✓ | ✓ | | status | ✓ | ✓ |
| logs | ✓ | ✓ | | logs | ✓ | ✓ |
| cancel | ✗ | ✓ |
| config | ✗ | ✓ | | config | ✗ | ✓ |
| context | — | ✓ | | context | — | ✓ |
| memory (subcommands) | ✗ | ✓ | | memory (subcommands) | ✗ | ✓ |

View File

@@ -19,6 +19,24 @@ cow skill list
``` ```
</CodeGroup> </CodeGroup>
Example output:
```
📦 Installed skills (3/4)
✅ pptx
Use this skill any time a .pptx file is involved…
Source: cowhub
✅ skill-creator
Create, install, or update skills…
Source: builtin
⏸️ image-vision (disabled)
Image understanding and visual analysis
Source: builtin
```
**Browse the Skill Hub** (view all available skills): **Browse the Skill Hub** (view all available skills):
<CodeGroup> <CodeGroup>

View File

@@ -81,7 +81,7 @@ nohup python3 app.py & tail -f nohup.out
``` ```
<Tip> <Tip>
If deploying on a server, open port `9899` in your firewall or security group to access the Web console. It's recommended to restrict access to specific IPs for security. **Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs.
</Tip> </Tip>
## Docker Deployment ## Docker Deployment
@@ -113,7 +113,7 @@ sudo docker logs -f chatgpt-on-wechat
``` ```
<Tip> <Tip>
If deploying on a server, open port `9899` in your firewall or security group to access the Web console. It's recommended to restrict access to specific IPs for security. **Running in Docker?** Set `WEB_HOST` to `0.0.0.0` in `docker-compose.yml` so the console is reachable from outside the container, and set `WEB_PASSWORD` to protect it. Make sure port `9899` is mapped to the host and open in your firewall or security group.
</Tip> </Tip>
## Core Configuration ## Core Configuration

View File

@@ -33,6 +33,10 @@ The script automatically performs these steps:
By default, the Web console starts after installation. Access `http://localhost:9899` to begin chatting. By default, the Web console starts after installation. Access `http://localhost:9899` to begin chatting.
<Note>
**Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access only). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs.
</Note>
## Management Commands ## Management Commands
After installation, use the `cow` command to manage the service: After installation, use the `cow` command to manage the service:

61
docs/en/guide/upgrade.mdx Normal file
View File

@@ -0,0 +1,61 @@
---
title: Upgrade
description: How to upgrade CowAgent
---
## Recommended: One-line upgrade
Use `cow update` to pull the latest code and restart the service in one step:
```bash
cow update
```
The command runs the following automatically:
1. Pull the latest code (`git pull`)
2. Stop the running service
3. Update Python dependencies
4. Reinstall the CLI
5. Start the service
<Note>
If the Cow CLI is not installed, `./run.sh update` performs the same operations.
</Note>
## Manual upgrade
Run the following inside the project root:
```bash
git pull
pip3 install -r requirements.txt
pip3 install -e .
```
Then restart the service:
```bash
# Using Cow CLI (recommended)
cow restart
# Or using run.sh
./run.sh restart
# Or restart manually with nohup
kill $(ps -ef | grep app.py | grep -v grep | awk '{print $2}')
nohup python3 app.py & tail -f nohup.out
```
## Docker upgrade
Run the following in the directory containing `docker-compose.yml`:
```bash
sudo docker compose pull
sudo docker compose up -d
```
<Tip>
Back up `config.json` before upgrading. For Docker deployments, mount the workspace directory as a volume to persist data across upgrades.
</Tip>

View File

@@ -9,7 +9,7 @@ CowAgent 2.0 has evolved from a simple chatbot into a super intelligent assistan
CowAgent's architecture consists of the following core modules: CowAgent's architecture consists of the following core modules:
<img src="https://cdn.link-ai.tech/doc/cow-agent-arch-en.jpg.jpg" alt="CowAgent Architecture" /> <img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/architecture/en/architecture.jpg" alt="CowAgent Architecture" />
| Module | Description | | Module | Description |
| --- | --- | | --- | --- |
@@ -39,8 +39,8 @@ The Agent workspace is located at `~/cow` by default and stores system prompts,
``` ```
~/cow/ ~/cow/
├── system.md # Agent system prompt ├── SYSTEM.md # Agent system prompt
├── user.md # User profile ├── USER.md # User profile
├── MEMORY.md # Core memory ├── MEMORY.md # Core memory
├── memory/ # Long-term memory storage ├── memory/ # Long-term memory storage
│ └── YYYY-MM-DD.md # Daily memory │ └── YYYY-MM-DD.md # Daily memory
@@ -67,9 +67,10 @@ Configure Agent mode parameters in `config.json`:
{ {
"agent": true, "agent": true,
"agent_workspace": "~/cow", "agent_workspace": "~/cow",
"agent_max_context_tokens": 40000, "agent_max_context_tokens": 50000,
"agent_max_context_turns": 30, "agent_max_context_turns": 20,
"agent_max_steps": 15 "agent_max_steps": 20,
"enable_thinking": false
} }
``` ```
@@ -77,7 +78,9 @@ Configure Agent mode parameters in `config.json`:
| --- | --- | --- | | --- | --- | --- |
| `agent` | Enable Agent mode | `true` | | `agent` | Enable Agent mode | `true` |
| `agent_workspace` | Workspace path | `~/cow` | | `agent_workspace` | Workspace path | `~/cow` |
| `agent_max_context_tokens` | Max context tokens | `40000` | | `agent_max_context_tokens` | Max context tokens | `50000` |
| `agent_max_context_turns` | Max context turns | `30` | | `agent_max_context_turns` | Max context turns | `20` |
| `agent_max_steps` | Max decision steps per task | `15` | | `agent_max_steps` | Max decision steps per task | `20` |
| `enable_thinking` | Enable deep-thinking mode | `false` |
| `knowledge` | Enable personal knowledge base | `true` |
| `knowledge` | Enable personal knowledge base | `true` | | `knowledge` | Enable personal knowledge base | `true` |

View File

@@ -84,7 +84,7 @@ Secrets required by skills are stored in an environment variable file, managed b
The Skills system provides infinite extensibility for the Agent. Each Skill consists of a description file, execution scripts (optional), and resources (optional), describing how to complete specific types of tasks. Skills allow the Agent to follow instructions for complex workflows, invoke tools, or integrate third-party systems. The Skills system provides infinite extensibility for the Agent. Each Skill consists of a description file, execution scripts (optional), and resources (optional), describing how to complete specific types of tasks. Skills allow the Agent to follow instructions for complex workflows, invoke tools, or integrate third-party systems.
- **[Skill Hub](https://skills.cowagent.ai/):** An open skill marketplace featuring official, community, and third-party skills. Install with one command. - [Skill Hub](https://skills.cowagent.ai/): An open skill marketplace featuring official, community, and third-party skills. Install with one command.
- **Built-in skills:** Located in the project's `skills/` directory, including skill creator, image recognition, LinkAI agent, web fetch, and more. Built-in skills are automatically enabled based on dependency conditions (API keys, system commands, etc.). - **Built-in skills:** Located in the project's `skills/` directory, including skill creator, image recognition, LinkAI agent, web fetch, and more. Built-in skills are automatically enabled based on dependency conditions (API keys, system commands, etc.).
- **Custom skills:** Created by users through conversation, stored in the workspace (`~/cow/skills/`), capable of implementing any complex business process or third-party integration. - **Custom skills:** Created by users through conversation, stored in the workspace (`~/cow/skills/`), capable of implementing any complex business process or third-party integration.

View File

@@ -1,53 +1,60 @@
--- ---
title: Introduction title: Introduction
description: CowAgent - AI Super Assistant powered by LLMs description: CowAgent - Open-source super AI assistant and Agent Harness
--- ---
<img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="600px"/> <div align="center">
<img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="450px"/>
</div>
**CowAgent** is an AI super assistant powered by LLMs with autonomous task planning, long-term memory, skills system, multimodal messages, multiple model support, and multi-platform deployment. **CowAgent** is an open-source super AI assistant and Agent Harness. It proactively plans tasks, runs tools and skills, and autonomously grows with memory and knowledge.
CowAgent can proactively think and plan tasks, operate computers and external resources, create and execute Skills, and continuously grow with long-term memory. It supports flexible switching between multiple models, handles text, voice, images, files and other multimodal messages, and can be integrated into WeChat, web, Feishu, DingTalk, WeCom, and WeChat Official Account. It runs 7x24 hours on your personal computer or server. CowAgent is lightweight, easy to deploy, and built to extend. Plug in any major LLM provider, run it across Web and major IM platforms, 24/7 on a personal computer or server.
<Card title="GitHub" icon="github" href="https://github.com/zhayujie/CowAgent"> <CardGroup cols={2}>
github.com/zhayujie/CowAgent <Card title="GitHub" icon="github" href="https://github.com/zhayujie/CowAgent">
</Card> Open-source repository — Star and contribute
</Card>
<Card title="Try Online" icon="cloud" href="https://link-ai.tech/cowagent/create">
No setup required — experience CowAgent instantly
</Card>
</CardGroup>
## Core Capabilities ## Core Capabilities
<CardGroup cols={2}> <CardGroup cols={2}>
<Card title="Autonomous Task Planning" icon="brain" href="/en/intro/architecture"> <Card title="Autonomous Task Planning" icon="brain" href="/en/intro/architecture">
Understands complex tasks and autonomously plans execution, continuously thinking and invoking tools until goals are achieved. Supports accessing file systems, terminals, browsers, schedulers, and other system resources through tools. Decomposes complex tasks and executes them step by step, looping over tools and skills until the goal is reached.
</Card> </Card>
<Card title="Long-term Memory" icon="database" href="/en/memory"> <Card title="Long-term Memory" icon="database" href="/en/memory/index">
Three-tier memory flow (context → daily memory → global memory) with daily Deep Dream distillation, keyword and vector retrieval support. Three-tier architecture (context → daily → core), automatic Deep Dream distillation, hybrid keyword + vector retrieval.
</Card> </Card>
<Card title="Knowledge Base" icon="book" href="/en/knowledge"> <Card title="Personal Knowledge Base" icon="book" href="/en/knowledge/index">
Automatically organizes structured knowledge with knowledge graph visualization, building a continuously growing knowledge network through cross-references. Auto-curates structured knowledge into a Markdown wiki, builds an evolving knowledge graph with visual browsing.
</Card> </Card>
<Card title="Skills System" icon="puzzle-piece" href="/en/skills/index"> <Card title="Skills System" icon="puzzle-piece" href="/en/skills/index">
Implements a Skills creation and execution engine with built-in skills, and supports custom Skills development through natural language conversation. A complete skill creation and execution engine. Install from Skill Hub or generate custom skills via natural-language conversation.
</Card> </Card>
<Card title="Multimodal Messages" icon="image" href="/en/channels/web"> <Card title="Multimodal Messaging" icon="image" href="/en/channels/web">
Supports parsing, processing, generating, and sending text, images, voice, files, and other message types. First-class support for text, images, voice, and files — recognition, generation, and delivery.
</Card> </Card>
<Card title="Tool System" icon="wrench" href="/en/tools/index"> <Card title="Tool System" icon="wrench" href="/en/tools/index">
Built-in tools for file I/O, terminal execution, browser automation, scheduled tasks, messaging, and more. The Agent autonomously invokes tools to accomplish complex tasks. Built-in file I/O, terminal, browser, scheduler, memory retrieval, web search, and more — with native MCP integration.
</Card> </Card>
<Card title="Command System" icon="terminal" href="/en/cli/index"> <Card title="Command System" icon="terminal" href="/en/cli/index">
Provides terminal CLI and in-chat commands for process management, skill installation, configuration, context inspection, and other common operations. Terminal CLI and in-chat commands for process management, skill installation, configuration, and context inspection.
</Card> </Card>
<Card title="Multiple Model Support" icon="microchip" href="/en/models/index"> <Card title="Pluggable Models" icon="microchip" href="/en/models/index">
Supports mainstream model providers including OpenAI, Claude, Gemini, DeepSeek, MiniMax, GLM, Qwen, Kimi, Doubao, and more. Claude, GPT, Gemini, DeepSeek, Qwen, GLM, Kimi, MiniMax, Doubao, and more — swap providers from the Web console with one click.
</Card> </Card>
<Card title="Multi-platform Deployment" icon="server" href="/en/channels/weixin"> <Card title="Multi-channel Integration" icon="server" href="/en/channels/index">
Runs on local computers or servers, integrable into WeChat, web, Feishu, DingTalk, WeChat Official Account, and WeCom applications. A single Agent simultaneously serves Web, WeChat, Feishu, DingTalk, WeCom, QQ, and Official Accounts.
</Card> </Card>
</CardGroup> </CardGroup>
## Quick Experience ## Quick Start
Run the following command in your terminal for one-click install, configuration, and startup: Run one of the commands below to install, configure, and start CowAgent in a single step:
<Tabs> <Tabs>
<Tab title="Linux / macOS"> <Tab title="Linux / macOS">
@@ -62,25 +69,25 @@ Run the following command in your terminal for one-click install, configuration,
</Tab> </Tab>
</Tabs> </Tabs>
By default, the Web service starts after running. Access `http://localhost:9899/chat` to chat in the web interface. Once started, open `http://localhost:9899` to access the **Web console** — the unified place to chat, configure providers, connect channels, and install skills.
<CardGroup cols={2}> <CardGroup cols={2}>
<Card title="Quick Start" icon="rocket" href="/en/guide/quick-start"> <Card title="Quick Start" icon="rocket" href="/en/guide/quick-start">
Complete installation and run guide Complete installation and run guide
</Card> </Card>
<Card title="Architecture" icon="sitemap" href="/en/intro/architecture"> <Card title="Architecture" icon="sitemap" href="/en/intro/architecture">
CowAgent system architecture design CowAgent system architecture
</Card> </Card>
</CardGroup> </CardGroup>
## Disclaimer ## Disclaimer
1. This project follows the [MIT License](https://github.com/zhayujie/CowAgent/blob/master/LICENSE) and is intended for technical research and learning. Users must comply with local laws, regulations, policies, and corporate bylaws. Any illegal or rights-infringing use is prohibited. 1. This project is licensed under the [MIT License](https://github.com/zhayujie/CowAgent/blob/master/LICENSE) and is intended for technical research and learning. You are responsible for complying with applicable laws and regulations in your jurisdiction; the maintainers assume no liability for any consequences arising from use of this project.
2. Agent mode consumes more tokens than normal chat mode. Choose models based on effectiveness and cost. Agent has access to the host operating system deploy with caution. 2. **Cost & safety:** Agent mode consumes substantially more tokens than plain chat — pick models that balance quality and cost. The Agent has access to your local operating system; deploy only in trusted environments.
3. CowAgent focuses on open-source development and does not participate in, authorize, or issue any cryptocurrency. 3. CowAgent is a pure open-source project and does not participate in, authorize, or issue any cryptocurrency.
## Community ## Community
Add our assistant on WeChat to join the open-source community: Scan the WeChat QR code to join the open-source community group:
<img width="140" src="https://img-1317903499.cos.ap-guangzhou.myqcloud.com/docs/open-community.png" /> <img width="140" src="https://img-1317903499.cos.ap-guangzhou.myqcloud.com/docs/open-community.png" />

View File

@@ -5,6 +5,10 @@ description: CowAgent personal knowledge base — structured knowledge accumulat
The personal knowledge base is the Agent's long-term structured knowledge store, saved in the `knowledge/` directory within the workspace. Unlike memory, which is organized by timeline, the knowledge base organizes content by topic — articles, conversation insights, and learning materials are structured into interlinked Markdown pages, forming a continuously growing knowledge network. The personal knowledge base is the Agent's long-term structured knowledge store, saved in the `knowledge/` directory within the workspace. Unlike memory, which is organized by timeline, the knowledge base organizes content by topic — articles, conversation insights, and learning materials are structured into interlinked Markdown pages, forming a continuously growing knowledge network.
<Frame>
<img src="https://cdn.link-ai.tech/doc/20260413105435.png" width="800" />
</Frame>
## Core Concepts ## Core Concepts
### Knowledge vs Memory ### Knowledge vs Memory
@@ -43,7 +47,7 @@ Knowledge writing is an autonomous Agent behavior, triggered in these scenarios:
Each knowledge page includes cross-reference links to related pages, gradually building a knowledge graph. Each knowledge page includes cross-reference links to related pages, gradually building a knowledge graph.
<Frame> <Frame>
<img src="https://gist.github.com/user-attachments/assets/3ce92f78-1863-4820-8fa8-660c0f2b7f09" alt="Conversational knowledge ingest" /> <img src="https://cdn.link-ai.tech/doc/20260413110104.png" width="800" />
</Frame> </Frame>
## Knowledge Retrieval ## Knowledge Retrieval
@@ -63,11 +67,11 @@ The web console provides a dedicated "Knowledge" module with:
- **Chat integration** — Knowledge document links referenced in Agent replies are clickable for direct navigation - **Chat integration** — Knowledge document links referenced in Agent replies are clickable for direct navigation
<Frame> <Frame>
<img src="https://gist.github.com/user-attachments/assets/b7b9d6be-0ac1-4c65-803b-2c6b36bd59a7" alt="Knowledge document browsing" /> <img src="https://cdn.link-ai.tech/doc/17aad553d3e9e428c52ff9dc31726fda.png" width="800" />
</Frame> </Frame>
<Frame> <Frame>
<img src="https://gist.github.com/user-attachments/assets/44ae68ca-96cc-40b9-ab33-cdbec34c2379" alt="Knowledge graph visualization" /> <img src="https://cdn.link-ai.tech/doc/20260413105402.png" width="800" />
</Frame> </Frame>
## CLI Commands ## CLI Commands

View File

@@ -27,7 +27,7 @@ The Agent automatically persists conversation content to long-term memory throug
- **On context trimming** — When conversation turns or tokens exceed the configured limit, the oldest half of the context is trimmed, and the discarded content is summarized by LLM into key information and written to the daily memory file. The summary is also asynchronously injected into the retained context for conversational continuity - **On context trimming** — When conversation turns or tokens exceed the configured limit, the oldest half of the context is trimmed, and the discarded content is summarized by LLM into key information and written to the daily memory file. The summary is also asynchronously injected into the retained context for conversational continuity
- **Daily scheduled summary** — A full summary is automatically triggered at 23:55 every day, ensuring memory is preserved even on low-activity days (skipped if content hasn't changed) - **Daily scheduled summary** — A full summary is automatically triggered at 23:55 every day, ensuring memory is preserved even on low-activity days (skipped if content hasn't changed)
- **[Deep Dream (memory distillation)](/en/memory/deep-dream)** — Runs automatically after the daily summary, distilling daily memories into MEMORY.md and generating a dream diary - [Deep Dream (memory distillation)](/en/memory/deep-dream) — Runs automatically after the daily summary, distilling daily memories into MEMORY.md and generating a dream diary
- **On API context overflow** — When the model API returns a context overflow error, the current conversation summary is saved as an emergency measure - **On API context overflow** — When the model API returns a context overflow error, the current conversation summary is saved as an emergency measure
All memory writes run asynchronously in a background thread (LLM summarization + file writing), never blocking normal conversation replies. All memory writes run asynchronously in a background thread (LLM summarization + file writing), never blocking normal conversation replies.

View File

@@ -1,17 +1,50 @@
--- ---
title: Claude title: Claude
description: Claude model configuration description: Anthropic Claude model configuration (Text Chat + Image Understanding)
--- ---
Claude is provided by Anthropic and supports both text chat and image understanding. The mainstream Sonnet / Opus models natively support vision, so no separate Vision model needs to be specified.
<Tip>
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
</Tip>
## Text Chat
```json ```json
{ {
"model": "claude-sonnet-4-6", "model": "claude-opus-4-8",
"claude_api_key": "YOUR_API_KEY" "claude_api_key": "YOUR_API_KEY"
} }
``` ```
| Parameter | Description | | Parameter | Description |
| --- | --- | | --- | --- |
| `model` | Options include `claude-sonnet-4-6`, `claude-opus-4-7`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-sonnet-4-0`, `claude-3-5-sonnet-latest`, etc. See [official models](https://docs.anthropic.com/en/docs/about-claude/models/overview) | | `model` | Supports `claude-opus-4-8`, `claude-opus-4-7`, `claude-sonnet-4-6`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-sonnet-4-0`, `claude-3-5-sonnet-latest`, etc. See [official models](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
| `claude_api_key` | Create at [Claude Console](https://console.anthropic.com/settings/keys) | | `claude_api_key` | Create one in the [Claude Console](https://console.anthropic.com/settings/keys) |
| `claude_api_base` | Optional. Defaults to `https://api.anthropic.com/v1`. Change to use third-party proxy | | `claude_api_base` | Optional, defaults to `https://api.anthropic.com/v1`. Can be changed to a third-party proxy |
### Model Selection
| Model | Use Case |
| --- | --- |
| `claude-opus-4-8` | Default recommended, latest flagship; best for complex reasoning and long-running tasks |
| `claude-opus-4-7` | Previous-generation Opus flagship |
| `claude-sonnet-4-6` | Balanced cost and speed, lower cost |
| `claude-opus-4-6` / `claude-sonnet-4-5` / `claude-sonnet-4-0` | Earlier flagships at a lower price |
## Image Understanding
Once `claude_api_key` is configured, the Agent's Vision tool automatically uses the Claude main model to recognize images, with no extra setup required.
To manually specify a Vision model, set it explicitly in the configuration file:
```json
{
"tools": {
"vision": {
"model": "claude-sonnet-4-6"
}
}
}
```

View File

@@ -77,7 +77,7 @@ Reference: [China Key](https://platform.minimaxi.com/docs/coding-plan/quickstart
--- ---
## Zhipu GLM ## GLM
```json ```json
{ {

View File

@@ -1,26 +1,26 @@
--- ---
title: Custom title: Custom
description: Custom provider for third-party APIs and local models description: Custom vendor configuration for third-party API proxies and local models
--- ---
For models accessed via OpenAI-compatible APIs, such as: For model services accessed via the OpenAI-compatible protocol or locally deployed models, such as:
- **Third-party API proxies**: Use a unified API Base to call multiple models - **Third-party API proxies**: call multiple models through a unified API base
- **Local models**: Models deployed locally via Ollama, vLLM, LocalAI, etc. - **Local models**: models deployed locally with tools like Ollama, vLLM, LocalAI
- **Private deployments**: Self-hosted model services within your organization - **Private deployments**: model services deployed inside an enterprise
<Note> <Note>
Unlike the `openai` provider, switching models under the Custom provider will not auto-switch the provider type. Your custom API address is always preserved. Difference from the `openai` vendor: when a custom vendor is selected, switching models via `/config model` does not automatically switch the vendor type — the custom API address is always used.
</Note> </Note>
## Configuration ## Text Chat
### Third-party API Proxy ### Third-party API proxy
```json ```json
{ {
"bot_type": "custom", "bot_type": "custom",
"model": "deepseek-v4-flash", "model": "",
"custom_api_key": "YOUR_API_KEY", "custom_api_key": "YOUR_API_KEY",
"custom_api_base": "https://{your-proxy.com}/v1" "custom_api_base": "https://{your-proxy.com}/v1"
} }
@@ -29,13 +29,13 @@ For models accessed via OpenAI-compatible APIs, such as:
| Parameter | Description | | Parameter | Description |
| --- | --- | | --- | --- |
| `bot_type` | Must be set to `custom` | | `bot_type` | Must be set to `custom` |
| `model` | Model name, any model supported by your proxy service | | `model` | Model name; any model name supported by the proxy service |
| `custom_api_key` | API key provided by your proxy service | | `custom_api_key` | API key provided by the proxy service |
| `custom_api_base` | API base URL, must be OpenAI-compatible | | `custom_api_base` | API endpoint provided by the proxy service; must be OpenAI-compatible |
### Local Models ### Local models
Local models typically don't require an API key — just set the API base: Local models usually do not require an API key — only the API base needs to be filled in:
```json ```json
{ {
@@ -45,7 +45,7 @@ Local models typically don't require an API key — just set the API base:
} }
``` ```
Common local deployment tools and their default addresses: Common local deployment tools and their default endpoints:
| Tool | Default API Base | | Tool | Default API Base |
| --- | --- | | --- | --- |
@@ -53,9 +53,9 @@ Common local deployment tools and their default addresses:
| [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` | | [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` |
| [LocalAI](https://localai.io) | `http://localhost:8080/v1` | | [LocalAI](https://localai.io) | `http://localhost:8080/v1` |
## Switching Models ### Switching Models
Under the Custom provider, switching models only changes `model` without affecting `bot_type` or the API address: Switching models under a custom vendor only changes `model` `bot_type` and the API endpoint remain unchanged:
``` ```
/config model qwen3.5:27b /config model qwen3.5:27b

View File

@@ -1,9 +1,11 @@
--- ---
title: DeepSeek title: DeepSeek
description: DeepSeek model configuration description: DeepSeek model configuration (Text Chat + Thinking Mode)
--- ---
Option 1: Native integration (recommended): DeepSeek is one of the default recommended vendors in Agent mode, focused on cost-effective text chat and task planning.
## Text Chat
```json ```json
{ {
@@ -14,24 +16,24 @@ Option 1: Native integration (recommended):
| Parameter | Description | | Parameter | Description |
| --- | --- | | --- | --- |
| `model` | Supports `deepseek-v4-flash` (default) and `deepseek-v4-pro` | | `model` | Supports `deepseek-v4-flash` (Default), `deepseek-v4-pro` |
| `deepseek_api_key` | Create at [DeepSeek Platform](https://platform.deepseek.com/api_keys) | | `deepseek_api_key` | Create one on the [DeepSeek Platform](https://platform.deepseek.com/api_keys) |
| `deepseek_api_base` | Optional, defaults to `https://api.deepseek.com/v1`. Can be changed to a third-party proxy | | `deepseek_api_base` | Optional, defaults to `https://api.deepseek.com/v1`. Can be changed to a third-party proxy |
## Model Selection ### Model Selection
| Model | Use Case | | Model | Use Case |
| --- | --- | | --- | --- |
| `deepseek-v4-flash` | Default: fast and cost-effective | | `deepseek-v4-flash` | Default recommended; fast and low cost |
| `deepseek-v4-pro` | Stronger on complex tasks | | `deepseek-v4-pro` | Smarter; better for complex tasks |
## Thinking Mode ## Thinking Mode
The V4 series (`deepseek-v4-flash` / `deepseek-v4-pro`) supports an explicit "thinking mode": the model emits a chain-of-thought (`reasoning_content`) before the final answer to improve answer quality. The V4 series (`deepseek-v4-flash` / `deepseek-v4-pro`) supports an explicit "thinking mode": before producing the final answer, the model emits a chain of thought (`reasoning_content`) to improve answer quality.
### Toggle ### Toggle
Controlled by the global `enable_thinking` setting: Controlled by the global `enable_thinking` config, and can also be toggled from the Web Console's configuration page:
```json ```json
{ {
@@ -39,12 +41,12 @@ Controlled by the global `enable_thinking` setting:
} }
``` ```
- `true`: thinking is on across all channels. The Web console renders the reasoning trace; IM channels (WeChat / WeCom / DingTalk / Feishu) don't render it but still benefit from higher answer quality. - `true`: the model thinks before answering across all channels. The Web Console displays the thinking process; IM channels (WeChat / WeCom / DingTalk / Feishu) do not show it but still get better answers.
- `false`: thinking off, faster responses with lower first-token latency. - `false`: thinking is disabled, responses are faster, and time-to-first-token is lower.
### Reasoning Effort ### Reasoning Effort
Under thinking mode, `reasoning_effort` controls how hard the model thinks: Under thinking mode, `reasoning_effort` controls reasoning intensity:
```json ```json
{ {
@@ -55,27 +57,16 @@ Under thinking mode, `reasoning_effort` controls how hard the model thinks:
| Value | Use Case | | Value | Use Case |
| --- | --- | | --- | --- |
| `high` (default) | Day-to-day agent tasks; balanced thinking depth and latency | | `high` (Default) | Day-to-day Agent tasks; balanced reasoning and speed |
| `max` | Complex coding, long-horizon planning, strict-constraint tasks. Deeper reasoning at the cost of more output tokens and higher latency | | `max` | Complex coding, long-horizon planning, strictly constrained tasks; deeper reasoning but more time and output tokens |
`reasoning_effort` only takes effect when `enable_thinking` is `true`. It is silently ignored on models that do not support thinking mode. `reasoning_effort` only takes effect when `enable_thinking` is `true`; it is ignored automatically when the model does not support thinking mode.
### Notes ### Behavior Notes
- **Sampling parameters**: under thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are silently ignored by the server (no error). CowAgent skips sending them automatically. - **Sampling parameters**: in thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are ignored by the server (without errors). CowAgent automatically skips them.
- **Multi-turn tool calls**: once the history contains any tool-call turn, DeepSeek requires `reasoning_content` on every assistant message. CowAgent handles the round-trip automatically, including across mid-session toggles of the thinking switch. - **Multi-turn tool calls**: when the history contains tool calls, DeepSeek requires every assistant message to include `reasoning_content`. CowAgent handles this automatically, so toggling thinking mode across turns will not cause errors.
<Tip> <Tip>
Start with `deepseek-v4-flash`; switch to `deepseek-v4-pro` for harder tasks; enable `enable_thinking` when you want deeper reasoning. `deepseek-v4-flash` is used by default; switch to `deepseek-v4-pro` for complex tasks; enable `enable_thinking` when deep reasoning is needed.
</Tip> </Tip>
Option 2: OpenAI-compatible configuration:
```json
{
"model": "deepseek-v4-flash",
"bot_type": "openai",
"open_ai_api_key": "YOUR_API_KEY",
"open_ai_api_base": "https://api.deepseek.com/v1"
}
```

View File

@@ -1,17 +1,66 @@
--- ---
title: Doubao (ByteDance) title: Doubao
description: Doubao (Volcano Ark) model configuration description: Doubao (Volcengine Ark) model configuration (Text / Image Understanding / Image Generation / Embedding)
--- ---
Doubao (Volcengine Ark) supports text chat, image understanding, image generation (Seedream), and embedding. A single `ark_api_key` enables all capabilities.
<Tip>
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
</Tip>
## Text Chat
```json ```json
{ {
"model": "doubao-seed-2-0-code-preview-260215", "model": "doubao-seed-2-0-pro-260215",
"ark_api_key": "YOUR_API_KEY" "ark_api_key": "YOUR_API_KEY"
} }
``` ```
| Parameter | Description | | Parameter | Description |
| --- | --- | | --- | --- |
| `model` | Options include `doubao-seed-2-0-code-preview-260215`, `doubao-seed-2-0-pro-260215`, `doubao-seed-2-0-lite-260215`, etc. | | `model` | Can be `doubao-seed-2-0-pro-260215`, `doubao-seed-2-0-code-preview-260215`, `doubao-seed-2-0-lite-260215`, etc. |
| `ark_api_key` | Create at [Volcano Ark Console](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) | | `ark_api_key` | Create one in the [Volcengine Ark Console](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) |
| `ark_base_url` | Optional. Defaults to `https://ark.cn-beijing.volces.com/api/v3` | | `ark_base_url` | Optional, defaults to `https://ark.cn-beijing.volces.com/api/v3` |
## Image Understanding
Once `ark_api_key` is configured, the Agent's Vision tool automatically uses `doubao-seed-2-0-pro-260215` to recognize images, with no extra setup required.
To manually specify a Vision model:
```json
{
"tools": {
"vision": {
"model": "doubao-seed-2-0-pro-260215"
}
}
}
```
## Image Generation
```json
{
"skills": {
"image-generation": {
"model": "seedream-5.0-lite"
}
}
}
```
Available models: `seedream-5.0-lite`, `seedream-4.5`.
## Embedding
```json
{
"embedding_provider": "doubao",
"embedding_model": "doubao-embedding-vision-251215"
}
```
The default model is `doubao-embedding-vision-251215` (multimodal embedding); the dimension (1024 or 2048) can be set via `embedding_dimensions` in the configuration file. After changing the embedding, run `/memory rebuild-index` to rebuild the index.

View File

@@ -1,16 +1,59 @@
--- ---
title: Gemini title: Gemini
description: Google Gemini model configuration description: Google Gemini model configuration (Text Chat + Image Understanding + Image Generation)
--- ---
Google Gemini supports text chat, image understanding, and image generation (Nano Banana series). A single `gemini_api_key` enables all capabilities.
<Tip>
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
</Tip>
## Text Chat
```json ```json
{ {
"model": "gemini-3.1-pro-preview", "model": "gemini-3.5-flash",
"gemini_api_key": "YOUR_API_KEY" "gemini_api_key": "YOUR_API_KEY"
} }
``` ```
| Parameter | Description | | Parameter | Description |
| --- | --- | | --- | --- |
| `model` | Options include `gemini-3.1-flash-lite-preview`, `gemini-3.1-pro-preview`, `gemini-3-flash-preview`, `gemini-3-pro-preview`, etc. See [official docs](https://ai.google.dev/gemini-api/docs/models) | | `model` | Recommended: `gemini-3.5-flash`; also supports `gemini-3.1-pro-preview`, `gemini-3.1-flash-lite-preview`, `gemini-3-flash-preview`, `gemini-3-pro-preview`, etc. See [official docs](https://ai.google.dev/gemini-api/docs/models) |
| `gemini_api_key` | Create at [Google AI Studio](https://aistudio.google.com/app/apikey) | | `gemini_api_key` | Create one in [Google AI Studio](https://aistudio.google.com/app/apikey) |
| `gemini_api_base` | Optional, defaults to `https://generativelanguage.googleapis.com`. Can be changed to a third-party proxy |
## Image Understanding
All Gemini models natively support vision. Once `gemini_api_key` is configured, the Agent's Vision tool automatically uses the main model to recognize images, with no extra setup required.
To manually specify a Vision model:
```json
{
"tools": {
"vision": {
"model": "gemini-3.1-flash-lite-preview"
}
}
}
```
## Image Generation
```json
{
"skills": {
"image-generation": {
"model": "gemini-3.1-flash-image-preview"
}
}
}
```
| Model ID | Alias |
| --- | --- |
| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
| `gemini-3-pro-image-preview` | Nano Banana Pro |
| `gemini-2.5-flash-image` | Nano Banana |

View File

@@ -1,8 +1,16 @@
--- ---
title: GLM (Zhipu AI) title: GLM
description: Zhipu AI GLM model configuration description: Zhipu AI GLM model configuration (Text / Image Understanding / Speech-to-Text / Embedding)
--- ---
Zhipu AI supports text chat, image understanding, speech-to-text (ASR), and embedding. A single `zhipu_ai_api_key` enables all capabilities.
<Tip>
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
</Tip>
## Text Chat
```json ```json
{ {
"model": "glm-5.1", "model": "glm-5.1",
@@ -12,16 +20,37 @@ description: Zhipu AI GLM model configuration
| Parameter | Description | | Parameter | Description |
| --- | --- | | --- | --- |
| `model` | Options include `glm-5.1`, `glm-5-turbo`, `glm-5`, `glm-4.7`, `glm-4-plus`, `glm-4-flash`, `glm-4-air`, etc. See [model codes](https://bigmodel.cn/dev/api/normal-model/glm-4) | | `model` | Can be `glm-5.1`, `glm-5-turbo`, `glm-5`, `glm-4.7`, `glm-4-plus`, `glm-4-flash`, `glm-4-air`, etc. See [model codes](https://bigmodel.cn/dev/api/normal-model/glm-4) |
| `zhipu_ai_api_key` | Create at [Zhipu AI Console](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) | | `zhipu_ai_api_key` | Create one in the [Zhipu AI Console](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
| `zhipu_ai_api_base` | Optional, defaults to `https://open.bigmodel.cn/api/paas/v4` |
OpenAI-compatible configuration is also supported: ## Image Understanding
Zhipu's chat models (`glm-5.1`, `glm-5-turbo`, etc.) do not support vision; vision calls are uniformly routed to `glm-5v-turbo`. Once `zhipu_ai_api_key` is configured, the Agent's Vision tool automatically uses this model, with no need to specify it explicitly in the configuration file.
## Speech-to-Text (ASR)
```json ```json
{ {
"bot_type": "openai", "voice_to_text": "zhipu",
"model": "glm-5.1", "voice_to_text_model": "glm-asr-2512"
"open_ai_api_base": "https://open.bigmodel.cn/api/paas/v4",
"open_ai_api_key": "YOUR_API_KEY"
} }
``` ```
| Parameter | Description |
| --- | --- |
| `voice_to_text` | Set to `zhipu` to enable Zhipu ASR |
| `voice_to_text_model` | Optional, defaults to `glm-asr-2512` |
Credentials are automatically reused from `zhipu_ai_api_key`. Audio files should be smaller than 25MB; oversized files may be rejected by the server.
## Embedding
```json
{
"embedding_provider": "zhipu",
"embedding_model": "embedding-3"
}
```
Available models: `embedding-3`, `embedding-2`. After changing the embedding, run `/memory rebuild-index` to rebuild the index.

View File

@@ -1,58 +1,38 @@
--- ---
title: Models Overview title: Models Overview
description: Supported models and recommended choices for CowAgent description: Model vendors supported by CowAgent and their capability matrix
--- ---
CowAgent supports mainstream LLMs from domestic and international providers. Model interfaces are implemented in the project's `models/` directory. CowAgent supports a wide range of mainstream large language models. Model interfaces live under the project's `models/` directory. Beyond text chat, several vendors also provide vision understanding, image generation, speech-to-text, text-to-speech, and embeddings — all of which can be invoked on demand in the Agent flow.
<Note> ## Capability Matrix
For Agent mode, the following models are recommended based on quality and cost: deepseek-v4-flash, MiniMax-M2.7, claude-sonnet-4-6, gemini-3.1-pro-preview, glm-5.1, qwen3.6-plus, kimi-k2.6, ernie-5.1
</Note>
## Configuration A snapshot of each vendor's capabilities. "Text" refers to the main chat model; the remaining columns show which Agent capabilities the vendor can power.
Configure the model name and API key in `config.json` according to your chosen model. Each model also supports OpenAI-compatible access by setting `bot_type` to `openai` and configuring `open_ai_api_base` and `open_ai_api_key`. | Vendor | Representative Models | Text | Vision | Image Gen | STT | TTS | Embedding |
| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: |
You can also use the [LinkAI](https://link-ai.tech) platform interface to flexibly switch between multiple models with support for knowledge base, workflows, and other Agent capabilities. | [DeepSeek](/en/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | |
| [MiniMax](/en/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
## Supported Models | [Claude](/en/models/claude) | claude-opus-4-8 | ✅ | ✅ | | | | |
| [Gemini](/en/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | |
<CardGroup cols={2}> | [OpenAI](/en/models/openai) | gpt-5.5, o-series | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
<Card title="DeepSeek" href="/en/models/deepseek"> | [GLM](/en/models/glm) | glm-5.1, glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ |
deepseek-v4-flash, deepseek-v4-pro, and more | [Qwen](/en/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
</Card> | [Doubao](/en/models/doubao) | doubao-seed-2.0 series | ✅ | ✅ | ✅ | | | ✅ |
<Card title="Baidu Qianfan / ERNIE" href="/en/models/qianfan"> | [Kimi](/en/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
ernie-5.1, ernie-5.0, ernie-4.5-turbo-128k, and more | [ERNIE](/en/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
</Card> | [MiMo](/en/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
<Card title="MiniMax" href="/en/models/minimax"> | [LinkAI](/en/models/linkai) | 100+ models from multiple vendors | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
MiniMax-M2.7 and other series models | [Custom](/en/models/custom) | Local models / third-party proxies | ✅ | | | | | |
</Card>
<Card title="Claude" href="/en/models/claude">
claude-sonnet-4-6 and more
</Card>
<Card title="Gemini" href="/en/models/gemini">
gemini-3.1-pro-preview and more
</Card>
<Card title="OpenAI" href="/en/models/openai">
gpt-5.4, gpt-4.1, o-series and more
</Card>
<Card title="GLM (Zhipu AI)" href="/en/models/glm">
glm-5.1, glm-5-turbo, glm-5 and other series models
</Card>
<Card title="Qwen (Tongyi Qianwen)" href="/en/models/qwen">
qwen3.6-plus, qwen3-max and more
</Card>
<Card title="Doubao (ByteDance)" href="/en/models/doubao">
doubao-seed series models
</Card>
<Card title="Kimi" href="/en/models/kimi">
kimi-k2.6, kimi-k2.5, kimi-k2 and more
</Card>
<Card title="LinkAI" href="/en/models/linkai">
Unified multi-model interface + knowledge base
</Card>
</CardGroup>
<Tip> <Tip>
For a full list of model names, refer to the project's [`common/const.py`](https://github.com/zhayujie/CowAgent/blob/master/common/const.py) file. Every capability in the Web console (Vision / Image / STT / TTS / Embedding / Web Search) can be configured independently with its own vendor and model — there is no forced binding between them.
</Tip> </Tip>
## How to Configure
**Option 1 (recommended):** Manage models and capabilities online via the [Web console](/en/channels/web), with no need to edit the configuration file:
<img width="900" src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-models-config.png" />
**Option 2:** Edit `config.json` manually and fill in the model name and API key for the selected vendor. Every model also supports OpenAI-compatible access — just set `bot_type` to `openai` and configure `open_ai_api_base` and `open_ai_api_key`.

View File

@@ -1,8 +1,16 @@
--- ---
title: Kimi (Moonshot) title: Kimi
description: Kimi (Moonshot) model configuration description: Kimi (Moonshot) model configuration (Text Chat + Image Understanding)
--- ---
Kimi is provided by Moonshot and supports both text chat and image understanding. The `kimi-k2.x` series natively supports vision.
<Tip>
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
</Tip>
## Text Chat
```json ```json
{ {
"model": "kimi-k2.6", "model": "kimi-k2.6",
@@ -12,16 +20,22 @@ description: Kimi (Moonshot) model configuration
| Parameter | Description | | Parameter | Description |
| --- | --- | | --- | --- |
| `model` | Options include `kimi-k2.6`, `kimi-k2.5`, `kimi-k2`, `moonshot-v1-8k`, `moonshot-v1-32k`, `moonshot-v1-128k` | | `model` | Can be `kimi-k2.6`, `kimi-k2.5`, `kimi-k2`, `moonshot-v1-8k`, `moonshot-v1-32k`, `moonshot-v1-128k` |
| `moonshot_api_key` | Create at [Moonshot Console](https://platform.moonshot.cn/console/api-keys) | | `moonshot_api_key` | Create one in the [Moonshot Console](https://platform.moonshot.cn/console/api-keys) |
| `moonshot_base_url` | Optional, defaults to `https://api.moonshot.cn/v1` |
OpenAI-compatible configuration is also supported: ## Image Understanding
Once `moonshot_api_key` is configured, the Agent's Vision tool automatically uses `kimi-k2.6` to recognize images, with no extra setup required.
To manually specify a Vision model:
```json ```json
{ {
"bot_type": "openai", "tools": {
"model": "kimi-k2.6", "vision": {
"open_ai_api_base": "https://api.moonshot.cn/v1", "model": "kimi-k2.6"
"open_ai_api_key": "YOUR_API_KEY" }
}
} }
``` ```

View File

@@ -1,9 +1,15 @@
--- ---
title: LinkAI title: LinkAI
description: Unified access to multiple models via LinkAI platform description: Access text, vision, image, speech, and embedding capabilities through the LinkAI platform
--- ---
The [LinkAI](https://link-ai.tech) platform lets you flexibly switch between OpenAI, Claude, Gemini, DeepSeek, MiniMax, Qwen, Kimi, and other models, with support for knowledge base, workflows, plugins, and other Agent capabilities. A single `linkai_api_key` gives you access to all capabilities of mainstream vendors such as OpenAI, Claude, Gemini, DeepSeek, MiniMax, Qwen, Kimi, and Doubao.
<Tip>
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
</Tip>
## Text Chat
```json ```json
{ {
@@ -14,8 +20,84 @@ The [LinkAI](https://link-ai.tech) platform lets you flexibly switch between Ope
| Parameter | Description | | Parameter | Description |
| --- | --- | | --- | --- |
| `use_linkai` | Set to `true` to enable LinkAI interface | | `use_linkai` | Set to `true` to enable |
| `linkai_api_key` | Create at [LinkAI Console](https://link-ai.tech/console/interface) | | `linkai_api_key` | Create one in the [Console](https://link-ai.tech/console/interface) |
| `model` | Leave empty to use the agent's default model. Can be switched flexibly on the platform. All models in the [model list](https://link-ai.tech/console/models) are supported | | `model` | Can be any code from the [model list](https://link-ai.tech/console/models) |
See the [API documentation](https://docs.link-ai.tech/platform/api) for more details. See [Model Service](https://link-ai.tech/console/models) for more.
## Image Understanding
Once configured, the Agent's Vision tool automatically calls multimodal models via the gateway, with no extra setup required. To manually specify a Vision model:
```json
{
"tools": {
"vision": {
"model": "gpt-5.4-mini"
}
}
}
```
Available models: `gpt-4.1-mini`, `gpt-5.4-mini`, `qwen3.6-plus`, `doubao-seed-2-0-pro-260215`, `kimi-k2.6`, `claude-sonnet-4-6`, `gemini-3.1-flash-lite-preview`, etc.
## Image Generation
```json
{
"skills": {
"image-generation": {
"model": "gpt-image-2"
}
}
}
```
| Model ID | Alias |
| --- | --- |
| `gpt-image-2` | OpenAI |
| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
| `gemini-3-pro-image-preview` | Nano Banana Pro |
| `seedream-5.0-lite` | ByteDance Doubao Seedream |
## Speech-to-Text (ASR)
```json
{
"voice_to_text": "linkai"
}
```
ASR uses Whisper by default; credentials are automatically reused from `linkai_api_key`.
## Text-to-Speech (TTS)
The TTS gateway supports multiple underlying engines. The engine is selected by `text_to_voice_model`, and the available voices change with the engine.
```json
{
"text_to_voice": "linkai",
"text_to_voice_model": "doubao",
"tts_voice_id": "BV001_streaming"
}
```
| `text_to_voice_model` | Engine |
| --- | --- |
| `tts-1` | OpenAI · Multi-language (voices like `alloy` / `nova` / `echo`, etc.) |
| `doubao` | ByteDance Doubao · Rich Chinese voices |
| `baidu` | Baidu · Chinese broadcaster voices |
Voices differ by engine; we recommend selecting them visually in the Web Console under "Model Management → Text-to-Speech".
## Embedding
```json
{
"embedding_provider": "linkai",
"embedding_model": "text-embedding-3-small"
}
```
The default model is `text-embedding-3-small` (OpenAI-compatible). After changing the embedding, run `/memory rebuild-index` to rebuild the index.

136
docs/en/models/mimo.mdx Normal file
View File

@@ -0,0 +1,136 @@
---
title: MiMo
description: Xiaomi MiMo model configuration (Text Chat + Image Understanding + Text-to-Speech)
---
Xiaomi MiMo is a native omni-modal large model. A single `mimo_api_key` enables text chat, image understanding, and text-to-speech all at once.
<Tip>
All capabilities below can be configured in one place via the "Model Management" page in the Web Console — no need to manually edit the configuration file.
</Tip>
## Text Chat
```json
{
"model": "mimo-v2.5-pro",
"mimo_api_key": "YOUR_API_KEY",
"mimo_api_base": "https://api.xiaomimimo.com/v1"
}
```
| Parameter | Description |
| --- | --- |
| `model` | Default recommendation: `mimo-v2.5-pro`; `mimo-v2.5` is also supported |
| `mimo_api_key` | Create one in the [MiMo Open Platform](https://platform.xiaomimimo.com/console/api-keys) |
| `mimo_api_base` | Optional, defaults to `https://api.xiaomimimo.com/v1` |
### Model Selection
| Model | Use Case |
| --- | --- |
| `mimo-v2.5-pro` | Flagship: native omni-modal + Agent capability, up to 1M tokens context |
| `mimo-v2.5` | General-purpose, native omni-modal (text / image / video / audio) |
## Thinking Mode
The MiMo V2.5 series enables "thinking mode" by default: the model emits `reasoning_content` (chain-of-thought) before the final answer, improving performance on complex tasks.
Use the global `enable_thinking` flag to toggle visibility (also switchable from the Web Console settings):
```json
{
"enable_thinking": true
}
```
## Image Understanding
Once `mimo_api_key` is configured, the Agent's Vision tool can automatically use MiMo's vision models:
- When the main model itself is multimodal (`mimo-v2.5-pro` / `mimo-v2.5`), images are handled directly by the main model with no extra setup.
- When the main model belongs to another vendor, the Vision tool falls back to `mimo-v2.5-pro` in order.
To force a specific Vision model, set it explicitly in the configuration:
```json
{
"tools": {
"vision": {
"provider": "mimo",
"model": "mimo-v2.5-pro"
}
}
}
```
## Text-to-Speech (TTS)
```json
{
"text_to_voice": "mimo",
"text_to_voice_model": "mimo-v2.5-tts",
"tts_voice_id": "冰糖"
}
```
| Parameter | Description |
| --- | --- |
| `text_to_voice_model` | Currently only `mimo-v2.5-tts` (preset voices + singing mode) |
| `tts_voice_id` | Preset voice name (Chinese voice IDs use the Chinese name directly) |
### Preset Voices
| Voice ID | Description |
| --- | --- |
| `Mia` | English · Female |
| `Chloe` | English · Female |
| `Milo` | English · Male |
| `Dean` | English · Male |
| `冰糖` | Chinese · Female (default) |
| `茉莉` | Chinese · Female |
| `苏打` | Chinese · Male |
| `白桦` | Chinese · Male |
You can also pick a voice visually from the Web Console under "Model Management → Text-to-Speech".
### Style Control
MiMo TTS supports embedding **audio tags** in the synthesis text to control emotion, tone, dialect, persona, and even singing. Tags must appear in the **text that will be synthesized to speech (i.e. the Agent's reply)**, with the overall style tag placed at the very beginning:
```
(style)content-to-synthesize
```
Half-width `()`, full-width ``, and `[]` brackets are all accepted. Both Chinese and English style descriptors work — pick whichever language expresses the timbre most precisely. Common examples:
| Category | Example tags |
| --- | --- |
| Basic emotions | `happy` `sad` `angry` `fear` `surprised` `excited` `aggrieved` `calm` `indifferent` |
| Compound emotions | `wistful` `relieved` `helpless` `guilty` `at ease` `uneasy` `touched` |
| Overall tone | `gentle` `aloof` `lively` `serious` `languid` `playful` `deep` `sharp` `cutting` |
| Voice character | `magnetic` `mellow` `bright` `ethereal` `childlike` `aged` `sweet` `husky` |
| Persona | `squeaky` `mature lady` `young boy` `uncle` `Taiwanese accent` |
| Dialect | `Northeastern` `Sichuan` `Henan` `Cantonese` |
| Role-play | `Sun Wukong` `Lin Daiyu` |
| Singing | `sing` / `singing` |
Examples:
- `(magnetic)The night is deep, and the city is still breathing.`
- `(gentle)Take a breath. You've got this.`
- `(serious)This is the final warning before the system reboots.`
- `(singing)Oh, when the saints go marching in…`
You can also insert fine-grained audio tags at any position in the text to control breathing, laughter, pauses, etc. For example:
```
(nervous, deep breath) Phew… stay calm, stay calm. (faster pace) I've rehearsed this intro fifty times, it'll be fine.
```
See the [MiMo speech synthesis documentation](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5) for the full tag list.
<Tip>
When CowAgent calls TTS, the Agent's reply text (including any `(...)` tags) is forwarded directly to MiMo for synthesis. Tell the model in its persona / system prompt to "prefix replies with a `(style)` tag to control the tone", and IM channels (WeChat / Feishu / DingTalk / WeCom) will play voice replies with the corresponding emotion, dialect, or even singing.
</Tip>

View File

@@ -1,8 +1,16 @@
--- ---
title: MiniMax title: MiniMax
description: MiniMax model configuration description: MiniMax model configuration (Text / Image Understanding / Image Generation / Text-to-Speech)
--- ---
MiniMax supports text chat, image understanding, image generation, and text-to-speech. A single `minimax_api_key` enables all capabilities.
<Tip>
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
</Tip>
## Text Chat
```json ```json
{ {
"model": "MiniMax-M2.7", "model": "MiniMax-M2.7",
@@ -12,16 +20,52 @@ description: MiniMax model configuration
| Parameter | Description | | Parameter | Description |
| --- | --- | | --- | --- |
| `model` | Options include `MiniMax-M2.7`, `MiniMax-M2.5`, `MiniMax-M2.1`, `MiniMax-M2.1-lightning`, `MiniMax-M2`, etc. | | `model` | Can be `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.1`, `MiniMax-M2.1-lightning`, `MiniMax-M2`, etc. |
| `minimax_api_key` | Create at [MiniMax Console](https://platform.minimaxi.com/user-center/basic-information/interface-key) | | `minimax_api_key` | Create one in the [MiniMax Console](https://platform.minimaxi.com/user-center/basic-information/interface-key) |
OpenAI-compatible configuration is also supported: ## Image Understanding
MiniMax's M2.x chat models do not support vision natively; vision calls are uniformly routed to `MiniMax-Text-01`. Once `minimax_api_key` is configured, the Agent's Vision tool automatically uses this model, with no need to specify it explicitly in the configuration file.
## Image Generation
```json ```json
{ {
"bot_type": "openai", "skills": {
"model": "MiniMax-M2.7", "image-generation": {
"open_ai_api_base": "https://api.minimaxi.com/v1", "model": "image-01"
"open_ai_api_key": "YOUR_API_KEY" }
}
} }
``` ```
Available models: `image-01`.
## Text-to-Speech (TTS)
```json
{
"text_to_voice": "minimax",
"text_to_voice_model": "speech-2.8-hd",
"tts_voice_id": "female-shaonv"
}
```
| Parameter | Description |
| --- | --- |
| `text_to_voice_model` | `speech-2.8-hd` (emotional rendering, natural sound), `speech-2.8-turbo` (ultra-fast), `speech-2.6-hd`, `speech-2.6-turbo` |
| `tts_voice_id` | Voice ID; supports Chinese / Cantonese / English / Japanese / Korean — 70+ voices in total |
Common voice examples:
| Voice ID | Description |
| --- | --- |
| `female-shaonv` | Chinese · Young Girl (Female) |
| `female-yujie` | Chinese · Mature Lady (Female) |
| `female-tianmei` | Chinese · Sweet Female (Female) |
| `male-qn-jingying` | Chinese · Elite Youth (Male) |
| `male-qn-badao` | Chinese · Dominant Youth (Male) |
| `Cantonese_GentleLady` | Cantonese · Gentle Female Voice |
| `English_Graceful_Lady` | English · Graceful Lady |
For the full voice list (70+ voices across Chinese / Cantonese / English / Japanese / Korean), see the [system voice list](https://platform.minimaxi.com/docs/faq/system-voice-id), or select visually in the Web Console under "Model Management → Text-to-Speech".

View File

@@ -1,11 +1,20 @@
--- ---
title: OpenAI title: OpenAI
description: OpenAI model configuration description: OpenAI model configuration (Text / Vision / Image / Speech / Embedding)
--- ---
OpenAI offers the most complete coverage and can simultaneously serve text chat, vision understanding, image generation, speech-to-text (ASR), text-to-speech (TTS), and embedding. A single `open_ai_api_key` lets the Agent use all of these capabilities.
<Tip>
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
</Tip>
## Text Chat
```json ```json
{ {
"model": "gpt-5.4", "model": "gpt-5.5",
"open_ai_api_key": "YOUR_API_KEY", "open_ai_api_key": "YOUR_API_KEY",
"open_ai_api_base": "https://api.openai.com/v1" "open_ai_api_base": "https://api.openai.com/v1"
} }
@@ -13,7 +22,82 @@ description: OpenAI model configuration
| Parameter | Description | | Parameter | Description |
| --- | --- | | --- | --- |
| `model` | Matches the [model parameter](https://platform.openai.com/docs/models) of the OpenAI API. Supports o-series, gpt-5.4, gpt-5 series, gpt-4.1, etc. Recommended for Agent mode: `gpt-5.4` | | `model` | Same as OpenAI's [model parameter](https://platform.openai.com/docs/models); supports `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`, the `gpt-5` series, `gpt-4.1`, the o-series, etc. Agent mode defaults to `gpt-5.5`; use `gpt-5.4` for better cost-efficiency |
| `open_ai_api_key` | Create at [OpenAI Platform](https://platform.openai.com/api-keys) | | `open_ai_api_key` | Create one on the [OpenAI Platform](https://platform.openai.com/api-keys) |
| `open_ai_api_base` | Optional. Change to use third-party proxy | | `open_ai_api_base` | Optional; change it to access a third-party proxy |
| `bot_type` | Not required for official OpenAI models. Set to `openai` when using Claude or other non-OpenAI models via proxy | | `bot_type` | Not required when using OpenAI's official models; set to `openai` when accessing other vendors via the compatible protocol |
## Image Understanding
OpenAI models like `gpt-5.5`, `gpt-5.4`, `gpt-4o`, and `gpt-4.1` natively support vision. Once `open_ai_api_key` is configured, the Agent's Vision tool automatically uses the main model to recognize images. If the main model does not support vision or you want to specify it explicitly, set it in the configuration file:
```json
{
"tools": {
"vision": {
"model": "gpt-5.4-mini"
}
}
}
```
Supported Vision models: `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`, `gpt-5`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4o`.
## Image Generation
Specify the image generation model in the configuration file; the Agent automatically routes image generation skill calls to OpenAI:
```json
{
"skills": {
"image-generation": {
"model": "gpt-image-2"
}
}
}
```
Supported image generation models: `gpt-image-2`, `gpt-image-1`.
## Speech-to-Text (ASR)
```json
{
"voice_to_text": "openai",
"voice_to_text_model": "gpt-4o-mini-transcribe"
}
```
| Parameter | Description |
| --- | --- |
| `voice_to_text` | Set to `openai` to enable OpenAI speech-to-text |
| `voice_to_text_model` | Optional, defaults to `gpt-4o-mini-transcribe`; can also be `gpt-4o-transcribe`, `whisper-1` |
Credentials are automatically reused from `open_ai_api_key`.
## Text-to-Speech (TTS)
```json
{
"text_to_voice": "openai",
"text_to_voice_model": "tts-1",
"tts_voice_id": "alloy"
}
```
| Parameter | Description |
| --- | --- |
| `text_to_voice_model` | `tts-1`, `tts-1-hd`, `gpt-4o-mini-tts` |
| `tts_voice_id` | Voices: `alloy`, `echo`, `fable`, `onyx`, `nova`, `shimmer`, `ash`, `ballad`, `coral`, `sage`, `verse` |
## Embedding
```json
{
"embedding_provider": "openai",
"embedding_model": "text-embedding-3-small"
}
```
Available models: `text-embedding-3-small`, `text-embedding-3-large`, `text-embedding-ada-002`. After changing the embedding, run `/memory rebuild-index` to rebuild the index.

View File

@@ -1,6 +1,6 @@
--- ---
title: Baidu Qianfan / ERNIE title: ERNIE
description: Baidu Qianfan ERNIE model configuration description: ERNIE model configuration (Baidu Qianfan)
--- ---
Option 1: Native integration (recommended): Option 1: Native integration (recommended):
@@ -40,7 +40,7 @@ To force a specific Vision model, set it explicitly in `config.json`:
```json ```json
{ {
"tool": { "tools": {
"vision": { "vision": {
"model": "ernie-4.5-turbo-vl" "model": "ernie-4.5-turbo-vl"
} }

View File

@@ -1,8 +1,16 @@
--- ---
title: Qwen (Tongyi Qianwen) title: Qwen
description: Tongyi Qianwen model configuration description: Qwen model configuration (Text / Image Understanding / Image Generation / Speech-to-Text / Text-to-Speech / Embedding)
--- ---
Qwen (Alibaba DashScope / Bailian) is one of the most fully-featured vendors. Text, image understanding, image generation, speech-to-text, text-to-speech, and embedding can all be enabled with a single `dashscope_api_key`.
<Tip>
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
</Tip>
## Text Chat
```json ```json
{ {
"model": "qwen3.6-plus", "model": "qwen3.6-plus",
@@ -12,16 +20,93 @@ description: Tongyi Qianwen model configuration
| Parameter | Description | | Parameter | Description |
| --- | --- | | --- | --- |
| `model` | Options include `qwen3.6-plus`, `qwen3.5-plus`, `qwen3-max`, `qwen-max`, `qwen-plus`, `qwen-turbo`, `qwq-plus`, etc. | | `model` | Can be `qwen3.6-plus`, `qwen3.7-max`, `qwen3.5-plus`, `qwen3-max`, `qwen-max`, `qwen-plus`, `qwen-turbo`, `qwq-plus`, etc. |
| `dashscope_api_key` | Create at [Bailian Console](https://bailian.console.aliyun.com/?tab=model#/api-key). See [official docs](https://bailian.console.aliyun.com/?tab=api#/api) | | `dashscope_api_key` | Create one in the [Bailian Console](https://bailian.console.aliyun.com/?tab=model#/api-key); see the [official docs](https://bailian.console.aliyun.com/?tab=api#/api) |
OpenAI-compatible configuration is also supported: ## Image Understanding
Once `dashscope_api_key` is configured, the Agent's Vision tool automatically calls Qwen's vision models to recognize images. Models like `qwen3-max` / `qwen3.5-plus` / `qwen3.6-plus` are already multimodal; if the main model is text-only (e.g. `qwen-turbo`), it automatically falls back to `qwen-vl-max`.
To manually specify a Vision model:
```json ```json
{ {
"bot_type": "openai", "tools": {
"model": "qwen3.6-plus", "vision": {
"open_ai_api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1", "model": "qwen3.6-plus"
"open_ai_api_key": "YOUR_API_KEY" }
}
} }
``` ```
Supported models: `qwen3.6-plus`, `qwen3.5-plus`, `qwen3-max`.
## Image Generation
```json
{
"skills": {
"image-generation": {
"model": "qwen-image-2.0"
}
}
}
```
Available models: `qwen-image-2.0`, `qwen-image-2.0-pro`.
## Speech-to-Text (ASR)
```json
{
"voice_to_text": "dashscope",
"voice_to_text_model": "qwen3-asr-flash"
}
```
| Parameter | Description |
| --- | --- |
| `voice_to_text` | Set to `dashscope` to enable Qwen ASR |
| `voice_to_text_model` | Optional, defaults to `qwen3-asr-flash` |
Credentials are automatically reused from `dashscope_api_key`. A single audio segment should be smaller than 10MB and no longer than 300 seconds.
## Text-to-Speech (TTS)
```json
{
"text_to_voice": "dashscope",
"text_to_voice_model": "qwen3-tts-flash",
"tts_voice_id": "Cherry"
}
```
| Parameter | Description |
| --- | --- |
| `text_to_voice_model` | Optional, defaults to `qwen3-tts-flash`; covers Mandarin, dialects, and major foreign languages |
| `tts_voice_id` | Voice ID; see the common list below |
Common voice examples:
| Voice ID | Description |
| --- | --- |
| `Cherry` | Qianyue · Sunny Female Voice |
| `Serena` | Suyao · Gentle Female Voice |
| `Ethan` | Chenxu · Sunny Male Voice |
| `Chelsie` | Qianxue · Anime Girl |
| `Dylan` | Beijing Dialect · Xiaodong |
| `Rocky` | Cantonese · Aqiang |
| `Sunny` | Sichuan Dialect · Qing'er |
The full voice list (Mandarin / regional dialects / bilingual, etc.) can be selected visually in the Web Console under "Model Management → Text-to-Speech".
## Embedding
```json
{
"embedding_provider": "dashscope",
"embedding_model": "text-embedding-v4"
}
```
The default model is `text-embedding-v4`. After changing the embedding, run `/memory rebuild-index` to rebuild the index.

View File

@@ -5,12 +5,15 @@ description: CowAgent version history
| Version | Date | Description | | Version | Date | Description |
| --- | --- | --- | | --- | --- | --- |
| [2.0.9](/en/releases/v2.0.9) | 2026.05.22 | Model management console, MCP protocol support, browser persistent login, new models (gpt-5.5, gemini-3.5-flash, qwen3.7-max, etc.), deployment hardening |
| [2.0.8](/en/releases/v2.0.8) | 2026.05.06 | Major Feishu channel upgrade (voice, streaming and Markdown, one-click QR-scan setup), DeepSeek V4 and Baidu models, scheduler tool enhancements |
| [2.0.7](/en/releases/v2.0.7) | 2026.04.22 | Image Generation Skill (6-provider auto-routing), new models (Kimi K2.6, Claude Opus 4.7, GLM 5.1), knowledge base and Web Console improvements | | [2.0.7](/en/releases/v2.0.7) | 2026.04.22 | Image Generation Skill (6-provider auto-routing), new models (Kimi K2.6, Claude Opus 4.7, GLM 5.1), knowledge base and Web Console improvements |
| [2.0.6](/en/releases/v2.0.6) | 2026.04.14 | Knowledge Base, Deep Dream Memory Distillation, Smart Context Compression, Web Console upgrades | | [2.0.6](/en/releases/v2.0.6) | 2026.04.14 | Project rename, Knowledge Base system, Deep Dream Memory Distillation, Smart Context Compression, Web Console multi-session and various improvements |
| [2.0.5](/en/releases/v2.0.5) | 2026.04.01 | Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more | | [2.0.5](/en/releases/v2.0.5) | 2026.04.01 | Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more |
| [2.0.4](/en/releases/v2.0.4) | 2026.03.22 | Personal WeChat channel, new model support, Japanese docs, script refactoring and bug fixes | | [2.0.4](/en/releases/v2.0.4) | 2026.03.22 | Personal WeChat channel, new model support, Japanese docs, script refactoring and bug fixes |
| [2.0.3](/en/releases/v2.0.3) | 2026.03.18 | WeCom Smart Bot and QQ channels, Coding Plan support, multiple new models, Web file processing, memory system upgrade |
| [2.0.2](/en/releases/v2.0.2) | 2026.02.27 | Web Console upgrade, multi-channel concurrency, session persistence | | [2.0.2](/en/releases/v2.0.2) | 2026.02.27 | Web Console upgrade, multi-channel concurrency, session persistence |
| [2.0.1](/en/releases/v2.0.1) | 2026.02.27 | Built-in Web Search tool, smart context management, multiple fixes | | [2.0.1](/en/releases/v2.0.1) | 2026.02.13 | Built-in Web Search tool, smart context management, multiple fixes |
| [2.0.0](/en/releases/v2.0.0) | 2026.02.03 | Full upgrade to AI super assistant | | [2.0.0](/en/releases/v2.0.0) | 2026.02.03 | Full upgrade to AI super assistant |
| 1.7.6 | 2025.05.23 | Web Channel optimization, AgentMesh plugin | | 1.7.6 | 2025.05.23 | Web Channel optimization, AgentMesh plugin |
| 1.7.5 | 2025.04.11 | DeepSeek model | | 1.7.5 | 2025.04.11 | DeepSeek model |
@@ -21,6 +24,8 @@ description: CowAgent version history
| 1.6.9 | 2024.07.19 | gpt-4o-mini, Alibaba voice recognition | | 1.6.9 | 2024.07.19 | gpt-4o-mini, Alibaba voice recognition |
| 1.6.8 | 2024.07.05 | Claude 3.5, Gemini 1.5 Pro | | 1.6.8 | 2024.07.05 | Claude 3.5, Gemini 1.5 Pro |
| 1.6.0 | 2024.04.26 | Kimi integration, gpt-4-turbo upgrade | | 1.6.0 | 2024.04.26 | Kimi integration, gpt-4-turbo upgrade |
| 1.5.8 | 2024.03.26 | GLM-4, Claude-3, edge-tts |
| 1.5.2 | 2023.11.10 | Feishu channel, image recognition chat |
| 1.5.0 | 2023.11.10 | gpt-4-turbo, dall-e-3, tts multimodal | | 1.5.0 | 2023.11.10 | gpt-4-turbo, dall-e-3, tts multimodal |
| 1.0.0 | 2022.12.12 | Project created, first ChatGPT integration | | 1.0.0 | 2022.12.12 | Project created, first ChatGPT integration |

Some files were not shown because too many files have changed in this diff Show More