mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 18:17:11 +08:00
Compare commits
35 Commits
feat-multi
...
feat-teleg
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e3eacc77d7 | ||
|
|
d106465419 | ||
|
|
f39380cea7 | ||
|
|
bccce2d7cb | ||
|
|
83cd6ad158 | ||
|
|
116fb27257 | ||
|
|
8d67177a1b | ||
|
|
ad2db1a776 | ||
|
|
2e6d9e0f27 | ||
|
|
e05f85f3ce | ||
|
|
40c48a9a61 | ||
|
|
c9a7525d0b | ||
|
|
fd571ac539 | ||
|
|
c5a3f991c5 | ||
|
|
eb74b73351 | ||
|
|
9b31f45481 | ||
|
|
bc9c1691f5 | ||
|
|
73bf83d2ff | ||
|
|
36e1988fee | ||
|
|
aad6ef635e | ||
|
|
96659cd616 | ||
|
|
c8787b7de4 | ||
|
|
91d427c8f9 | ||
|
|
c8c0573dbd | ||
|
|
29af855ecd | ||
|
|
0a146a245d | ||
|
|
bd85fee7d7 | ||
|
|
571897e2fd | ||
|
|
840dabeccd | ||
|
|
069bffa3e8 | ||
|
|
cc10d230b0 | ||
|
|
ac9d0f18c5 | ||
|
|
09fa624797 | ||
|
|
a01423a196 | ||
|
|
7c35df7a82 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -32,7 +32,6 @@ plugins/banwords/lib/__pycache__
|
||||
!plugins/role
|
||||
!plugins/keyword
|
||||
!plugins/linkai
|
||||
!plugins/agent
|
||||
!plugins/cow_cli
|
||||
client_config.json
|
||||
ref/
|
||||
|
||||
@@ -31,9 +31,13 @@ def detect_index_dim(storage) -> Optional[int]:
|
||||
if not row or not row["embedding"]:
|
||||
return None
|
||||
try:
|
||||
emb = json.loads(row["embedding"])
|
||||
raw = row["embedding"]
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
# New BLOB format: 4 bytes per float32
|
||||
return len(raw) // 4
|
||||
emb = json.loads(raw)
|
||||
return len(emb) if isinstance(emb, list) else None
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
except (json.JSONDecodeError, TypeError, Exception):
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from datetime import datetime, timedelta
|
||||
from agent.memory.config import MemoryConfig, get_default_memory_config
|
||||
from agent.memory.storage import MemoryStorage, MemoryChunk, SearchResult
|
||||
from agent.memory.chunker import TextChunker
|
||||
from agent.memory.embedding import EmbeddingProvider
|
||||
from agent.memory.embedding import EmbeddingProvider, EmbeddingCache
|
||||
from agent.memory.summarizer import MemoryFlushManager, create_memory_files_if_needed
|
||||
|
||||
|
||||
@@ -61,7 +61,11 @@ class MemoryManager:
|
||||
logger.info(
|
||||
"[MemoryManager] No embedding provider; memory will use keyword search only"
|
||||
)
|
||||
|
||||
|
||||
# Cache for query embeddings (avoids redundant API calls within a session)
|
||||
self._embedding_cache = EmbeddingCache()
|
||||
|
||||
|
||||
# Initialize memory flush manager
|
||||
workspace_dir = self.config.get_workspace()
|
||||
self.flush_manager = MemoryFlushManager(
|
||||
@@ -128,7 +132,14 @@ class MemoryManager:
|
||||
vector_results = []
|
||||
if self.embedding_provider:
|
||||
try:
|
||||
query_embedding = self.embedding_provider.embed_query(query)
|
||||
provider_name = type(self.embedding_provider).__name__
|
||||
model_name = getattr(self.embedding_provider, 'model', '')
|
||||
cached = self._embedding_cache.get(query, provider_name, model_name)
|
||||
if cached is not None:
|
||||
query_embedding = cached
|
||||
else:
|
||||
query_embedding = self.embedding_provider.embed_query(query)
|
||||
self._embedding_cache.put(query, provider_name, model_name, query_embedding)
|
||||
vector_results = self.storage.search_vector(
|
||||
query_embedding=query_embedding,
|
||||
user_id=user_id,
|
||||
|
||||
@@ -5,12 +5,42 @@ Provides vector and keyword search capabilities
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import re
|
||||
import sqlite3
|
||||
import json
|
||||
import hashlib
|
||||
import threading
|
||||
from typing import List, Dict, Optional, Any
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
try:
|
||||
import numpy as np
|
||||
_HAS_NUMPY = True
|
||||
except ImportError:
|
||||
_HAS_NUMPY = False
|
||||
np = None # type: ignore[assignment]
|
||||
|
||||
# UPSERT (INSERT … ON CONFLICT DO UPDATE) requires SQLite ≥ 3.24.0 (2018).
|
||||
# Older systems (e.g. CentOS 7 ships SQLite 3.7) fall back to INSERT OR REPLACE,
|
||||
# which risks FTS5 rowid drift on chunk updates (see save_chunk docstring).
|
||||
_HAS_UPSERT = sqlite3.sqlite_version_info >= (3, 24, 0)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CJK character ranges, compiled once at module load.
|
||||
# Covers: CJK Symbols/Punctuation, Japanese kana (hiragana + katakana),
|
||||
# CJK Unified Ideographs + Extension A, Korean syllables (Hangul),
|
||||
# CJK Compatibility Ideographs, and CJK Extension B–F.
|
||||
# ---------------------------------------------------------------------------
|
||||
_CJK_RANGES = (
|
||||
r'\u3000-\u30ff' # CJK Symbols/Punctuation + Japanese kana
|
||||
r'\u3400-\u9fff' # CJK Unified Ideographs (incl. Extension A)
|
||||
r'\uac00-\ud7af' # Korean syllables (Hangul)
|
||||
r'\uf900-\ufaff' # CJK Compatibility Ideographs
|
||||
r'\U00020000-\U0002fa1f' # CJK Extension B–F
|
||||
)
|
||||
_RE_CONTAINS_CJK = re.compile(f'[{_CJK_RANGES}]')
|
||||
_RE_CJK_WORDS = re.compile(f'[{_CJK_RANGES}]+')
|
||||
_RE_TRIGRAM_TOKENS = re.compile(f'[{_CJK_RANGES}]+|[A-Za-z0-9_]+')
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -48,6 +78,10 @@ class MemoryStorage:
|
||||
self.db_path = db_path
|
||||
self.conn: Optional[sqlite3.Connection] = None
|
||||
self.fts5_available = False # Track FTS5 availability
|
||||
# RLock protects concurrent writes from the same process.
|
||||
# SQLite WAL mode handles read/write concurrency at the file level,
|
||||
# but same-process concurrent writes still need a Python-level lock.
|
||||
self._lock = threading.RLock()
|
||||
self._init_db()
|
||||
|
||||
def _check_fts5_support(self) -> bool:
|
||||
@@ -69,6 +103,14 @@ class MemoryStorage:
|
||||
|
||||
# Check FTS5 support
|
||||
self.fts5_available = self._check_fts5_support()
|
||||
if not _HAS_UPSERT:
|
||||
from common.log import logger
|
||||
logger.warning(
|
||||
"[MemoryStorage] SQLite %s < 3.24 — UPSERT unavailable. "
|
||||
"Falling back to INSERT OR REPLACE; FTS5 rowid may drift on "
|
||||
"chunk updates (rebuild index periodically to recover).",
|
||||
sqlite3.sqlite_version,
|
||||
)
|
||||
if not self.fts5_available:
|
||||
from common.log import logger
|
||||
logger.debug("[MemoryStorage] FTS5 not available, using LIKE-based keyword search")
|
||||
@@ -175,6 +217,75 @@ class MemoryStorage:
|
||||
)
|
||||
self._rebuild_fts5_from_chunks()
|
||||
|
||||
# Internal key-value store for persistent flags (e.g. backfill tracking)
|
||||
self.conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS _meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
)
|
||||
""")
|
||||
|
||||
# Create trigram FTS5 table for CJK / mixed-language search
|
||||
self.trigram_fts5_available = False
|
||||
if self.fts5_available:
|
||||
try:
|
||||
self.conn.execute("""
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts_trigram USING fts5(
|
||||
text,
|
||||
id UNINDEXED,
|
||||
user_id UNINDEXED,
|
||||
path UNINDEXED,
|
||||
source UNINDEXED,
|
||||
scope UNINDEXED,
|
||||
content='chunks',
|
||||
content_rowid='rowid',
|
||||
tokenize='trigram case_sensitive 0'
|
||||
)
|
||||
""")
|
||||
self.conn.execute("""
|
||||
CREATE TRIGGER IF NOT EXISTS chunks_trigram_ai
|
||||
AFTER INSERT ON chunks BEGIN
|
||||
INSERT INTO chunks_fts_trigram(rowid, text, id, user_id, path, source, scope)
|
||||
VALUES (new.rowid, new.text, new.id, new.user_id, new.path, new.source, new.scope);
|
||||
END
|
||||
""")
|
||||
self.conn.execute("""
|
||||
CREATE TRIGGER IF NOT EXISTS chunks_trigram_ad
|
||||
AFTER DELETE ON chunks BEGIN
|
||||
DELETE FROM chunks_fts_trigram WHERE rowid = old.rowid;
|
||||
END
|
||||
""")
|
||||
self.conn.execute("""
|
||||
CREATE TRIGGER IF NOT EXISTS chunks_trigram_au
|
||||
AFTER UPDATE ON chunks BEGIN
|
||||
UPDATE chunks_fts_trigram
|
||||
SET text=new.text, id=new.id, user_id=new.user_id,
|
||||
path=new.path, source=new.source, scope=new.scope
|
||||
WHERE rowid = new.rowid;
|
||||
END
|
||||
""")
|
||||
# One-time backfill for existing rows.
|
||||
# NOTE: COUNT(*) on an FTS5 content table always returns 0, so we
|
||||
# use a persistent flag in _meta instead of counting trigram rows.
|
||||
backfill_done = self.conn.execute(
|
||||
"SELECT 1 FROM _meta WHERE key = 'trigram_backfill_done'"
|
||||
).fetchone()
|
||||
chunks_count = self.conn.execute(
|
||||
"SELECT COUNT(*) as c FROM chunks"
|
||||
).fetchone()['c']
|
||||
if chunks_count > 0 and not backfill_done:
|
||||
self.conn.execute(
|
||||
"INSERT INTO chunks_fts_trigram(chunks_fts_trigram) VALUES('rebuild')"
|
||||
)
|
||||
self.conn.execute(
|
||||
"INSERT OR REPLACE INTO _meta(key, value) VALUES('trigram_backfill_done', '1')"
|
||||
)
|
||||
self.trigram_fts5_available = True
|
||||
except Exception:
|
||||
from common.log import logger
|
||||
logger.warning("[MemoryStorage] trigram FTS5 unavailable, CJK search will use LIKE fallback", exc_info=True)
|
||||
self.trigram_fts5_available = False
|
||||
|
||||
# Create files metadata table
|
||||
self.conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
@@ -186,7 +297,7 @@ class MemoryStorage:
|
||||
updated_at INTEGER DEFAULT (strftime('%s', 'now'))
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
self.conn.commit()
|
||||
|
||||
def _fts5_state_inconsistent(self) -> bool:
|
||||
@@ -299,43 +410,98 @@ class MemoryStorage:
|
||||
self.conn.commit()
|
||||
|
||||
def save_chunk(self, chunk: MemoryChunk):
|
||||
"""Save a memory chunk"""
|
||||
self.conn.execute("""
|
||||
INSERT OR REPLACE INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line, text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
""", (
|
||||
chunk.id,
|
||||
chunk.user_id,
|
||||
chunk.scope,
|
||||
chunk.source,
|
||||
chunk.path,
|
||||
chunk.start_line,
|
||||
chunk.end_line,
|
||||
chunk.text,
|
||||
json.dumps(chunk.embedding) if chunk.embedding else None,
|
||||
"""Save a memory chunk (insert or update by id).
|
||||
|
||||
Uses SQLite UPSERT (INSERT … ON CONFLICT DO UPDATE) instead of
|
||||
INSERT OR REPLACE. INSERT OR REPLACE internally does DELETE+INSERT,
|
||||
which changes the row's rowid. Because both FTS5 tables use
|
||||
content_rowid='rowid', a new rowid would leave the old FTS index
|
||||
entries pointing at a non-existent rowid and trigger
|
||||
"fts5: missing row N from content table" errors.
|
||||
ON CONFLICT DO UPDATE fires the AFTER UPDATE trigger (chunks_au /
|
||||
chunks_trigram_au) and keeps the original rowid intact.
|
||||
"""
|
||||
if _HAS_UPSERT:
|
||||
_SQL = """
|
||||
INSERT INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line,
|
||||
text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
ON CONFLICT(id) DO UPDATE SET
|
||||
user_id = excluded.user_id,
|
||||
scope = excluded.scope,
|
||||
source = excluded.source,
|
||||
path = excluded.path,
|
||||
start_line = excluded.start_line,
|
||||
end_line = excluded.end_line,
|
||||
text = excluded.text,
|
||||
embedding = excluded.embedding,
|
||||
hash = excluded.hash,
|
||||
metadata = excluded.metadata,
|
||||
updated_at = strftime('%s', 'now')
|
||||
"""
|
||||
else:
|
||||
_SQL = """
|
||||
INSERT OR REPLACE INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line,
|
||||
text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
"""
|
||||
params = (
|
||||
chunk.id, chunk.user_id, chunk.scope, chunk.source, chunk.path,
|
||||
chunk.start_line, chunk.end_line, chunk.text,
|
||||
self._encode_embedding(chunk.embedding),
|
||||
chunk.hash,
|
||||
json.dumps(chunk.metadata) if chunk.metadata else None
|
||||
))
|
||||
self.conn.commit()
|
||||
|
||||
json.dumps(chunk.metadata) if chunk.metadata else None,
|
||||
)
|
||||
with self._lock:
|
||||
self.conn.execute(_SQL, params)
|
||||
self.conn.commit()
|
||||
|
||||
def save_chunks_batch(self, chunks: List[MemoryChunk]):
|
||||
"""Save multiple chunks in a batch"""
|
||||
self.conn.executemany("""
|
||||
INSERT OR REPLACE INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line, text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
""", [
|
||||
"""Save multiple chunks in a batch (insert or update by id).
|
||||
|
||||
See save_chunk for why UPSERT is used instead of INSERT OR REPLACE.
|
||||
"""
|
||||
if _HAS_UPSERT:
|
||||
_SQL = """
|
||||
INSERT INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line,
|
||||
text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
ON CONFLICT(id) DO UPDATE SET
|
||||
user_id = excluded.user_id,
|
||||
scope = excluded.scope,
|
||||
source = excluded.source,
|
||||
path = excluded.path,
|
||||
start_line = excluded.start_line,
|
||||
end_line = excluded.end_line,
|
||||
text = excluded.text,
|
||||
embedding = excluded.embedding,
|
||||
hash = excluded.hash,
|
||||
metadata = excluded.metadata,
|
||||
updated_at = strftime('%s', 'now')
|
||||
"""
|
||||
else:
|
||||
_SQL = """
|
||||
INSERT OR REPLACE INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line,
|
||||
text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
"""
|
||||
params_list = [
|
||||
(
|
||||
c.id, c.user_id, c.scope, c.source, c.path,
|
||||
c.start_line, c.end_line, c.text,
|
||||
json.dumps(c.embedding) if c.embedding else None,
|
||||
self._encode_embedding(c.embedding),
|
||||
c.hash,
|
||||
json.dumps(c.metadata) if c.metadata else None
|
||||
json.dumps(c.metadata) if c.metadata else None,
|
||||
)
|
||||
for c in chunks
|
||||
])
|
||||
self.conn.commit()
|
||||
]
|
||||
with self._lock:
|
||||
self.conn.executemany(_SQL, params_list)
|
||||
self.conn.commit()
|
||||
|
||||
def get_chunk(self, chunk_id: str) -> Optional[MemoryChunk]:
|
||||
"""Get a chunk by ID"""
|
||||
@@ -356,21 +522,21 @@ class MemoryStorage:
|
||||
limit: int = 10
|
||||
) -> List[SearchResult]:
|
||||
"""
|
||||
Vector similarity search using in-memory cosine similarity
|
||||
(sqlite-vec can be added later for better performance)
|
||||
Vector similarity search using numpy-vectorized cosine similarity.
|
||||
All embeddings are loaded then scored in a single BLAS matrix-vector
|
||||
multiply, which is ~100x faster than the pure-Python per-row loop.
|
||||
"""
|
||||
if scopes is None:
|
||||
scopes = ["shared"]
|
||||
if user_id:
|
||||
scopes.append("user")
|
||||
|
||||
# Build query
|
||||
|
||||
scope_placeholders = ','.join('?' * len(scopes))
|
||||
params = scopes
|
||||
|
||||
params = list(scopes)
|
||||
|
||||
if user_id:
|
||||
query = f"""
|
||||
SELECT * FROM chunks
|
||||
SELECT * FROM chunks
|
||||
WHERE scope IN ({scope_placeholders})
|
||||
AND (scope = 'shared' OR user_id = ?)
|
||||
AND embedding IS NOT NULL
|
||||
@@ -378,51 +544,95 @@ class MemoryStorage:
|
||||
params.append(user_id)
|
||||
else:
|
||||
query = f"""
|
||||
SELECT * FROM chunks
|
||||
SELECT * FROM chunks
|
||||
WHERE scope IN ({scope_placeholders})
|
||||
AND embedding IS NOT NULL
|
||||
"""
|
||||
|
||||
|
||||
rows = self.conn.execute(query, params).fetchall()
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
# Calculate cosine similarity. We probe the first row's dim to fail
|
||||
# loudly on a query/index dim mismatch — otherwise every doc would
|
||||
# score 0 silently, leaving the user wondering why search broke.
|
||||
results = []
|
||||
query_dim = len(query_embedding)
|
||||
if rows:
|
||||
first = json.loads(rows[0]['embedding'])
|
||||
if isinstance(first, list) and len(first) != query_dim:
|
||||
raise ValueError(
|
||||
f"Embedding dim mismatch: query is {query_dim}-dim but "
|
||||
f"index stores {len(first)}-dim vectors. The configured "
|
||||
f"embedding model differs from the one that built the "
|
||||
f"index — run /memory rebuild-index to re-embed."
|
||||
)
|
||||
|
||||
# Parse embeddings and build a (N, D) matrix in one pass.
|
||||
# New rows store BLOB bytes (np.frombuffer); legacy rows fall back to JSON.
|
||||
# Filter out rows whose embedding dimension differs from the query —
|
||||
# mixing dimensions would cause np.array() to produce an object array
|
||||
# and matrix @ q_vec to raise ValueError.
|
||||
expected_dim = len(query_embedding)
|
||||
valid_rows = []
|
||||
vectors = []
|
||||
for row in rows:
|
||||
embedding = json.loads(row['embedding'])
|
||||
similarity = self._cosine_similarity(query_embedding, embedding)
|
||||
vec = self._decode_embedding(row['embedding'])
|
||||
if not vec:
|
||||
continue
|
||||
if len(vec) != expected_dim:
|
||||
from common.log import logger
|
||||
logger.warning(
|
||||
"[MemoryStorage] Skipping chunk %s: embedding dim %d != query dim %d",
|
||||
row['id'], len(vec), expected_dim
|
||||
)
|
||||
continue
|
||||
valid_rows.append(row)
|
||||
vectors.append(vec)
|
||||
|
||||
if similarity > 0:
|
||||
results.append((similarity, row))
|
||||
|
||||
# Sort by similarity and limit
|
||||
results.sort(key=lambda x: x[0], reverse=True)
|
||||
results = results[:limit]
|
||||
|
||||
return [
|
||||
SearchResult(
|
||||
path=row['path'],
|
||||
start_line=row['start_line'],
|
||||
end_line=row['end_line'],
|
||||
score=score,
|
||||
snippet=self._truncate_text(row['text'], 500),
|
||||
source=row['source'],
|
||||
user_id=row['user_id']
|
||||
)
|
||||
for score, row in results
|
||||
]
|
||||
if not vectors:
|
||||
return []
|
||||
|
||||
if _HAS_NUMPY:
|
||||
matrix = np.array(vectors, dtype=np.float32) # (N, D)
|
||||
q_vec = np.array(query_embedding, dtype=np.float32) # (D,)
|
||||
|
||||
# Vectorized cosine similarity: dot(matrix, q) / (||matrix|| * ||q||)
|
||||
dots = matrix @ q_vec # (N,)
|
||||
row_norms = np.linalg.norm(matrix, axis=1) # (N,)
|
||||
q_norm = float(np.linalg.norm(q_vec))
|
||||
denominators = row_norms * q_norm
|
||||
np.maximum(denominators, 1e-10, out=denominators) # avoid div-by-zero
|
||||
sims = dots / denominators # (N,)
|
||||
|
||||
# Select TopK using argpartition (O(N) average), then sort only those K
|
||||
k = min(limit, len(valid_rows))
|
||||
top_idx = np.argpartition(sims, -k)[-k:]
|
||||
top_idx = top_idx[np.argsort(sims[top_idx])[::-1]]
|
||||
|
||||
return [
|
||||
SearchResult(
|
||||
path=valid_rows[i]['path'],
|
||||
start_line=valid_rows[i]['start_line'],
|
||||
end_line=valid_rows[i]['end_line'],
|
||||
score=float(sims[i]),
|
||||
snippet=self._truncate_text(valid_rows[i]['text'], 500),
|
||||
source=valid_rows[i]['source'],
|
||||
user_id=valid_rows[i]['user_id']
|
||||
)
|
||||
for i in top_idx
|
||||
if sims[i] > 0
|
||||
]
|
||||
else:
|
||||
# Pure-Python cosine similarity fallback (numpy not installed)
|
||||
import math
|
||||
q = query_embedding
|
||||
q_norm = math.sqrt(sum(x * x for x in q)) or 1e-10
|
||||
scored = []
|
||||
for i, vec in enumerate(vectors):
|
||||
dot = sum(a * b for a, b in zip(vec, q))
|
||||
v_norm = math.sqrt(sum(x * x for x in vec)) or 1e-10
|
||||
sim = dot / (v_norm * q_norm)
|
||||
if sim > 0:
|
||||
scored.append((sim, valid_rows[i]))
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
return [
|
||||
SearchResult(
|
||||
path=row['path'],
|
||||
start_line=row['start_line'],
|
||||
end_line=row['end_line'],
|
||||
score=sim,
|
||||
snippet=self._truncate_text(row['text'], 500),
|
||||
source=row['source'],
|
||||
user_id=row['user_id']
|
||||
)
|
||||
for sim, row in scored[:limit]
|
||||
]
|
||||
|
||||
def search_keyword(
|
||||
self,
|
||||
@@ -445,12 +655,37 @@ class MemoryStorage:
|
||||
if user_id:
|
||||
scopes.append("user")
|
||||
|
||||
if self.fts5_available:
|
||||
# Step 1: Standard FTS5 (unicode61) — pure ASCII queries only.
|
||||
# Skipped when query contains any CJK characters: unicode61 tokenises CJK
|
||||
# as individual characters without forming meaningful tokens, so it would
|
||||
# match only the ASCII portion of a mixed query (e.g. "Python" from
|
||||
# "Python教程") and silently discard the CJK part. Those queries go
|
||||
# directly to Step 2 (trigram), which handles both ASCII and CJK together.
|
||||
fts1_attempted = False
|
||||
if (self.fts5_available
|
||||
and not MemoryStorage._contains_cjk(query)
|
||||
and MemoryStorage._build_fts_query(query)):
|
||||
fts1_attempted = True
|
||||
fts_results = self._search_fts5(query, user_id, scopes, limit)
|
||||
if fts_results:
|
||||
return fts_results
|
||||
|
||||
return self._search_like(query, user_id, scopes, limit)
|
||||
# Step 2: Trigram FTS5 — CJK/mixed queries, plus fallback when unicode61
|
||||
# returned nothing (trigram indexes all scripts with 3-char sliding windows,
|
||||
# so it can catch terms that unicode61 tokenisation misses).
|
||||
if self.trigram_fts5_available and (
|
||||
MemoryStorage._contains_cjk(query) or fts1_attempted
|
||||
):
|
||||
trigram_results = self._search_fts5_trigram(query, user_id, scopes, limit)
|
||||
if trigram_results:
|
||||
return trigram_results
|
||||
|
||||
# Step 3: LIKE fallback — last resort (FTS5 unavailable, or CJK tokens
|
||||
# shorter than 3 characters that trigram cannot match, e.g. a single-char query).
|
||||
if not self.fts5_available or MemoryStorage._contains_cjk(query):
|
||||
return self._search_like(query, user_id, scopes, limit)
|
||||
|
||||
return []
|
||||
|
||||
def _search_fts5(
|
||||
self,
|
||||
@@ -471,7 +706,7 @@ class MemoryStorage:
|
||||
sql_query = f"""
|
||||
SELECT chunks.*, bm25(chunks_fts) as rank
|
||||
FROM chunks_fts
|
||||
JOIN chunks ON chunks.id = chunks_fts.id
|
||||
JOIN chunks ON chunks.rowid = chunks_fts.rowid
|
||||
WHERE chunks_fts MATCH ?
|
||||
AND chunks.scope IN ({scope_placeholders})
|
||||
AND (chunks.scope = 'shared' OR chunks.user_id = ?)
|
||||
@@ -483,7 +718,7 @@ class MemoryStorage:
|
||||
sql_query = f"""
|
||||
SELECT chunks.*, bm25(chunks_fts) as rank
|
||||
FROM chunks_fts
|
||||
JOIN chunks ON chunks.id = chunks_fts.id
|
||||
JOIN chunks ON chunks.rowid = chunks_fts.rowid
|
||||
WHERE chunks_fts MATCH ?
|
||||
AND chunks.scope IN ({scope_placeholders})
|
||||
ORDER BY rank
|
||||
@@ -505,13 +740,11 @@ class MemoryStorage:
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
from common.log import logger
|
||||
logger.error(
|
||||
f"[MemoryStorage] FTS5 search failed (caller will fall back to LIKE): {e}"
|
||||
)
|
||||
logger.warning("[MemoryStorage] _search_fts5 failed, returning empty", exc_info=True)
|
||||
return []
|
||||
|
||||
|
||||
def _search_like(
|
||||
self,
|
||||
query: str,
|
||||
@@ -522,12 +755,11 @@ class MemoryStorage:
|
||||
"""LIKE-based search.
|
||||
|
||||
Used as the keyword-search fallback when FTS5 is unavailable, fails,
|
||||
or returns empty. Supports both CJK runs and ASCII word tokens so it
|
||||
can serve as a true safety net for any query.
|
||||
or returns empty. Supports both CJK runs (1+ chars) and ASCII word
|
||||
tokens (3+ chars) so it can serve as a true safety net for any query.
|
||||
"""
|
||||
import re
|
||||
# CJK runs (2+ chars) + ASCII word tokens (3+ chars to avoid noise)
|
||||
cjk_words = re.findall(r'[\u4e00-\u9fff]{2,}', query)
|
||||
# CJK runs (1+ chars, wide Unicode range) + ASCII words (3+ chars to avoid noise)
|
||||
cjk_words = _RE_CJK_WORDS.findall(query)
|
||||
ascii_words = [t for t in re.findall(r'[A-Za-z0-9_]+', query) if len(t) >= 3]
|
||||
words = cjk_words + ascii_words
|
||||
if not words:
|
||||
@@ -565,44 +797,54 @@ class MemoryStorage:
|
||||
|
||||
try:
|
||||
rows = self.conn.execute(sql_query, params).fetchall()
|
||||
return [
|
||||
SearchResult(
|
||||
results = []
|
||||
for row in rows:
|
||||
# Dynamic score: reward chunks that contain more of the query words.
|
||||
# Use all tokens (CJK + ASCII) so pure-ASCII queries are not skipped.
|
||||
# matched_count is always ≥1 because the WHERE clause uses OR, but
|
||||
# guard defensively so unexpected zero-match rows are never surfaced.
|
||||
text_lower = row['text'].lower()
|
||||
matched_count = sum(1 for w in words if w.lower() in text_lower)
|
||||
if matched_count == 0:
|
||||
continue
|
||||
score = min(0.85, 0.3 + 0.15 * matched_count)
|
||||
results.append(SearchResult(
|
||||
path=row['path'],
|
||||
start_line=row['start_line'],
|
||||
end_line=row['end_line'],
|
||||
score=0.5, # Fixed score for LIKE search
|
||||
score=score,
|
||||
snippet=self._truncate_text(row['text'], 500),
|
||||
source=row['source'],
|
||||
user_id=row['user_id']
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
except Exception as e:
|
||||
))
|
||||
results.sort(key=lambda r: r.score, reverse=True)
|
||||
return results
|
||||
except Exception:
|
||||
from common.log import logger
|
||||
logger.error(f"[MemoryStorage] LIKE search failed: {e}")
|
||||
logger.warning("[MemoryStorage] _search_like failed, returning empty", exc_info=True)
|
||||
return []
|
||||
|
||||
|
||||
def delete_by_path(self, path: str):
|
||||
"""Delete all chunks from a file"""
|
||||
self.conn.execute("""
|
||||
DELETE FROM chunks WHERE path = ?
|
||||
""", (path,))
|
||||
self.conn.commit()
|
||||
|
||||
with self._lock:
|
||||
self.conn.execute("DELETE FROM chunks WHERE path = ?", (path,))
|
||||
self.conn.commit()
|
||||
|
||||
def get_file_hash(self, path: str) -> Optional[str]:
|
||||
"""Get stored file hash"""
|
||||
row = self.conn.execute("""
|
||||
SELECT hash FROM files WHERE path = ?
|
||||
""", (path,)).fetchone()
|
||||
return row['hash'] if row else None
|
||||
|
||||
|
||||
def update_file_metadata(self, path: str, source: str, file_hash: str, mtime: int, size: int):
|
||||
"""Update file metadata"""
|
||||
self.conn.execute("""
|
||||
INSERT OR REPLACE INTO files (path, source, hash, mtime, size, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
""", (path, source, file_hash, mtime, size))
|
||||
self.conn.commit()
|
||||
with self._lock:
|
||||
self.conn.execute("""
|
||||
INSERT OR REPLACE INTO files (path, source, hash, mtime, size, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
""", (path, source, file_hash, mtime, size))
|
||||
self.conn.commit()
|
||||
|
||||
def get_stats(self) -> Dict[str, int]:
|
||||
"""Get storage statistics"""
|
||||
@@ -632,7 +874,8 @@ class MemoryStorage:
|
||||
self.conn.close()
|
||||
self.conn = None # Mark as closed
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error closing database connection: {e}")
|
||||
from common.log import logger
|
||||
logger.warning("[MemoryStorage] Error closing database connection: %s", e)
|
||||
|
||||
def __del__(self):
|
||||
"""Destructor to ensure connection is closed"""
|
||||
@@ -642,7 +885,33 @@ class MemoryStorage:
|
||||
pass # Ignore errors during cleanup
|
||||
|
||||
# Helper methods
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _encode_embedding(embedding: Optional[List[float]]) -> Optional[bytes]:
|
||||
"""Encode embedding as float32 BLOB bytes (~6x smaller and faster than JSON).
|
||||
Falls back to struct.pack when numpy is unavailable."""
|
||||
if embedding is None:
|
||||
return None
|
||||
if _HAS_NUMPY:
|
||||
return np.array(embedding, dtype=np.float32).tobytes()
|
||||
import struct
|
||||
return struct.pack(f'{len(embedding)}f', *embedding)
|
||||
|
||||
@staticmethod
|
||||
def _decode_embedding(raw) -> Optional[List[float]]:
|
||||
"""Decode embedding from BLOB bytes or legacy JSON string.
|
||||
Handles both numpy and numpy-free environments."""
|
||||
if raw is None:
|
||||
return None
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
if _HAS_NUMPY:
|
||||
return np.frombuffer(raw, dtype=np.float32).tolist()
|
||||
import struct
|
||||
n = len(raw) // 4
|
||||
return list(struct.unpack(f'{n}f', raw))
|
||||
# Legacy JSON format written by older versions
|
||||
return json.loads(raw)
|
||||
|
||||
def _row_to_chunk(self, row) -> MemoryChunk:
|
||||
"""Convert database row to MemoryChunk"""
|
||||
return MemoryChunk(
|
||||
@@ -654,32 +923,89 @@ class MemoryStorage:
|
||||
start_line=row['start_line'],
|
||||
end_line=row['end_line'],
|
||||
text=row['text'],
|
||||
embedding=json.loads(row['embedding']) if row['embedding'] else None,
|
||||
embedding=self._decode_embedding(row['embedding']),
|
||||
hash=row['hash'],
|
||||
metadata=json.loads(row['metadata']) if row['metadata'] else None
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
|
||||
"""Calculate cosine similarity between two vectors"""
|
||||
if len(vec1) != len(vec2):
|
||||
return 0.0
|
||||
|
||||
dot_product = sum(a * b for a, b in zip(vec1, vec2))
|
||||
norm1 = sum(a * a for a in vec1) ** 0.5
|
||||
norm2 = sum(b * b for b in vec2) ** 0.5
|
||||
|
||||
if norm1 == 0 or norm2 == 0:
|
||||
return 0.0
|
||||
|
||||
return dot_product / (norm1 * norm2)
|
||||
def _contains_cjk(text: str) -> bool:
|
||||
"""Check if text contains CJK or related characters (Chinese, Japanese, Korean)."""
|
||||
return bool(_RE_CONTAINS_CJK.search(text))
|
||||
|
||||
@staticmethod
|
||||
def _contains_cjk(text: str) -> bool:
|
||||
"""Check if text contains CJK (Chinese/Japanese/Korean) characters"""
|
||||
import re
|
||||
return bool(re.search(r'[\u4e00-\u9fff]', text))
|
||||
|
||||
def _build_trigram_query(raw_query: str) -> Optional[str]:
|
||||
"""
|
||||
Build FTS5 MATCH query for the trigram tokenizer.
|
||||
Extracts CJK sequences (including single characters) and ASCII words,
|
||||
joining them with AND so all terms must appear in the matched chunk.
|
||||
"""
|
||||
tokens = _RE_TRIGRAM_TOKENS.findall(raw_query)
|
||||
tokens = [t for t in tokens if t]
|
||||
if not tokens:
|
||||
return None
|
||||
# Escape embedded double-quotes (FTS5 uses "" inside quoted phrases)
|
||||
quoted = [f'"{t.replace(chr(34), chr(34)*2)}"' for t in tokens]
|
||||
return ' AND '.join(quoted)
|
||||
|
||||
def _search_fts5_trigram(
|
||||
self,
|
||||
query: str,
|
||||
user_id: Optional[str],
|
||||
scopes: List[str],
|
||||
limit: int
|
||||
) -> List[SearchResult]:
|
||||
"""Trigram FTS5 search — handles CJK and mixed queries with BM25 ranking."""
|
||||
trigram_query = self._build_trigram_query(query)
|
||||
if not trigram_query:
|
||||
return []
|
||||
|
||||
scope_placeholders = ','.join('?' * len(scopes))
|
||||
params = [trigram_query] + list(scopes)
|
||||
|
||||
if user_id:
|
||||
sql = f"""
|
||||
SELECT chunks.*, bm25(chunks_fts_trigram) as rank
|
||||
FROM chunks_fts_trigram
|
||||
JOIN chunks ON chunks.rowid = chunks_fts_trigram.rowid
|
||||
WHERE chunks_fts_trigram MATCH ?
|
||||
AND chunks.scope IN ({scope_placeholders})
|
||||
AND (chunks.scope = 'shared' OR chunks.user_id = ?)
|
||||
ORDER BY rank
|
||||
LIMIT ?
|
||||
"""
|
||||
params.extend([user_id, limit])
|
||||
else:
|
||||
sql = f"""
|
||||
SELECT chunks.*, bm25(chunks_fts_trigram) as rank
|
||||
FROM chunks_fts_trigram
|
||||
JOIN chunks ON chunks.rowid = chunks_fts_trigram.rowid
|
||||
WHERE chunks_fts_trigram MATCH ?
|
||||
AND chunks.scope IN ({scope_placeholders})
|
||||
ORDER BY rank
|
||||
LIMIT ?
|
||||
"""
|
||||
params.append(limit)
|
||||
|
||||
try:
|
||||
rows = self.conn.execute(sql, params).fetchall()
|
||||
return [
|
||||
SearchResult(
|
||||
path=row['path'],
|
||||
start_line=row['start_line'],
|
||||
end_line=row['end_line'],
|
||||
score=self._bm25_rank_to_score(row['rank']),
|
||||
snippet=self._truncate_text(row['text'], 500),
|
||||
source=row['source'],
|
||||
user_id=row['user_id']
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
except Exception:
|
||||
from common.log import logger
|
||||
logger.warning("[MemoryStorage] _search_fts5_trigram failed, returning empty", exc_info=True)
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _build_fts_query(raw_query: str) -> Optional[str]:
|
||||
"""
|
||||
@@ -688,7 +1014,6 @@ class MemoryStorage:
|
||||
Works best for English and word-based languages.
|
||||
For CJK characters, LIKE search will be used as fallback.
|
||||
"""
|
||||
import re
|
||||
# Extract words (primarily English words and numbers)
|
||||
tokens = re.findall(r'[A-Za-z0-9_]+', raw_query)
|
||||
if not tokens:
|
||||
@@ -701,9 +1026,22 @@ class MemoryStorage:
|
||||
|
||||
@staticmethod
|
||||
def _bm25_rank_to_score(rank: float) -> float:
|
||||
"""Convert BM25 rank to 0-1 score"""
|
||||
normalized = max(0, rank) if rank is not None else 999
|
||||
return 1 / (1 + normalized)
|
||||
"""Convert SQLite BM25 rank to a [0, 1) relevance score.
|
||||
|
||||
SQLite's bm25() returns a non-positive float (0 or negative).
|
||||
More negative = more relevant. max(0, rank) would clip every
|
||||
negative value to 0, making every score 1/(1+0) = 1.0 and
|
||||
destroying all ranking information.
|
||||
|
||||
abs(rank) / (1 + abs(rank)) maps the absolute relevance magnitude
|
||||
to [0, 1): larger |rank| (stronger match) → score closer to 1.
|
||||
"""
|
||||
if rank is None:
|
||||
return 0.0
|
||||
# Add a floor of 0.3 so any FTS5 match always exceeds typical
|
||||
# min_score thresholds (default 0.1). Small-corpus ranks close to
|
||||
# 0 would otherwise produce score≈0 and be filtered out downstream.
|
||||
return 0.3 + 0.69 * (abs(rank) / (1.0 + abs(rank)))
|
||||
|
||||
@staticmethod
|
||||
def _truncate_text(text: str, max_chars: int) -> str:
|
||||
|
||||
@@ -3,6 +3,11 @@ from .agent_stream import AgentStreamExecutor
|
||||
from .task import Task, TaskType, TaskStatus
|
||||
from .result import AgentResult, AgentAction, AgentActionType, ToolResult
|
||||
from .models import LLMModel, LLMRequest, ModelFactory
|
||||
from .cancel import (
|
||||
AgentCancelledError,
|
||||
CancelTokenRegistry,
|
||||
get_cancel_registry,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'Agent',
|
||||
@@ -16,5 +21,8 @@ __all__ = [
|
||||
'ToolResult',
|
||||
'LLMModel',
|
||||
'LLMRequest',
|
||||
'ModelFactory'
|
||||
]
|
||||
'ModelFactory',
|
||||
'AgentCancelledError',
|
||||
'CancelTokenRegistry',
|
||||
'get_cancel_registry',
|
||||
]
|
||||
|
||||
@@ -365,7 +365,8 @@ class Agent:
|
||||
|
||||
return action
|
||||
|
||||
def run_stream(self, user_message: str, on_event=None, clear_history: bool = False, skill_filter=None) -> str:
|
||||
def run_stream(self, user_message: str, on_event=None, clear_history: bool = False,
|
||||
skill_filter=None, cancel_event=None) -> str:
|
||||
"""
|
||||
Execute single agent task with streaming (based on tool-call)
|
||||
|
||||
@@ -374,6 +375,7 @@ class Agent:
|
||||
- Multi-turn reasoning based on tool-call
|
||||
- Event callbacks
|
||||
- Persistent conversation history across calls
|
||||
- User-initiated cancellation via ``cancel_event``
|
||||
|
||||
Args:
|
||||
user_message: User message
|
||||
@@ -381,6 +383,11 @@ class Agent:
|
||||
event = {"type": str, "timestamp": float, "data": dict}
|
||||
clear_history: If True, clear conversation history before this call (default: False)
|
||||
skill_filter: Optional list of skill names to include in this run
|
||||
cancel_event: Optional threading.Event polled at agent checkpoints.
|
||||
When set, the loop exits at the next safe point, injects a
|
||||
"[Interrupted by user]" assistant note, and returns the
|
||||
partial response. ``messages`` stays in a valid state
|
||||
(tool_use/tool_result pairs preserved).
|
||||
|
||||
Returns:
|
||||
Final response text
|
||||
@@ -424,7 +431,8 @@ class Agent:
|
||||
max_turns=self.max_steps,
|
||||
on_event=on_event,
|
||||
messages=messages_copy, # Pass copied message history
|
||||
max_context_turns=max_context_turns
|
||||
max_context_turns=max_context_turns,
|
||||
cancel_event=cancel_event,
|
||||
)
|
||||
|
||||
# Execute
|
||||
|
||||
@@ -7,11 +7,19 @@ import json
|
||||
import time
|
||||
from typing import List, Dict, Any, Optional, Callable, Tuple
|
||||
|
||||
from agent.protocol.cancel import AgentCancelledError
|
||||
from agent.protocol.models import LLMRequest, LLMModel
|
||||
from agent.protocol.message_utils import sanitize_claude_messages, compress_turn_to_text_only
|
||||
from agent.tools.base_tool import BaseTool, ToolResult
|
||||
from common.log import logger
|
||||
|
||||
# Optional: repair malformed JSON args from non-strict providers (e.g. unescaped quotes in long content).
|
||||
try:
|
||||
from json_repair import repair_json as _repair_json
|
||||
_HAS_JSON_REPAIR = True
|
||||
except ImportError:
|
||||
_HAS_JSON_REPAIR = False
|
||||
|
||||
|
||||
# Maximum number of characters of model "reasoning / thinking" content to persist
|
||||
# in conversation history. The full reasoning is still streamed to the UI in real
|
||||
@@ -44,6 +52,30 @@ def _truncate_reasoning_for_storage(text: str) -> str:
|
||||
return head + _REASONING_TRUNCATE_MARKER.format(omitted=omitted) + tail
|
||||
|
||||
|
||||
def _parse_tool_args(args_str: str, finish_reason: Optional[str]) -> Tuple[dict, Optional[str]]:
|
||||
"""Parse tool args JSON. Returns (args, error_msg); error_msg is None on success.
|
||||
|
||||
On JSONDecodeError: detect truncation first (skip repair, surface max_tokens hint);
|
||||
otherwise try json-repair for escape issues; finally fall back to the raw decoder error.
|
||||
"""
|
||||
if not args_str:
|
||||
return {}, None
|
||||
try:
|
||||
return json.loads(args_str), None
|
||||
except json.JSONDecodeError as e:
|
||||
if finish_reason in ("length", "max_tokens") or not args_str.rstrip().endswith("}"):
|
||||
return {}, "Output truncated (max_tokens reached). Split content into smaller chunks across multiple tool calls."
|
||||
if _HAS_JSON_REPAIR:
|
||||
try:
|
||||
repaired = _repair_json(args_str, return_objects=True)
|
||||
if isinstance(repaired, dict):
|
||||
logger.warning(f"Tool args JSON repaired ({len(args_str)} chars)")
|
||||
return repaired, None
|
||||
except Exception:
|
||||
pass
|
||||
return {}, f"Invalid JSON in tool arguments: {e.msg}"
|
||||
|
||||
|
||||
class AgentStreamExecutor:
|
||||
"""
|
||||
Agent Stream Executor
|
||||
@@ -64,7 +96,8 @@ class AgentStreamExecutor:
|
||||
max_turns: int = 50,
|
||||
on_event: Optional[Callable] = None,
|
||||
messages: Optional[List[Dict]] = None,
|
||||
max_context_turns: int = 30
|
||||
max_context_turns: int = 30,
|
||||
cancel_event=None,
|
||||
):
|
||||
"""
|
||||
Initialize stream executor
|
||||
@@ -78,6 +111,10 @@ class AgentStreamExecutor:
|
||||
on_event: Event callback function
|
||||
messages: Optional existing message history (for persistent conversations)
|
||||
max_context_turns: Maximum number of conversation turns to keep in context
|
||||
cancel_event: Optional threading.Event used to signal user cancel.
|
||||
Checked at every safe point (turn boundary, before tool execution,
|
||||
during LLM streaming). When set, raises AgentCancelledError which
|
||||
run_stream catches to gracefully wind down.
|
||||
"""
|
||||
self.agent = agent
|
||||
self.model = model
|
||||
@@ -87,6 +124,7 @@ class AgentStreamExecutor:
|
||||
self.max_turns = max_turns
|
||||
self.on_event = on_event
|
||||
self.max_context_turns = max_context_turns
|
||||
self.cancel_event = cancel_event
|
||||
|
||||
# Message history - use provided messages or create new list
|
||||
self.messages = messages if messages is not None else []
|
||||
@@ -97,6 +135,73 @@ class AgentStreamExecutor:
|
||||
# Track files to send (populated by read tool)
|
||||
self.files_to_send = [] # List of file metadata dicts
|
||||
|
||||
def _check_cancelled(self) -> None:
|
||||
"""Raise AgentCancelledError if the user requested cancellation.
|
||||
|
||||
Called at safe points (turn start, between tool calls, between LLM
|
||||
chunks). Cheap to call: just an Event.is_set() probe.
|
||||
"""
|
||||
if self.cancel_event is not None and self.cancel_event.is_set():
|
||||
raise AgentCancelledError("agent cancelled by user")
|
||||
|
||||
def _handle_cancelled(self, partial_response: str) -> None:
|
||||
"""Wind down ``self.messages`` after a user-initiated cancel.
|
||||
|
||||
The messages list may be in any of these states when we get here:
|
||||
(a) Last message is an assistant message containing tool_use
|
||||
blocks but the matching tool_result has not been appended yet.
|
||||
(b) Last message is an assistant text-only reply (cancel happened
|
||||
right before the next turn started).
|
||||
(c) Last message is a user tool_result message and we cancelled
|
||||
between turns.
|
||||
|
||||
For (a) we MUST synthesise tool_result blocks, otherwise the next
|
||||
request will fail Claude/OpenAI's strict pairing validation. For
|
||||
(b)/(c) the state is already valid and we just append a small
|
||||
cancellation note so the user/LLM both see the boundary clearly.
|
||||
"""
|
||||
try:
|
||||
# Step 1: close any orphaned tool_use in the trailing assistant
|
||||
# message by injecting matching tool_result blocks.
|
||||
if self.messages and isinstance(self.messages[-1], dict) \
|
||||
and self.messages[-1].get("role") == "assistant":
|
||||
last = self.messages[-1]
|
||||
content = last.get("content")
|
||||
if isinstance(content, list):
|
||||
pending_tool_use_ids = [
|
||||
block.get("id")
|
||||
for block in content
|
||||
if isinstance(block, dict) and block.get("type") == "tool_use"
|
||||
]
|
||||
pending_tool_use_ids = [tid for tid in pending_tool_use_ids if tid]
|
||||
if pending_tool_use_ids:
|
||||
tool_result_blocks = [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": tid,
|
||||
"content": "Cancelled by user before this tool finished.",
|
||||
"is_error": True,
|
||||
}
|
||||
for tid in pending_tool_use_ids
|
||||
]
|
||||
self.messages.append({
|
||||
"role": "user",
|
||||
"content": tool_result_blocks,
|
||||
})
|
||||
logger.info(
|
||||
f"[Agent] Injected {len(tool_result_blocks)} cancellation "
|
||||
f"tool_result blocks to keep message history valid"
|
||||
)
|
||||
|
||||
# Step 2: append a stable "interrupted" marker so the LLM sees a
|
||||
# clear stop boundary on the next turn.
|
||||
self.messages.append({
|
||||
"role": "assistant",
|
||||
"content": [{"type": "text", "text": "_(Cancelled by user)_"}],
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"[Agent] _handle_cancelled cleanup failed: {e}")
|
||||
|
||||
def _emit_event(self, event_type: str, data: dict = None):
|
||||
"""Emit event"""
|
||||
if self.on_event:
|
||||
@@ -270,8 +375,13 @@ class AgentStreamExecutor:
|
||||
final_response = ""
|
||||
turn = 0
|
||||
|
||||
cancelled = False
|
||||
try:
|
||||
while turn < self.max_turns:
|
||||
# Check at the very top of every turn so a cancel arriving
|
||||
# between turns short-circuits cleanly.
|
||||
self._check_cancelled()
|
||||
|
||||
turn += 1
|
||||
logger.info(f"[Agent] 第 {turn} 轮")
|
||||
self._emit_event("turn_start", {"turn": turn})
|
||||
@@ -375,6 +485,8 @@ class AgentStreamExecutor:
|
||||
|
||||
try:
|
||||
for tool_call in tool_calls:
|
||||
# Honour cancel between tool invocations within the same turn
|
||||
self._check_cancelled()
|
||||
result = self._execute_tool(tool_call)
|
||||
tool_results.append(result)
|
||||
|
||||
@@ -557,6 +669,15 @@ class AgentStreamExecutor:
|
||||
self.messages.pop(prompt_insert_idx)
|
||||
logger.debug("[Agent] Removed injected max-steps prompt from message history")
|
||||
|
||||
except AgentCancelledError:
|
||||
# User-initiated stop: wind down message history cleanly so the
|
||||
# next turn is unaffected; channels emit a "cancelled" UI event.
|
||||
cancelled = True
|
||||
logger.info(f"[Agent] 🛑 已被用户中止 (第 {turn} 轮)")
|
||||
self._handle_cancelled(final_response)
|
||||
if not final_response or not final_response.strip():
|
||||
final_response = "_(Cancelled)_"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Agent执行错误: {e}")
|
||||
self._emit_event("error", {"error": str(e)})
|
||||
@@ -564,8 +685,11 @@ class AgentStreamExecutor:
|
||||
|
||||
finally:
|
||||
final_response = final_response.strip() if final_response else final_response
|
||||
logger.info(f"[Agent] 🏁 完成 ({turn}轮)")
|
||||
self._emit_event("agent_end", {"final_response": final_response})
|
||||
if cancelled:
|
||||
# Emit before agent_end so channels can mark UI as cancelled
|
||||
self._emit_event("agent_cancelled", {"final_response": final_response})
|
||||
logger.info(f"[Agent] 🏁 完成 ({turn}轮)" + (" [cancelled]" if cancelled else ""))
|
||||
self._emit_event("agent_end", {"final_response": final_response, "cancelled": cancelled})
|
||||
|
||||
return final_response
|
||||
|
||||
@@ -644,7 +768,32 @@ class AgentStreamExecutor:
|
||||
try:
|
||||
stream = self.model.call_stream(request)
|
||||
|
||||
# Probe cancel every N chunks to bound reaction time without
|
||||
# checking on every token.
|
||||
_cancel_probe_counter = 0
|
||||
_CANCEL_PROBE_EVERY = 8
|
||||
|
||||
for chunk in stream:
|
||||
_cancel_probe_counter += 1
|
||||
if _cancel_probe_counter >= _CANCEL_PROBE_EVERY:
|
||||
_cancel_probe_counter = 0
|
||||
if self.cancel_event is not None and self.cancel_event.is_set():
|
||||
# Persist partial text only; tool_use args may be
|
||||
# truncated mid-stream and would fail validation.
|
||||
logger.info("[Agent] cancel detected mid-stream, aborting LLM call")
|
||||
if full_content:
|
||||
partial_msg = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "text", "text": full_content}],
|
||||
}
|
||||
self.messages.append(partial_msg)
|
||||
self._emit_event("message_end", {
|
||||
"content": full_content,
|
||||
"tool_calls": [],
|
||||
"cancelled": True,
|
||||
})
|
||||
raise AgentCancelledError("cancelled during LLM streaming")
|
||||
|
||||
# Check for errors
|
||||
if isinstance(chunk, dict) and chunk.get("error"):
|
||||
# Extract error message from nested structure
|
||||
@@ -738,6 +887,10 @@ class AgentStreamExecutor:
|
||||
elif isinstance(choice, dict) and choice.get("_gemini_raw_parts"):
|
||||
gemini_raw_parts = choice["_gemini_raw_parts"]
|
||||
|
||||
except AgentCancelledError:
|
||||
# Must propagate untouched; never treat as a retryable error.
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
error_str_lower = error_str.lower()
|
||||
@@ -851,26 +1004,17 @@ class AgentStreamExecutor:
|
||||
import uuid
|
||||
tool_id = f"call_{uuid.uuid4().hex[:24]}"
|
||||
|
||||
try:
|
||||
# Safely get arguments, handle None case
|
||||
args_str = tc.get("arguments") or ""
|
||||
arguments = json.loads(args_str) if args_str else {}
|
||||
except json.JSONDecodeError as e:
|
||||
# Handle None or invalid arguments safely
|
||||
args_str = tc.get('arguments') or ""
|
||||
args_preview = args_str[:200] if len(args_str) > 200 else args_str
|
||||
logger.error(f"Failed to parse tool arguments for {tc['name']}")
|
||||
logger.error(f"Arguments length: {len(args_str)} chars")
|
||||
logger.error(f"Arguments preview: {args_preview}...")
|
||||
logger.error(f"JSON decode error: {e}")
|
||||
|
||||
# Return a clear error message to the LLM instead of empty dict
|
||||
# This helps the LLM understand what went wrong
|
||||
args_str = tc.get("arguments") or ""
|
||||
arguments, parse_err = _parse_tool_args(args_str, stop_reason)
|
||||
if parse_err:
|
||||
logger.error(
|
||||
f"Tool args parse failed for {tc['name']} ({len(args_str)} chars): {parse_err}"
|
||||
)
|
||||
tool_calls.append({
|
||||
"id": tool_id,
|
||||
"name": tc["name"],
|
||||
"arguments": {},
|
||||
"_parse_error": f"Invalid JSON in tool arguments: {args_preview}... Error: {str(e)}. Tip: For large content, consider splitting into smaller chunks or using a different approach."
|
||||
"_parse_error": parse_err,
|
||||
})
|
||||
continue
|
||||
|
||||
@@ -958,14 +1102,11 @@ class AgentStreamExecutor:
|
||||
tool_id = tool_call["id"]
|
||||
arguments = tool_call["arguments"]
|
||||
|
||||
# Check if there was a JSON parse error
|
||||
if "_parse_error" in tool_call:
|
||||
parse_error = tool_call["_parse_error"]
|
||||
logger.error(f"Skipping tool execution due to parse error: {parse_error}")
|
||||
result = {
|
||||
"status": "error",
|
||||
"result": f"Failed to parse tool arguments. {parse_error}. Please ensure your tool call uses valid JSON format with all required parameters.",
|
||||
"execution_time": 0
|
||||
"result": tool_call["_parse_error"],
|
||||
"execution_time": 0,
|
||||
}
|
||||
self._record_tool_result(tool_name, arguments, False)
|
||||
return result
|
||||
|
||||
121
agent/protocol/cancel.py
Normal file
121
agent/protocol/cancel.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
Cancel token registry for aborting in-flight agent runs.
|
||||
|
||||
A user cancel (web Cancel button, /cancel command) sets a threading.Event
|
||||
that the agent loop polls at safe checkpoints. Tokens are keyed by
|
||||
request_id (preferred) and tracked under session_id as a fallback. Entries
|
||||
are released after the run completes to keep the registry bounded.
|
||||
|
||||
No project deps — importable from any layer without circular imports.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
class AgentCancelledError(Exception):
|
||||
"""Raised inside the agent loop when a stop has been requested.
|
||||
|
||||
The agent stream executor catches this, injects a "[Interrupted]" note
|
||||
into the message history (preserving tool_use/tool_result integrity)
|
||||
and returns a partial response to the caller.
|
||||
"""
|
||||
|
||||
|
||||
class _CancelEntry:
|
||||
__slots__ = ("event", "session_id")
|
||||
|
||||
def __init__(self, session_id: Optional[str]):
|
||||
self.event = threading.Event()
|
||||
self.session_id = session_id
|
||||
|
||||
|
||||
class CancelTokenRegistry:
|
||||
"""In-process registry mapping request_id -> cancel Event.
|
||||
|
||||
Thread-safe. Singleton via module-level ``_registry``.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._lock = threading.Lock()
|
||||
self._by_request: Dict[str, _CancelEntry] = {}
|
||||
# session_id -> set of request_ids currently in flight (usually 1).
|
||||
self._by_session: Dict[str, set] = {}
|
||||
|
||||
def register(self, request_id: str, session_id: Optional[str] = None) -> threading.Event:
|
||||
"""Create (or return existing) cancel event for a request.
|
||||
|
||||
Returns the threading.Event the caller should poll via ``is_set()``.
|
||||
"""
|
||||
if not request_id:
|
||||
return threading.Event()
|
||||
with self._lock:
|
||||
entry = self._by_request.get(request_id)
|
||||
if entry is None:
|
||||
entry = _CancelEntry(session_id)
|
||||
self._by_request[request_id] = entry
|
||||
if session_id:
|
||||
self._by_session.setdefault(session_id, set()).add(request_id)
|
||||
return entry.event
|
||||
|
||||
def get_event(self, request_id: str) -> Optional[threading.Event]:
|
||||
if not request_id:
|
||||
return None
|
||||
with self._lock:
|
||||
entry = self._by_request.get(request_id)
|
||||
return entry.event if entry else None
|
||||
|
||||
def cancel_request(self, request_id: str) -> bool:
|
||||
"""Trigger cancel for a specific request. Returns True when matched."""
|
||||
if not request_id:
|
||||
return False
|
||||
with self._lock:
|
||||
entry = self._by_request.get(request_id)
|
||||
if entry is None:
|
||||
return False
|
||||
entry.event.set()
|
||||
return True
|
||||
|
||||
def cancel_session(self, session_id: str) -> int:
|
||||
"""Trigger cancel for every in-flight request of a session.
|
||||
|
||||
Returns the number of requests cancelled (0 when nothing was running).
|
||||
"""
|
||||
if not session_id:
|
||||
return 0
|
||||
with self._lock:
|
||||
request_ids = list(self._by_session.get(session_id, ()))
|
||||
entries = [self._by_request[r] for r in request_ids if r in self._by_request]
|
||||
for entry in entries:
|
||||
entry.event.set()
|
||||
return len(entries)
|
||||
|
||||
def unregister(self, request_id: str) -> None:
|
||||
"""Remove an entry once the agent run is done. Safe to call twice."""
|
||||
if not request_id:
|
||||
return
|
||||
with self._lock:
|
||||
entry = self._by_request.pop(request_id, None)
|
||||
if entry and entry.session_id:
|
||||
bucket = self._by_session.get(entry.session_id)
|
||||
if bucket is not None:
|
||||
bucket.discard(request_id)
|
||||
if not bucket:
|
||||
self._by_session.pop(entry.session_id, None)
|
||||
|
||||
def has_active(self, session_id: str) -> bool:
|
||||
if not session_id:
|
||||
return False
|
||||
with self._lock:
|
||||
bucket = self._by_session.get(session_id)
|
||||
return bool(bucket)
|
||||
|
||||
|
||||
_registry = CancelTokenRegistry()
|
||||
|
||||
|
||||
def get_cancel_registry() -> CancelTokenRegistry:
|
||||
"""Module-level accessor for the singleton registry."""
|
||||
return _registry
|
||||
@@ -145,7 +145,8 @@ class BrowserTool(BaseTool):
|
||||
url = args.get("url", "").strip()
|
||||
if not url:
|
||||
return ToolResult.fail("Error: 'url' is required for navigate action")
|
||||
if not url.startswith(("http://", "https://")):
|
||||
# Only auto-prepend https:// for bare hosts; preserve file://, about:, data:, etc.
|
||||
if "://" not in url and not url.startswith(("about:", "data:")):
|
||||
url = "https://" + url
|
||||
timeout = args.get("timeout", 30000)
|
||||
service = self._get_service()
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""
|
||||
MCP (Model Context Protocol) client module.
|
||||
|
||||
Implements JSON-RPC 2.0 over stdio and SSE transports without any external
|
||||
MCP SDK dependency.
|
||||
Implements JSON-RPC 2.0 over stdio, SSE and Streamable HTTP transports
|
||||
without any external MCP SDK dependency.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -17,18 +17,29 @@ from typing import Optional
|
||||
from common.log import logger
|
||||
|
||||
|
||||
# Aliases accepted for the Streamable HTTP transport type
|
||||
_STREAMABLE_HTTP_ALIASES = {"streamable-http", "streamable_http", "streamablehttp", "http"}
|
||||
|
||||
|
||||
class McpClient:
|
||||
"""Single MCP Server client supporting stdio and SSE transports."""
|
||||
"""Single MCP Server client supporting stdio, SSE and Streamable HTTP transports."""
|
||||
|
||||
def __init__(self, config: dict):
|
||||
"""
|
||||
config examples:
|
||||
stdio: {"name": "filesystem", "type": "stdio", "command": "npx", "args": [...]}
|
||||
SSE: {"name": "my-api", "type": "sse", "url": "http://localhost:8000/sse"}
|
||||
stdio: {"name": "filesystem", "type": "stdio", "command": "npx", "args": [...]}
|
||||
SSE: {"name": "my-api", "type": "sse", "url": "http://localhost:8000/sse"}
|
||||
streamable-http: {"name": "pubmed", "type": "streamable-http", "url": "https://x/mcp"}
|
||||
"""
|
||||
self.config = config
|
||||
self.name: str = config.get("name", "unknown")
|
||||
self.transport: str = config.get("type", "stdio")
|
||||
raw_transport: str = config.get("type", "stdio")
|
||||
# Normalize streamable-http aliases to a single internal key
|
||||
self.transport: str = (
|
||||
"streamable-http"
|
||||
if raw_transport.lower() in _STREAMABLE_HTTP_ALIASES
|
||||
else raw_transport
|
||||
)
|
||||
|
||||
# stdio state
|
||||
self._proc: Optional[subprocess.Popen] = None
|
||||
@@ -37,6 +48,11 @@ class McpClient:
|
||||
self._sse_url: Optional[str] = None
|
||||
self._post_url: Optional[str] = None # endpoint for sending messages (resolved from SSE)
|
||||
|
||||
# Streamable HTTP state
|
||||
self._http_url: Optional[str] = None
|
||||
self._http_headers: dict = {} # extra headers from user config (e.g. Authorization)
|
||||
self._http_session_id: Optional[str] = None # Mcp-Session-Id assigned by the server
|
||||
|
||||
# Shared state
|
||||
self._next_id = 1
|
||||
self._id_lock = threading.Lock()
|
||||
@@ -54,6 +70,8 @@ class McpClient:
|
||||
return self._init_stdio()
|
||||
elif self.transport == "sse":
|
||||
return self._init_sse()
|
||||
elif self.transport == "streamable-http":
|
||||
return self._init_streamable_http()
|
||||
else:
|
||||
logger.warning(f"[MCP:{self.name}] Unknown transport type: {self.transport!r}")
|
||||
return False
|
||||
@@ -109,6 +127,21 @@ class McpClient:
|
||||
pass
|
||||
self._proc = None
|
||||
logger.debug(f"[MCP:{self.name}] stdio process terminated")
|
||||
|
||||
# Best-effort streamable-http session termination
|
||||
if self.transport == "streamable-http" and self._http_session_id and self._http_url:
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
self._http_url,
|
||||
method="DELETE",
|
||||
headers={"Mcp-Session-Id": self._http_session_id, **self._http_headers},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=5):
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
self._http_session_id = None
|
||||
|
||||
self._initialized = False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -234,6 +267,120 @@ class McpClient:
|
||||
raw = resp.read().decode("utf-8")
|
||||
return json.loads(raw)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Streamable HTTP transport (MCP spec 2025-03-26)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _init_streamable_http(self) -> bool:
|
||||
url = self.config.get("url")
|
||||
if not url:
|
||||
logger.warning(f"[MCP:{self.name}] streamable-http config missing 'url'")
|
||||
return False
|
||||
|
||||
self._http_url = url
|
||||
# Allow user-provided headers (e.g. {"Authorization": "Bearer xxx"})
|
||||
extra_headers = self.config.get("headers") or {}
|
||||
if isinstance(extra_headers, dict):
|
||||
self._http_headers = {str(k): str(v) for k, v in extra_headers.items()}
|
||||
|
||||
return self._handshake()
|
||||
|
||||
def _streamable_http_send(self, message: dict) -> dict:
|
||||
"""POST a JSON-RPC request and return the response (JSON or SSE-wrapped)."""
|
||||
return self._streamable_http_post(message, expect_response=True)
|
||||
|
||||
def _streamable_http_post(self, message: dict, expect_response: bool) -> dict:
|
||||
"""
|
||||
POST a JSON-RPC message over Streamable HTTP.
|
||||
|
||||
Per the spec, the response Content-Type can be either:
|
||||
- application/json -> single JSON-RPC response in body
|
||||
- text/event-stream -> SSE stream; we read until we get a matching response
|
||||
"""
|
||||
body = json.dumps(message).encode("utf-8")
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json, text/event-stream",
|
||||
}
|
||||
if self._http_session_id:
|
||||
headers["Mcp-Session-Id"] = self._http_session_id
|
||||
headers.update(self._http_headers)
|
||||
|
||||
req = urllib.request.Request(
|
||||
self._http_url,
|
||||
data=body,
|
||||
method="POST",
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=30)
|
||||
except urllib.error.HTTPError as e:
|
||||
# Surface the server-provided error body for easier debugging
|
||||
detail = ""
|
||||
try:
|
||||
detail = e.read().decode("utf-8", errors="ignore")
|
||||
except Exception:
|
||||
pass
|
||||
raise IOError(
|
||||
f"[MCP:{self.name}] streamable-http HTTP {e.code}: {detail[:200]}"
|
||||
)
|
||||
|
||||
with resp:
|
||||
# Capture session id assigned by the server (if any)
|
||||
session_id = resp.headers.get("Mcp-Session-Id")
|
||||
if session_id and not self._http_session_id:
|
||||
self._http_session_id = session_id
|
||||
|
||||
status = resp.status if hasattr(resp, "status") else resp.getcode()
|
||||
|
||||
# Notifications: server may reply with 202 Accepted and no body
|
||||
if not expect_response or status == 202:
|
||||
try:
|
||||
resp.read()
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
content_type = (resp.headers.get("Content-Type") or "").lower()
|
||||
expected_id = message.get("id")
|
||||
|
||||
if "text/event-stream" in content_type:
|
||||
return self._read_sse_response(resp, expected_id)
|
||||
|
||||
raw = resp.read().decode("utf-8")
|
||||
if not raw:
|
||||
return {}
|
||||
return json.loads(raw)
|
||||
|
||||
def _read_sse_response(self, resp, expected_id) -> dict:
|
||||
"""Read an SSE stream and return the first JSON-RPC response with matching id."""
|
||||
data_buf: list = []
|
||||
for raw_line in resp:
|
||||
line = raw_line.decode("utf-8").rstrip("\n\r")
|
||||
if line == "":
|
||||
# End of an SSE event, attempt to parse accumulated data
|
||||
if data_buf:
|
||||
payload = "\n".join(data_buf)
|
||||
data_buf = []
|
||||
try:
|
||||
msg = json.loads(payload)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
# Skip notifications / mismatched ids
|
||||
if "id" not in msg:
|
||||
continue
|
||||
if expected_id is None or msg.get("id") == expected_id:
|
||||
return msg
|
||||
continue
|
||||
if line.startswith(":"):
|
||||
continue # SSE comment / keepalive
|
||||
if line.startswith("data:"):
|
||||
data_buf.append(line[len("data:"):].lstrip())
|
||||
# Ignore 'event:' / 'id:' lines; we only care about JSON-RPC payloads
|
||||
|
||||
raise IOError(f"[MCP:{self.name}] streamable-http SSE stream closed before response")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Common JSON-RPC helpers
|
||||
# ------------------------------------------------------------------
|
||||
@@ -267,6 +414,8 @@ class McpClient:
|
||||
return self._stdio_send(message)
|
||||
elif self.transport == "sse":
|
||||
return self._sse_send(message)
|
||||
elif self.transport == "streamable-http":
|
||||
return self._streamable_http_send(message)
|
||||
else:
|
||||
raise ValueError(f"[MCP:{self.name}] Unsupported transport: {self.transport}")
|
||||
|
||||
@@ -291,6 +440,11 @@ class McpClient:
|
||||
pass
|
||||
except Exception:
|
||||
pass # notifications are fire-and-forget
|
||||
elif self.transport == "streamable-http":
|
||||
try:
|
||||
self._streamable_http_post(notification, expect_response=False)
|
||||
except Exception:
|
||||
pass # notifications are fire-and-forget
|
||||
|
||||
def _handshake(self) -> bool:
|
||||
"""Perform the MCP initialize / notifications/initialized handshake."""
|
||||
|
||||
@@ -57,34 +57,44 @@ def init_scheduler(agent_bridge) -> bool:
|
||||
_task_store = TaskStore(store_path)
|
||||
logger.debug(f"[Scheduler] Task store initialized: {store_path}")
|
||||
|
||||
# Create execute callback
|
||||
# Create execute callback. Returns True on success, False to ask
|
||||
# the scheduler to retry on the next tick (e.g. channel not yet
|
||||
# ready right after process start).
|
||||
def execute_task_callback(task: dict):
|
||||
"""Callback to execute a scheduled task"""
|
||||
try:
|
||||
action = task.get("action", {})
|
||||
action_type = action.get("type")
|
||||
channel_type = action.get("channel_type", "unknown")
|
||||
receiver = action.get("receiver", "")
|
||||
|
||||
if not _is_channel_ready(channel_type, receiver):
|
||||
logger.warning(
|
||||
f"[Scheduler] Task {task.get('id')}: channel "
|
||||
f"'{channel_type}' not ready for receiver={receiver} "
|
||||
f"(no inbound msg cached since restart?); deferring"
|
||||
)
|
||||
return False
|
||||
|
||||
if action_type == "agent_task":
|
||||
_execute_agent_task(task, agent_bridge)
|
||||
return _execute_agent_task(task, agent_bridge)
|
||||
elif action_type == "send_message":
|
||||
# Legacy support for old tasks
|
||||
_execute_send_message(task, agent_bridge)
|
||||
return _execute_send_message(task, agent_bridge)
|
||||
elif action_type == "tool_call":
|
||||
# Legacy support for old tasks
|
||||
_execute_tool_call(task, agent_bridge)
|
||||
return _execute_tool_call(task, agent_bridge)
|
||||
elif action_type == "skill_call":
|
||||
# Legacy support for old tasks
|
||||
_execute_skill_call(task, agent_bridge)
|
||||
return _execute_skill_call(task, agent_bridge)
|
||||
else:
|
||||
logger.warning(f"[Scheduler] Unknown action type: {action_type}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error executing task {task.get('id')}: {e}")
|
||||
return False
|
||||
|
||||
# Create scheduler service
|
||||
_scheduler_service = SchedulerService(_task_store, execute_task_callback)
|
||||
_scheduler_service.start()
|
||||
|
||||
logger.debug("[Scheduler] Scheduler service initialized and started")
|
||||
logger.info("[Scheduler] Service initialized and started")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
@@ -92,6 +102,40 @@ def init_scheduler(agent_bridge) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _is_channel_ready(channel_type: str, receiver: str) -> bool:
|
||||
"""Best-effort readiness probe for outbound channels.
|
||||
|
||||
Returns False when we know the send will drop (e.g. weixin not yet
|
||||
logged in, web session has no polling queue), so the scheduler can
|
||||
defer instead of consuming the task. Unknown channels return True
|
||||
to preserve previous behaviour.
|
||||
"""
|
||||
if not channel_type or channel_type == "unknown":
|
||||
return True
|
||||
try:
|
||||
from channel.channel_factory import create_channel
|
||||
channel = create_channel(channel_type)
|
||||
if channel is None:
|
||||
return False
|
||||
|
||||
if channel_type == "weixin":
|
||||
tokens = getattr(channel, "_context_tokens", None)
|
||||
if not tokens or receiver not in tokens:
|
||||
return False
|
||||
return True
|
||||
|
||||
if channel_type == "web":
|
||||
queues = getattr(channel, "session_queues", None)
|
||||
if not queues or receiver not in queues:
|
||||
return False
|
||||
return True
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"[Scheduler] Channel readiness check failed for {channel_type}: {e}")
|
||||
return True
|
||||
|
||||
|
||||
def get_task_store():
|
||||
"""Get the global task store instance"""
|
||||
return _task_store
|
||||
@@ -145,13 +189,10 @@ def _remember_delivered_output(
|
||||
)
|
||||
|
||||
|
||||
def _execute_agent_task(task: dict, agent_bridge):
|
||||
def _execute_agent_task(task: dict, agent_bridge) -> bool:
|
||||
"""
|
||||
Execute an agent_task action - let Agent handle the task
|
||||
|
||||
Args:
|
||||
task: Task dictionary
|
||||
agent_bridge: AgentBridge instance
|
||||
Execute an agent_task action - let Agent handle the task.
|
||||
Returns True on successful delivery, False to retry next tick.
|
||||
"""
|
||||
try:
|
||||
action = task.get("action", {})
|
||||
@@ -162,11 +203,11 @@ def _execute_agent_task(task: dict, agent_bridge):
|
||||
|
||||
if not task_description:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No task_description specified")
|
||||
return
|
||||
return True # malformed task, don't loop forever
|
||||
|
||||
if not receiver:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
|
||||
return
|
||||
return True
|
||||
|
||||
# Check for unsupported channels
|
||||
if channel_type == "dingtalk":
|
||||
@@ -209,51 +250,47 @@ def _execute_agent_task(task: dict, agent_bridge):
|
||||
try:
|
||||
# Don't clear history - scheduler tasks use isolated session_id so they won't pollute user conversations
|
||||
reply = agent_bridge.agent_reply(task_description, context=context, on_event=None, clear_history=False)
|
||||
|
||||
if reply and reply.content:
|
||||
# Send the reply via channel
|
||||
from channel.channel_factory import create_channel
|
||||
|
||||
try:
|
||||
channel = create_channel(channel_type)
|
||||
if channel:
|
||||
# For web channel, register request_id
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
request_id = context.get("request_id")
|
||||
if request_id:
|
||||
channel.request_to_session[request_id] = receiver
|
||||
logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
|
||||
|
||||
# Send the reply
|
||||
channel.send(reply, context)
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, reply.content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed successfully, result sent to {receiver}")
|
||||
else:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send result: {e}")
|
||||
else:
|
||||
|
||||
if not (reply and reply.content):
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No result from agent execution")
|
||||
|
||||
return True # agent ran but produced nothing; don't loop
|
||||
|
||||
from channel.channel_factory import create_channel
|
||||
channel = create_channel(channel_type)
|
||||
if not channel:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
return False
|
||||
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
request_id = context.get("request_id")
|
||||
if request_id:
|
||||
channel.request_to_session[request_id] = receiver
|
||||
|
||||
try:
|
||||
channel.send(reply, context)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send result: {e}")
|
||||
return False
|
||||
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, reply.content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed successfully, result sent to {receiver}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to execute task via Agent: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error in _execute_agent_task: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
|
||||
def _execute_send_message(task: dict, agent_bridge):
|
||||
"""
|
||||
Execute a send_message action
|
||||
|
||||
Args:
|
||||
task: Task dictionary
|
||||
agent_bridge: AgentBridge instance
|
||||
"""
|
||||
def _execute_send_message(task: dict, agent_bridge) -> bool:
|
||||
"""Execute a send_message action. Returns True/False for delivery."""
|
||||
try:
|
||||
action = task.get("action", {})
|
||||
content = action.get("content", "")
|
||||
@@ -263,7 +300,7 @@ def _execute_send_message(task: dict, agent_bridge):
|
||||
|
||||
if not receiver:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
|
||||
return
|
||||
return True
|
||||
|
||||
# Create context for sending message
|
||||
context = Context(ContextType.TEXT, content)
|
||||
@@ -308,169 +345,135 @@ def _execute_send_message(task: dict, agent_bridge):
|
||||
# Get channel and send
|
||||
from channel.channel_factory import create_channel
|
||||
|
||||
channel = create_channel(channel_type)
|
||||
if not channel:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
return False
|
||||
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
channel.request_to_session[request_id] = receiver
|
||||
|
||||
try:
|
||||
channel = create_channel(channel_type)
|
||||
if channel:
|
||||
# For web channel, register the request_id to session mapping
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
channel.request_to_session[request_id] = receiver
|
||||
logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
|
||||
|
||||
channel.send(reply, context)
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: sent message to {receiver}")
|
||||
else:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
channel.send(reply, context)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send message: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
|
||||
return False
|
||||
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: sent message to {receiver}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error in _execute_send_message: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
|
||||
def _execute_tool_call(task: dict, agent_bridge):
|
||||
"""
|
||||
Execute a tool_call action
|
||||
|
||||
Args:
|
||||
task: Task dictionary
|
||||
agent_bridge: AgentBridge instance
|
||||
"""
|
||||
def _execute_tool_call(task: dict, agent_bridge) -> bool:
|
||||
"""Execute a tool_call action. Returns True/False for delivery."""
|
||||
try:
|
||||
action = task.get("action", {})
|
||||
# Support both old and new field names
|
||||
tool_name = action.get("call_name") or action.get("tool_name")
|
||||
tool_params = action.get("call_params") or action.get("tool_params", {})
|
||||
result_prefix = action.get("result_prefix", "")
|
||||
receiver = action.get("receiver")
|
||||
is_group = action.get("is_group", False)
|
||||
channel_type = action.get("channel_type", "unknown")
|
||||
|
||||
|
||||
if not tool_name:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No tool_name specified")
|
||||
return
|
||||
|
||||
return True
|
||||
if not receiver:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
|
||||
return
|
||||
|
||||
# Get tool manager and create tool instance
|
||||
return True
|
||||
|
||||
from agent.tools.tool_manager import ToolManager
|
||||
tool_manager = ToolManager()
|
||||
tool = tool_manager.create_tool(tool_name)
|
||||
|
||||
tool = ToolManager().create_tool(tool_name)
|
||||
if not tool:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: Tool '{tool_name}' not found")
|
||||
return
|
||||
|
||||
# Execute tool
|
||||
return True
|
||||
|
||||
logger.info(f"[Scheduler] Task {task['id']}: Executing tool '{tool_name}' with params {tool_params}")
|
||||
result = tool.execute(tool_params)
|
||||
|
||||
# Get result content
|
||||
if hasattr(result, 'result'):
|
||||
content = result.result
|
||||
else:
|
||||
content = str(result)
|
||||
|
||||
# Add prefix if specified
|
||||
content = result.result if hasattr(result, 'result') else str(result)
|
||||
if result_prefix:
|
||||
content = f"{result_prefix}\n\n{content}"
|
||||
|
||||
# Send result as message
|
||||
|
||||
context = Context(ContextType.TEXT, content)
|
||||
context["receiver"] = receiver
|
||||
context["isgroup"] = is_group
|
||||
context["session_id"] = receiver
|
||||
|
||||
# Channel-specific context setup
|
||||
|
||||
request_id = None
|
||||
if channel_type == "web":
|
||||
# Web channel needs request_id
|
||||
import uuid
|
||||
request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
|
||||
context["request_id"] = request_id
|
||||
logger.debug(f"[Scheduler] Generated request_id for web channel: {request_id}")
|
||||
elif channel_type == "feishu":
|
||||
context["receive_id_type"] = "chat_id" if is_group else "open_id"
|
||||
context["msg"] = None
|
||||
logger.debug(f"[Scheduler] Feishu: receive_id_type={context['receive_id_type']}, is_group={is_group}, receiver={receiver}")
|
||||
elif channel_type == "wecom_bot":
|
||||
context["msg"] = None
|
||||
|
||||
reply = Reply(ReplyType.TEXT, content)
|
||||
|
||||
# Get channel and send
|
||||
from channel.channel_factory import create_channel
|
||||
channel = create_channel(channel_type)
|
||||
if not channel:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
return False
|
||||
|
||||
if channel_type == "web" and request_id and hasattr(channel, 'request_to_session'):
|
||||
channel.request_to_session[request_id] = receiver
|
||||
|
||||
try:
|
||||
channel = create_channel(channel_type)
|
||||
if channel:
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
channel.request_to_session[request_id] = receiver
|
||||
logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
|
||||
|
||||
channel.send(reply, context)
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: sent tool result to {receiver}")
|
||||
else:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
channel.send(reply, context)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send tool result: {e}")
|
||||
return False
|
||||
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: sent tool result to {receiver}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error in _execute_tool_call: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def _execute_skill_call(task: dict, agent_bridge):
|
||||
"""
|
||||
Execute a skill_call action by asking Agent to run the skill
|
||||
|
||||
Args:
|
||||
task: Task dictionary
|
||||
agent_bridge: AgentBridge instance
|
||||
"""
|
||||
def _execute_skill_call(task: dict, agent_bridge) -> bool:
|
||||
"""Execute a skill_call action by asking Agent to run the skill.
|
||||
Returns True/False for delivery."""
|
||||
try:
|
||||
action = task.get("action", {})
|
||||
# Support both old and new field names
|
||||
skill_name = action.get("call_name") or action.get("skill_name")
|
||||
skill_params = action.get("call_params") or action.get("skill_params", {})
|
||||
result_prefix = action.get("result_prefix", "")
|
||||
receiver = action.get("receiver")
|
||||
is_group = action.get("isgroup", False)
|
||||
channel_type = action.get("channel_type", "unknown")
|
||||
|
||||
|
||||
if not skill_name:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No skill_name specified")
|
||||
return
|
||||
|
||||
return True
|
||||
if not receiver:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
|
||||
return
|
||||
|
||||
return True
|
||||
|
||||
logger.info(f"[Scheduler] Task {task['id']}: Executing skill '{skill_name}' with params {skill_params}")
|
||||
|
||||
# Create a unique session_id for this scheduled task to avoid polluting user's conversation
|
||||
# Format: scheduler_<receiver>_<task_id> to ensure isolation
|
||||
|
||||
scheduler_session_id = f"scheduler_{receiver}_{task['id']}"
|
||||
|
||||
# Build a natural language query for the Agent to execute the skill
|
||||
# Format: "Use skill-name to do something with params"
|
||||
param_str = ", ".join([f"{k}={v}" for k, v in skill_params.items()])
|
||||
query = f"Use {skill_name} skill"
|
||||
if param_str:
|
||||
query += f" with {param_str}"
|
||||
|
||||
# Create context for Agent
|
||||
|
||||
context = Context(ContextType.TEXT, query)
|
||||
context["receiver"] = receiver
|
||||
context["isgroup"] = is_group
|
||||
context["session_id"] = scheduler_session_id
|
||||
|
||||
# Channel-specific setup
|
||||
|
||||
if channel_type == "web":
|
||||
import uuid
|
||||
request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
|
||||
@@ -481,49 +484,48 @@ def _execute_skill_call(task: dict, agent_bridge):
|
||||
elif channel_type == "wecom_bot":
|
||||
context["msg"] = None
|
||||
|
||||
# Use Agent to execute the skill
|
||||
try:
|
||||
# Don't clear history - scheduler tasks use isolated session_id so they won't pollute user conversations
|
||||
reply = agent_bridge.agent_reply(query, context=context, on_event=None, clear_history=False)
|
||||
|
||||
if reply and reply.content:
|
||||
content = reply.content
|
||||
|
||||
# Add prefix if specified
|
||||
if result_prefix:
|
||||
content = f"{result_prefix}\n\n{content}"
|
||||
|
||||
# Send the result via channel
|
||||
from channel.channel_factory import create_channel
|
||||
|
||||
try:
|
||||
channel = create_channel(channel_type)
|
||||
if channel:
|
||||
# For web channel, register request_id
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
req_id = context.get("request_id")
|
||||
if req_id:
|
||||
channel.request_to_session[req_id] = receiver
|
||||
logger.debug(f"[Scheduler] Registered request_id {req_id} -> session {receiver}")
|
||||
|
||||
channel.send(Reply(ReplyType.TEXT, content), context)
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send skill result: {e}")
|
||||
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: skill result sent to {receiver}")
|
||||
else:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No result from skill execution")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to execute skill via Agent: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
|
||||
return False
|
||||
|
||||
if not (reply and reply.content):
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No result from skill execution")
|
||||
return True
|
||||
|
||||
content = reply.content
|
||||
if result_prefix:
|
||||
content = f"{result_prefix}\n\n{content}"
|
||||
|
||||
from channel.channel_factory import create_channel
|
||||
channel = create_channel(channel_type)
|
||||
if not channel:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
return False
|
||||
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
req_id = context.get("request_id")
|
||||
if req_id:
|
||||
channel.request_to_session[req_id] = receiver
|
||||
|
||||
try:
|
||||
channel.send(Reply(ReplyType.TEXT, content), context)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send skill result: {e}")
|
||||
return False
|
||||
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: skill result sent to {receiver}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error in _execute_skill_call: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
|
||||
def attach_scheduler_to_tool(tool, context: Context = None):
|
||||
|
||||
@@ -10,6 +10,19 @@ from croniter import croniter
|
||||
from common.log import logger
|
||||
|
||||
|
||||
def _parse_naive_local(iso_str: str) -> datetime:
|
||||
"""Parse an ISO datetime and coerce it to tz-naive local time.
|
||||
|
||||
The scheduler uses ``datetime.now()`` (tz-naive) for all comparisons,
|
||||
so any persisted timestamp must be normalized to the same flavor —
|
||||
otherwise comparing naive vs aware raises TypeError.
|
||||
"""
|
||||
dt = datetime.fromisoformat(iso_str)
|
||||
if dt.tzinfo is not None:
|
||||
dt = dt.astimezone().replace(tzinfo=None)
|
||||
return dt
|
||||
|
||||
|
||||
class SchedulerService:
|
||||
"""
|
||||
Background service that executes scheduled tasks
|
||||
@@ -39,7 +52,6 @@ class SchedulerService:
|
||||
self.running = True
|
||||
self.thread = threading.Thread(target=self._run_loop, daemon=True)
|
||||
self.thread.start()
|
||||
logger.debug("[Scheduler] Service started")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the scheduler service"""
|
||||
@@ -54,7 +66,7 @@ class SchedulerService:
|
||||
|
||||
def _run_loop(self):
|
||||
"""Main scheduler loop"""
|
||||
logger.debug("[Scheduler] Scheduler loop started")
|
||||
logger.info("[Scheduler] Scheduler loop started")
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
@@ -71,12 +83,18 @@ class SchedulerService:
|
||||
|
||||
for task in tasks:
|
||||
try:
|
||||
# Check if task is due
|
||||
if self._is_task_due(task, now):
|
||||
logger.info(f"[Scheduler] Executing task: {task['id']} - {task['name']}")
|
||||
self._execute_task(task)
|
||||
|
||||
# Update next run time
|
||||
ok = self._execute_task(task)
|
||||
if not ok:
|
||||
# Leave next_run_at as-is so the next loop retries.
|
||||
# Cron tasks within the catch-up window will keep
|
||||
# firing; beyond it _is_task_due will reschedule.
|
||||
logger.warning(
|
||||
f"[Scheduler] Task {task['id']} delivery failed, will retry next tick"
|
||||
)
|
||||
continue
|
||||
|
||||
next_run = self._calculate_next_run(task, now)
|
||||
if next_run:
|
||||
self.task_store.update_task(task['id'], {
|
||||
@@ -84,7 +102,6 @@ class SchedulerService:
|
||||
"last_run_at": now.isoformat()
|
||||
})
|
||||
else:
|
||||
# One-time task completed, remove it
|
||||
self.task_store.delete_task(task['id'])
|
||||
logger.info(f"[Scheduler] One-time task completed and removed: {task['id']}")
|
||||
except Exception as e:
|
||||
@@ -113,34 +130,43 @@ class SchedulerService:
|
||||
return False
|
||||
|
||||
try:
|
||||
next_run = datetime.fromisoformat(next_run_str)
|
||||
|
||||
# Check if task is overdue (e.g., service restart)
|
||||
next_run = _parse_naive_local(next_run_str)
|
||||
|
||||
if next_run < now:
|
||||
time_diff = (now - next_run).total_seconds()
|
||||
|
||||
# If overdue by more than 5 minutes, skip this run and schedule next
|
||||
if time_diff > 300: # 5 minutes
|
||||
logger.warning(f"[Scheduler] Task {task['id']} is overdue by {int(time_diff)}s, skipping and scheduling next run")
|
||||
|
||||
# For one-time tasks, remove them directly
|
||||
schedule = task.get("schedule", {})
|
||||
if schedule.get("type") == "once":
|
||||
self.task_store.delete_task(task['id'])
|
||||
logger.info(f"[Scheduler] One-time task {task['id']} expired, removed")
|
||||
return False
|
||||
|
||||
# For recurring tasks, calculate next run from now
|
||||
next_next_run = self._calculate_next_run(task, now)
|
||||
if next_next_run:
|
||||
self.task_store.update_task(task['id'], {
|
||||
"next_run_at": next_next_run.isoformat()
|
||||
})
|
||||
logger.info(f"[Scheduler] Rescheduled task {task['id']} to {next_next_run}")
|
||||
schedule = task.get("schedule", {})
|
||||
schedule_type = schedule.get("type")
|
||||
|
||||
# Catch-up window: fire if we're within 10 minutes of the
|
||||
# scheduled tick. Beyond that we'd rather skip than push a
|
||||
# stale daily report to the user.
|
||||
if time_diff <= 600:
|
||||
return True
|
||||
|
||||
logger.warning(
|
||||
f"[Scheduler] Task {task['id']} is overdue by {int(time_diff)}s, "
|
||||
f"skipping and scheduling next run"
|
||||
)
|
||||
|
||||
if schedule_type == "once":
|
||||
self.task_store.delete_task(task['id'])
|
||||
logger.info(f"[Scheduler] One-time task {task['id']} expired, removed")
|
||||
return False
|
||||
|
||||
|
||||
next_next_run = self._calculate_next_run(task, now)
|
||||
if next_next_run:
|
||||
self.task_store.update_task(task['id'], {
|
||||
"next_run_at": next_next_run.isoformat()
|
||||
})
|
||||
logger.info(f"[Scheduler] Rescheduled task {task['id']} to {next_next_run}")
|
||||
return False
|
||||
|
||||
return now >= next_run
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[Scheduler] Failed to evaluate due-state for task "
|
||||
f"{task.get('id')} (next_run_at={next_run_str!r}): {e}"
|
||||
)
|
||||
return False
|
||||
|
||||
def _calculate_next_run(self, task: dict, from_time: datetime) -> Optional[datetime]:
|
||||
@@ -184,30 +210,34 @@ class SchedulerService:
|
||||
return None
|
||||
|
||||
try:
|
||||
run_at = datetime.fromisoformat(run_at_str)
|
||||
# Only return if in the future
|
||||
run_at = _parse_naive_local(run_at_str)
|
||||
if run_at > from_time:
|
||||
return run_at
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[Scheduler] Failed to parse once-task run_at "
|
||||
f"{run_at_str!r}: {e}"
|
||||
)
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def _execute_task(self, task: dict):
|
||||
def _execute_task(self, task: dict) -> bool:
|
||||
"""
|
||||
Execute a task
|
||||
|
||||
Args:
|
||||
task: Task dictionary
|
||||
Execute a task.
|
||||
|
||||
Returns True if delivery succeeded (caller should advance state),
|
||||
False if it failed (caller should keep next_run_at so the next
|
||||
loop iteration retries). Callback may return None for legacy
|
||||
behaviour, treated as success.
|
||||
"""
|
||||
try:
|
||||
# Call the execute callback
|
||||
self.execute_callback(task)
|
||||
result = self.execute_callback(task)
|
||||
return False if result is False else True
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error executing task {task['id']}: {e}")
|
||||
# Update task with error
|
||||
self.task_store.update_task(task['id'], {
|
||||
"last_error": str(e),
|
||||
"last_error_at": datetime.now().isoformat()
|
||||
})
|
||||
return False
|
||||
|
||||
@@ -364,9 +364,12 @@ class SchedulerTool(BaseTool):
|
||||
logger.error(f"[SchedulerTool] Invalid relative time format: {schedule_value}")
|
||||
return None
|
||||
else:
|
||||
# Absolute time in ISO format
|
||||
datetime.fromisoformat(schedule_value)
|
||||
return {"type": "once", "run_at": schedule_value}
|
||||
# Absolute ISO time. Normalize to tz-naive local so it
|
||||
# stays comparable with the scheduler's datetime.now().
|
||||
parsed = datetime.fromisoformat(schedule_value)
|
||||
if parsed.tzinfo is not None:
|
||||
parsed = parsed.astimezone().replace(tzinfo=None)
|
||||
return {"type": "once", "run_at": parsed.isoformat()}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[SchedulerTool] Invalid schedule: {e}")
|
||||
|
||||
@@ -30,7 +30,7 @@ from common import const
|
||||
from common.log import logger
|
||||
from config import conf
|
||||
|
||||
DEFAULT_MODEL = const.GPT_55
|
||||
DEFAULT_MODEL = const.GPT_41_MINI
|
||||
DEFAULT_TIMEOUT = 60
|
||||
MAX_TOKENS = 1000
|
||||
COMPRESS_THRESHOLD = 1_048_576 # 1 MB
|
||||
@@ -57,6 +57,7 @@ _DISCOVERABLE_MODELS = [
|
||||
("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"),
|
||||
("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"),
|
||||
("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
|
||||
("mimo_api_key", const.MIMO, const.MIMO_V2_5_PRO, "MiMo"),
|
||||
]
|
||||
|
||||
# Model name prefix → discoverable provider display_name.
|
||||
@@ -73,11 +74,29 @@ _MODEL_PREFIX_TO_PROVIDER = [
|
||||
("glm-", "ZhipuAI"),
|
||||
("minimax-", "MiniMax"),
|
||||
("abab", "MiniMax"),
|
||||
("mimo-", "MiMo"),
|
||||
]
|
||||
|
||||
# Model prefixes that natively belong to OpenAI / LinkAI (raw HTTP providers).
|
||||
_OPENAI_MODEL_PREFIXES = ("gpt-", "o1-", "o3-", "o4-", "chatgpt-")
|
||||
|
||||
# Maps the UI provider id (persisted in tools.vision.provider) to the internal
|
||||
# display name used in VisionProvider.name. Keep in sync with _DISCOVERABLE_MODELS
|
||||
# and the openai/linkai branches in _route_by_model_name.
|
||||
_PROVIDER_ID_TO_DISPLAY = {
|
||||
"openai": "OpenAI",
|
||||
"linkai": "LinkAI",
|
||||
"moonshot": "Moonshot",
|
||||
"doubao": "Doubao",
|
||||
"dashscope": "DashScope",
|
||||
"claudeAPI": "Claude",
|
||||
"gemini": "Gemini",
|
||||
"qianfan": "Qianfan",
|
||||
"zhipu": "ZhipuAI",
|
||||
"minimax": "MiniMax",
|
||||
"mimo": "MiMo",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class VisionProvider:
|
||||
@@ -211,13 +230,19 @@ class Vision(BaseTool):
|
||||
are de-duplicated to avoid retrying the same endpoint twice.
|
||||
"""
|
||||
user_model = self._resolve_user_vision_model()
|
||||
user_provider = self._resolve_user_vision_provider()
|
||||
providers: List[VisionProvider] = []
|
||||
|
||||
# Step 1: preferred provider derived from tools.vision.model
|
||||
if user_model:
|
||||
# Step 1: preferred provider — explicit `tools.vision.provider`
|
||||
# wins so custom model names can still be routed correctly. Falls
|
||||
# through to model-name prefix inference when provider is unset.
|
||||
preferred = None
|
||||
if user_provider and user_model:
|
||||
preferred = self._route_by_provider_id(user_provider, user_model)
|
||||
if not preferred and user_model:
|
||||
preferred = self._route_by_model_name(user_model)
|
||||
if preferred:
|
||||
providers.extend(preferred)
|
||||
if preferred:
|
||||
providers.extend(preferred)
|
||||
|
||||
# Step 2: auto-discovery chain as fallback
|
||||
existing = {p.name for p in providers}
|
||||
@@ -263,6 +288,24 @@ class Vision(BaseTool):
|
||||
return m.strip()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _resolve_user_vision_provider() -> Optional[str]:
|
||||
"""Read tools.vision.provider — the UI-persisted vendor id.
|
||||
|
||||
Lets users pin a vendor for custom model names that prefix-inference
|
||||
can't recognize. Returns None when unset/blank.
|
||||
"""
|
||||
tools_conf = conf().get("tools") or conf().get("tool") or {}
|
||||
if not isinstance(tools_conf, dict):
|
||||
return None
|
||||
vision_conf = tools_conf.get("vision", {})
|
||||
if not isinstance(vision_conf, dict):
|
||||
return None
|
||||
p = vision_conf.get("provider")
|
||||
if isinstance(p, str) and p.strip():
|
||||
return p.strip()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _infer_provider_from_model(model_name: str) -> Optional[str]:
|
||||
"""
|
||||
@@ -279,6 +322,54 @@ class Vision(BaseTool):
|
||||
return display_name
|
||||
return None
|
||||
|
||||
def _route_by_provider_id(self, provider_id: str, user_model: str) -> Optional[List[VisionProvider]]:
|
||||
"""Route by the UI-persisted provider id.
|
||||
|
||||
Returns:
|
||||
- [provider] : provider id is known and its key is configured.
|
||||
- None : unknown provider id, or the bot can't be created.
|
||||
Caller falls through to model-name-based routing.
|
||||
"""
|
||||
display_name = _PROVIDER_ID_TO_DISPLAY.get(provider_id)
|
||||
if not display_name:
|
||||
return None
|
||||
|
||||
# OpenAI / LinkAI use raw HTTP providers, not the discoverable bot path.
|
||||
if provider_id == "openai":
|
||||
p = self._build_openai_provider(user_model)
|
||||
return [p] if p else None
|
||||
if provider_id == "linkai":
|
||||
p = self._build_linkai_provider(user_model)
|
||||
return [p] if p else None
|
||||
|
||||
# Discoverable bot-backed providers.
|
||||
for config_key, bot_type, _default_model, name in _DISCOVERABLE_MODELS:
|
||||
if name != display_name:
|
||||
continue
|
||||
api_key = conf().get(config_key, "")
|
||||
if not api_key or not api_key.strip():
|
||||
logger.warning(f"[Vision] tools.vision.provider='{provider_id}' "
|
||||
f"but '{config_key}' is not configured. Falling back.")
|
||||
return None
|
||||
try:
|
||||
from models.bot_factory import create_bot
|
||||
bot = create_bot(bot_type)
|
||||
if not hasattr(bot, 'call_vision'):
|
||||
logger.warning(f"[Vision] '{display_name}' bot does not implement call_vision.")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"[Vision] Failed to create '{display_name}' bot: {e}")
|
||||
return None
|
||||
return [VisionProvider(
|
||||
name=display_name,
|
||||
api_key="",
|
||||
api_base="",
|
||||
model_override=user_model,
|
||||
use_bot=True,
|
||||
fallback_bot=bot,
|
||||
)]
|
||||
return None
|
||||
|
||||
def _route_by_model_name(self, user_model: str) -> Optional[List[VisionProvider]]:
|
||||
"""
|
||||
Try to build a provider list using the user-specified model name.
|
||||
|
||||
12
app.py
12
app.py
@@ -288,6 +288,16 @@ def _warmup_mcp_tools():
|
||||
logger.warning(f"[App] MCP warmup failed (non-fatal): {e}")
|
||||
|
||||
|
||||
def _warmup_scheduler():
|
||||
"""Eager-init AgentBridge so the scheduler thread starts at process
|
||||
boot rather than waiting for the first user message."""
|
||||
try:
|
||||
from bridge.bridge import Bridge
|
||||
Bridge().get_agent_bridge()
|
||||
except Exception as e:
|
||||
logger.warning(f"[App] Scheduler warmup failed: {e}")
|
||||
|
||||
|
||||
def _sync_builtin_skills():
|
||||
"""Sync builtin skills from project skills/ to workspace skills/ on startup."""
|
||||
import shutil
|
||||
@@ -353,6 +363,8 @@ def run():
|
||||
# latency isn't dominated by npx package downloads.
|
||||
_warmup_mcp_tools()
|
||||
|
||||
_warmup_scheduler()
|
||||
|
||||
logger.info(f"[App] Starting channels: {channel_names}")
|
||||
|
||||
_channel_mgr = ChannelManager()
|
||||
|
||||
@@ -5,7 +5,7 @@ Agent Bridge - Integrates Agent system with existing COW bridge
|
||||
import os
|
||||
from typing import Optional, List
|
||||
|
||||
from agent.protocol import Agent, LLMModel, LLMRequest
|
||||
from agent.protocol import Agent, LLMModel, LLMRequest, get_cancel_registry
|
||||
from bridge.agent_event_handler import AgentEventHandler
|
||||
from bridge.agent_initializer import AgentInitializer
|
||||
from bridge.bridge import Bridge
|
||||
@@ -285,6 +285,15 @@ class AgentBridge:
|
||||
|
||||
# Create helper instances
|
||||
self.initializer = AgentInitializer(bridge, self)
|
||||
|
||||
# Eager-start the scheduler so cron tasks fire without waiting
|
||||
# for the first user message. init_scheduler is idempotent.
|
||||
try:
|
||||
from agent.tools.scheduler.integration import init_scheduler
|
||||
if init_scheduler(self):
|
||||
self.scheduler_initialized = True
|
||||
except Exception as e:
|
||||
logger.warning(f"[AgentBridge] Eager scheduler init failed: {e}")
|
||||
def create_agent(self, system_prompt: str, tools: List = None, **kwargs) -> Agent:
|
||||
"""
|
||||
Create the super agent with COW integration
|
||||
@@ -390,11 +399,22 @@ class AgentBridge:
|
||||
"""
|
||||
session_id = None
|
||||
agent = None
|
||||
request_id = None
|
||||
cancel_event = None
|
||||
try:
|
||||
# Extract session_id from context for user isolation
|
||||
if context:
|
||||
session_id = context.kwargs.get("session_id") or context.get("session_id")
|
||||
|
||||
request_id = context.kwargs.get("request_id") or context.get("request_id")
|
||||
|
||||
# Register a cancel token. Prefer per-turn request_id (web),
|
||||
# fall back to session_id (IM channels). The Event is polled by
|
||||
# AgentStreamExecutor at safe checkpoints.
|
||||
registry = get_cancel_registry()
|
||||
token_key = request_id or session_id
|
||||
if token_key:
|
||||
cancel_event = registry.register(token_key, session_id=session_id)
|
||||
|
||||
# Get agent for this session (will auto-initialize if needed)
|
||||
agent = self.get_agent(session_id=session_id)
|
||||
if not agent:
|
||||
@@ -449,7 +469,8 @@ class AgentBridge:
|
||||
response = agent.run_stream(
|
||||
user_message=query,
|
||||
on_event=event_handler.handle_event,
|
||||
clear_history=clear_history
|
||||
clear_history=clear_history,
|
||||
cancel_event=cancel_event,
|
||||
)
|
||||
finally:
|
||||
# Restore original tools
|
||||
@@ -459,6 +480,13 @@ class AgentBridge:
|
||||
# Log execution summary
|
||||
event_handler.log_summary()
|
||||
|
||||
# Release cancel token; keep registry bounded.
|
||||
if token_key:
|
||||
try:
|
||||
registry.unregister(token_key)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Persist new messages generated during this run
|
||||
if session_id:
|
||||
channel_type = (context.get("channel_type") or "") if context else ""
|
||||
@@ -512,6 +540,12 @@ class AgentBridge:
|
||||
logger.info(f"[AgentBridge] Cleared DB for session after error: {session_id}")
|
||||
except Exception as db_err:
|
||||
logger.warning(f"[AgentBridge] Failed to clear DB after error: {db_err}")
|
||||
# Release cancel token on error path too (idempotent).
|
||||
if cancel_event is not None and (request_id or session_id):
|
||||
try:
|
||||
get_cancel_registry().unregister(request_id or session_id)
|
||||
except Exception:
|
||||
pass
|
||||
return Reply(ReplyType.ERROR, f"Agent error: {str(e)}")
|
||||
|
||||
def _schedule_mcp_hot_reload(self, agent):
|
||||
|
||||
@@ -2,44 +2,40 @@
|
||||
Agent Event Handler - Handles agent events and thinking process output
|
||||
"""
|
||||
|
||||
from common import const
|
||||
from common.log import logger
|
||||
|
||||
# Cap intermediate thinking messages on weixin to stay within send quota.
|
||||
WEIXIN_THINKING_INSTANT_MAX = 7
|
||||
|
||||
|
||||
class AgentEventHandler:
|
||||
"""
|
||||
Handles agent events and optionally sends intermediate messages to channel
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, context=None, original_callback=None):
|
||||
"""
|
||||
Initialize event handler
|
||||
|
||||
Args:
|
||||
context: COW context (for accessing channel)
|
||||
original_callback: Original event callback to chain
|
||||
"""
|
||||
self.context = context
|
||||
self.original_callback = original_callback
|
||||
|
||||
# Get channel for sending intermediate messages
|
||||
|
||||
self.channel = None
|
||||
if context:
|
||||
self.channel = context.kwargs.get("channel") if hasattr(context, "kwargs") else None
|
||||
|
||||
|
||||
self.current_content = ""
|
||||
self.turn_number = 0
|
||||
|
||||
|
||||
channel_type = ""
|
||||
if context and hasattr(context, "kwargs"):
|
||||
channel_type = context.kwargs.get("channel_type", "") or ""
|
||||
self._is_weixin = channel_type == const.WEIXIN
|
||||
self._thinking_sent_count = 0
|
||||
self._merged_buf: list[str] = []
|
||||
|
||||
def handle_event(self, event):
|
||||
"""
|
||||
Main event handler
|
||||
|
||||
Args:
|
||||
event: Event dict with type and data
|
||||
"""
|
||||
event_type = event.get("type")
|
||||
data = event.get("data", {})
|
||||
|
||||
# Dispatch to specific handlers
|
||||
|
||||
if event_type == "turn_start":
|
||||
self._handle_turn_start(data)
|
||||
elif event_type == "message_update":
|
||||
@@ -52,25 +48,23 @@ class AgentEventHandler:
|
||||
self._handle_tool_execution_start(data)
|
||||
elif event_type == "tool_execution_end":
|
||||
self._handle_tool_execution_end(data)
|
||||
|
||||
# Call original callback if provided
|
||||
elif event_type == "agent_end":
|
||||
self._handle_agent_end(data)
|
||||
|
||||
if self.original_callback:
|
||||
self.original_callback(event)
|
||||
|
||||
|
||||
def _handle_turn_start(self, data):
|
||||
"""Handle turn start event"""
|
||||
self.turn_number = data.get("turn", 0)
|
||||
self.current_content = ""
|
||||
|
||||
|
||||
def _handle_message_update(self, data):
|
||||
"""Handle message update event (streaming content text)"""
|
||||
delta = data.get("delta", "")
|
||||
self.current_content += delta
|
||||
|
||||
|
||||
def _handle_message_end(self, data):
|
||||
"""Handle message end event"""
|
||||
tool_calls = data.get("tool_calls", [])
|
||||
|
||||
|
||||
if tool_calls:
|
||||
if self.current_content.strip():
|
||||
logger.info(f"💭 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}")
|
||||
@@ -78,35 +72,54 @@ class AgentEventHandler:
|
||||
else:
|
||||
if self.current_content.strip():
|
||||
logger.debug(f"💬 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}")
|
||||
|
||||
# Drain weixin buffer before final reply leaves chat_channel
|
||||
self._flush_merged_now()
|
||||
|
||||
self.current_content = ""
|
||||
|
||||
|
||||
def _handle_agent_end(self, data):
|
||||
self._flush_merged_now()
|
||||
|
||||
def _handle_tool_execution_start(self, data):
|
||||
"""Handle tool execution start event - logged by agent_stream.py"""
|
||||
pass
|
||||
|
||||
|
||||
def _handle_tool_execution_end(self, data):
|
||||
"""Handle tool execution end event - logged by agent_stream.py"""
|
||||
pass
|
||||
|
||||
|
||||
def _send_to_channel(self, message):
|
||||
"""
|
||||
Try to send intermediate message to channel.
|
||||
Skipped in SSE mode because thinking text is already streamed via on_event.
|
||||
"""
|
||||
if self.context and self.context.get("on_event"):
|
||||
return
|
||||
if not self.channel:
|
||||
return
|
||||
|
||||
if not self._is_weixin:
|
||||
self._do_send(message)
|
||||
return
|
||||
|
||||
if self._thinking_sent_count < WEIXIN_THINKING_INSTANT_MAX:
|
||||
self._do_send(message)
|
||||
self._thinking_sent_count += 1
|
||||
return
|
||||
|
||||
self._merged_buf.append(message)
|
||||
|
||||
def _flush_merged_now(self):
|
||||
if not self._merged_buf:
|
||||
return
|
||||
merged = "\n\n".join(self._merged_buf)
|
||||
count = len(self._merged_buf)
|
||||
self._merged_buf = []
|
||||
logger.debug(f"[AgentEventHandler] Flushing {count} merged thinking msgs, len={len(merged)}")
|
||||
self._do_send(merged)
|
||||
self._thinking_sent_count += 1
|
||||
|
||||
def _do_send(self, message):
|
||||
try:
|
||||
from bridge.reply import Reply, ReplyType
|
||||
reply = Reply(ReplyType.TEXT, message)
|
||||
self.channel._send(reply, self.context)
|
||||
except Exception as e:
|
||||
logger.debug(f"[AgentEventHandler] Failed to send to channel: {e}")
|
||||
|
||||
if self.channel:
|
||||
try:
|
||||
from bridge.reply import Reply, ReplyType
|
||||
reply = Reply(ReplyType.TEXT, message)
|
||||
self.channel._send(reply, self.context)
|
||||
except Exception as e:
|
||||
logger.debug(f"[AgentEventHandler] Failed to send to channel: {e}")
|
||||
|
||||
def log_summary(self):
|
||||
"""Log execution summary - simplified"""
|
||||
# Summary removed as per user request
|
||||
# Real-time logging during execution is sufficient
|
||||
pass
|
||||
|
||||
@@ -63,6 +63,10 @@ class Bridge(object):
|
||||
if model_type and model_type.startswith("deepseek"):
|
||||
self.btype["chat"] = const.DEEPSEEK
|
||||
|
||||
# 小米 MiMo 系列模型,全部以 mimo- 开头
|
||||
if model_type and model_type.startswith("mimo-"):
|
||||
self.btype["chat"] = const.MIMO
|
||||
|
||||
if model_type and isinstance(model_type, str):
|
||||
lowered_model_type = model_type.lower()
|
||||
if lowered_model_type == const.QIANFAN or lowered_model_type.startswith("ernie"):
|
||||
|
||||
@@ -73,7 +73,7 @@ class Channel(object):
|
||||
Build reply content, using agent if enabled in config
|
||||
"""
|
||||
# Check if agent mode is enabled
|
||||
use_agent = conf().get("agent", False)
|
||||
use_agent = conf().get("agent", True)
|
||||
|
||||
if use_agent:
|
||||
try:
|
||||
|
||||
@@ -39,6 +39,9 @@ def create_channel(channel_type) -> Channel:
|
||||
elif channel_type == const.QQ:
|
||||
from channel.qq.qq_channel import QQChannel
|
||||
ch = QQChannel()
|
||||
elif channel_type == const.TELEGRAM:
|
||||
from channel.telegram.telegram_channel import TelegramChannel
|
||||
ch = TelegramChannel()
|
||||
elif channel_type in (const.WEIXIN, "wx"):
|
||||
from channel.weixin.weixin_channel import WeixinChannel
|
||||
ch = WeixinChannel()
|
||||
|
||||
@@ -438,8 +438,21 @@ class ChatChannel(Channel):
|
||||
|
||||
return func
|
||||
|
||||
# Chat commands that must bypass the per-session serial queue,
|
||||
# otherwise /cancel would queue behind the task it tries to cancel.
|
||||
# Use /cancel (not /stop) to avoid colliding with `cow stop` CLI.
|
||||
_BYPASS_QUEUE_COMMANDS = ("/cancel",)
|
||||
|
||||
def produce(self, context: Context):
|
||||
session_id = context["session_id"]
|
||||
|
||||
# Fast path: /cancel must not enter the queue.
|
||||
if context.type == ContextType.TEXT and context.content:
|
||||
stripped = context.content.strip().lower()
|
||||
if stripped in self._BYPASS_QUEUE_COMMANDS:
|
||||
self._handle_cancel_command(context, session_id)
|
||||
return
|
||||
|
||||
with self.lock:
|
||||
if session_id not in self.sessions:
|
||||
self.sessions[session_id] = [
|
||||
@@ -451,6 +464,29 @@ class ChatChannel(Channel):
|
||||
else:
|
||||
self.sessions[session_id][0].put(context)
|
||||
|
||||
def _handle_cancel_command(self, context: Context, session_id: str) -> None:
|
||||
"""Cancel any in-flight agent run for *session_id* and reply inline.
|
||||
|
||||
Runs synchronously on the caller's thread. Reply is sent through
|
||||
_send_reply so plugins (e.g. logging) still observe it.
|
||||
"""
|
||||
try:
|
||||
from agent.protocol import get_cancel_registry
|
||||
from bridge.reply import Reply, ReplyType
|
||||
|
||||
cancelled = get_cancel_registry().cancel_session(session_id)
|
||||
text = (
|
||||
"🛑 已中止"
|
||||
if cancelled > 0
|
||||
else "当前没有可中止的任务。"
|
||||
)
|
||||
logger.info(
|
||||
f"[chat_channel] /cancel fast-path: session={session_id}, cancelled={cancelled}"
|
||||
)
|
||||
self._send_reply(context, Reply(ReplyType.TEXT, text))
|
||||
except Exception as e:
|
||||
logger.warning(f"[chat_channel] /cancel fast-path failed: {e}")
|
||||
|
||||
# 消费者函数,单独线程,用于从消息队列中取出消息并处理
|
||||
def consume(self):
|
||||
while True:
|
||||
|
||||
@@ -752,6 +752,9 @@ class FeiShuChanel(ChatChannel):
|
||||
init_in_flight = [False]
|
||||
# 一旦初始化失败就长期标记为 disabled,本次回复不再尝试任何流式调用
|
||||
disabled = [False]
|
||||
# True after agent_cancelled: agent_end stops rewriting the card
|
||||
# with stale final_response and just finalizes current content.
|
||||
cancelled = [False]
|
||||
lock = threading.Lock()
|
||||
|
||||
# ---- 异步推送队列 ----------------------------------------------------
|
||||
@@ -1076,18 +1079,42 @@ class FeiShuChanel(ChatChannel):
|
||||
message_id[0] = None
|
||||
sequence[0] = 0
|
||||
|
||||
elif event_type == "agent_cancelled":
|
||||
# Lock channel into "no-rewrite" mode: the subsequent
|
||||
# agent_end's final_response is from the last *completed*
|
||||
# turn (the user already saw it), so rewriting the card
|
||||
# would duplicate it visually.
|
||||
with lock:
|
||||
cancelled[0] = True
|
||||
|
||||
elif event_type == "agent_end":
|
||||
# 最终回复:用 final_response 覆盖当前流式卡片,然后关闭流式模式。
|
||||
final_response = data.get("final_response", "")
|
||||
if not final_response:
|
||||
return
|
||||
final_text = str(final_response)
|
||||
# 标记 streamed 让 chat_channel 跳过 send()
|
||||
context["feishu_streamed"] = True
|
||||
|
||||
with lock:
|
||||
was_cancelled = cancelled[0]
|
||||
has_card = card_id[0] is not None
|
||||
init_busy = init_in_flight[0]
|
||||
pending_text = current_text[0]
|
||||
|
||||
if was_cancelled:
|
||||
# Cancelled path: finalize the in-flight card with
|
||||
# partial output (or a short marker if empty); drop
|
||||
# stale final_response to avoid duplicating last turn.
|
||||
if has_card:
|
||||
_drain_push_queue()
|
||||
partial = (pending_text or "").rstrip()
|
||||
final_text = partial or "_(已中止)_"
|
||||
_stream_update_text(final_text)
|
||||
_close_streaming_mode(final_text)
|
||||
push_queue.put(None)
|
||||
return
|
||||
|
||||
if not final_response:
|
||||
return
|
||||
final_text = str(final_response)
|
||||
|
||||
# 罕见情况:agent_end 触发时还没创建过卡片(极快返回 / 没有
|
||||
# message_update),主动创建一张承载 final_text。
|
||||
|
||||
0
channel/telegram/__init__.py
Normal file
0
channel/telegram/__init__.py
Normal file
676
channel/telegram/telegram_channel.py
Normal file
676
channel/telegram/telegram_channel.py
Normal file
@@ -0,0 +1,676 @@
|
||||
"""
|
||||
Telegram channel via Bot API (long polling mode).
|
||||
|
||||
Features:
|
||||
- Single chat & group chat (text / photo / voice / video / document)
|
||||
- Group trigger: @mention or reply-to-bot (configurable)
|
||||
- /cancel fast-path matches Web channel behaviour
|
||||
- Auto-register bot commands menu on startup (mirrors Web slash menu)
|
||||
- Optional HTTP/SOCKS5 proxy support for restricted networks
|
||||
|
||||
Implementation note:
|
||||
python-telegram-bot is async-first. We run the bot inside a dedicated
|
||||
thread with its own asyncio loop so the rest of cow (which is sync)
|
||||
stays untouched. Inbound updates are dispatched onto cow's existing
|
||||
sync ChatChannel.produce() pipeline; outbound send() schedules
|
||||
coroutines back onto that loop via asyncio.run_coroutine_threadsafe.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
import threading
|
||||
|
||||
from bridge.context import Context, ContextType
|
||||
from bridge.reply import Reply, ReplyType
|
||||
from channel.chat_channel import ChatChannel, check_prefix
|
||||
from channel.telegram.telegram_message import TelegramMessage
|
||||
from common.expired_dict import ExpiredDict
|
||||
from common.log import logger
|
||||
from common.singleton import singleton
|
||||
from config import conf
|
||||
|
||||
# Bot command menu, aligned with Web slash commands.
|
||||
# Top-level commands only; sub-commands are entered with a space (e.g. "/skill list").
|
||||
TELEGRAM_BOT_COMMANDS = [
|
||||
("help", "Show command help"),
|
||||
("status", "Show running status"),
|
||||
("context", "View/clear conversation context (sub: clear)"),
|
||||
("skill", "Manage skills (list/search/install/...)"),
|
||||
("memory", "Manage memory (sub: dream)"),
|
||||
("knowledge", "Manage knowledge base (list/on/off)"),
|
||||
("config", "Show current config"),
|
||||
("cancel", "Cancel running agent task"),
|
||||
("logs", "Show recent logs"),
|
||||
("version", "Show version"),
|
||||
]
|
||||
|
||||
|
||||
@singleton
|
||||
class TelegramChannel(ChatChannel):
|
||||
NOT_SUPPORT_REPLYTYPE = []
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.bot_token = ""
|
||||
self.bot_username = "" # used for @-mention matching
|
||||
self._bot = None
|
||||
self._application = None
|
||||
self._loop = None
|
||||
self._loop_thread = None
|
||||
self._stop_event = threading.Event()
|
||||
# Idempotent dedup; TG occasionally redelivers the same update on flaky networks
|
||||
self._received_msgs = ExpiredDict(60 * 60 * 1)
|
||||
|
||||
# Disable group whitelist / prefix checks (we handle triggering ourselves
|
||||
# in _should_reply_in_group), aligned with feishu / wecom_bot channels.
|
||||
conf()["group_name_white_list"] = ["ALL_GROUP"]
|
||||
conf()["single_chat_prefix"] = [""]
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def startup(self):
|
||||
self.bot_token = conf().get("telegram_token", "")
|
||||
if not self.bot_token:
|
||||
err = "[Telegram] telegram_token is required"
|
||||
logger.error(err)
|
||||
self.report_startup_error(err)
|
||||
return
|
||||
|
||||
try:
|
||||
from telegram.ext import (
|
||||
Application,
|
||||
MessageHandler,
|
||||
CommandHandler,
|
||||
filters,
|
||||
)
|
||||
except ImportError:
|
||||
err = (
|
||||
"[Telegram] python-telegram-bot is not installed. "
|
||||
"Run: pip install python-telegram-bot"
|
||||
)
|
||||
logger.error(err)
|
||||
self.report_startup_error(err)
|
||||
return
|
||||
|
||||
# Run the asyncio event loop in a dedicated thread so the sync cow body
|
||||
# is untouched.
|
||||
self._loop = asyncio.new_event_loop()
|
||||
|
||||
def _run_loop():
|
||||
asyncio.set_event_loop(self._loop)
|
||||
try:
|
||||
self._loop.run_until_complete(self._async_main(Application, MessageHandler, CommandHandler, filters))
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] event loop crashed: {e}", exc_info=True)
|
||||
self.report_startup_error(str(e))
|
||||
finally:
|
||||
try:
|
||||
self._loop.close()
|
||||
except Exception:
|
||||
pass
|
||||
logger.info("[Telegram] event loop exited")
|
||||
|
||||
self._loop_thread = threading.Thread(target=_run_loop, daemon=True, name="telegram-loop")
|
||||
self._loop_thread.start()
|
||||
# Block startup() until the loop thread exits, matching other channels'
|
||||
# behaviour (startup is a blocking call).
|
||||
self._loop_thread.join()
|
||||
|
||||
async def _async_main(self, Application, MessageHandler, CommandHandler, filters):
|
||||
"""Build Application, register handlers, and run polling."""
|
||||
builder = Application.builder().token(self.bot_token)
|
||||
|
||||
# Proxy: prefer telegram_proxy config, fall back to HTTPS_PROXY env var
|
||||
proxy_url = conf().get("telegram_proxy", "") or os.environ.get("HTTPS_PROXY", "")
|
||||
if proxy_url:
|
||||
try:
|
||||
builder = builder.proxy(proxy_url).get_updates_proxy(proxy_url)
|
||||
logger.info(f"[Telegram] using proxy: {proxy_url}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[Telegram] proxy config failed, fallback to direct: {e}")
|
||||
|
||||
# Media uploads (photo/voice/video/document) over a proxy can be slow,
|
||||
# bump read/write/connect/pool timeouts.
|
||||
builder = (
|
||||
builder
|
||||
.read_timeout(60)
|
||||
.write_timeout(120)
|
||||
.connect_timeout(30)
|
||||
.pool_timeout(30)
|
||||
)
|
||||
|
||||
application = builder.build()
|
||||
self._application = application
|
||||
self._bot = application.bot
|
||||
|
||||
# Fetch our own username (needed for @-mention matching in groups)
|
||||
try:
|
||||
me = await self._bot.get_me()
|
||||
self.bot_username = me.username or ""
|
||||
self.name = self.bot_username # ChatChannel uses self.name to strip @-mention
|
||||
logger.info(f"[Telegram] Bot logged in as @{self.bot_username} (id={me.id})")
|
||||
except Exception as e:
|
||||
err = f"[Telegram] get_me failed: {e}"
|
||||
logger.error(err)
|
||||
self.report_startup_error(err)
|
||||
return
|
||||
|
||||
# Register the command menu (failure is non-fatal)
|
||||
if conf().get("telegram_register_commands", True):
|
||||
try:
|
||||
from telegram import BotCommand
|
||||
cmds = [BotCommand(name, desc) for name, desc in TELEGRAM_BOT_COMMANDS]
|
||||
await self._bot.set_my_commands(cmds)
|
||||
logger.info(f"[Telegram] Registered {len(cmds)} bot commands")
|
||||
except Exception as e:
|
||||
logger.warning(f"[Telegram] set_my_commands failed: {e}")
|
||||
|
||||
# Handlers:
|
||||
# 1) /cancel uses the fast-path
|
||||
application.add_handler(CommandHandler("cancel", self._on_cancel))
|
||||
# 2) Normal messages (text + media)
|
||||
application.add_handler(MessageHandler(filters.ALL & ~filters.COMMAND, self._on_message))
|
||||
# 3) Other slash commands are forwarded as plain text for the agent to handle
|
||||
application.add_handler(MessageHandler(filters.COMMAND, self._on_command_passthrough))
|
||||
|
||||
# Start polling. drop_pending_updates avoids replaying backlog after restart.
|
||||
logger.info("[Telegram] Starting long polling...")
|
||||
await application.initialize()
|
||||
await application.start()
|
||||
await application.updater.start_polling(drop_pending_updates=True)
|
||||
self.report_startup_success()
|
||||
logger.info("[Telegram] ✅ Telegram bot ready, polling for updates")
|
||||
|
||||
# Block until stop()
|
||||
try:
|
||||
while not self._stop_event.is_set():
|
||||
await asyncio.sleep(0.5)
|
||||
finally:
|
||||
try:
|
||||
await application.updater.stop()
|
||||
await application.stop()
|
||||
await application.shutdown()
|
||||
except Exception as e:
|
||||
logger.warning(f"[Telegram] shutdown error: {e}")
|
||||
|
||||
def stop(self):
|
||||
logger.info("[Telegram] stop() called")
|
||||
self._stop_event.set()
|
||||
if self._loop_thread and self._loop_thread.is_alive():
|
||||
try:
|
||||
self._loop_thread.join(timeout=10)
|
||||
except Exception:
|
||||
pass
|
||||
logger.info("[Telegram] stop() completed")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Inbound: telegram update -> ChatMessage -> ChatChannel.produce
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _on_cancel(self, update, _context):
|
||||
"""Fast-path: /cancel calls cancel_session directly without going through agent."""
|
||||
try:
|
||||
from agent.protocol import get_cancel_registry
|
||||
session_id = self._compute_session_id(update)
|
||||
cancelled = get_cancel_registry().cancel_session(session_id)
|
||||
text = "Current task cancelled." if cancelled else "No running task to cancel."
|
||||
await update.effective_message.reply_text(text)
|
||||
logger.info(f"[Telegram] /cancel session={session_id}, cancelled={cancelled}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] /cancel error: {e}", exc_info=True)
|
||||
try:
|
||||
await update.effective_message.reply_text(f"⚠️ /cancel failed: {e}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def _on_command_passthrough(self, update, _context):
|
||||
"""All non-/cancel commands fall through to plain message handling."""
|
||||
await self._on_message(update, _context)
|
||||
|
||||
async def _on_message(self, update, _context):
|
||||
"""Telegram update entry: parse message -> build ChatMessage -> produce()."""
|
||||
try:
|
||||
message = update.effective_message
|
||||
chat = update.effective_chat
|
||||
if not message or not chat:
|
||||
return
|
||||
|
||||
# Idempotent dedup
|
||||
msg_uid = f"{chat.id}:{message.message_id}"
|
||||
if self._received_msgs.get(msg_uid):
|
||||
return
|
||||
self._received_msgs[msg_uid] = True
|
||||
|
||||
is_group = chat.type in ("group", "supergroup")
|
||||
|
||||
# Debug log: helpful when group messages are silently dropped
|
||||
if is_group:
|
||||
logger.debug(
|
||||
f"[Telegram] group update received: chat_id={chat.id}, "
|
||||
f"text={(message.text or message.caption or '')[:40]!r}, "
|
||||
f"reply_to_bot={bool(message.reply_to_message and message.reply_to_message.from_user and message.reply_to_message.from_user.username == self.bot_username)}"
|
||||
)
|
||||
|
||||
# Group trigger gate (silently drop if not triggered)
|
||||
if is_group and not self._should_reply_in_group(update):
|
||||
logger.debug(f"[Telegram] group message not triggered (need @{self.bot_username} or reply), skip")
|
||||
return
|
||||
|
||||
# Parse message type + download media if needed.
|
||||
# Media messages with caption return both the local path and the caption text.
|
||||
ctype, content, caption = await self._parse_message(message)
|
||||
if ctype is None:
|
||||
logger.debug(f"[Telegram] unsupported message type, skip. msg={message}")
|
||||
return
|
||||
|
||||
# Strip @bot mention for group text/caption
|
||||
if is_group and self.bot_username:
|
||||
if ctype == ContextType.TEXT and content:
|
||||
content = self._strip_at_mention(content)
|
||||
if caption:
|
||||
caption = self._strip_at_mention(caption)
|
||||
|
||||
tg_msg = TelegramMessage(
|
||||
update,
|
||||
is_group=is_group,
|
||||
bot_username=self.bot_username,
|
||||
ctype=ctype,
|
||||
content=content,
|
||||
)
|
||||
tg_msg.is_at = is_group # If we got here in a group, the bot is mentioned/replied
|
||||
|
||||
# File cache: standalone media goes into cache, the next text query attaches them
|
||||
from channel.file_cache import get_file_cache
|
||||
file_cache = get_file_cache()
|
||||
session_id = self._compute_session_id(update)
|
||||
|
||||
# Media + caption together: treat as a complete query and bypass the cache
|
||||
if ctype in (ContextType.IMAGE, ContextType.FILE) and caption:
|
||||
tag = "image" if ctype == ContextType.IMAGE else "file"
|
||||
merged_text = f"{caption}\n[{tag}: {content}]"
|
||||
tg_msg.ctype = ContextType.TEXT
|
||||
tg_msg.content = merged_text
|
||||
ctype = ContextType.TEXT
|
||||
logger.info(f"[Telegram] Media+caption merged for session {session_id}")
|
||||
# fallthrough to the TEXT branch below
|
||||
|
||||
elif ctype == ContextType.IMAGE:
|
||||
file_cache.add(session_id, content, file_type="image")
|
||||
logger.info(f"[Telegram] Image cached for session {session_id}, waiting for query...")
|
||||
return
|
||||
elif ctype == ContextType.FILE:
|
||||
file_cache.add(session_id, content, file_type="file")
|
||||
logger.info(f"[Telegram] File cached for session {session_id}: {content}")
|
||||
return
|
||||
|
||||
if ctype == ContextType.TEXT:
|
||||
cached_files = file_cache.get(session_id)
|
||||
if cached_files:
|
||||
refs = []
|
||||
for fi in cached_files:
|
||||
ftype = fi["type"]
|
||||
tag = ftype if ftype in ("image", "video") else "file"
|
||||
refs.append(f"[{tag}: {fi['path']}]")
|
||||
tg_msg.content = (tg_msg.content or "") + "\n" + "\n".join(refs)
|
||||
file_cache.clear(session_id)
|
||||
logger.info(f"[Telegram] Attached {len(cached_files)} cached file(s) to query")
|
||||
|
||||
# Dispatch to cow main pipeline (reuses ChatChannel._compose_context routing)
|
||||
context = self._compose_context(
|
||||
tg_msg.ctype,
|
||||
tg_msg.content,
|
||||
isgroup=is_group,
|
||||
msg=tg_msg,
|
||||
)
|
||||
if context:
|
||||
context["session_id"] = session_id
|
||||
context["receiver"] = str(chat.id)
|
||||
context["telegram_chat_id"] = chat.id
|
||||
context["telegram_reply_to_msg_id"] = message.message_id if is_group else None
|
||||
self.produce(context)
|
||||
logger.debug(f"[Telegram] received: type={ctype}, content={str(tg_msg.content)[:80]}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] _on_message error: {e}", exc_info=True)
|
||||
|
||||
async def _parse_message(self, message):
|
||||
"""Parse a telegram message and return (ctype, content, caption).
|
||||
|
||||
- content is text for ContextType.TEXT, otherwise the local file path
|
||||
- caption is the optional text accompanying a media message; empty for plain text
|
||||
"""
|
||||
caption = (message.caption or "").strip()
|
||||
|
||||
if message.photo:
|
||||
largest = message.photo[-1]
|
||||
path = await self._download_file(largest.file_id, suffix=".jpg")
|
||||
return (ContextType.IMAGE, path, caption) if path else (None, None, "")
|
||||
|
||||
if message.voice or message.audio:
|
||||
audio_obj = message.voice or message.audio
|
||||
suffix = ".ogg" if message.voice else (
|
||||
"." + (audio_obj.mime_type.split("/")[-1] if getattr(audio_obj, "mime_type", "") else "mp3")
|
||||
)
|
||||
path = await self._download_file(audio_obj.file_id, suffix=suffix)
|
||||
return (ContextType.VOICE, path, caption) if path else (None, None, "")
|
||||
|
||||
if message.video or message.video_note:
|
||||
video_obj = message.video or message.video_note
|
||||
path = await self._download_file(video_obj.file_id, suffix=".mp4")
|
||||
return (ContextType.FILE, path, caption) if path else (None, None, "")
|
||||
|
||||
if message.document:
|
||||
doc = message.document
|
||||
ext = ""
|
||||
if doc.file_name and "." in doc.file_name:
|
||||
ext = "." + doc.file_name.rsplit(".", 1)[-1]
|
||||
path = await self._download_file(doc.file_id, suffix=ext, original_name=doc.file_name)
|
||||
if not path:
|
||||
return (None, None, "")
|
||||
# Image-typed documents (user picked "send as file") are treated as images
|
||||
mime = (doc.mime_type or "").lower()
|
||||
if mime.startswith("image/"):
|
||||
return (ContextType.IMAGE, path, caption)
|
||||
return (ContextType.FILE, path, caption)
|
||||
|
||||
if message.text:
|
||||
return (ContextType.TEXT, message.text.strip(), "")
|
||||
|
||||
return (None, None, "")
|
||||
|
||||
async def _download_file(self, file_id: str, suffix: str = "", original_name: str = ""):
|
||||
"""Download via bot.get_file into the local tmp dir; return path or None on failure."""
|
||||
try:
|
||||
f = await self._bot.get_file(file_id)
|
||||
tmp_dir = TelegramMessage.get_tmp_dir()
|
||||
base = original_name or f"{file_id}{suffix or ''}"
|
||||
# Prefix with file_id to avoid name collisions / weird chars
|
||||
safe_name = f"{file_id}_{base}" if original_name else base
|
||||
local_path = os.path.join(tmp_dir, safe_name)
|
||||
await f.download_to_drive(custom_path=local_path)
|
||||
logger.debug(f"[Telegram] downloaded file_id={file_id} -> {local_path}")
|
||||
return local_path
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] download_file failed (file_id={file_id}): {e}")
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Group trigger logic
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _should_reply_in_group(self, update) -> bool:
|
||||
"""Decide whether to reply to a group message based on configuration."""
|
||||
mode = conf().get("telegram_group_trigger", "mention_or_reply")
|
||||
if mode == "all":
|
||||
return True
|
||||
|
||||
message = update.effective_message
|
||||
if not message:
|
||||
return False
|
||||
|
||||
# 1) Mentioned
|
||||
if self.bot_username and self._is_mentioned(message, self.bot_username):
|
||||
return True
|
||||
|
||||
# 2) Reply to a bot message
|
||||
if mode == "mention_or_reply":
|
||||
reply = message.reply_to_message
|
||||
if reply and reply.from_user and reply.from_user.username == self.bot_username:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _is_mentioned(message, bot_username: str) -> bool:
|
||||
"""Check whether entities/caption_entities contain a @mention of the bot."""
|
||||
bot_at = "@" + bot_username.lower()
|
||||
text = (message.text or message.caption or "").lower()
|
||||
if bot_at in text:
|
||||
return True
|
||||
# Also check entities strictly to support text_mention (no-username @)
|
||||
for ent in (message.entities or []) + (message.caption_entities or []):
|
||||
if ent.type == "mention":
|
||||
src = message.text or message.caption or ""
|
||||
if src[ent.offset: ent.offset + ent.length].lower() == bot_at:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _strip_at_mention(self, content: str) -> str:
|
||||
"""Strip @bot_username from group text (case-insensitive)."""
|
||||
if not content or not self.bot_username:
|
||||
return content
|
||||
pattern = re.compile(r"@" + re.escape(self.bot_username), re.IGNORECASE)
|
||||
return pattern.sub("", content).strip()
|
||||
|
||||
@staticmethod
|
||||
def _compute_session_id(update) -> str:
|
||||
chat = update.effective_chat
|
||||
user = update.effective_user
|
||||
is_group = chat.type in ("group", "supergroup")
|
||||
if is_group:
|
||||
if conf().get("group_shared_session", True):
|
||||
return f"tg_group_{chat.id}"
|
||||
return f"tg_group_{chat.id}_{user.id}"
|
||||
return f"tg_user_{user.id}"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Override _compose_context: skip the parent's group whitelist/at checks
|
||||
# (already handled in _on_message via _should_reply_in_group). Same idea
|
||||
# as the feishu channel.
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _compose_context(self, ctype: ContextType, content, **kwargs):
|
||||
context = Context(ctype, content)
|
||||
context.kwargs = kwargs
|
||||
if "channel_type" not in context:
|
||||
context["channel_type"] = self.channel_type
|
||||
if "origin_ctype" not in context:
|
||||
context["origin_ctype"] = ctype
|
||||
|
||||
cmsg = context["msg"]
|
||||
if cmsg.is_group:
|
||||
if conf().get("group_shared_session", True):
|
||||
context["session_id"] = cmsg.other_user_id
|
||||
else:
|
||||
context["session_id"] = f"{cmsg.from_user_id}:{cmsg.other_user_id}"
|
||||
else:
|
||||
context["session_id"] = cmsg.from_user_id
|
||||
context["receiver"] = cmsg.other_user_id
|
||||
|
||||
if ctype == ContextType.TEXT:
|
||||
img_match_prefix = check_prefix(content, conf().get("image_create_prefix"))
|
||||
if img_match_prefix:
|
||||
content = content.replace(img_match_prefix, "", 1)
|
||||
context.type = ContextType.IMAGE_CREATE
|
||||
else:
|
||||
context.type = ContextType.TEXT
|
||||
context.content = (content or "").strip()
|
||||
if "desire_rtype" not in context and conf().get("always_reply_voice"):
|
||||
context["desire_rtype"] = ReplyType.VOICE
|
||||
elif ctype == ContextType.VOICE:
|
||||
if "desire_rtype" not in context and (
|
||||
conf().get("voice_reply_voice") or conf().get("always_reply_voice")
|
||||
):
|
||||
context["desire_rtype"] = ReplyType.VOICE
|
||||
|
||||
return context
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Outbound: ChatChannel.send -> Telegram API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def send(self, reply: Reply, context: Context):
|
||||
"""Called from cow's sync main thread; we marshal the coroutine onto the loop thread."""
|
||||
if self._loop is None or self._bot is None:
|
||||
logger.warning("[Telegram] bot not ready, drop reply")
|
||||
return
|
||||
|
||||
chat_id = context.get("telegram_chat_id")
|
||||
reply_to = context.get("telegram_reply_to_msg_id")
|
||||
if chat_id is None:
|
||||
logger.warning("[Telegram] no telegram_chat_id in context, drop reply")
|
||||
return
|
||||
|
||||
coro = self._async_send(reply, chat_id, reply_to)
|
||||
try:
|
||||
future = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
||||
# Media uploads through a proxy can be slow; let PTB's own timeouts win
|
||||
future.result(timeout=180)
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] send failed: {e}")
|
||||
|
||||
# Number of retries for transient network errors (proxy hiccups etc.)
|
||||
_SEND_RETRIES = 2
|
||||
_SEND_RETRY_BACKOFF = 2.0 # seconds
|
||||
|
||||
async def _send_with_retry(self, send_fn, *, label: str):
|
||||
"""Run a single Telegram API call with retries for transient network errors."""
|
||||
from telegram.error import NetworkError, TimedOut
|
||||
last_err = None
|
||||
for attempt in range(self._SEND_RETRIES + 1):
|
||||
try:
|
||||
return await send_fn()
|
||||
except (NetworkError, TimedOut) as e:
|
||||
last_err = e
|
||||
if attempt >= self._SEND_RETRIES:
|
||||
break
|
||||
wait = self._SEND_RETRY_BACKOFF * (attempt + 1)
|
||||
logger.warning(
|
||||
f"[Telegram] {label} transient error (attempt {attempt + 1}/"
|
||||
f"{self._SEND_RETRIES + 1}): {e}; retry in {wait}s"
|
||||
)
|
||||
await asyncio.sleep(wait)
|
||||
raise last_err
|
||||
|
||||
async def _async_send(self, reply: Reply, chat_id, reply_to_msg_id):
|
||||
try:
|
||||
rtype = reply.type
|
||||
content = reply.content
|
||||
|
||||
if rtype == ReplyType.TEXT or rtype == ReplyType.INFO or rtype == ReplyType.ERROR:
|
||||
# Telegram caps a single text message at 4096 chars; auto-split
|
||||
text = str(content) if content is not None else ""
|
||||
if not text:
|
||||
return
|
||||
for chunk in _split_text(text, 4000):
|
||||
await self._send_with_retry(
|
||||
lambda c=chunk: self._bot.send_message(
|
||||
chat_id=chat_id,
|
||||
text=c,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
# Avoid failing the whole send if reply_to was deleted
|
||||
allow_sending_without_reply=True,
|
||||
),
|
||||
label="send_message",
|
||||
)
|
||||
|
||||
elif rtype == ReplyType.IMAGE:
|
||||
# Already a local BytesIO; send it directly
|
||||
content.seek(0)
|
||||
await self._send_with_retry(
|
||||
lambda: self._bot.send_photo(
|
||||
chat_id=chat_id,
|
||||
photo=content,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
),
|
||||
label="send_photo",
|
||||
)
|
||||
|
||||
elif rtype == ReplyType.IMAGE_URL:
|
||||
url = str(content)
|
||||
if url.startswith("file://"):
|
||||
local = url[7:]
|
||||
# Open inside the lambda so each retry gets a fresh stream
|
||||
async def _send_local_photo():
|
||||
with open(local, "rb") as f:
|
||||
return await self._bot.send_photo(
|
||||
chat_id=chat_id, photo=f,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
)
|
||||
await self._send_with_retry(_send_local_photo, label="send_photo(file)")
|
||||
else:
|
||||
await self._send_with_retry(
|
||||
lambda: self._bot.send_photo(
|
||||
chat_id=chat_id, photo=url,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
),
|
||||
label="send_photo(url)",
|
||||
)
|
||||
|
||||
elif rtype == ReplyType.VOICE:
|
||||
local = content[7:] if isinstance(content, str) and content.startswith("file://") else content
|
||||
async def _send_voice():
|
||||
with open(local, "rb") as f:
|
||||
return await self._bot.send_voice(
|
||||
chat_id=chat_id, voice=f,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
)
|
||||
await self._send_with_retry(_send_voice, label="send_voice")
|
||||
|
||||
elif rtype == ReplyType.FILE:
|
||||
# Videos go through send_video, everything else through send_document
|
||||
local = content[7:] if isinstance(content, str) and content.startswith("file://") else content
|
||||
# File replies may carry an accompanying text caption
|
||||
caption = getattr(reply, "text_content", None) or None
|
||||
is_video = isinstance(local, str) and local.lower().endswith(
|
||||
(".mp4", ".mov", ".avi", ".mkv", ".webm")
|
||||
)
|
||||
|
||||
async def _send_file():
|
||||
with open(local, "rb") as f:
|
||||
if is_video:
|
||||
return await self._bot.send_video(
|
||||
chat_id=chat_id, video=f, caption=caption,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
)
|
||||
return await self._bot.send_document(
|
||||
chat_id=chat_id, document=f, caption=caption,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
)
|
||||
await self._send_with_retry(_send_file, label="send_video" if is_video else "send_document")
|
||||
|
||||
else:
|
||||
# Fallback: send as plain text
|
||||
await self._send_with_retry(
|
||||
lambda: self._bot.send_message(
|
||||
chat_id=chat_id, text=str(content),
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
),
|
||||
label="send_message(fallback)",
|
||||
)
|
||||
|
||||
logger.info(f"[Telegram] sent reply (type={rtype}, chat_id={chat_id})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] _async_send error: {e}", exc_info=True)
|
||||
|
||||
|
||||
def _split_text(text: str, limit: int):
|
||||
"""Split long text preferring line breaks to keep markdown structure intact."""
|
||||
if len(text) <= limit:
|
||||
yield text
|
||||
return
|
||||
buf = []
|
||||
size = 0
|
||||
for line in text.splitlines(keepends=True):
|
||||
if size + len(line) > limit and buf:
|
||||
yield "".join(buf)
|
||||
buf, size = [], 0
|
||||
# Hard-split single lines that exceed the limit
|
||||
while len(line) > limit:
|
||||
yield line[:limit]
|
||||
line = line[limit:]
|
||||
buf.append(line)
|
||||
size += len(line)
|
||||
if buf:
|
||||
yield "".join(buf)
|
||||
62
channel/telegram/telegram_message.py
Normal file
62
channel/telegram/telegram_message.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""
|
||||
Telegram message adapter.
|
||||
|
||||
Convert a python-telegram-bot Update into cow's unified ChatMessage.
|
||||
File downloads are NOT performed here; the channel layer triggers
|
||||
bot.get_file() on demand because it requires the async event loop.
|
||||
"""
|
||||
import os
|
||||
|
||||
from bridge.context import ContextType
|
||||
from channel.chat_message import ChatMessage
|
||||
from common.utils import expand_path
|
||||
from config import conf
|
||||
|
||||
|
||||
class TelegramMessage(ChatMessage):
|
||||
"""Wrap a Telegram Update into the unified ChatMessage."""
|
||||
|
||||
def __init__(self, update, is_group: bool = False, bot_username: str = "",
|
||||
ctype: ContextType = ContextType.TEXT, content: str = ""):
|
||||
super().__init__(update)
|
||||
message = update.effective_message
|
||||
chat = update.effective_chat
|
||||
user = update.effective_user
|
||||
|
||||
# Basic fields
|
||||
self.msg_id = str(message.message_id) if message else ""
|
||||
self.create_time = int(message.date.timestamp()) if message and message.date else 0
|
||||
self.ctype = ctype
|
||||
self.content = content
|
||||
|
||||
# Sender / chat info
|
||||
from_user_id = str(user.id) if user else "unknown"
|
||||
from_user_nick = (
|
||||
user.full_name if user and user.full_name else (user.username if user else "unknown")
|
||||
)
|
||||
self.from_user_id = from_user_id
|
||||
self.from_user_nickname = from_user_nick or from_user_id
|
||||
self.to_user_id = bot_username or "telegram_bot"
|
||||
self.to_user_nickname = bot_username or "telegram_bot"
|
||||
|
||||
self.is_group = is_group
|
||||
if is_group:
|
||||
# Group: other_user_id = group_id, actual_user_id = sender id
|
||||
self.other_user_id = str(chat.id)
|
||||
self.other_user_nickname = chat.title or str(chat.id)
|
||||
self.actual_user_id = from_user_id
|
||||
self.actual_user_nickname = self.from_user_nickname
|
||||
else:
|
||||
self.other_user_id = from_user_id
|
||||
self.other_user_nickname = self.from_user_nickname
|
||||
|
||||
# Whether the bot was triggered by @-mention or reply (set by channel layer)
|
||||
self.is_at = False
|
||||
|
||||
@staticmethod
|
||||
def get_tmp_dir() -> str:
|
||||
"""Local download directory, aligned with other channels (agent_workspace/tmp)."""
|
||||
workspace_root = expand_path(conf().get("agent_workspace", "~/cow"))
|
||||
tmp_dir = os.path.join(workspace_root, "tmp")
|
||||
os.makedirs(tmp_dir, exist_ok=True)
|
||||
return tmp_dir
|
||||
@@ -445,7 +445,7 @@
|
||||
bg-primary-400 text-white hover:bg-primary-500
|
||||
disabled:bg-slate-300 dark:disabled:bg-slate-600
|
||||
disabled:cursor-not-allowed cursor-pointer transition-colors duration-150"
|
||||
disabled onclick="sendMessage()">
|
||||
disabled>
|
||||
<i class="fas fa-paper-plane text-sm"></i>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
@@ -1367,3 +1367,35 @@
|
||||
text-align: right;
|
||||
}
|
||||
.voice-pill audio { display: none; }
|
||||
|
||||
/* Send button toggles into a Stop button while an SSE stream is in flight.
|
||||
Match the look of the disabled send button (light grey block + white
|
||||
glyph) so it reads as the same visual element, just paused/idle from
|
||||
sending perspective and clickable to stop. */
|
||||
#send-btn.send-btn-cancel {
|
||||
background-color: rgb(203 213 225) !important; /* slate-300, == disabled send-btn */
|
||||
color: white !important;
|
||||
}
|
||||
#send-btn.send-btn-cancel:hover {
|
||||
background-color: rgb(148 163 184) !important; /* slate-400 */
|
||||
}
|
||||
#send-btn.send-btn-cancel:disabled {
|
||||
background-color: rgb(226 232 240) !important; /* slate-200, while stop is in flight */
|
||||
color: white !important;
|
||||
cursor: progress;
|
||||
}
|
||||
.dark #send-btn.send-btn-cancel {
|
||||
background-color: rgb(71 85 105) !important; /* slate-600, == dark disabled send-btn */
|
||||
color: white !important;
|
||||
}
|
||||
.dark #send-btn.send-btn-cancel:hover {
|
||||
background-color: rgb(100 116 139) !important; /* slate-500 */
|
||||
}
|
||||
.dark #send-btn.send-btn-cancel:disabled {
|
||||
background-color: rgb(51 65 85) !important; /* slate-700 */
|
||||
color: rgb(203 213 225) !important;
|
||||
}
|
||||
|
||||
.agent-cancelled-tag {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
@@ -18,9 +18,9 @@ const I18N = {
|
||||
menu_memory: '记忆', menu_knowledge: '知识', menu_channels: '通道', menu_tasks: '定时',
|
||||
menu_logs: '日志',
|
||||
models_title: '模型管理',
|
||||
models_desc: '统一管理对话、视觉、语音、向量、图像、搜索能力',
|
||||
models_desc: '统一管理对话、图像、语音、向量、搜索能力',
|
||||
models_section_vendors: '厂商凭据',
|
||||
models_section_vendors_desc: '一处配置,多个能力共享',
|
||||
models_section_vendors_desc: '一处配置,多个模型能力共享',
|
||||
models_section_capabilities: '模型能力',
|
||||
models_add_vendor: '添加厂商',
|
||||
models_provider: '厂商',
|
||||
@@ -33,20 +33,20 @@ const I18N = {
|
||||
models_base_default_hint: '留空将使用官方默认地址',
|
||||
models_base_default: '默认',
|
||||
models_capability_chat: '主模型',
|
||||
models_capability_chat_desc: '驱动对话与 Agent 推理',
|
||||
models_capability_chat_desc: '用于基础对话和 Agent 推理',
|
||||
models_capability_vision: '图像理解',
|
||||
models_capability_vision_desc: '识别图片内容;未指定时自动跟随主模型',
|
||||
models_capability_vision_desc: '识别图片内容,用于图像识别工具',
|
||||
models_capability_image: '图像生成',
|
||||
models_capability_image_desc: '生成图片,可固定厂商或跟随主模型',
|
||||
models_capability_image_desc: '生成图片,用于图像生成技能',
|
||||
models_auto_using: '当前优先使用',
|
||||
models_capability_asr: '语音识别',
|
||||
models_capability_asr_desc: '语音转文字',
|
||||
models_capability_tts: '语音合成',
|
||||
models_capability_tts_desc: '文字转语音',
|
||||
models_capability_embedding: '向量',
|
||||
models_capability_embedding_desc: '记忆与知识的向量化',
|
||||
models_capability_embedding_desc: '用于记忆与知识的向量化检索',
|
||||
models_capability_search: '联网搜索',
|
||||
models_capability_search_desc: '实时网页检索能力,在搜索工具中使用',
|
||||
models_capability_search_desc: '实时网页检索能力,用于搜索工具',
|
||||
models_strategy_auto: '自动',
|
||||
models_search_strategy_label: '策略',
|
||||
models_search_strategy_fixed: '指定',
|
||||
@@ -106,7 +106,7 @@ const I18N = {
|
||||
config_custom_model_hint: '输入自定义模型名称',
|
||||
config_save: '保存', config_saved: '已保存',
|
||||
config_save_error: '保存失败',
|
||||
config_custom_option: '自定义...',
|
||||
config_custom_option: '自定义',
|
||||
config_custom_tip: '接口需遵循 OpenAI API 协议',
|
||||
config_security: '安全设置', config_password: '访问密码',
|
||||
config_password_hint: '留空则不启用密码保护',
|
||||
@@ -192,9 +192,9 @@ const I18N = {
|
||||
menu_memory: 'Memory', menu_knowledge: 'Knowledge', menu_channels: 'Channels', menu_tasks: 'Tasks',
|
||||
menu_logs: 'Logs',
|
||||
models_title: 'Models',
|
||||
models_desc: 'Manage chat, vision, voice, embedding, image and search capabilities in one place',
|
||||
models_desc: 'Manage chat, image, voice, embedding and search capabilities in one place',
|
||||
models_section_vendors: 'Vendor Credentials',
|
||||
models_section_vendors_desc: 'Configured once, shared by every capability',
|
||||
models_section_vendors_desc: 'Configured once, shared by multiple model capabilities',
|
||||
models_section_capabilities: 'Capabilities',
|
||||
models_add_vendor: 'Add Vendor',
|
||||
models_provider: 'Provider',
|
||||
@@ -207,20 +207,20 @@ const I18N = {
|
||||
models_base_default_hint: 'Leave blank to use the official default base URL',
|
||||
models_base_default: 'Default',
|
||||
models_capability_chat: 'Main Model',
|
||||
models_capability_chat_desc: 'Powers chat and agent reasoning',
|
||||
models_capability_chat_desc: 'Used for basic chat and agent reasoning',
|
||||
models_capability_vision: 'Image Understanding',
|
||||
models_capability_vision_desc: 'Reads images; auto-follows main model when unspecified',
|
||||
models_capability_vision_desc: 'Recognizes image content, used by image recognition tools',
|
||||
models_capability_image: 'Image Generation',
|
||||
models_capability_image_desc: 'Generate images; pin a vendor or follow the main model',
|
||||
models_capability_image_desc: 'Generates images, used by image generation skills',
|
||||
models_auto_using: 'Preferred',
|
||||
models_capability_asr: 'Speech Recognition',
|
||||
models_capability_asr_desc: 'Voice to text',
|
||||
models_capability_tts: 'Speech Synthesis',
|
||||
models_capability_tts_desc: 'Text to voice',
|
||||
models_capability_embedding: 'Embedding',
|
||||
models_capability_embedding_desc: 'Vectorizes memory and knowledge',
|
||||
models_capability_embedding_desc: 'Used for vectorized retrieval of memory and knowledge',
|
||||
models_capability_search: 'Web Search',
|
||||
models_capability_search_desc: 'Real-time web retrieval',
|
||||
models_capability_search_desc: 'Real-time web retrieval, used by search tools',
|
||||
models_strategy_auto: 'auto',
|
||||
models_search_strategy_label: 'Strategy',
|
||||
models_search_strategy_fixed: 'Pinned',
|
||||
@@ -280,7 +280,7 @@ const I18N = {
|
||||
config_custom_model_hint: 'Enter custom model name',
|
||||
config_save: 'Save', config_saved: 'Saved',
|
||||
config_save_error: 'Save failed',
|
||||
config_custom_option: 'Custom...',
|
||||
config_custom_option: 'Custom',
|
||||
config_custom_tip: 'API must follow OpenAI protocol.',
|
||||
config_security: 'Security', config_password: 'Password',
|
||||
config_password_hint: 'Leave empty to disable password protection',
|
||||
@@ -367,6 +367,15 @@ function t(key) {
|
||||
return (I18N[currentLang] && I18N[currentLang][key]) || (I18N.en[key]) || key;
|
||||
}
|
||||
|
||||
// Resolve a localized label that may be either a plain string or
|
||||
// a {zh, en} object returned by the backend.
|
||||
function localizedLabel(label) {
|
||||
if (label && typeof label === 'object') {
|
||||
return label[currentLang] || label.en || label.zh || '';
|
||||
}
|
||||
return label || '';
|
||||
}
|
||||
|
||||
function applyI18n() {
|
||||
document.querySelectorAll('[data-i18n]').forEach(el => {
|
||||
el.textContent = t(el.dataset.i18n);
|
||||
@@ -1007,7 +1016,60 @@ const inputHistory = [];
|
||||
let historyIdx = -1;
|
||||
let historySavedDraft = '';
|
||||
|
||||
// While an SSE stream is in flight, the send button morphs into a cancel
|
||||
// button. Only one in-flight request is supported at a time.
|
||||
let activeRequestId = null;
|
||||
let sendBtnMode = 'send'; // 'send' | 'cancel'
|
||||
|
||||
function setSendBtnCancelMode(requestId) {
|
||||
activeRequestId = requestId;
|
||||
sendBtnMode = 'cancel';
|
||||
sendBtn.disabled = false;
|
||||
sendBtn.classList.add('send-btn-cancel');
|
||||
sendBtn.title = (currentLang === 'zh' ? '中止' : 'Cancel');
|
||||
sendBtn.innerHTML = '<i class="fas fa-stop text-sm"></i>';
|
||||
}
|
||||
|
||||
function resetSendBtnSendMode() {
|
||||
activeRequestId = null;
|
||||
sendBtnMode = 'send';
|
||||
sendBtn.classList.remove('send-btn-cancel');
|
||||
sendBtn.title = '';
|
||||
sendBtn.innerHTML = '<i class="fas fa-paper-plane text-sm"></i>';
|
||||
updateSendBtnState();
|
||||
}
|
||||
|
||||
function requestCancel() {
|
||||
const reqId = activeRequestId;
|
||||
if (!reqId) return;
|
||||
fetch('/cancel', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ request_id: reqId, session_id: sessionId, lang: currentLang }),
|
||||
}).catch(err => {
|
||||
console.warn('[cancel] request failed', err);
|
||||
});
|
||||
// Optimistic UI lock so the click visibly registers before the SSE
|
||||
// "cancelled" event arrives.
|
||||
sendBtn.disabled = true;
|
||||
sendBtn.title = (currentLang === 'zh' ? '已中止' : 'Cancelled');
|
||||
}
|
||||
|
||||
// Button click is the only path to Cancel. Pressing Enter still calls
|
||||
// sendMessage() so users can submit "/cancel" as a regular slash command.
|
||||
sendBtn.addEventListener('click', () => {
|
||||
if (sendBtnMode === 'cancel') {
|
||||
requestCancel();
|
||||
} else {
|
||||
sendMessage();
|
||||
}
|
||||
});
|
||||
|
||||
function updateSendBtnState() {
|
||||
if (sendBtnMode === 'cancel') {
|
||||
// Don't downgrade a Cancel button on input edits.
|
||||
return;
|
||||
}
|
||||
sendBtn.disabled = uploadingCount > 0 || (!chatInput.value.trim() && pendingAttachments.length === 0);
|
||||
}
|
||||
|
||||
@@ -1255,6 +1317,7 @@ const SLASH_COMMANDS = [
|
||||
{ cmd: '/knowledge on', desc: '开启知识库' },
|
||||
{ cmd: '/knowledge off', desc: '关闭知识库' },
|
||||
{ cmd: '/config', desc: '查看当前配置' },
|
||||
{ cmd: '/cancel', desc: '中止当前正在运行的 Agent 任务' },
|
||||
{ cmd: '/logs', desc: '查看最近日志' },
|
||||
{ cmd: '/version', desc: '查看版本' },
|
||||
];
|
||||
@@ -1525,6 +1588,7 @@ function sendVoiceMessage(text, audioUrl) {
|
||||
stream: true,
|
||||
timestamp: timestamp.toISOString(),
|
||||
is_voice: true,
|
||||
lang: currentLang,
|
||||
};
|
||||
|
||||
const MAX_RETRIES = 2;
|
||||
@@ -1538,7 +1602,12 @@ function sendVoiceMessage(text, audioUrl) {
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
if (data.status === 'success') {
|
||||
if (data.stream) {
|
||||
if (data.inline_reply) {
|
||||
// Synchronous fast-path reply (e.g. /cancel); skip SSE.
|
||||
loadingEl.remove();
|
||||
addBotMessage(data.inline_reply, new Date());
|
||||
} else if (data.stream) {
|
||||
setSendBtnCancelMode(data.request_id);
|
||||
startSSE(data.request_id, loadingEl, timestamp, titleInfo);
|
||||
} else {
|
||||
loadingContainers[data.request_id] = loadingEl;
|
||||
@@ -1546,6 +1615,7 @@ function sendVoiceMessage(text, audioUrl) {
|
||||
} else {
|
||||
loadingEl.remove();
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
resetSendBtnSendMode();
|
||||
}
|
||||
})
|
||||
.catch(err => {
|
||||
@@ -1582,6 +1652,10 @@ function addUserVoiceMessage(audioUrl, caption, timestamp) {
|
||||
}
|
||||
|
||||
function sendMessage() {
|
||||
// Do NOT branch on sendBtnMode here: Enter should always send (so
|
||||
// typing "/cancel" submits normally). Cancel is wired only to the
|
||||
// send button's pointer click — see send-btn listener above.
|
||||
|
||||
const text = chatInput.value.trim();
|
||||
if (!text && pendingAttachments.length === 0) return;
|
||||
|
||||
@@ -1610,7 +1684,7 @@ function sendMessage() {
|
||||
renderAttachmentPreview();
|
||||
sendBtn.disabled = true;
|
||||
|
||||
const body = { session_id: sessionId, message: text, stream: true, timestamp: timestamp.toISOString() };
|
||||
const body = { session_id: sessionId, message: text, stream: true, timestamp: timestamp.toISOString(), lang: currentLang };
|
||||
if (attachments.length > 0) {
|
||||
body.attachments = attachments.map(a => ({
|
||||
file_path: a.file_path,
|
||||
@@ -1632,7 +1706,13 @@ function sendMessage() {
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
if (data.status === 'success') {
|
||||
if (data.stream) {
|
||||
if (data.inline_reply) {
|
||||
// Channel handled synchronously (e.g. /cancel fast-path);
|
||||
// render as a bot bubble and skip SSE entirely.
|
||||
loadingEl.remove();
|
||||
addBotMessage(data.inline_reply, new Date());
|
||||
} else if (data.stream) {
|
||||
setSendBtnCancelMode(data.request_id);
|
||||
startSSE(data.request_id, loadingEl, timestamp, titleInfo);
|
||||
} else {
|
||||
loadingContainers[data.request_id] = loadingEl;
|
||||
@@ -1640,12 +1720,14 @@ function sendMessage() {
|
||||
} else {
|
||||
loadingEl.remove();
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
resetSendBtnSendMode();
|
||||
}
|
||||
})
|
||||
.catch(err => {
|
||||
if (err.name === 'AbortError') {
|
||||
loadingEl.remove();
|
||||
addBotMessage(t('error_timeout'), new Date());
|
||||
resetSendBtnSendMode();
|
||||
return;
|
||||
}
|
||||
if (attempt < MAX_RETRIES) {
|
||||
@@ -1655,6 +1737,7 @@ function sendMessage() {
|
||||
}
|
||||
loadingEl.remove();
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
resetSendBtnSendMode();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1910,14 +1993,33 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
|
||||
stepsEl.appendChild(wrap);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'cancelled') {
|
||||
// Agent acknowledged the stop; mark the bubble. A trailing
|
||||
// "done" still arrives with the partial answer.
|
||||
ensureBotEl();
|
||||
if (currentReasoningEl) {
|
||||
finalizeThinking(currentReasoningEl, reasoningStartTime, reasoningText);
|
||||
currentReasoningEl = null;
|
||||
reasoningText = '';
|
||||
}
|
||||
if (!botEl.querySelector('.agent-cancelled-tag')) {
|
||||
const tag = document.createElement('div');
|
||||
tag.className = 'agent-cancelled-tag text-xs text-amber-600 dark:text-amber-400 mt-1';
|
||||
tag.textContent = (currentLang === 'zh') ? '已中止' : 'Cancelled';
|
||||
stepsEl.appendChild(tag);
|
||||
}
|
||||
resetSendBtnSendMode();
|
||||
|
||||
} else if (item.type === 'done') {
|
||||
// Don't close the stream yet: the backend keeps it open
|
||||
// for a short tail to deliver async attachments such as
|
||||
// TTS audio (`voice_attach`). It will close the stream on
|
||||
// its own via onerror once the tail expires.
|
||||
done = true;
|
||||
resetSendBtnSendMode();
|
||||
|
||||
const finalText = item.content || accumulatedText;
|
||||
const finalTextRaw = item.content || accumulatedText;
|
||||
const finalText = localizeCancelMarker(finalTextRaw);
|
||||
|
||||
if (!botEl && finalText) {
|
||||
if (loadingEl) { loadingEl.remove(); loadingEl = null; }
|
||||
@@ -1925,7 +2027,7 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
|
||||
} else if (botEl) {
|
||||
contentEl.classList.remove('sse-streaming');
|
||||
if (finalText) contentEl.innerHTML = renderMarkdown(finalText);
|
||||
contentEl.dataset.rawMd = finalText || '';
|
||||
contentEl.dataset.rawMd = finalTextRaw || '';
|
||||
const copyBtn = botEl.querySelector('.copy-msg-btn');
|
||||
if (copyBtn && finalText) copyBtn.style.display = '';
|
||||
applyHighlighting(botEl);
|
||||
@@ -1955,6 +2057,7 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
|
||||
delete activeStreams[requestId];
|
||||
if (loadingEl) { loadingEl.remove(); loadingEl = null; }
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
resetSendBtnSendMode();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1991,6 +2094,7 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
|
||||
applyHighlighting(botEl);
|
||||
bindChatKnowledgeLinks(botEl);
|
||||
}
|
||||
resetSendBtnSendMode();
|
||||
};
|
||||
}
|
||||
|
||||
@@ -2229,13 +2333,23 @@ function _renderSentFileFromToolResult(step) {
|
||||
`<i class="fas fa-file-download" style="color:#6b7280;"></i> ${escapeHtml(fileName)}</a></div>`;
|
||||
}
|
||||
|
||||
// Cosmetic translator for cancel markers persisted in history.
|
||||
// History keeps the English canonical form for the LLM; only display is localized.
|
||||
function localizeCancelMarker(text) {
|
||||
if (!text) return text;
|
||||
if (currentLang !== 'zh') return text;
|
||||
return text
|
||||
.replace(/_\(Cancelled by user\)_/g, '_(用户已中止)_')
|
||||
.replace(/_\(Cancelled\)_/g, '_(已中止)_');
|
||||
}
|
||||
|
||||
function createBotMessageEl(content, timestamp, requestId, msg) {
|
||||
const el = document.createElement('div');
|
||||
el.className = 'flex gap-3 px-4 sm:px-6 py-3';
|
||||
if (requestId) el.dataset.requestId = requestId;
|
||||
|
||||
let stepsHtml = '';
|
||||
let displayContent = content;
|
||||
let displayContent = localizeCancelMarker(content);
|
||||
|
||||
if (msg && msg.steps && msg.steps.length > 0) {
|
||||
// New format: ordered steps with interleaved content
|
||||
@@ -3164,7 +3278,7 @@ function initConfigView(data) {
|
||||
configCurrentModel = data.model || '';
|
||||
|
||||
const providerEl = document.getElementById('cfg-provider');
|
||||
const providerOpts = Object.entries(configProviders).map(([pid, p]) => ({ value: pid, label: p.label }));
|
||||
const providerOpts = Object.entries(configProviders).map(([pid, p]) => ({ value: pid, label: localizedLabel(p.label) }));
|
||||
|
||||
// if use_linkai is enabled, always select linkai as the provider
|
||||
// Otherwise prefer bot_type from config, fall back to model-based detection
|
||||
@@ -3914,7 +4028,7 @@ function renderVendorChip(p) {
|
||||
bg-slate-50 dark:bg-white/5 hover:border-primary-300 dark:hover:border-primary-500/50
|
||||
cursor-pointer transition-colors duration-150 text-left">
|
||||
${renderProviderLogo(p, 28)}
|
||||
<span class="flex-1 min-w-0 text-sm font-medium text-slate-800 dark:text-slate-100 truncate">${escapeHtml(p.label)}</span>
|
||||
<span class="flex-1 min-w-0 text-sm font-medium text-slate-800 dark:text-slate-100 truncate">${escapeHtml(localizedLabel(p.label))}</span>
|
||||
<i class="fas fa-pen-to-square text-[11px] text-slate-400 dark:text-slate-500 group-hover:text-primary-500 transition-colors"></i>
|
||||
</button>`;
|
||||
}
|
||||
@@ -3922,7 +4036,7 @@ function renderVendorChip(p) {
|
||||
// Render a uniformly-styled logo for a provider. Tries an SVG asset first; if
|
||||
// it 404s the <img> swaps itself for a monogram fallback via onerror.
|
||||
function renderProviderLogo(p, sizePx) {
|
||||
const initial = (p.label || p.id || '?').slice(0, 1).toUpperCase();
|
||||
const initial = (localizedLabel(p.label) || p.id || '?').slice(0, 1).toUpperCase();
|
||||
const sz = sizePx || 32;
|
||||
const url = `${MODELS_PROVIDER_LOGO_PATH}/${encodeURIComponent(p.id)}.svg`;
|
||||
const fallbackId = `pl-${p.id}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
@@ -3977,7 +4091,7 @@ function renderCapabilityHeaderTag(def, cap) {
|
||||
function _searchProviderLabel(cap, providerId) {
|
||||
const list = (cap && cap.providers) || [];
|
||||
const hit = list.find(p => p.id === providerId);
|
||||
return hit ? hit.label : providerId;
|
||||
return hit ? localizedLabel(hit.label) : providerId;
|
||||
}
|
||||
|
||||
// Search card body: strategy picker + (when fixed) provider picker + a
|
||||
@@ -4103,7 +4217,7 @@ function _renderSearchSummary(body, cap) {
|
||||
class="inline-flex items-center gap-1 px-2 py-0.5 text-[11px] rounded-md cursor-pointer
|
||||
bg-emerald-50 dark:bg-emerald-900/30 text-emerald-600 dark:text-emerald-400
|
||||
hover:bg-emerald-100 dark:hover:bg-emerald-900/50 transition-colors">
|
||||
<i class="fas fa-check text-[10px]"></i>${escapeHtml(p.label)}
|
||||
<i class="fas fa-check text-[10px]"></i>${escapeHtml(localizedLabel(p.label))}
|
||||
</button>
|
||||
`).join('');
|
||||
host.innerHTML = `
|
||||
@@ -4150,7 +4264,7 @@ function openSearchAddProviderPicker(missingProviders) {
|
||||
class="w-full flex items-center justify-between px-3 py-2.5 rounded-lg cursor-pointer
|
||||
bg-slate-50 dark:bg-white/5 hover:bg-slate-100 dark:hover:bg-white/10
|
||||
text-sm text-slate-700 dark:text-slate-200 transition-colors">
|
||||
<span>${escapeHtml(p.label)}</span>
|
||||
<span>${escapeHtml(localizedLabel(p.label))}</span>
|
||||
<i class="fas fa-chevron-right text-[10px] text-slate-400"></i>
|
||||
</button>
|
||||
`).join('');
|
||||
@@ -4607,7 +4721,7 @@ function renderCapabilityHints(def, cap, body, currentProvider) {
|
||||
// id ("linkai") when we know it. Falls back to the id when the
|
||||
// provider isn't in our vendor table (rare).
|
||||
const provMeta = modelsState.providers.find(p => p.id === fbProv);
|
||||
const fbProvLabel = (provMeta && provMeta.label) || fbProv;
|
||||
const fbProvLabel = (provMeta && localizedLabel(provMeta.label)) || fbProv;
|
||||
const fbText = fbModel ? `${fbProvLabel} / ${fbModel}` : fbProvLabel;
|
||||
slot.innerHTML = `
|
||||
<p class="flex items-center gap-1.5 text-xs text-slate-400 dark:text-slate-500 min-w-0">
|
||||
@@ -4639,7 +4753,7 @@ function buildCapabilityProviderOptions(def, cap) {
|
||||
const configured = !tracked || !!meta.configured;
|
||||
return {
|
||||
value: pid,
|
||||
label: (meta && meta.label) || pid,
|
||||
label: (meta && localizedLabel(meta.label)) || pid,
|
||||
_tracked: tracked,
|
||||
_configured: configured,
|
||||
};
|
||||
@@ -4798,7 +4912,7 @@ function rebuildCapabilityModelDropdown(def, providerId, selectedModel, scope) {
|
||||
modelValues.push(entry.value);
|
||||
return { value: entry.value, label: entry.label || entry.value, hint: entry.hint || '' };
|
||||
});
|
||||
opts.push({ value: '__custom__', label: currentLang === 'zh' ? '自定义...' : 'Custom...' });
|
||||
opts.push({ value: '__custom__', label: currentLang === 'zh' ? '自定义' : 'Custom' });
|
||||
|
||||
let initialValue = selectedModel || '';
|
||||
if (initialValue && !modelValues.includes(initialValue)) {
|
||||
@@ -4881,7 +4995,7 @@ function rebuildCapabilityVoiceDropdown(providerId, selectedVoice, scope, modelI
|
||||
hint: desc === code ? '' : code,
|
||||
};
|
||||
});
|
||||
opts.push({ value: '__custom__', label: currentLang === 'zh' ? '自定义...' : 'Custom...' });
|
||||
opts.push({ value: '__custom__', label: currentLang === 'zh' ? '自定义' : 'Custom' });
|
||||
|
||||
// Off-catalog values route through the custom branch.
|
||||
let initial = selectedVoice || '';
|
||||
@@ -5069,7 +5183,7 @@ function openVendorModal(providerId, onSaved) {
|
||||
const pickerEl = document.getElementById('vendor-modal-picker');
|
||||
const pickerOpts = modelsState.providers.map(p => ({
|
||||
value: p.id,
|
||||
label: p.label,
|
||||
label: localizedLabel(p.label),
|
||||
_configured: !!p.configured,
|
||||
}));
|
||||
initDropdown(pickerEl, pickerOpts, defaultId, (val) => fillVendorModalForProvider(val));
|
||||
@@ -5108,7 +5222,7 @@ function openVendorModal(providerId, onSaved) {
|
||||
function fillVendorModalForProvider(providerId) {
|
||||
const meta = modelsState.providers.find(p => p.id === providerId);
|
||||
if (!meta) return;
|
||||
document.getElementById('vendor-modal-title').textContent = meta.label;
|
||||
document.getElementById('vendor-modal-title').textContent = localizedLabel(meta.label);
|
||||
document.getElementById('vendor-modal-subtitle').textContent = meta.id;
|
||||
|
||||
// ----- API Base -----
|
||||
|
||||
@@ -28,8 +28,16 @@ from config import conf
|
||||
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".svg"}
|
||||
VIDEO_EXTENSIONS = {".mp4", ".webm", ".avi", ".mov", ".mkv"}
|
||||
|
||||
def _get_web_password() -> str:
|
||||
# Coerce to str so non-string values in config.json (e.g. numeric password) won't break comparisons
|
||||
pwd = conf().get("web_password", "")
|
||||
if pwd is None:
|
||||
return ""
|
||||
return str(pwd)
|
||||
|
||||
|
||||
def _is_password_enabled():
|
||||
return bool(conf().get("web_password", ""))
|
||||
return bool(_get_web_password())
|
||||
|
||||
|
||||
def _session_expire_seconds():
|
||||
@@ -40,7 +48,7 @@ def _create_auth_token():
|
||||
"""Create a stateless signed token: ``<timestamp_hex>.<hmac_hex>``."""
|
||||
ts = format(int(time.time()), "x")
|
||||
sig = hmac.new(
|
||||
conf().get("web_password", "").encode(),
|
||||
_get_web_password().encode(),
|
||||
ts.encode(),
|
||||
hashlib.sha256,
|
||||
).hexdigest()
|
||||
@@ -63,7 +71,7 @@ def _verify_auth_token(token):
|
||||
if time.time() - ts > _session_expire_seconds():
|
||||
return False
|
||||
expected = hmac.new(
|
||||
conf().get("web_password", "").encode(),
|
||||
_get_web_password().encode(),
|
||||
ts_hex.encode(),
|
||||
hashlib.sha256,
|
||||
).hexdigest()
|
||||
@@ -85,6 +93,15 @@ def _require_auth():
|
||||
json.dumps({"status": "error", "message": "Unauthorized"}))
|
||||
|
||||
|
||||
# Localized text for /cancel system replies. Web is the only channel that
|
||||
# honors a per-request `lang`; other channels reply in Chinese by default.
|
||||
def _cancel_reply_text(cancelled: int, lang: str) -> str:
|
||||
en = lang.startswith("en")
|
||||
if cancelled > 0:
|
||||
return "🛑 Cancelled." if en else "🛑 已中止"
|
||||
return "Nothing to cancel." if en else "当前没有可中止的任务。"
|
||||
|
||||
|
||||
def _get_upload_dir() -> str:
|
||||
from common.utils import expand_path
|
||||
ws_root = expand_path(conf().get("agent_workspace", "~/cow"))
|
||||
@@ -429,6 +446,18 @@ class WebChannel(ChatChannel):
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
|
||||
elif event_type == "agent_cancelled":
|
||||
# Push an explicit cancelled SSE event so the frontend
|
||||
# marks the bubble as stopped. A trailing "done" still
|
||||
# arrives with the partial answer.
|
||||
final_response = data.get("final_response", "")
|
||||
q.put({
|
||||
"type": "cancelled",
|
||||
"content": final_response,
|
||||
"request_id": request_id,
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
|
||||
elif event_type == "agent_end":
|
||||
# Safety net: if the agent finishes with an empty final_response,
|
||||
# chat_channel skips _send_reply (because reply.content is empty),
|
||||
@@ -748,6 +777,25 @@ class WebChannel(ChatChannel):
|
||||
# desire_rtype concept used by other channels).
|
||||
is_voice_input = bool(json_data.get('is_voice', False))
|
||||
|
||||
# Fast path for /cancel: bypass the session queue and SSE setup.
|
||||
# Web frontend (stream=true) only listens to SSE, so we return an
|
||||
# inline_reply payload to be rendered synchronously.
|
||||
stripped_prompt = (prompt or "").strip().lower()
|
||||
if stripped_prompt == "/cancel":
|
||||
from agent.protocol import get_cancel_registry
|
||||
cancelled = get_cancel_registry().cancel_session(session_id)
|
||||
lang = (json_data.get('lang') or 'zh').lower()
|
||||
msg_text = _cancel_reply_text(cancelled, lang)
|
||||
logger.info(
|
||||
f"[WebChannel] /cancel fast-path: session={session_id}, cancelled={cancelled}, lang={lang}"
|
||||
)
|
||||
return json.dumps({
|
||||
"status": "success",
|
||||
"request_id": "",
|
||||
"stream": False,
|
||||
"inline_reply": msg_text,
|
||||
})
|
||||
|
||||
# Append file references to the prompt (same format as QQ channel)
|
||||
if attachments:
|
||||
file_refs = []
|
||||
@@ -854,6 +902,11 @@ class WebChannel(ChatChannel):
|
||||
if itype == "done":
|
||||
post_done = True
|
||||
post_deadline = time.time() + POST_DONE_TAIL_SECONDS
|
||||
elif itype == "cancelled":
|
||||
# Close SSE tail quickly after cancel; don't wait for the
|
||||
# full TTS tail since the user already pressed Stop.
|
||||
post_done = True
|
||||
post_deadline = time.time() + 3
|
||||
elif itype == "voice_attach":
|
||||
# WSGI buffers the previous chunk until the next yield;
|
||||
# shrink the tail so the generator wakes up quickly to
|
||||
@@ -864,6 +917,59 @@ class WebChannel(ChatChannel):
|
||||
finally:
|
||||
self.sse_queues.pop(request_id, None)
|
||||
|
||||
def cancel_request(self):
|
||||
"""
|
||||
Cancel an in-flight agent run.
|
||||
|
||||
Body: {"request_id": "...", "session_id": "..."}
|
||||
Either field is sufficient; request_id is preferred when known.
|
||||
Always returns success even when nothing was running, so the
|
||||
client's UX is idempotent.
|
||||
"""
|
||||
try:
|
||||
from agent.protocol import get_cancel_registry
|
||||
|
||||
data = web.data()
|
||||
try:
|
||||
json_data = json.loads(data) if data else {}
|
||||
except Exception:
|
||||
json_data = {}
|
||||
|
||||
request_id = (json_data.get("request_id") or "").strip()
|
||||
session_id = (json_data.get("session_id") or "").strip()
|
||||
lang = (json_data.get("lang") or "zh").lower()
|
||||
|
||||
registry = get_cancel_registry()
|
||||
cancelled = 0
|
||||
|
||||
if request_id:
|
||||
if registry.cancel_request(request_id):
|
||||
cancelled = 1
|
||||
|
||||
if cancelled == 0 and session_id:
|
||||
cancelled = registry.cancel_session(session_id)
|
||||
|
||||
if request_id and request_id in self.sse_queues:
|
||||
self.sse_queues[request_id].put({
|
||||
"type": "cancelled",
|
||||
"content": "Cancelled" if lang.startswith("en") else "已中止",
|
||||
"request_id": request_id,
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
|
||||
logger.info(
|
||||
f"[WebChannel] cancel request: request_id={request_id!r}, "
|
||||
f"session_id={session_id!r}, cancelled={cancelled}"
|
||||
)
|
||||
return json.dumps({
|
||||
"status": "success",
|
||||
"cancelled": cancelled,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[WebChannel] cancel_request error: {e}")
|
||||
return json.dumps({"status": "error", "message": str(e)})
|
||||
|
||||
def poll_response(self):
|
||||
"""
|
||||
Poll for responses using the session_id.
|
||||
@@ -959,6 +1065,7 @@ class WebChannel(ChatChannel):
|
||||
'/api/voice/tts', 'VoiceTtsHandler',
|
||||
'/poll', 'PollHandler',
|
||||
'/stream', 'StreamHandler',
|
||||
'/cancel', 'CancelHandler',
|
||||
'/chat', 'ChatHandler',
|
||||
'/config', 'ConfigHandler',
|
||||
'/api/models', 'ModelsHandler',
|
||||
@@ -1050,8 +1157,8 @@ class AuthLoginHandler:
|
||||
data = json.loads(web.data())
|
||||
except Exception:
|
||||
return json.dumps({"status": "error", "message": "Invalid request"})
|
||||
password = data.get("password", "")
|
||||
expected = conf().get("web_password", "")
|
||||
password = str(data.get("password", "") or "")
|
||||
expected = _get_web_password()
|
||||
if not hmac.compare_digest(password, expected):
|
||||
logger.warning("[WebChannel] Invalid login attempt")
|
||||
return json.dumps({"status": "error", "message": "Wrong password"})
|
||||
@@ -1232,6 +1339,12 @@ class PollHandler:
|
||||
return WebChannel().poll_response()
|
||||
|
||||
|
||||
class CancelHandler:
|
||||
def POST(self):
|
||||
_require_auth()
|
||||
return WebChannel().cancel_request()
|
||||
|
||||
|
||||
class StreamHandler:
|
||||
def GET(self):
|
||||
_require_auth()
|
||||
@@ -1274,6 +1387,7 @@ class ConfigHandler:
|
||||
const.DOUBAO_SEED_2_PRO, const.DOUBAO_SEED_2_CODE,
|
||||
const.KIMI_K2_6, const.KIMI_K2_5, const.KIMI_K2,
|
||||
const.ERNIE_5_1, const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K,
|
||||
const.MIMO_V2_5_PRO, const.MIMO_V2_5,
|
||||
]
|
||||
|
||||
# Generic placeholder hints surfaced in the web console. We deliberately
|
||||
@@ -1329,7 +1443,7 @@ class ConfigHandler:
|
||||
"models": [const.GPT_55, const.GPT_54, const.GPT_54_MINI, const.GPT_54_NANO, const.GPT_5, const.GPT_41, const.GPT_4o],
|
||||
}),
|
||||
("zhipu", {
|
||||
"label": "智谱AI",
|
||||
"label": {"zh": "智谱AI", "en": "GLM"},
|
||||
"api_key_field": "zhipu_ai_api_key",
|
||||
"api_base_key": "zhipu_ai_api_base",
|
||||
"api_base_default": "https://open.bigmodel.cn/api/paas/v4",
|
||||
@@ -1337,7 +1451,7 @@ class ConfigHandler:
|
||||
"models": [const.GLM_5_1, const.GLM_5_TURBO, const.GLM_5, const.GLM_4_7],
|
||||
}),
|
||||
("dashscope", {
|
||||
"label": "通义千问",
|
||||
"label": {"zh": "通义千问", "en": "Qwen"},
|
||||
"api_key_field": "dashscope_api_key",
|
||||
"api_base_key": None,
|
||||
"api_base_default": None,
|
||||
@@ -1345,7 +1459,7 @@ class ConfigHandler:
|
||||
"models": [const.QWEN36_PLUS, const.QWEN37_MAX, const.QWEN35_PLUS, const.QWEN3_MAX],
|
||||
}),
|
||||
("doubao", {
|
||||
"label": "豆包",
|
||||
"label": {"zh": "豆包", "en": "Doubao"},
|
||||
"api_key_field": "ark_api_key",
|
||||
"api_base_key": "ark_base_url",
|
||||
"api_base_default": "https://ark.cn-beijing.volces.com/api/v3",
|
||||
@@ -1361,13 +1475,21 @@ class ConfigHandler:
|
||||
"models": [const.KIMI_K2_6, const.KIMI_K2_5, const.KIMI_K2],
|
||||
}),
|
||||
("qianfan", {
|
||||
"label": "百度千帆",
|
||||
"label": {"zh": "百度千帆", "en": "ERNIE"},
|
||||
"api_key_field": "qianfan_api_key",
|
||||
"api_base_key": "qianfan_api_base",
|
||||
"api_base_default": "https://qianfan.baidubce.com/v2",
|
||||
"api_base_placeholder": _PLACEHOLDER_QIANFAN,
|
||||
"models": [const.ERNIE_5_1, const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K],
|
||||
}),
|
||||
("mimo", {
|
||||
"label": {"zh": "小米 MiMo", "en": "MiMo"},
|
||||
"api_key_field": "mimo_api_key",
|
||||
"api_base_key": "mimo_api_base",
|
||||
"api_base_default": "https://api.xiaomimimo.com/v1",
|
||||
"api_base_placeholder": _PLACEHOLDER_V1,
|
||||
"models": [const.MIMO_V2_5_PRO, const.MIMO_V2_5],
|
||||
}),
|
||||
("linkai", {
|
||||
"label": "LinkAI",
|
||||
"api_key_field": "linkai_api_key",
|
||||
@@ -1377,7 +1499,7 @@ class ConfigHandler:
|
||||
"models": _RECOMMENDED_MODELS,
|
||||
}),
|
||||
("custom", {
|
||||
"label": "自定义",
|
||||
"label": {"zh": "自定义", "en": "Custom"},
|
||||
"api_key_field": "custom_api_key",
|
||||
"api_base_key": "custom_api_base",
|
||||
"api_base_default": "",
|
||||
@@ -1389,10 +1511,10 @@ class ConfigHandler:
|
||||
EDITABLE_KEYS = {
|
||||
"model", "bot_type", "use_linkai",
|
||||
"open_ai_api_base", "deepseek_api_base", "qianfan_api_base", "claude_api_base", "gemini_api_base",
|
||||
"zhipu_ai_api_base", "moonshot_base_url", "ark_base_url", "custom_api_base",
|
||||
"zhipu_ai_api_base", "moonshot_base_url", "ark_base_url", "custom_api_base", "mimo_api_base",
|
||||
"open_ai_api_key", "deepseek_api_key", "qianfan_api_key", "claude_api_key", "gemini_api_key",
|
||||
"zhipu_ai_api_key", "dashscope_api_key", "moonshot_api_key",
|
||||
"ark_api_key", "minimax_api_key", "linkai_api_key", "custom_api_key",
|
||||
"ark_api_key", "minimax_api_key", "linkai_api_key", "custom_api_key", "mimo_api_key",
|
||||
"agent_max_context_tokens", "agent_max_context_turns", "agent_max_steps",
|
||||
"enable_thinking", "web_password",
|
||||
}
|
||||
@@ -1409,7 +1531,7 @@ class ConfigHandler:
|
||||
web.header('Content-Type', 'application/json; charset=utf-8')
|
||||
try:
|
||||
local_config = conf()
|
||||
use_agent = local_config.get("agent", False)
|
||||
use_agent = local_config.get("agent", True)
|
||||
title = "CowAgent" if use_agent else "AI Assistant"
|
||||
|
||||
api_bases = {}
|
||||
@@ -1434,7 +1556,7 @@ class ConfigHandler:
|
||||
"api_key_field": p.get("api_key_field"),
|
||||
}
|
||||
|
||||
raw_pwd = local_config.get("web_password", "")
|
||||
raw_pwd = str(local_config.get("web_password", "") or "")
|
||||
masked_pwd = ("*" * len(raw_pwd)) if raw_pwd else ""
|
||||
|
||||
return json.dumps({
|
||||
@@ -1533,7 +1655,7 @@ class ModelsHandler:
|
||||
# Capability -> provider ids drawn from ConfigHandler.PROVIDER_MODELS.
|
||||
_ASR_PROVIDERS = ["openai", "dashscope", "zhipu", "linkai"]
|
||||
# Web-console white-list. Other vendors stay usable via direct config.
|
||||
_TTS_PROVIDERS = ["openai", "minimax", "dashscope", "linkai"]
|
||||
_TTS_PROVIDERS = ["openai", "minimax", "dashscope", "mimo", "linkai"]
|
||||
|
||||
# TTS engine catalog (speech models, not voice timbres). Entries are
|
||||
# either a bare code or {value, hint?} when a friendly label helps.
|
||||
@@ -1548,6 +1670,10 @@ class ModelsHandler:
|
||||
"dashscope": [
|
||||
{"value": "qwen3-tts-flash", "hint": "覆盖普通话、方言与主流外语"},
|
||||
],
|
||||
# 小米 MiMo TTS 系列,通过 chat completions 接口合成
|
||||
"mimo": [
|
||||
{"value": "mimo-v2.5-tts", "hint": "预置音色 · 支持唱歌模式"},
|
||||
],
|
||||
# Aggregating gateway: a single endpoint multiplexes several
|
||||
# underlying TTS engines, selected via the `model` field.
|
||||
# Each engine exposes its own voice catalog (see _TTS_PROVIDER_VOICES).
|
||||
@@ -1667,6 +1793,18 @@ class ModelsHandler:
|
||||
{"value": "Marcus", "hint": "陕西话 · 秦川"},
|
||||
{"value": "Roy", "hint": "闽南语 · 阿杰"},
|
||||
],
|
||||
# 小米 MiMo 预置音色列表(mimo-v2.5-tts),文档:
|
||||
# https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5
|
||||
"mimo": [
|
||||
{"value": "冰糖", "hint": "中文 · 女声 · 冰糖"},
|
||||
{"value": "茉莉", "hint": "中文 · 女声 · 茉莉"},
|
||||
{"value": "苏打", "hint": "中文 · 男声 · 苏打"},
|
||||
{"value": "白桦", "hint": "中文 · 男声 · 白桦"},
|
||||
{"value": "Mia", "hint": "英文 · 女声 · Mia"},
|
||||
{"value": "Chloe", "hint": "英文 · 女声 · Chloe"},
|
||||
{"value": "Milo", "hint": "英文 · 男声 · Milo"},
|
||||
{"value": "Dean", "hint": "英文 · 男声 · Dean"},
|
||||
],
|
||||
# Aggregating gateway: voices are scoped per engine model. The
|
||||
# frontend picks the correct list based on the selected model so
|
||||
# users don't see incompatible timbres for the active engine.
|
||||
@@ -1803,6 +1941,8 @@ class ModelsHandler:
|
||||
# (see models/minimax/minimax_bot.py::call_vision); the M2.x chat
|
||||
# family is text-only.
|
||||
"minimax": [const.MINIMAX_TEXT_01],
|
||||
# MiMo 原生全模态模型:v2.5-pro / v2.5 支持图像/音频/视频输入
|
||||
"mimo": [const.MIMO_V2_5_PRO, const.MIMO_V2_5],
|
||||
# LinkAI proxies the underlying vendor; surface a curated set of
|
||||
# multimodal models. Order: gpt-4.1-mini → gpt-5.4-mini as the
|
||||
# cross-vendor baselines, then each vendor's recommended default.
|
||||
@@ -1932,6 +2072,7 @@ class ModelsHandler:
|
||||
("qianfan", "qianfan_api_key", const.ERNIE_45_TURBO_VL),
|
||||
("zhipu", "zhipu_ai_api_key", const.GLM_5V_TURBO),
|
||||
("minimax", "minimax_api_key", const.MINIMAX_TEXT_01),
|
||||
("mimo", "mimo_api_key", const.MIMO_V2_5_PRO),
|
||||
]
|
||||
|
||||
@classmethod
|
||||
@@ -2011,12 +2152,17 @@ class ModelsHandler:
|
||||
if not isinstance(vision_conf, dict):
|
||||
vision_conf = {}
|
||||
user_specified = (vision_conf.get("model") or "").strip()
|
||||
explicit_provider = (vision_conf.get("provider") or "").strip()
|
||||
|
||||
# When the user pinned a specific model, infer which vendor card to
|
||||
# highlight by scanning the per-provider model lists. Falls back to
|
||||
# an empty provider so the dropdown stays on "auto" if we can't tell.
|
||||
# Provider resolution priority:
|
||||
# 1. Explicit `tools.vision.provider` (persisted via UI; supports
|
||||
# custom model names that prefix-inference can't recognize).
|
||||
# 2. Scan per-provider model lists by model name.
|
||||
# Empty provider keeps the dropdown on "auto" when we can't tell.
|
||||
inferred_provider = ""
|
||||
if user_specified:
|
||||
if explicit_provider and explicit_provider in cls._VISION_PROVIDER_MODELS:
|
||||
inferred_provider = explicit_provider
|
||||
elif user_specified:
|
||||
for pid, models in cls._VISION_PROVIDER_MODELS.items():
|
||||
if user_specified in models:
|
||||
inferred_provider = pid
|
||||
@@ -2181,11 +2327,17 @@ class ModelsHandler:
|
||||
if not isinstance(img_node, dict):
|
||||
img_node = {}
|
||||
explicit_model = (img_node.get("model") or "").strip()
|
||||
explicit_provider = (img_node.get("provider") or "").strip()
|
||||
|
||||
# Infer the provider card to highlight by scanning per-provider
|
||||
# model lists, including alias values inside {value, hint} entries.
|
||||
# Provider resolution priority:
|
||||
# 1. Explicit `skills.image-generation.provider` (persisted via UI;
|
||||
# supports custom model names that prefix-inference can't catch).
|
||||
# 2. Scan per-provider model catalog by model name.
|
||||
# Empty provider keeps the dropdown on "auto" when we can't tell.
|
||||
inferred_provider = ""
|
||||
if explicit_model:
|
||||
if explicit_provider and explicit_provider in cls._IMAGE_PROVIDER_MODELS:
|
||||
inferred_provider = explicit_provider
|
||||
elif explicit_model:
|
||||
for pid, models in cls._IMAGE_PROVIDER_MODELS.items():
|
||||
for entry in models:
|
||||
val = entry if isinstance(entry, str) else (entry.get("value") or "")
|
||||
@@ -2222,10 +2374,10 @@ class ModelsHandler:
|
||||
_SEARCH_PROVIDERS = ("bocha", "qianfan", "zhipu", "linkai")
|
||||
|
||||
_SEARCH_PROVIDER_LABELS = {
|
||||
"bocha": "博查",
|
||||
"zhipu": "智谱",
|
||||
"qianfan": "百度千帆",
|
||||
"linkai": "LinkAI",
|
||||
"bocha": {"zh": "博查", "en": "Bocha"},
|
||||
"zhipu": {"zh": "智谱", "en": "GLM"},
|
||||
"qianfan": {"zh": "百度千帆", "en": "ERNIE"},
|
||||
"linkai": {"zh": "LinkAI", "en": "LinkAI"},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@@ -2440,27 +2592,37 @@ class ModelsHandler:
|
||||
return json.dumps({"status": "error", "message": f"capability not editable: {capability}"})
|
||||
|
||||
def _set_image(self, provider_id: str, model: str) -> str:
|
||||
# Source of truth: skills.image-generation.model. provider_id is
|
||||
# informational only; the resolver picks the vendor by model prefix.
|
||||
# Source of truth: skills.image-generation.{provider, model}. The
|
||||
# provider field is persisted so users picking a custom model under
|
||||
# a specific vendor still get routed there — runtime falls back to
|
||||
# model-name prefix inference only when provider is empty.
|
||||
local_config = conf()
|
||||
file_cfg = self._read_file_config()
|
||||
|
||||
self._set_nested_namespace_value(local_config, "skills", "image-generation", "model", model or "")
|
||||
self._set_nested_namespace_value(file_cfg, "skills", "image-generation", "model", model or "")
|
||||
self._set_nested_namespace_value(local_config, "skills", "image-generation", "provider", provider_id or "")
|
||||
self._set_nested_namespace_value(file_cfg, "skills", "image-generation", "provider", provider_id or "")
|
||||
self._drop_legacy_namespace(local_config, "skill", "skills", child="image-generation")
|
||||
self._drop_legacy_namespace(file_cfg, "skill", "skills", child="image-generation")
|
||||
|
||||
self._write_file_config(file_cfg)
|
||||
|
||||
# The skill subprocess reads SKILL_IMAGE_GENERATION_MODEL from env at
|
||||
# startup; mirror the change so live edits apply without restart.
|
||||
env_key = "SKILL_IMAGE_GENERATION_MODEL"
|
||||
# The skill subprocess reads SKILL_IMAGE_GENERATION_{MODEL,PROVIDER}
|
||||
# from env at startup; mirror the change so live edits apply without
|
||||
# restart.
|
||||
model_env = "SKILL_IMAGE_GENERATION_MODEL"
|
||||
provider_env = "SKILL_IMAGE_GENERATION_PROVIDER"
|
||||
if model:
|
||||
os.environ[env_key] = model
|
||||
os.environ[model_env] = model
|
||||
else:
|
||||
os.environ.pop(env_key, None)
|
||||
os.environ.pop(model_env, None)
|
||||
if provider_id:
|
||||
os.environ[provider_env] = provider_id
|
||||
else:
|
||||
os.environ.pop(provider_env, None)
|
||||
|
||||
logger.info(f"[ModelsHandler] image updated: provider_hint={provider_id!r} model={model!r}")
|
||||
logger.info(f"[ModelsHandler] image updated: provider={provider_id!r} model={model!r}")
|
||||
return json.dumps({
|
||||
"status": "success",
|
||||
"provider": provider_id,
|
||||
@@ -2499,18 +2661,22 @@ class ModelsHandler:
|
||||
return json.dumps({"status": "success", "applied": applied})
|
||||
|
||||
def _set_vision(self, provider_id: str, model: str) -> str:
|
||||
# Source of truth: tools.vision.model. provider_id is informational
|
||||
# only; the resolver picks the vendor by model prefix.
|
||||
# Source of truth: tools.vision.{provider, model}. The provider field
|
||||
# is persisted so users picking a custom model under a specific vendor
|
||||
# still get routed there — runtime falls back to model-name prefix
|
||||
# inference only when provider is empty.
|
||||
local_config = conf()
|
||||
file_cfg = self._read_file_config()
|
||||
self._set_nested_namespace_value(file_cfg, "tools", "vision", "model", model)
|
||||
self._set_nested_namespace_value(local_config, "tools", "vision", "model", model)
|
||||
self._set_nested_namespace_value(file_cfg, "tools", "vision", "provider", provider_id or "")
|
||||
self._set_nested_namespace_value(local_config, "tools", "vision", "provider", provider_id or "")
|
||||
self._drop_legacy_namespace(file_cfg, "tool", "tools", child="vision")
|
||||
self._drop_legacy_namespace(local_config, "tool", "tools", child="vision")
|
||||
|
||||
self._write_file_config(file_cfg)
|
||||
logger.info(f"[ModelsHandler] vision model set: {model!r}")
|
||||
return json.dumps({"status": "success", "model": model})
|
||||
logger.info(f"[ModelsHandler] vision updated: provider={provider_id!r} model={model!r}")
|
||||
return json.dumps({"status": "success", "provider": provider_id, "model": model})
|
||||
|
||||
@staticmethod
|
||||
def _set_nested_namespace_value(cfg, top: str, name: str, key: str, value):
|
||||
@@ -2743,6 +2909,14 @@ class ChannelsHandler:
|
||||
{"key": "wechatmp_port", "label": "Port", "type": "number", "default": 8080},
|
||||
],
|
||||
}),
|
||||
("telegram", {
|
||||
"label": {"zh": "Telegram", "en": "Telegram"},
|
||||
"icon": "fa-paper-plane",
|
||||
"color": "sky",
|
||||
"fields": [
|
||||
{"key": "telegram_token", "label": "Bot Token", "type": "secret"},
|
||||
],
|
||||
}),
|
||||
])
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -440,6 +440,17 @@ class WecomBotChannel(ChatChannel):
|
||||
state["current"] = ""
|
||||
_push_stream(state, force=True)
|
||||
|
||||
elif event_type == "agent_cancelled":
|
||||
# Flush partial output and strip trailing "---" separator
|
||||
# left over from previous turn, to avoid a dangling divider.
|
||||
if state["current"]:
|
||||
state["committed"] += state["current"]
|
||||
state["current"] = ""
|
||||
state["committed"] = state["committed"].rstrip()
|
||||
if state["committed"].endswith("---"):
|
||||
state["committed"] = state["committed"][:-3].rstrip()
|
||||
_push_stream(state, force=True)
|
||||
|
||||
return on_event
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -47,14 +47,16 @@ def _load_credentials(cred_path: str) -> dict:
|
||||
|
||||
|
||||
def _save_credentials(cred_path: str, data: dict):
|
||||
"""Save credentials to JSON file."""
|
||||
"""Atomically save credentials to JSON file (tmp + rename)."""
|
||||
os.makedirs(os.path.dirname(cred_path), exist_ok=True)
|
||||
with open(cred_path, "w") as f:
|
||||
tmp_path = f"{cred_path}.tmp"
|
||||
with open(tmp_path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
try:
|
||||
os.chmod(cred_path, 0o600)
|
||||
os.chmod(tmp_path, 0o600)
|
||||
except Exception:
|
||||
pass
|
||||
os.replace(tmp_path, cred_path)
|
||||
|
||||
|
||||
@singleton
|
||||
@@ -73,7 +75,10 @@ class WeixinChannel(ChatChannel):
|
||||
self.api = None
|
||||
self._stop_event = threading.Event()
|
||||
self._poll_thread = None
|
||||
self._context_tokens = {} # user_id -> context_token
|
||||
# user_id -> context_token. Guarded by _context_tokens_lock for any
|
||||
# mutation that races with disk persistence.
|
||||
self._context_tokens = {}
|
||||
self._context_tokens_lock = threading.Lock()
|
||||
self._received_msgs = ExpiredDict(60 * 60 * 7.1)
|
||||
self._get_updates_buf = ""
|
||||
self._credentials_path = ""
|
||||
@@ -95,12 +100,19 @@ class WeixinChannel(ChatChannel):
|
||||
conf().get("weixin_credentials_path", "~/.weixin_cow_credentials.json")
|
||||
)
|
||||
|
||||
# Always load credentials so we can restore context_tokens even when
|
||||
# the bot token itself comes from config.
|
||||
creds = _load_credentials(self._credentials_path)
|
||||
if not token:
|
||||
creds = _load_credentials(self._credentials_path)
|
||||
token = creds.get("token", "")
|
||||
if creds.get("base_url"):
|
||||
base_url = creds["base_url"]
|
||||
|
||||
# Restore persisted context_tokens so scheduler can deliver pushes
|
||||
# immediately after restart, without waiting for the user to ping
|
||||
# the bot first.
|
||||
self._restore_context_tokens_from_creds(creds)
|
||||
|
||||
if not token:
|
||||
token, base_url = self._login_with_retry(base_url)
|
||||
if not token:
|
||||
@@ -140,11 +152,16 @@ class WeixinChannel(ChatChannel):
|
||||
def _relogin(self) -> bool:
|
||||
"""Re-login after session expiry. Returns True on success."""
|
||||
base_url = self.api.base_url if self.api else DEFAULT_BASE_URL
|
||||
if os.path.exists(self._credentials_path):
|
||||
try:
|
||||
os.remove(self._credentials_path)
|
||||
except Exception:
|
||||
pass
|
||||
# Clearing the whole credentials file is intentional: the new login
|
||||
# will issue a fresh `token` and persisted context_tokens belong to
|
||||
# the previous bot identity, so they must not survive.
|
||||
with self._context_tokens_lock:
|
||||
self._context_tokens.clear()
|
||||
if os.path.exists(self._credentials_path):
|
||||
try:
|
||||
os.remove(self._credentials_path)
|
||||
except Exception:
|
||||
pass
|
||||
self.login_status = self.LOGIN_STATUS_WAITING
|
||||
result = self._qr_login(base_url)
|
||||
if not result:
|
||||
@@ -156,9 +173,62 @@ class WeixinChannel(ChatChannel):
|
||||
cdn_base_url=self.api.cdn_base_url if self.api else CDN_BASE_URL,
|
||||
)
|
||||
self.login_status = self.LOGIN_STATUS_OK
|
||||
self._context_tokens.clear()
|
||||
return True
|
||||
|
||||
# ── Context token persistence ──────────────────────────────────────
|
||||
# ilink requires every outbound send to echo the context_token from the
|
||||
# user's latest inbound message. We mirror the in-memory map into the
|
||||
# credentials JSON so scheduled pushes survive process restarts.
|
||||
# All mutation + disk IO is serialized via _context_tokens_lock so that
|
||||
# concurrent updates can never lose each other's writes.
|
||||
|
||||
def _restore_context_tokens_from_creds(self, creds: dict) -> None:
|
||||
if not isinstance(creds, dict):
|
||||
return
|
||||
tokens = creds.get("context_tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return
|
||||
restored = 0
|
||||
with self._context_tokens_lock:
|
||||
for user_id, token in tokens.items():
|
||||
if isinstance(user_id, str) and isinstance(token, str) and token:
|
||||
self._context_tokens[user_id] = token
|
||||
restored += 1
|
||||
if restored:
|
||||
logger.info(f"[Weixin] Restored {restored} context_tokens from credentials")
|
||||
|
||||
def _persist_context_tokens_locked(self) -> None:
|
||||
"""Flush the token map to disk. Caller must hold _context_tokens_lock."""
|
||||
if not self._credentials_path:
|
||||
return
|
||||
try:
|
||||
creds = _load_credentials(self._credentials_path) or {}
|
||||
creds["context_tokens"] = dict(self._context_tokens)
|
||||
_save_credentials(self._credentials_path, creds)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Weixin] Failed to persist context_tokens: {e}")
|
||||
|
||||
def _update_context_token(self, user_id: str, token: str) -> None:
|
||||
"""Update the in-memory token for a user; flush to disk only on change."""
|
||||
if not user_id or not token:
|
||||
return
|
||||
with self._context_tokens_lock:
|
||||
if self._context_tokens.get(user_id) == token:
|
||||
return
|
||||
self._context_tokens[user_id] = token
|
||||
self._persist_context_tokens_locked()
|
||||
|
||||
def _invalidate_context_token(self, user_id: str) -> None:
|
||||
"""Drop the cached token for a user (used after -14 / send rejection)."""
|
||||
if not user_id:
|
||||
return
|
||||
with self._context_tokens_lock:
|
||||
if user_id not in self._context_tokens:
|
||||
return
|
||||
del self._context_tokens[user_id]
|
||||
logger.info(f"[Weixin] Invalidated stale context_token for {user_id}")
|
||||
self._persist_context_tokens_locked()
|
||||
|
||||
# ── QR Login ───────────────────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
@@ -391,7 +461,7 @@ class WeixinChannel(ChatChannel):
|
||||
context_token = raw_msg.get("context_token", "")
|
||||
|
||||
if context_token and from_user:
|
||||
self._context_tokens[from_user] = context_token
|
||||
self._update_context_token(from_user, context_token)
|
||||
|
||||
cdn_base_url = self.api.cdn_base_url if self.api else CDN_BASE_URL
|
||||
try:
|
||||
@@ -510,10 +580,30 @@ class WeixinChannel(ChatChannel):
|
||||
return msg.context_token
|
||||
return self._context_tokens.get(receiver, "")
|
||||
|
||||
def _check_send_response(self, resp, receiver: str) -> None:
|
||||
"""Inspect a send-API response; drop stale context_token on -14.
|
||||
|
||||
ilink uses ret/errcode = -14 to signal that the session (and any
|
||||
cached context_token) is no longer valid. The plugin keeps running
|
||||
because the bot itself can re-login; we just need to forget the
|
||||
per-user token so the next push won't retry forever.
|
||||
"""
|
||||
if not isinstance(resp, dict):
|
||||
return
|
||||
ret = resp.get("ret")
|
||||
errcode = resp.get("errcode")
|
||||
if ret == -14 or errcode == -14:
|
||||
logger.warning(
|
||||
f"[Weixin] Send returned -14 (session expired) for "
|
||||
f"receiver={receiver}; dropping cached context_token"
|
||||
)
|
||||
self._invalidate_context_token(receiver)
|
||||
|
||||
def _send_text(self, text: str, receiver: str, context_token: str):
|
||||
if len(text) <= TEXT_CHUNK_LIMIT:
|
||||
try:
|
||||
self.api.send_text(receiver, text, context_token)
|
||||
resp = self.api.send_text(receiver, text, context_token)
|
||||
self._check_send_response(resp, receiver)
|
||||
logger.debug(f"[Weixin] Text sent to {receiver}, len={len(text)}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Weixin] Failed to send text: {e}")
|
||||
@@ -522,7 +612,8 @@ class WeixinChannel(ChatChannel):
|
||||
chunks = self._split_text(text, TEXT_CHUNK_LIMIT)
|
||||
for i, chunk in enumerate(chunks):
|
||||
try:
|
||||
self.api.send_text(receiver, chunk, context_token)
|
||||
resp = self.api.send_text(receiver, chunk, context_token)
|
||||
self._check_send_response(resp, receiver)
|
||||
logger.debug(f"[Weixin] Text chunk {i+1}/{len(chunks)} sent to {receiver}, len={len(chunk)}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Weixin] Failed to send text chunk {i+1}/{len(chunks)}: {e}")
|
||||
@@ -556,13 +647,14 @@ class WeixinChannel(ChatChannel):
|
||||
return
|
||||
try:
|
||||
result = upload_media_to_cdn(self.api, local_path, receiver, media_type=1)
|
||||
self.api.send_image_item(
|
||||
resp = self.api.send_image_item(
|
||||
to=receiver,
|
||||
context_token=context_token,
|
||||
encrypt_query_param=result["encrypt_query_param"],
|
||||
aes_key_b64=result["aes_key_b64"],
|
||||
ciphertext_size=result["ciphertext_size"],
|
||||
)
|
||||
self._check_send_response(resp, receiver)
|
||||
logger.info(f"[Weixin] Image sent to {receiver}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Weixin] Image send failed: {e}")
|
||||
@@ -575,7 +667,7 @@ class WeixinChannel(ChatChannel):
|
||||
return
|
||||
try:
|
||||
result = upload_media_to_cdn(self.api, local_path, receiver, media_type=3)
|
||||
self.api.send_file_item(
|
||||
resp = self.api.send_file_item(
|
||||
to=receiver,
|
||||
context_token=context_token,
|
||||
encrypt_query_param=result["encrypt_query_param"],
|
||||
@@ -583,6 +675,7 @@ class WeixinChannel(ChatChannel):
|
||||
file_name=os.path.basename(local_path),
|
||||
file_size=result["raw_size"],
|
||||
)
|
||||
self._check_send_response(resp, receiver)
|
||||
logger.info(f"[Weixin] File sent to {receiver}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Weixin] File send failed: {e}")
|
||||
@@ -595,13 +688,14 @@ class WeixinChannel(ChatChannel):
|
||||
return
|
||||
try:
|
||||
result = upload_media_to_cdn(self.api, local_path, receiver, media_type=2)
|
||||
self.api.send_video_item(
|
||||
resp = self.api.send_video_item(
|
||||
to=receiver,
|
||||
context_token=context_token,
|
||||
encrypt_query_param=result["encrypt_query_param"],
|
||||
aes_key_b64=result["aes_key_b64"],
|
||||
ciphertext_size=result["ciphertext_size"],
|
||||
)
|
||||
self._check_send_response(resp, receiver)
|
||||
logger.info(f"[Weixin] Video sent to {receiver}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Weixin] Video send failed: {e}")
|
||||
|
||||
@@ -269,7 +269,7 @@ def status():
|
||||
channel = ", ".join(channel)
|
||||
click.echo(f" 通道: {channel}")
|
||||
click.echo(f" 模型: {cfg.get('model', 'unknown')}")
|
||||
mode = "Agent" if cfg.get("agent") else "Chat"
|
||||
mode = "Chat" if cfg.get("agent") is False else "Agent"
|
||||
click.echo(f" 模式: {mode}")
|
||||
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ ZHIPU_AI = "zhipu"
|
||||
MOONSHOT = "moonshot"
|
||||
MiniMax = "minimax"
|
||||
DEEPSEEK = "deepseek"
|
||||
MIMO = "mimo" # 小米 MiMo 大模型
|
||||
CUSTOM = "custom" # custom OpenAI-compatible API, bot_type won't auto-switch on model change
|
||||
MODELSCOPE = "modelscope"
|
||||
|
||||
@@ -140,6 +141,13 @@ KIMI_K2 = "kimi-k2"
|
||||
KIMI_K2_5 = "kimi-k2.5"
|
||||
KIMI_K2_6 = "kimi-k2.6" # Kimi K2.6 - Agent recommended model (default)
|
||||
|
||||
# 小米 MiMo
|
||||
MIMO_V2_5_PRO = "mimo-v2.5-pro" # MiMo V2.5 Pro - 旗舰,长上下文(默认推荐)
|
||||
MIMO_V2_5 = "mimo-v2.5" # MiMo V2.5 - 多模态(文/图/音/视频)
|
||||
MIMO_V2_PRO = "mimo-v2-pro" # MiMo V2 Pro
|
||||
MIMO_V2_OMNI = "mimo-v2-omni" # MiMo V2 Omni - 多模态
|
||||
MIMO_V2_FLASH = "mimo-v2-flash" # MiMo V2 Flash - 极速版
|
||||
|
||||
# Doubao (Volcengine Ark)
|
||||
DOUBAO = "doubao"
|
||||
DOUBAO_SEED_2_CODE = "doubao-seed-2-0-code-preview-260215"
|
||||
@@ -182,6 +190,9 @@ MODEL_LIST = [
|
||||
# MiniMax
|
||||
MiniMax, MINIMAX_M2_7, MINIMAX_M2_7_HIGHSPEED, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5,
|
||||
|
||||
# 小米 MiMo
|
||||
MIMO, MIMO_V2_5_PRO, MIMO_V2_5, MIMO_V2_PRO, MIMO_V2_OMNI, MIMO_V2_FLASH,
|
||||
|
||||
# Claude
|
||||
CLAUDE3, CLAUDE_4_6_SONNET, CLAUDE_4_7_OPUS, CLAUDE_4_6_OPUS, CLAUDE_4_OPUS, CLAUDE_4_5_SONNET, CLAUDE_4_SONNET, CLAUDE_3_OPUS, CLAUDE_3_OPUS_0229,
|
||||
CLAUDE_35_SONNET, CLAUDE_35_SONNET_1022, CLAUDE_35_SONNET_0620, CLAUDE_3_SONNET, CLAUDE_3_HAIKU,
|
||||
@@ -232,3 +243,4 @@ DINGTALK = "dingtalk"
|
||||
WECOM_BOT = "wecom_bot"
|
||||
QQ = "qq"
|
||||
WEIXIN = "weixin"
|
||||
TELEGRAM = "telegram"
|
||||
|
||||
14
config.py
14
config.py
@@ -166,6 +166,11 @@ available_setting = {
|
||||
# 企微智能机器人配置(长连接模式)
|
||||
"wecom_bot_id": "", # 企微智能机器人BotID
|
||||
"wecom_bot_secret": "", # 企微智能机器人长连接Secret
|
||||
# Telegram 配置
|
||||
"telegram_token": "", # 从 @BotFather 申请的 bot token
|
||||
"telegram_proxy": "", # 可选的 HTTP/SOCKS5 代理,例如 http://127.0.0.1:7890 或 socks5://127.0.0.1:1080(留空则走系统环境变量)
|
||||
"telegram_group_trigger": "mention_or_reply", # 群聊触发方式: mention_or_reply(@或回复触发,推荐) | mention_only(仅@) | all(所有消息)
|
||||
"telegram_register_commands": True, # 启动时是否自动向 BotFather 注册命令菜单(与 web 端 slash 命令一致)
|
||||
# 微信配置
|
||||
"weixin_token": "", # 微信登录后获取的bot_token,留空则启动时自动扫码登录
|
||||
"weixin_base_url": "https://ilinkai.weixin.qq.com", # Weixin ilink API base URL
|
||||
@@ -174,7 +179,7 @@ available_setting = {
|
||||
# chatgpt指令自定义触发词
|
||||
"clear_memory_commands": ["#清除记忆"], # 重置会话指令,必须以#开头
|
||||
# channel配置
|
||||
"channel_type": "", # 通道类型,支持多渠道同时运行。单个: "feishu",多个: "feishu, dingtalk" 或 ["feishu", "dingtalk"]。可选值: web,feishu,dingtalk,wecom_bot,weixin,wechatmp,wechatmp_service,wechatcom_app
|
||||
"channel_type": "", # 通道类型,支持多渠道同时运行。单个: "feishu",多个: "feishu, dingtalk" 或 ["feishu", "dingtalk"]。可选值: web,feishu,dingtalk,wecom_bot,weixin,wechatmp,wechatmp_service,wechatcom_app,telegram
|
||||
"web_console": True, # 是否自动启动Web控制台(默认启动)。设为False可禁用
|
||||
"subscribe_msg": "", # 订阅消息, 支持: wechatmp, wechatmp_service, wechatcom_app
|
||||
"debug": False, # 是否开启debug模式,开启后会打印更多日志
|
||||
@@ -209,6 +214,9 @@ available_setting = {
|
||||
"Minimax_base_url": "",
|
||||
"deepseek_api_key": "",
|
||||
"deepseek_api_base": "https://api.deepseek.com/v1",
|
||||
# 小米 MiMo 大模型
|
||||
"mimo_api_key": "",
|
||||
"mimo_api_base": "https://api.xiaomimimo.com/v1",
|
||||
"web_host": "", # Web console bind address; empty means auto
|
||||
"web_port": 9899,
|
||||
"web_password": "", # Web console password; empty means no authentication required
|
||||
@@ -376,7 +384,7 @@ def load_config():
|
||||
logger.info("[INIT] Model: {}".format(config.get("model", "unknown")))
|
||||
|
||||
# Agent模式信息
|
||||
if config.get("agent", False):
|
||||
if config.get("agent", True):
|
||||
workspace = config.get("agent_workspace", "~/cow")
|
||||
logger.info("[INIT] Mode: Agent (workspace: {})".format(workspace))
|
||||
else:
|
||||
@@ -401,6 +409,8 @@ def load_config():
|
||||
"minimax_api_base": "MINIMAX_API_BASE",
|
||||
"deepseek_api_key": "DEEPSEEK_API_KEY",
|
||||
"deepseek_api_base": "DEEPSEEK_API_BASE",
|
||||
"mimo_api_key": "MIMO_API_KEY",
|
||||
"mimo_api_base": "MIMO_API_BASE",
|
||||
"qianfan_api_key": "QIANFAN_API_KEY",
|
||||
"qianfan_api_base": "QIANFAN_API_BASE",
|
||||
"zhipu_ai_api_key": "ZHIPU_AI_API_KEY",
|
||||
|
||||
30
docs/README.md
Normal file
30
docs/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Documentation
|
||||
|
||||
This directory contains the Mintlify documentation site for the project.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Node.js v20.17.0 or higher (LTS recommended)
|
||||
|
||||
## Install the CLI (one-time, global)
|
||||
|
||||
```bash
|
||||
npm i -g mint
|
||||
```
|
||||
|
||||
## Run the docs locally
|
||||
|
||||
From this `docs/` directory:
|
||||
|
||||
```bash
|
||||
mint dev
|
||||
```
|
||||
|
||||
Then open http://localhost:3000 (or the port Mint reports if 3000 is in use).
|
||||
|
||||
> The first run downloads the Mint preview framework (~90 MB) into `~/.mintlify/`.
|
||||
> Subsequent runs start instantly from the local cache.
|
||||
|
||||
## More
|
||||
|
||||
- Mintlify docs: https://www.mintlify.com/docs
|
||||
@@ -19,6 +19,7 @@ CowAgent 支持接入多种聊天通道,启动时通过 `channel_type` 切换
|
||||
| [QQ](/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
|
||||
| [企业微信应用](/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [公众号](/channels/wechatmp) | ✅ | ✅ | | ✅ | |
|
||||
| [Telegram](/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
- **图片 / 文件 / 语音**列表示通道支持收发对应消息类型,具体细节详见各通道文档
|
||||
- **群聊**列指可识别并响应群消息
|
||||
@@ -37,3 +38,4 @@ CowAgent 支持接入多种聊天通道,启动时通过 `channel_type` 切换
|
||||
- [QQ](/channels/qq) — QQ 官方机器人开放平台
|
||||
- [企业微信应用](/channels/wecom) — 企业微信自建应用接入
|
||||
- [公众号](/channels/wechatmp) — 微信公众号(订阅号 / 服务号)
|
||||
- [Telegram](/channels/telegram) — 海外 IM,5 分钟接入,无需公网 IP
|
||||
|
||||
112
docs/channels/telegram.mdx
Normal file
112
docs/channels/telegram.mdx
Normal file
@@ -0,0 +1,112 @@
|
||||
---
|
||||
title: Telegram
|
||||
description: 将 CowAgent 接入 Telegram Bot
|
||||
---
|
||||
|
||||
> 通过 Telegram Bot API 接入 CowAgent,支持单聊与群聊(@机器人 / 回复机器人触发),使用 Long Polling 模式无需公网 IP,开箱即用。
|
||||
|
||||
|
||||
## 一、接入步骤
|
||||
|
||||
### 步骤一:通过 BotFather 创建 Bot
|
||||
|
||||
1. 在 Telegram 中搜索并打开官方账号 [@BotFather](https://t.me/BotFather)。
|
||||
2. 发送 `/newbot` 命令,按提示输入:
|
||||
- **Bot 名称**(显示名,可中文,例如 `My CowAgent Bot`)
|
||||
- **Bot 用户名**(必须以 `bot` 结尾,例如 `my_cowagent_bot`)
|
||||
3. 创建成功后,BotFather 会返回一段 **HTTP API Token**(形如 `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`),妥善保存。
|
||||
|
||||
<Tip>
|
||||
这个 Token 等同于 Bot 的密码,请勿泄露。如果意外泄漏可向 `@BotFather` 发送 `/revoke` 重置。
|
||||
</Tip>
|
||||
|
||||
### 步骤二:(群聊使用)关闭 Privacy Mode
|
||||
|
||||
仅使用单聊可跳过此步。Telegram Bot 默认开启 **Privacy Mode**,群聊中只能收到带 `@bot` 的命令(如 `/start@your_bot`)以及对 bot 消息的 reply;**普通的 `@bot 你好` 文字消息收不到**,会导致群聊无响应。
|
||||
|
||||
向 `@BotFather` 发送:
|
||||
|
||||
1. `/setprivacy`
|
||||
2. 选择刚才创建的 bot
|
||||
3. 选择 `Disable`
|
||||
|
||||
<Note>
|
||||
若设置后群聊仍无响应,可尝试把 Bot 从群里移除并重新拉入。
|
||||
</Note>
|
||||
|
||||
### 步骤三:接入 CowAgent
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Web 控制台(推荐)">
|
||||
打开 Web 控制台(本地链接:http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **Telegram**,填入 Bot Token,点击接入即可。
|
||||
</Tab>
|
||||
<Tab title="配置文件">
|
||||
在 `config.json` 中添加以下配置后启动:
|
||||
|
||||
```json
|
||||
{
|
||||
"channel_type": "telegram",
|
||||
"telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ",
|
||||
"telegram_group_trigger": "mention_or_reply"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
| --- | --- | --- |
|
||||
| `telegram_token` | BotFather 返回的 HTTP API Token | - |
|
||||
| `telegram_group_trigger` | 群聊触发方式:`mention_or_reply`(@或回复机器人)/ `mention_only`(仅@) / `all`(所有消息) | `mention_or_reply` |
|
||||
| `telegram_register_commands` | 启动时是否自动向 BotFather 注册命令菜单 | `true` |
|
||||
| `telegram_proxy` | (可选)代理地址,如 `http://127.0.0.1:7890`、`socks5://127.0.0.1:1080`;运行环境无法直连 `api.telegram.org` 时配置,留空则使用环境变量 `HTTPS_PROXY` | `""` |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
启动 Cow 后,日志中出现以下输出即表示接入成功:
|
||||
|
||||
```
|
||||
[Telegram] Bot logged in as @my_cowagent_bot (id=123456789)
|
||||
[Telegram] Registered 10 bot commands
|
||||
[Telegram] ✅ Telegram bot ready, polling for updates
|
||||
```
|
||||
|
||||
## 二、功能说明
|
||||
|
||||
| 功能 | 支持情况 |
|
||||
| --- | --- |
|
||||
| 单聊 | ✅ |
|
||||
| 群聊(@机器人 / 回复机器人) | ✅ |
|
||||
| 文本消息 | ✅ 收发 |
|
||||
| 图片消息 | ✅ 收发 |
|
||||
| 语音消息 | ✅ 收发(接收 OGG/Opus,发送 OGG/Opus) |
|
||||
| 视频消息 | ✅ 收发 |
|
||||
| 文件消息 | ✅ 收发(PDF / Word / Excel 等) |
|
||||
| 命令菜单 | ✅ 与 Web 控制台 slash 命令一致 |
|
||||
|
||||
### 命令菜单
|
||||
|
||||
启动时会自动向 BotFather 注册命令菜单,用户在 Telegram 输入框输入 `/` 会出现下拉提示:
|
||||
|
||||
| 命令 | 说明 |
|
||||
| --- | --- |
|
||||
| `/help` | 显示命令帮助 |
|
||||
| `/status` | 查看运行状态 |
|
||||
| `/context` | 查看对话上下文(`/context clear` 清除) |
|
||||
| `/skill` | 技能管理(`/skill list`、`/skill install` 等) |
|
||||
| `/memory` | 记忆管理(`/memory dream`) |
|
||||
| `/knowledge` | 知识库管理(`/knowledge list` / `on` / `off`) |
|
||||
| `/config` | 查看当前配置 |
|
||||
| `/cancel` | 中止当前正在运行的 Agent 任务 |
|
||||
| `/logs` | 查看最近日志 |
|
||||
| `/version` | 查看版本 |
|
||||
|
||||
<Note>
|
||||
Telegram 命令菜单只能展示一级命令,子命令通过空格输入即可,例如 `/skill list`、`/context clear`。
|
||||
</Note>
|
||||
|
||||
## 三、使用
|
||||
|
||||
完成接入后:
|
||||
|
||||
- **单聊**:在 Telegram 中搜索你创建的 Bot 用户名(如 `@my_cowagent_bot`),点击 `Start` 即可开始对话。
|
||||
- **群聊**:把 Bot 拉进群,使用 `@bot 你好` 或 **回复 Bot 的某条消息** 触发对话。若群聊无响应,请检查 Privacy Mode 是否已按 [步骤二](#步骤二-群聊使用-关闭-privacy-mode) 关闭。
|
||||
|
||||
发送图片或文件时,可以直接在附件上方的输入框中 **添加 Caption**(描述/问题)一并发送,机器人会结合附件回答。也支持先发附件再发问题,两条消息会自动合并提问。
|
||||
@@ -39,6 +39,14 @@ Mode: agent
|
||||
Session: 12 messages | 8 skills loaded
|
||||
```
|
||||
|
||||
## cancel
|
||||
|
||||
中止当前会话正在运行的 Agent 任务。在 Agent 执行长时间任务(例如多轮工具调用、长流式输出)时,可随时发送 `/cancel`,Agent 会在下一次工具执行前停止。Web 端、微信、企业微信、飞书等各通道均可使用。
|
||||
|
||||
```text
|
||||
/cancel
|
||||
```
|
||||
|
||||
## config
|
||||
|
||||
查看或修改运行时配置。修改后立即生效,无需重启服务。
|
||||
|
||||
@@ -57,6 +57,7 @@ Others:
|
||||
| --- | --- |
|
||||
| `/help` | 显示命令帮助 |
|
||||
| `/status` | 查看服务状态和配置 |
|
||||
| `/cancel` | 中止当前正在运行的 Agent 任务 |
|
||||
| `/config` | 查看或修改运行时配置 |
|
||||
| `/skill` | 管理技能(安装、卸载、启用、禁用等) |
|
||||
| `/memory dream [N]` | 手动触发记忆蒸馏(默认 3 天,最大 30) |
|
||||
@@ -82,6 +83,7 @@ Others:
|
||||
| version | ✓ | ✓ |
|
||||
| status | ✓ | ✓ |
|
||||
| logs | ✓ | ✓ |
|
||||
| cancel | ✗ | ✓ |
|
||||
| config | ✗ | ✓ |
|
||||
| context | — | ✓ |
|
||||
| memory (子命令) | ✗ | ✓ |
|
||||
|
||||
@@ -38,6 +38,12 @@
|
||||
{
|
||||
"language": "zh",
|
||||
"default": true,
|
||||
"navbar": {
|
||||
"links": [
|
||||
{ "label": "官网", "href": "https://cowagent.ai/?lang=zh" },
|
||||
{ "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
|
||||
]
|
||||
},
|
||||
"tabs": [
|
||||
{
|
||||
"tab": "项目介绍",
|
||||
@@ -82,6 +88,7 @@
|
||||
"models/doubao",
|
||||
"models/kimi",
|
||||
"models/qianfan",
|
||||
"models/mimo",
|
||||
"models/linkai",
|
||||
"models/coding-plan",
|
||||
"models/custom"
|
||||
@@ -189,7 +196,8 @@
|
||||
"channels/wecom-bot",
|
||||
"channels/qq",
|
||||
"channels/wecom",
|
||||
"channels/wechatmp"
|
||||
"channels/wechatmp",
|
||||
"channels/telegram"
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -216,6 +224,7 @@
|
||||
"group": "发布记录",
|
||||
"pages": [
|
||||
"releases/overview",
|
||||
"releases/v2.0.9",
|
||||
"releases/v2.0.8",
|
||||
"releases/v2.0.7",
|
||||
"releases/v2.0.6",
|
||||
@@ -233,6 +242,12 @@
|
||||
},
|
||||
{
|
||||
"language": "en",
|
||||
"navbar": {
|
||||
"links": [
|
||||
{ "label": "Website", "href": "https://cowagent.ai/" },
|
||||
{ "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
|
||||
]
|
||||
},
|
||||
"tabs": [
|
||||
{
|
||||
"tab": "Introduction",
|
||||
@@ -254,7 +269,8 @@
|
||||
"group": "Installation",
|
||||
"pages": [
|
||||
"en/guide/quick-start",
|
||||
"en/guide/manual-install"
|
||||
"en/guide/manual-install",
|
||||
"en/guide/upgrade"
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -276,6 +292,7 @@
|
||||
"en/models/doubao",
|
||||
"en/models/kimi",
|
||||
"en/models/qianfan",
|
||||
"en/models/mimo",
|
||||
"en/models/linkai",
|
||||
"en/models/coding-plan",
|
||||
"en/models/custom"
|
||||
@@ -331,6 +348,7 @@
|
||||
"pages": [
|
||||
"en/skills/index",
|
||||
"en/skills/install",
|
||||
"en/skills/create",
|
||||
"en/skills/hub"
|
||||
]
|
||||
},
|
||||
@@ -374,6 +392,7 @@
|
||||
{
|
||||
"group": "Platforms",
|
||||
"pages": [
|
||||
"en/channels/index",
|
||||
"en/channels/weixin",
|
||||
"en/channels/web",
|
||||
"en/channels/feishu",
|
||||
@@ -381,7 +400,8 @@
|
||||
"en/channels/wecom-bot",
|
||||
"en/channels/qq",
|
||||
"en/channels/wecom",
|
||||
"en/channels/wechatmp"
|
||||
"en/channels/wechatmp",
|
||||
"en/channels/telegram"
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -396,7 +416,7 @@
|
||||
"en/cli/process",
|
||||
"en/cli/skill",
|
||||
"en/cli/memory-knowledge",
|
||||
"en/cli/chat"
|
||||
"en/cli/general"
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -408,6 +428,7 @@
|
||||
"group": "Release Notes",
|
||||
"pages": [
|
||||
"en/releases/overview",
|
||||
"en/releases/v2.0.9",
|
||||
"en/releases/v2.0.8",
|
||||
"en/releases/v2.0.7",
|
||||
"en/releases/v2.0.6",
|
||||
@@ -425,6 +446,12 @@
|
||||
},
|
||||
{
|
||||
"language": "ja",
|
||||
"navbar": {
|
||||
"links": [
|
||||
{ "label": "ウェブサイト", "href": "https://cowagent.ai/" },
|
||||
{ "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
|
||||
]
|
||||
},
|
||||
"tabs": [
|
||||
{
|
||||
"tab": "紹介",
|
||||
@@ -469,6 +496,7 @@
|
||||
"ja/models/doubao",
|
||||
"ja/models/kimi",
|
||||
"ja/models/qianfan",
|
||||
"ja/models/mimo",
|
||||
"ja/models/linkai",
|
||||
"ja/models/coding-plan",
|
||||
"ja/models/custom"
|
||||
@@ -568,6 +596,7 @@
|
||||
{
|
||||
"group": "プラットフォーム",
|
||||
"pages": [
|
||||
"ja/channels/index",
|
||||
"ja/channels/weixin",
|
||||
"ja/channels/web",
|
||||
"ja/channels/feishu",
|
||||
@@ -575,7 +604,8 @@
|
||||
"ja/channels/wecom-bot",
|
||||
"ja/channels/qq",
|
||||
"ja/channels/wecom",
|
||||
"ja/channels/wechatmp"
|
||||
"ja/channels/wechatmp",
|
||||
"ja/channels/telegram"
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -602,6 +632,7 @@
|
||||
"group": "リリースノート",
|
||||
"pages": [
|
||||
"ja/releases/overview",
|
||||
"ja/releases/v2.0.9",
|
||||
"ja/releases/v2.0.8",
|
||||
"ja/releases/v2.0.7",
|
||||
"ja/releases/v2.0.6",
|
||||
|
||||
@@ -1,250 +0,0 @@
|
||||
<p align="center"><img src="https://github.com/user-attachments/assets/eca9a9ec-8534-4615-9e0f-96c5ac1d10a3" alt="CowAgent" width="550" /></p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/zhayujie/CowAgent/releases/latest"><img src="https://img.shields.io/github/v/release/zhayujie/CowAgent" alt="Latest release"></a>
|
||||
<a href="https://github.com/zhayujie/CowAgent/blob/master/LICENSE"><img src="https://img.shields.io/github/license/zhayujie/CowAgent" alt="License: MIT"></a>
|
||||
<a href="https://github.com/zhayujie/CowAgent"><img src="https://img.shields.io/github/stars/zhayujie/CowAgent?style=flat-square" alt="Stars"></a> <br/>
|
||||
[<a href="https://github.com/zhayujie/CowAgent/blob/master/README.md">中文</a>] | [English] | [<a href="https://github.com/zhayujie/CowAgent/blob/master/docs/ja/README.md">日本語</a>]
|
||||
</p>
|
||||
|
||||
**CowAgent** is an AI super assistant powered by LLMs, capable of autonomous task planning, operating computers and external resources, creating and executing Skills, and continuously growing with long-term memory and a personal knowledge base. It supports flexible model switching, handles text, voice, images, and files, and can be integrated into WeChat, Web, Feishu, DingTalk, WeCom Bot, WeCom App, and WeChat Official Account — running 7×24 hours on your personal computer or server.
|
||||
|
||||
<p align="center">
|
||||
<a href="https://cowagent.ai/">🌐 Website</a> ·
|
||||
<a href="https://docs.cowagent.ai/en/intro/index">📖 Docs</a> ·
|
||||
<a href="https://docs.cowagent.ai/en/guide/quick-start">🚀 Quick Start</a> ·
|
||||
<a href="https://skills.cowagent.ai/">🧩 Skill Hub</a> ·
|
||||
<a href="https://link-ai.tech/cowagent/create">☁️ Try Online</a>
|
||||
</p>
|
||||
|
||||
## Introduction
|
||||
|
||||
> CowAgent is both an out-of-the-box AI super assistant and a highly extensible Agent framework. You can extend it with new model interfaces, channels, built-in tools, and the Skills system to flexibly implement various customization needs.
|
||||
|
||||
- ✅ **Autonomous Task Planning**: Understands complex tasks and autonomously plans execution, continuously thinking and invoking tools until goals are achieved.
|
||||
- ✅ **Long-term Memory**: Automatically persists conversation memory to local files and databases, including core memory, daily memory, and Deep Dream distillation, with keyword and vector retrieval support.
|
||||
- ✅ **Personal Knowledge Base**: Automatically organizes structured knowledge with cross-references to build a knowledge graph, with web-based visualization and conversational management.
|
||||
- ✅ **Skills System**: Implements a Skills creation and execution engine, supports installing skills from [Skill Hub](https://skills.cowagent.ai), GitHub, etc., or creating custom Skills through conversation.
|
||||
- ✅ **Tool System**: Built-in tools for file I/O, terminal execution, browser automation, scheduled tasks, messaging, and more — autonomously invoked by the Agent.
|
||||
- ✅ **CLI System**: Provides terminal commands and in-chat commands for process management, skill installation, configuration, and more.
|
||||
- ✅ **Multimodal Messages**: Supports parsing, processing, generating, and sending text, images, voice, files, and other message types.
|
||||
- ✅ **Multiple Model Support**: Supports DeepSeek, MiniMax, Claude, Gemini, OpenAI, GLM, Qwen, Doubao, Kimi, and other mainstream model providers.
|
||||
- ✅ **Multi-platform Deployment**: Runs on local computers or servers, integrable into WeChat, Web, Feishu, DingTalk, WeChat Official Account, and WeCom applications.
|
||||
|
||||
## Disclaimer
|
||||
|
||||
1. This project follows the [MIT License](/LICENSE) and is intended for technical research and learning. Users must comply with local laws, regulations, policies, and corporate bylaws. Any illegal or rights-infringing use is prohibited.
|
||||
2. Agent mode consumes more tokens than normal chat mode. Choose models based on effectiveness and cost. Agent has access to the host OS — please deploy in trusted environments.
|
||||
3. CowAgent focuses on open-source development and does not participate in, authorize, or issue any cryptocurrency.
|
||||
|
||||
## Demo
|
||||
|
||||
Try online (no deployment needed): [CowAgent](https://link-ai.tech/cowagent/create)
|
||||
|
||||
## Changelog
|
||||
|
||||
> **2026.04.14:** [v2.0.6](https://github.com/zhayujie/CowAgent/releases/tag/2.0.6) — Knowledge Base, Deep Dream Memory Distillation, Smart Context Compression, Web Console upgrades.
|
||||
|
||||
> **2026.04.01:** [v2.0.5](https://github.com/zhayujie/CowAgent/releases/tag/2.0.5) — Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more.
|
||||
|
||||
> **2026.02.27:** [v2.0.2](https://github.com/zhayujie/CowAgent/releases/tag/2.0.2) — Web console overhaul (streaming chat, model/skill/memory/channel/scheduler/log management), multi-channel concurrent running, session persistence, new models including Gemini 3.1 Pro / Claude 4.6 Sonnet / Qwen3.5 Plus.
|
||||
|
||||
> **2026.02.13:** [v2.0.1](https://github.com/zhayujie/CowAgent/releases/tag/2.0.1) — Built-in Web Search tool, smart context trimming, runtime info dynamic update, Windows compatibility, fixes for scheduler memory loss, Feishu connection issues, and more.
|
||||
|
||||
> **2026.02.03:** [v2.0.0](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) — Full upgrade to AI super assistant with multi-step task planning, long-term memory, built-in tools, Skills framework, new models, and optimized channels.
|
||||
|
||||
> **2025.05.23:** [v1.7.6](https://github.com/zhayujie/CowAgent/releases/tag/1.7.6) — Web channel optimization, AgentMesh multi-agent plugin, Baidu TTS, claude-4-sonnet/opus support.
|
||||
|
||||
> **2025.04.11:** [v1.7.5](https://github.com/zhayujie/CowAgent/releases/tag/1.7.5) — wechatferry protocol, DeepSeek model, Tencent Cloud voice, ModelScope and Gitee-AI support.
|
||||
|
||||
> **2024.12.13:** [v1.7.4](https://github.com/zhayujie/CowAgent/releases/tag/1.7.4) — Gemini 2.0 model, Web channel, memory leak fix.
|
||||
|
||||
Full changelog: [Release Notes](https://docs.cowagent.ai/en/releases/overview)
|
||||
|
||||
<br/>
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
The project provides a one-click script for installation, configuration, startup, and management:
|
||||
|
||||
**Linux / macOS:**
|
||||
```bash
|
||||
bash <(curl -fsSL https://cdn.link-ai.tech/code/cow/run.sh)
|
||||
```
|
||||
|
||||
**Windows (PowerShell):**
|
||||
```powershell
|
||||
irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex
|
||||
```
|
||||
|
||||
After running, the Web service starts by default. Access `http://localhost:9899/chat` to chat.
|
||||
|
||||
Script usage: [One-click Install](https://docs.cowagent.ai/en/guide/quick-start). After installation, you can also use `cow start`, `cow stop`, and other [CLI commands](https://docs.cowagent.ai/en/cli/index) to manage the service.
|
||||
|
||||
### Manual Installation
|
||||
|
||||
**1. Clone the project**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/zhayujie/CowAgent
|
||||
cd CowAgent/
|
||||
```
|
||||
|
||||
**2. Install dependencies**
|
||||
|
||||
```bash
|
||||
pip3 install -r requirements.txt
|
||||
pip3 install -r requirements-optional.txt # optional but recommended
|
||||
```
|
||||
|
||||
**3. Install Cow CLI (recommended)**
|
||||
|
||||
```bash
|
||||
pip3 install -e .
|
||||
```
|
||||
|
||||
After installation, use `cow` commands to manage the service (start, stop, update, etc.) and skills. See [Command Docs](https://docs.cowagent.ai/en/cli/index).
|
||||
|
||||
**4. Install browser (optional)**
|
||||
|
||||
If you need the Agent to operate a browser (visit web pages, fill forms, etc.):
|
||||
|
||||
```bash
|
||||
cow install-browser
|
||||
```
|
||||
|
||||
This auto-installs `playwright` and Chromium. See [Browser Tool Docs](https://docs.cowagent.ai/en/tools/browser).
|
||||
|
||||
**5. Configure**
|
||||
|
||||
```bash
|
||||
cp config-template.json config.json
|
||||
```
|
||||
|
||||
Fill in your model API key and channel type in `config.json`. See the [configuration docs](https://docs.cowagent.ai/en/guide/manual-install) for details.
|
||||
|
||||
**6. Run**
|
||||
|
||||
```bash
|
||||
cow start # recommended, requires Cow CLI
|
||||
python3 app.py # or run directly
|
||||
```
|
||||
|
||||
For server deployment, use `cow` commands to manage the service:
|
||||
|
||||
```bash
|
||||
cow start # start in background
|
||||
cow stop # stop service
|
||||
cow restart # restart service
|
||||
cow status # check running status
|
||||
cow logs # view logs
|
||||
cow update # pull latest code and restart
|
||||
```
|
||||
|
||||
Or use the traditional way:
|
||||
|
||||
```bash
|
||||
nohup python3 app.py & tail -f nohup.out
|
||||
```
|
||||
|
||||
### Docker Deployment
|
||||
|
||||
```bash
|
||||
curl -O https://cdn.link-ai.tech/code/cow/docker-compose.yml
|
||||
# Edit docker-compose.yml with your config
|
||||
sudo docker compose up -d
|
||||
sudo docker logs -f chatgpt-on-wechat
|
||||
```
|
||||
|
||||
<br/>
|
||||
|
||||
## Models
|
||||
|
||||
Supports mainstream model providers. Recommended models for Agent mode:
|
||||
|
||||
| Provider | Recommended Model |
|
||||
| --- | --- |
|
||||
| DeepSeek | `deepseek-v4-flash` |
|
||||
| MiniMax | `MiniMax-M2.7` |
|
||||
| Claude | `claude-sonnet-4-6` |
|
||||
| Gemini | `gemini-3.1-pro-preview` |
|
||||
| OpenAI | `gpt-5.4` |
|
||||
| GLM | `glm-5.1` |
|
||||
| Qwen | `qwen3.6-plus` |
|
||||
| Doubao | `doubao-seed-2-0-code-preview-260215` |
|
||||
| Kimi | `kimi-k2.6` |
|
||||
|
||||
For detailed configuration of each model, see the [Models documentation](https://docs.cowagent.ai/en/models/index).
|
||||
|
||||
### Coding Plan
|
||||
|
||||
Coding Plan is a monthly subscription package offered by various providers, ideal for high-frequency Agent usage. All providers can be accessed via OpenAI-compatible mode:
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "MODEL_NAME",
|
||||
"open_ai_api_base": "PROVIDER_CODING_PLAN_API_BASE",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
}
|
||||
```
|
||||
|
||||
- `bot_type`: Must be `openai`
|
||||
- `model`: Model name supported by the provider
|
||||
- `open_ai_api_base`: Provider's Coding Plan API Base (different from standard pay-as-you-go)
|
||||
- `open_ai_api_key`: Provider's Coding Plan API Key
|
||||
|
||||
> Note: Coding Plan API Base and API Key are usually separate from standard pay-as-you-go ones. Please obtain them from each provider's platform.
|
||||
|
||||
Supported providers include Alibaba Cloud, MiniMax, Zhipu GLM, Kimi, Volcengine, and more. For detailed configuration of each provider, see the [Coding Plan documentation](https://docs.cowagent.ai/en/models/coding-plan).
|
||||
|
||||
<br/>
|
||||
|
||||
## Channels
|
||||
|
||||
Supports multiple platforms. Set `channel_type` in `config.json` to switch:
|
||||
|
||||
| Channel | `channel_type` | Docs |
|
||||
| --- | --- | --- |
|
||||
| WeChat | `weixin` | [WeChat Setup](https://docs.cowagent.ai/en/channels/weixin) |
|
||||
| Web (default) | `web` | [Web Channel](https://docs.cowagent.ai/en/channels/web) |
|
||||
| Feishu | `feishu` | [Feishu Setup](https://docs.cowagent.ai/en/channels/feishu) |
|
||||
| DingTalk | `dingtalk` | [DingTalk Setup](https://docs.cowagent.ai/en/channels/dingtalk) |
|
||||
| WeCom Bot | `wecom_bot` | [WeCom Bot Setup](https://docs.cowagent.ai/en/channels/wecom-bot) |
|
||||
| WeCom App | `wechatcom_app` | [WeCom Setup](https://docs.cowagent.ai/en/channels/wecom) |
|
||||
| WeChat MP | `wechatmp` / `wechatmp_service` | [WeChat MP Setup](https://docs.cowagent.ai/en/channels/wechatmp) |
|
||||
| Terminal | `terminal` | — |
|
||||
|
||||
Multiple channels can be enabled simultaneously, separated by commas: `"channel_type": "feishu,dingtalk"`.
|
||||
|
||||
<br/>
|
||||
|
||||
## Enterprise Services
|
||||
|
||||
<a href="https://link-ai.tech" target="_blank"><img width="720" src="https://cdn.link-ai.tech/image/link-ai-intro.jpg"></a>
|
||||
|
||||
> [LinkAI](https://link-ai.tech/) is a one-stop AI agent platform for enterprises and developers, integrating multimodal LLMs, knowledge bases, Agent plugins, and workflows. Supports one-click integration with mainstream platforms, SaaS and private deployment.
|
||||
|
||||
<br/>
|
||||
|
||||
## 🔗 Related Projects
|
||||
|
||||
- [Cow Skill Hub](https://github.com/zhayujie/cow-skill-hub): Open skill marketplace for AI Agents — browse, search, install, and publish skills for CowAgent, OpenClaw, Claude Code, and more.
|
||||
- [bot-on-anything](https://github.com/zhayujie/bot-on-anything): Lightweight and highly extensible LLM application framework supporting Slack, Telegram, Discord, Gmail, and more.
|
||||
- [AgentMesh](https://github.com/MinimalFuture/AgentMesh): Open-source Multi-Agent framework for complex problem solving through agent team collaboration.
|
||||
|
||||
## 🔎 FAQ
|
||||
|
||||
FAQs: <https://github.com/zhayujie/CowAgent/wiki/FAQs>
|
||||
|
||||
## 🛠️ Contributing
|
||||
|
||||
Welcome to add new channels, referring to the [Feishu channel](https://github.com/zhayujie/CowAgent/blob/master/channel/feishu/feishu_channel.py) as an example. Also welcome to contribute new Skills, see the [Skill Creation docs](https://docs.cowagent.ai/en/skills/create), or submit to [Skill Hub](https://skills.cowagent.ai/submit).
|
||||
|
||||
## ✉ Contact
|
||||
|
||||
Welcome to submit PRs and Issues, and support the project with a 🌟 Star. For questions, check the [FAQ list](https://github.com/zhayujie/CowAgent/wiki/FAQs) or search [Issues](https://github.com/zhayujie/CowAgent/issues).
|
||||
|
||||
## 🌟 Contributors
|
||||
|
||||

|
||||
@@ -15,8 +15,11 @@ description: Integrate CowAgent into Feishu via a custom enterprise app
|
||||
|
||||
No need to manually create an app on the Feishu Developer Platform. Start the Cow project, open the web console (default `http://127.0.0.1:9899/`), go to **Channels**, click **Add Channel**, choose **Feishu**, then under the **Scan QR** tab click **One-click Create Feishu App** and scan with the **Feishu App** to complete app creation and connection automatically.
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260505181126.png" width="800"/>
|
||||
|
||||
<Note>
|
||||
The created app comes with all required permissions (messaging, card read/write, group events, etc.) and event subscriptions pre-configured. Currently only the Feishu mainland version is supported (Lark international not yet supported).
|
||||
1. Requires `lark-oapi` ≥ 1.5.5.
|
||||
2. The created app comes with all required permissions (messaging, card read/write, group events, etc.) and event subscriptions pre-configured — no manual setup on the developer console needed. Currently only the Feishu mainland version is supported (Lark international not yet supported).
|
||||
</Note>
|
||||
|
||||
When starting from CLI without `feishu_app_id` configured, the QR code is also printed to the terminal.
|
||||
|
||||
41
docs/en/channels/index.mdx
Normal file
41
docs/en/channels/index.mdx
Normal file
@@ -0,0 +1,41 @@
|
||||
---
|
||||
title: Channels Overview
|
||||
description: Channels supported by CowAgent and their capability matrix
|
||||
---
|
||||
|
||||
CowAgent supports multiple chat channels. Switch between them at startup via `channel_type`. The Web Console is enabled by default and can run in parallel with other channels.
|
||||
|
||||
## Capability Matrix
|
||||
|
||||
The table below summarizes the inbound message types, bot reply types, and group chat capabilities supported by each channel, making it easy to choose by scenario.
|
||||
|
||||
| Channel | Text | Image | File | Voice | Group Chat |
|
||||
| --- | :-: | :-: | :-: | :-: | :-: |
|
||||
| [WeChat](/en/channels/weixin) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Web Console](/en/channels/web) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Feishu](/en/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [DingTalk](/en/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [WeCom Bot](/en/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [QQ](/en/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
|
||||
| [WeCom App](/en/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Official Account](/en/channels/wechatmp) | ✅ | ✅ | | ✅ | |
|
||||
| [Telegram](/en/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
- The **Image / File / Voice** columns indicate that the channel can send and receive the corresponding message types; see each channel's docs for details
|
||||
- The **Group Chat** column indicates the ability to recognize and respond to group messages
|
||||
|
||||
<Tip>
|
||||
The voice / image capabilities of each channel depend on the configuration of the corresponding model provider. See [Models Overview](/en/models/index) for details.
|
||||
</Tip>
|
||||
|
||||
## Channel List
|
||||
|
||||
- [Web Console](/en/channels/web) — built-in browser-based chat and management panel, enabled by default
|
||||
- [WeChat](/en/channels/weixin) — log in via personal WeChat QR scan
|
||||
- [Feishu](/en/channels/feishu) — Feishu custom bot
|
||||
- [DingTalk](/en/channels/dingtalk) — DingTalk custom bot
|
||||
- [WeCom Bot](/en/channels/wecom-bot) — WeCom AI Bot via WebSocket long connection
|
||||
- [QQ](/en/channels/qq) — QQ Official Bot open platform
|
||||
- [WeCom App](/en/channels/wecom) — WeCom custom app integration
|
||||
- [Official Account](/en/channels/wechatmp) — WeChat Official Account (subscription / service)
|
||||
- [Telegram](/en/channels/telegram) — global IM, 5-minute setup, no public IP needed
|
||||
111
docs/en/channels/telegram.mdx
Normal file
111
docs/en/channels/telegram.mdx
Normal file
@@ -0,0 +1,111 @@
|
||||
---
|
||||
title: Telegram
|
||||
description: Integrate CowAgent with Telegram via the Bot API
|
||||
---
|
||||
|
||||
> Integrate CowAgent into Telegram via the official Bot API. Supports private chat and group chat (triggered by @mention or replying to the bot). Uses Long Polling — no public IP required, works out of the box.
|
||||
|
||||
|
||||
## 1. Setup
|
||||
|
||||
### Step 1: Create a Bot via BotFather
|
||||
|
||||
1. Open the official account [@BotFather](https://t.me/BotFather) in Telegram.
|
||||
2. Send `/newbot` and follow the prompts:
|
||||
- **Bot name** (display name, e.g. `My CowAgent Bot`)
|
||||
- **Bot username** (must end with `bot`, e.g. `my_cowagent_bot`)
|
||||
3. Once created, BotFather returns an **HTTP API Token** (e.g. `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`). Keep it safe.
|
||||
|
||||
<Tip>
|
||||
The token is the password of your bot — never share it. If it leaks, send `/revoke` to `@BotFather` to reset it.
|
||||
</Tip>
|
||||
|
||||
### Step 2: (Group chat only) Disable Privacy Mode
|
||||
|
||||
Skip this step if you only use private chat. Telegram bots run in **Privacy Mode** by default — in groups they can only see commands suffixed with `@bot` (e.g. `/start@your_bot`) and replies to bot messages; **plain `@bot hello` text messages are not delivered**, so the bot will appear unresponsive in groups.
|
||||
|
||||
Send the following to `@BotFather`:
|
||||
|
||||
1. `/setprivacy`
|
||||
2. Pick the bot you just created
|
||||
3. Choose `Disable`
|
||||
|
||||
<Note>
|
||||
If the bot is still silent in groups after this, try removing it from the group and adding it back.
|
||||
</Note>
|
||||
|
||||
### Step 3: Connect to CowAgent
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Web Console (Recommended)">
|
||||
Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Telegram**, paste the Bot Token, and click connect.
|
||||
</Tab>
|
||||
<Tab title="Config File">
|
||||
Add the following to `config.json` and start Cow:
|
||||
|
||||
```json
|
||||
{
|
||||
"channel_type": "telegram",
|
||||
"telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ",
|
||||
"telegram_group_trigger": "mention_or_reply"
|
||||
}
|
||||
```
|
||||
|
||||
| Key | Description | Default |
|
||||
| --- | --- | --- |
|
||||
| `telegram_token` | HTTP API Token returned by BotFather | - |
|
||||
| `telegram_group_trigger` | Group trigger: `mention_or_reply` (@ or reply) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` |
|
||||
| `telegram_register_commands` | Whether to register the command menu with BotFather on startup | `true` |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
The integration is ready when you see logs like:
|
||||
|
||||
```
|
||||
[Telegram] Bot logged in as @my_cowagent_bot (id=123456789)
|
||||
[Telegram] Registered 10 bot commands
|
||||
[Telegram] ✅ Telegram bot ready, polling for updates
|
||||
```
|
||||
|
||||
## 2. Capabilities
|
||||
|
||||
| Feature | Support |
|
||||
| --- | --- |
|
||||
| Private chat | ✅ |
|
||||
| Group chat (@bot / reply to bot) | ✅ |
|
||||
| Text messages | ✅ send / receive |
|
||||
| Image messages | ✅ send / receive |
|
||||
| Voice messages | ✅ send / receive (OGG/Opus) |
|
||||
| Video messages | ✅ send / receive |
|
||||
| File messages | ✅ send / receive (PDF / Word / Excel, etc.) |
|
||||
| Command menu | ✅ aligned with Web Console slash commands |
|
||||
|
||||
### Command Menu
|
||||
|
||||
On startup, the channel registers a command menu with BotFather. Typing `/` in Telegram shows a dropdown:
|
||||
|
||||
| Command | Description |
|
||||
| --- | --- |
|
||||
| `/help` | Show command help |
|
||||
| `/status` | View runtime status |
|
||||
| `/context` | View conversation context (`/context clear` to clear) |
|
||||
| `/skill` | Skill management (`/skill list`, `/skill install`, ...) |
|
||||
| `/memory` | Memory management (`/memory dream`) |
|
||||
| `/knowledge` | Knowledge base (`/knowledge list` / `on` / `off`) |
|
||||
| `/config` | View current config |
|
||||
| `/cancel` | Cancel the running Agent task |
|
||||
| `/logs` | View recent logs |
|
||||
| `/version` | Show version |
|
||||
|
||||
<Note>
|
||||
Telegram's command menu only displays top-level commands; subcommands are entered with a space, e.g. `/skill list`, `/context clear`.
|
||||
</Note>
|
||||
|
||||
## 3. Usage
|
||||
|
||||
Once connected:
|
||||
|
||||
- **Private chat**: search for your bot username (e.g. `@my_cowagent_bot`) in Telegram, click `Start` and chat away.
|
||||
- **Group chat**: add the bot to a group, then trigger it with `@bot hello` or by **replying to one of the bot's messages**. If the bot doesn't respond in groups, double-check Privacy Mode in [Step 2](#step-2-group-chat-only-disable-privacy-mode).
|
||||
|
||||
When sending an image or file, you can **add a caption** (description / question) directly in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically.
|
||||
@@ -1,23 +1,32 @@
|
||||
---
|
||||
title: Web Console
|
||||
description: Use CowAgent through the web console
|
||||
description: Use CowAgent through the Web Console
|
||||
---
|
||||
|
||||
The Web Console is CowAgent's default channel. It starts automatically after launch, allowing you to chat with the Agent through a browser and manage models, skills, memory, channels, and other configurations online.
|
||||
The Web Console is CowAgent's default channel. It runs automatically once started, letting you chat with the Agent in a browser and manage models, skills, memory, channels, and other configuration online.
|
||||
|
||||
## Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"channel_type": "web",
|
||||
"web_port": 9899
|
||||
"web_host": "0.0.0.0",
|
||||
"web_port": 9899,
|
||||
"web_password": "",
|
||||
"enable_thinking": false
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description | Default |
|
||||
| --- | --- | --- |
|
||||
| `channel_type` | Set to `web` | `web` |
|
||||
| `web_host` | Web service listen address. Defaults to `127.0.0.1` (local only); set to `0.0.0.0` for public access and configure a password | `""` |
|
||||
| `web_port` | Web service listen port | `9899` |
|
||||
| `web_password` | Access password. Leave empty to disable password protection; recommended when listening on `0.0.0.0` | `""` |
|
||||
| `web_session_expire_days` | Login session validity in days | `30` |
|
||||
| `enable_thinking` | Whether to enable deep thinking mode | `false` |
|
||||
|
||||
Once a password is configured, you must enter it to log in when accessing the console. The login session is kept for 30 days by default, so restarting the service during that period does not require re-login. The password can also be changed online from the "Configuration" page in the console.
|
||||
|
||||
## Access URL
|
||||
|
||||
@@ -34,13 +43,13 @@ After starting the project, visit:
|
||||
|
||||
### Chat Interface
|
||||
|
||||
Supports streaming output with real-time display of the Agent's reasoning process and tool calls, providing intuitive observation of the Agent's decision-making:
|
||||
Supports streaming output with real-time display of the Agent's reasoning process and tool calls, providing intuitive observation of the Agent's decision-making. Deep thinking can be toggled via configuration or the "Agent Configuration" switch in the console.
|
||||
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260227180120.png" />
|
||||
|
||||
#### Multi-Session Management
|
||||
|
||||
The chat interface supports multi-session management. All session records are persistently stored in a SQLite database:
|
||||
The chat interface supports multi-session management. All session records are persistently stored in the database:
|
||||
|
||||
- **Session List**: Click the history icon on the left to expand/collapse the session list panel, with scroll-to-load support for all historical sessions
|
||||
- **AI-Generated Titles**: After the first exchange in a new session, the model is automatically called to generate a short summary title
|
||||
@@ -50,9 +59,9 @@ The chat interface supports multi-session management. All session records are pe
|
||||
|
||||
### Model Management
|
||||
|
||||
Manage model configurations online without manually editing config files:
|
||||
Manage text, image, voice, and embedding model configurations for different providers online — no need to edit config files manually:
|
||||
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" />
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260521212949.png" />
|
||||
|
||||
### Skill Management
|
||||
|
||||
@@ -80,6 +89,6 @@ View and manage scheduled tasks online, including one-time tasks, fixed interval
|
||||
|
||||
### Logs
|
||||
|
||||
View Agent runtime logs in real-time for monitoring and troubleshooting:
|
||||
View Agent runtime logs in real time for monitoring and troubleshooting:
|
||||
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173514.png" />
|
||||
|
||||
@@ -3,71 +3,88 @@ title: WeCom Bot
|
||||
description: Connect CowAgent to WeCom AI Bot (WebSocket long connection)
|
||||
---
|
||||
|
||||
Connect CowAgent via WeCom AI Bot, supporting both direct messages and group chats. No public IP required — uses WebSocket long connection with Markdown rendering and streaming output.
|
||||
> Connect CowAgent via WeCom AI Bot, supporting both internal direct messages and group chats. No public IP required — uses a WebSocket long connection, with Markdown rendering and streaming output.
|
||||
|
||||
<Note>
|
||||
WeCom Bot and WeCom App are two different integration methods. WeCom Bot uses WebSocket long connection, requiring no public IP or domain, making it easier to set up.
|
||||
WeCom Bot and WeCom App are two different integration methods. WeCom Bot uses a WebSocket long connection and requires no public IP or domain, making setup much simpler.
|
||||
</Note>
|
||||
|
||||
## 1. Create an AI Bot
|
||||
## 1. Connection methods
|
||||
|
||||
### Option A: One-click QR scan (recommended)
|
||||
|
||||
No need to create the bot ahead of time. Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/), go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **QR scan** mode, and scan the QR code with **WeCom** — bot creation and connection complete automatically.
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260401121213.png" width="800"/>
|
||||
|
||||
<Note>
|
||||
After a successful scan, you can further configure the bot (name, avatar, visibility scope, etc.) in **WeCom Workbench → AI Bot**.
|
||||
</Note>
|
||||
|
||||
### Option B: Manual creation
|
||||
|
||||
Create the AI Bot in WeCom and obtain the Bot ID and Secret, then connect via the Web console or config file.
|
||||
|
||||
**Step 1: Create the AI Bot**
|
||||
|
||||
1. Open the WeCom client, go to **Workbench**, and click **AI Bot**:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316180959.png" width="800"/>
|
||||
|
||||
2. Click **Create Bot** → **Manual Creation**:
|
||||
2. Click **Create Bot → Manual Creation**:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181118.png" width="600"/>
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181118.png" width="800"/>
|
||||
|
||||
3. Scroll to the bottom of the right panel and select **API Mode**:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181215.png" width="600"/>
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181215.png" width="800"/>
|
||||
|
||||
4. Set the bot name, avatar, and visibility scope. Select **Long Connection** mode, note down the **Bot ID** and **Secret**, then click Save.
|
||||
4. Set the bot name, avatar, and visibility scope. Choose **Long Connection** mode, save the **Bot ID** and **Secret**, then click Save.
|
||||
|
||||
## 2. Configuration
|
||||
**Step 2: Connect to CowAgent**
|
||||
|
||||
### Option A: Web Console
|
||||
<Tabs>
|
||||
<Tab title="Web Console">
|
||||
Open the Web console, go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **Manual** mode, enter the Bot ID and Secret, and click Connect.
|
||||
|
||||
Start the program and open the Web console (local access: http://127.0.0.1:9899). Go to the **Channels** tab, click **Connect Channel**, select **WeCom Bot**, fill in the Bot ID and Secret from the previous step, and click Connect.
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181711.png" width="800"/>
|
||||
</Tab>
|
||||
<Tab title="Config File">
|
||||
Add the following to `config.json`, then start CowAgent:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181711.png" width="600"/>
|
||||
```json
|
||||
{
|
||||
"channel_type": "wecom_bot",
|
||||
"wecom_bot_id": "YOUR_BOT_ID",
|
||||
"wecom_bot_secret": "YOUR_SECRET"
|
||||
}
|
||||
```
|
||||
|
||||
### Option B: Config File
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `wecom_bot_id` | Bot ID of the AI Bot |
|
||||
| `wecom_bot_secret` | Secret of the AI Bot |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
Add the following to your `config.json`:
|
||||
The log line `[WecomBot] Subscribe success` confirms the connection is established.
|
||||
|
||||
```json
|
||||
{
|
||||
"channel_type": "wecom_bot",
|
||||
"wecom_bot_id": "YOUR_BOT_ID",
|
||||
"wecom_bot_secret": "YOUR_SECRET"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `wecom_bot_id` | Bot ID of the AI Bot |
|
||||
| `wecom_bot_secret` | Secret for the AI Bot |
|
||||
|
||||
After configuration, start the program. The log message `[WecomBot] Subscribe success` indicates a successful connection.
|
||||
|
||||
## 3. Supported Features
|
||||
## 2. Supported features
|
||||
|
||||
| Feature | Status |
|
||||
| --- | --- |
|
||||
| Direct Messages | ✅ |
|
||||
| Group Chat (@bot) | ✅ |
|
||||
| Text Messages | ✅ Send & Receive |
|
||||
| Image Messages | ✅ Send & Receive |
|
||||
| File Messages | ✅ Send & Receive |
|
||||
| Streaming Reply | ✅ |
|
||||
| Scheduled Push | ✅ |
|
||||
| Direct chat | ✅ |
|
||||
| Group chat (@bot) | ✅ |
|
||||
| Text messages | ✅ Send / Receive |
|
||||
| Image messages | ✅ Send / Receive |
|
||||
| File messages | ✅ Send / Receive |
|
||||
| Streaming replies | ✅ |
|
||||
| Scheduled push messages | ✅ |
|
||||
|
||||
## 4. Usage
|
||||
## 3. Usage
|
||||
|
||||
Search for the bot name in WeCom to start a direct conversation.
|
||||
Search for the bot's name inside WeCom to start a direct chat.
|
||||
|
||||
To use in group chats, add the bot to a group and @mention it to send messages.
|
||||
To use the bot in an internal group chat, add it to the group and @-mention it.
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316182902.png" width="800"/>
|
||||
|
||||
@@ -1,19 +1,21 @@
|
||||
---
|
||||
title: WeChat
|
||||
description: Connect CowAgent to personal WeChat
|
||||
description: Connect CowAgent to personal WeChat (via the official API)
|
||||
---
|
||||
|
||||
> Connect CowAgent to your personal WeChat. Simply scan a QR code to log in — no public IP required. Supports text, image, voice, file, and video messages.
|
||||
> Connect CowAgent to your personal WeChat — scan to log in, no public IP required. Supports text, image, voice, file, and video messages in 1-on-1 chats. Backed by WeChat's official API; safe to use. After connecting, a bot assistant is added to your conversation list without affecting normal account usage.
|
||||
|
||||
## 1. Configuration
|
||||
## 1. Setup and run
|
||||
|
||||
### Option A: Web Console
|
||||
### Option A: Web console
|
||||
|
||||
Start the program and open the Web console (local access: http://127.0.0.1:9899). Go to the **Channels** tab, click **Connect Channel**, select **WeChat**, and follow the prompts to scan the QR code.
|
||||
Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/). Go to the **Channels** tab, click **Connect Channel**, select **WeChat**, and follow the prompts to scan in.
|
||||
|
||||
### Option B: Config File
|
||||
<img src="https://cdn.link-ai.tech/doc/20260322195114.png" width="800" />
|
||||
|
||||
Set `channel_type` to `weixin` in your `config.json`:
|
||||
### Option B: Config file
|
||||
|
||||
Set `channel_type` to `weixin` in `config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -21,52 +23,49 @@ Set `channel_type` to `weixin` in your `config.json`:
|
||||
}
|
||||
```
|
||||
|
||||
After starting the program, a QR code will be displayed in the terminal. Scan it with WeChat and confirm on your phone to complete login.
|
||||
After starting CowAgent, a QR code is displayed in the terminal. Scan it with WeChat to complete login.
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260322195509.png" width="800" />
|
||||
|
||||
<Note>
|
||||
For backward compatibility, setting `channel_type` to `wx` also activates the WeChat channel.
|
||||
1. For backward compatibility, setting `channel_type` to `wx` also activates the WeChat channel.
|
||||
2. The WeChat client must be on version **8.0.69** or higher.
|
||||
</Note>
|
||||
|
||||
## 2. Parameters
|
||||
## 2. Usage
|
||||
|
||||
| Parameter | Description | Default |
|
||||
| --- | --- | --- |
|
||||
| `channel_type` | Set to `weixin` or `wx` | — |
|
||||
Once authorized, the integration completes and you can start chatting. A bot assistant is created in your WeChat conversation list, leaving normal account usage unaffected.
|
||||
|
||||
Login credentials are automatically saved to `~/.weixin_cow_credentials.json`. To force a re-login, delete this file and restart.
|
||||
> You can find the bot at any time by searching for **"微信ClawBot"**. You may also rename it, change its avatar, pin it to the top of your conversation list, and so on.
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/83ae8251d896219fde4803f4205205be.jpg" width="250" />
|
||||
|
||||
## 3. Login
|
||||
|
||||
### QR Code Login
|
||||
### QR code login
|
||||
|
||||
On first startup, a QR code is displayed in the terminal (valid for approximately 2 minutes). Scan it with WeChat and confirm on your phone.
|
||||
On first startup, a QR code appears in the terminal (valid for around 2 minutes). Scan it with WeChat and confirm on your phone to log in.
|
||||
|
||||
- The QR code automatically refreshes when it expires
|
||||
- The `qrcode` dependency is already included in `requirements.txt`, enabling QR code rendering directly in the terminal
|
||||
- The QR code refreshes automatically when it expires
|
||||
- The `qrcode` dependency is already included in `requirements.txt`, so the QR code renders directly in the terminal after install
|
||||
|
||||
### Credential Persistence
|
||||
### Credential persistence
|
||||
|
||||
After successful login, credentials are saved to `~/.weixin_cow_credentials.json`. Subsequent startups will reuse the saved credentials without requiring a new scan.
|
||||
After a successful login, credentials are saved to `~/.weixin_cow_credentials.json`. Subsequent startups reuse the saved credentials with no need to re-scan.
|
||||
|
||||
To force a re-login, delete the credentials file and restart the program.
|
||||
To force a re-login, delete the credentials file and restart.
|
||||
|
||||
### Session Expiry
|
||||
### Session expiry
|
||||
|
||||
When the WeChat session expires (errcode -14), the program automatically clears old credentials and initiates a new QR login — no manual intervention required.
|
||||
When the WeChat session expires (errcode `-14`), CowAgent automatically clears old credentials and initiates a new QR login — no manual intervention required.
|
||||
|
||||
## 4. Supported Features
|
||||
## 4. Supported features
|
||||
|
||||
| Feature | Status |
|
||||
| --- | --- |
|
||||
| Direct Messages | ✅ |
|
||||
| Text Messages | ✅ Send & Receive |
|
||||
| Image Messages | ✅ Send & Receive |
|
||||
| File Messages | ✅ Send & Receive |
|
||||
| Video Messages | ✅ Send & Receive |
|
||||
| Voice Messages | ✅ Receive |
|
||||
|
||||
## 5. Notes
|
||||
|
||||
1. Ensure network access to `ilinkai.weixin.qq.com`.
|
||||
2. Media files (images, files, videos) are transferred via CDN with AES-128-ECB encryption, handled automatically by the program.
|
||||
3. A stable network connection is recommended to avoid frequent disconnections that would require re-scanning.
|
||||
| Direct messages | ✅ |
|
||||
| Text messages | ✅ Send & Receive |
|
||||
| Image messages | ✅ Send & Receive |
|
||||
| File messages | ✅ Send & Receive |
|
||||
| Video messages | ✅ Send & Receive |
|
||||
| Voice messages | ✅ Receive (built-in speech recognition) |
|
||||
|
||||
@@ -25,6 +25,14 @@ View current session and service status, including process info, model configura
|
||||
/status
|
||||
```
|
||||
|
||||
## cancel
|
||||
|
||||
Abort the agent task currently running in this session. When the agent is busy with a long task (e.g. multi-turn tool calls or a long streaming response), send `/cancel` and the agent will stop before the next tool execution. Available across all channels — Web, WeChat, WeCom, Feishu, etc.
|
||||
|
||||
```text
|
||||
/cancel
|
||||
```
|
||||
|
||||
## config
|
||||
|
||||
View or modify runtime configuration. Changes take effect immediately without restarting.
|
||||
|
||||
@@ -57,6 +57,7 @@ In the Web console or any connected channel, type `/` to see command suggestions
|
||||
| --- | --- |
|
||||
| `/help` | Show command help |
|
||||
| `/status` | View service status and configuration |
|
||||
| `/cancel` | Abort the currently running agent task |
|
||||
| `/config` | View or modify runtime configuration |
|
||||
| `/skill` | Manage skills (install, uninstall, enable, disable, etc.) |
|
||||
| `/memory dream [N]` | Manually trigger memory distillation (default 3 days, max 30) |
|
||||
@@ -80,6 +81,7 @@ In the Web console or any connected channel, type `/` to see command suggestions
|
||||
| version | ✓ | ✓ |
|
||||
| status | ✓ | ✓ |
|
||||
| logs | ✓ | ✓ |
|
||||
| cancel | ✗ | ✓ |
|
||||
| config | ✗ | ✓ |
|
||||
| context | — | ✓ |
|
||||
| memory (subcommands) | ✗ | ✓ |
|
||||
|
||||
@@ -19,6 +19,24 @@ cow skill list
|
||||
```
|
||||
</CodeGroup>
|
||||
|
||||
Example output:
|
||||
|
||||
```
|
||||
📦 Installed skills (3/4)
|
||||
|
||||
✅ pptx
|
||||
Use this skill any time a .pptx file is involved…
|
||||
Source: cowhub
|
||||
|
||||
✅ skill-creator
|
||||
Create, install, or update skills…
|
||||
Source: builtin
|
||||
|
||||
⏸️ image-vision (disabled)
|
||||
Image understanding and visual analysis
|
||||
Source: builtin
|
||||
```
|
||||
|
||||
**Browse the Skill Hub** (view all available skills):
|
||||
|
||||
<CodeGroup>
|
||||
|
||||
@@ -81,7 +81,7 @@ nohup python3 app.py & tail -f nohup.out
|
||||
```
|
||||
|
||||
<Tip>
|
||||
If deploying on a server, open port `9899` in your firewall or security group to access the Web console. It's recommended to restrict access to specific IPs for security.
|
||||
**Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs.
|
||||
</Tip>
|
||||
|
||||
## Docker Deployment
|
||||
@@ -113,7 +113,7 @@ sudo docker logs -f chatgpt-on-wechat
|
||||
```
|
||||
|
||||
<Tip>
|
||||
If deploying on a server, open port `9899` in your firewall or security group to access the Web console. It's recommended to restrict access to specific IPs for security.
|
||||
**Running in Docker?** Set `WEB_HOST` to `0.0.0.0` in `docker-compose.yml` so the console is reachable from outside the container, and set `WEB_PASSWORD` to protect it. Make sure port `9899` is mapped to the host and open in your firewall or security group.
|
||||
</Tip>
|
||||
|
||||
## Core Configuration
|
||||
|
||||
@@ -33,6 +33,10 @@ The script automatically performs these steps:
|
||||
|
||||
By default, the Web console starts after installation. Access `http://localhost:9899` to begin chatting.
|
||||
|
||||
<Note>
|
||||
**Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access only). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs.
|
||||
</Note>
|
||||
|
||||
## Management Commands
|
||||
|
||||
After installation, use the `cow` command to manage the service:
|
||||
|
||||
61
docs/en/guide/upgrade.mdx
Normal file
61
docs/en/guide/upgrade.mdx
Normal file
@@ -0,0 +1,61 @@
|
||||
---
|
||||
title: Upgrade
|
||||
description: How to upgrade CowAgent
|
||||
---
|
||||
|
||||
## Recommended: One-line upgrade
|
||||
|
||||
Use `cow update` to pull the latest code and restart the service in one step:
|
||||
|
||||
```bash
|
||||
cow update
|
||||
```
|
||||
|
||||
The command runs the following automatically:
|
||||
|
||||
1. Pull the latest code (`git pull`)
|
||||
2. Stop the running service
|
||||
3. Update Python dependencies
|
||||
4. Reinstall the CLI
|
||||
5. Start the service
|
||||
|
||||
<Note>
|
||||
If the Cow CLI is not installed, `./run.sh update` performs the same operations.
|
||||
</Note>
|
||||
|
||||
## Manual upgrade
|
||||
|
||||
Run the following inside the project root:
|
||||
|
||||
```bash
|
||||
git pull
|
||||
pip3 install -r requirements.txt
|
||||
pip3 install -e .
|
||||
```
|
||||
|
||||
Then restart the service:
|
||||
|
||||
```bash
|
||||
# Using Cow CLI (recommended)
|
||||
cow restart
|
||||
|
||||
# Or using run.sh
|
||||
./run.sh restart
|
||||
|
||||
# Or restart manually with nohup
|
||||
kill $(ps -ef | grep app.py | grep -v grep | awk '{print $2}')
|
||||
nohup python3 app.py & tail -f nohup.out
|
||||
```
|
||||
|
||||
## Docker upgrade
|
||||
|
||||
Run the following in the directory containing `docker-compose.yml`:
|
||||
|
||||
```bash
|
||||
sudo docker compose pull
|
||||
sudo docker compose up -d
|
||||
```
|
||||
|
||||
<Tip>
|
||||
Back up `config.json` before upgrading. For Docker deployments, mount the workspace directory as a volume to persist data across upgrades.
|
||||
</Tip>
|
||||
@@ -9,7 +9,7 @@ CowAgent 2.0 has evolved from a simple chatbot into a super intelligent assistan
|
||||
|
||||
CowAgent's architecture consists of the following core modules:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/cow-agent-arch-en.jpg.jpg" alt="CowAgent Architecture" />
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/architecture/en/architecture.jpg" alt="CowAgent Architecture" />
|
||||
|
||||
| Module | Description |
|
||||
| --- | --- |
|
||||
@@ -39,8 +39,8 @@ The Agent workspace is located at `~/cow` by default and stores system prompts,
|
||||
|
||||
```
|
||||
~/cow/
|
||||
├── system.md # Agent system prompt
|
||||
├── user.md # User profile
|
||||
├── SYSTEM.md # Agent system prompt
|
||||
├── USER.md # User profile
|
||||
├── MEMORY.md # Core memory
|
||||
├── memory/ # Long-term memory storage
|
||||
│ └── YYYY-MM-DD.md # Daily memory
|
||||
@@ -67,9 +67,10 @@ Configure Agent mode parameters in `config.json`:
|
||||
{
|
||||
"agent": true,
|
||||
"agent_workspace": "~/cow",
|
||||
"agent_max_context_tokens": 40000,
|
||||
"agent_max_context_turns": 30,
|
||||
"agent_max_steps": 15
|
||||
"agent_max_context_tokens": 50000,
|
||||
"agent_max_context_turns": 20,
|
||||
"agent_max_steps": 20,
|
||||
"enable_thinking": false
|
||||
}
|
||||
```
|
||||
|
||||
@@ -77,7 +78,9 @@ Configure Agent mode parameters in `config.json`:
|
||||
| --- | --- | --- |
|
||||
| `agent` | Enable Agent mode | `true` |
|
||||
| `agent_workspace` | Workspace path | `~/cow` |
|
||||
| `agent_max_context_tokens` | Max context tokens | `40000` |
|
||||
| `agent_max_context_turns` | Max context turns | `30` |
|
||||
| `agent_max_steps` | Max decision steps per task | `15` |
|
||||
| `agent_max_context_tokens` | Max context tokens | `50000` |
|
||||
| `agent_max_context_turns` | Max context turns | `20` |
|
||||
| `agent_max_steps` | Max decision steps per task | `20` |
|
||||
| `enable_thinking` | Enable deep-thinking mode | `false` |
|
||||
| `knowledge` | Enable personal knowledge base | `true` |
|
||||
| `knowledge` | Enable personal knowledge base | `true` |
|
||||
|
||||
@@ -84,7 +84,7 @@ Secrets required by skills are stored in an environment variable file, managed b
|
||||
|
||||
The Skills system provides infinite extensibility for the Agent. Each Skill consists of a description file, execution scripts (optional), and resources (optional), describing how to complete specific types of tasks. Skills allow the Agent to follow instructions for complex workflows, invoke tools, or integrate third-party systems.
|
||||
|
||||
- **[Skill Hub](https://skills.cowagent.ai/):** An open skill marketplace featuring official, community, and third-party skills. Install with one command.
|
||||
- [Skill Hub](https://skills.cowagent.ai/): An open skill marketplace featuring official, community, and third-party skills. Install with one command.
|
||||
- **Built-in skills:** Located in the project's `skills/` directory, including skill creator, image recognition, LinkAI agent, web fetch, and more. Built-in skills are automatically enabled based on dependency conditions (API keys, system commands, etc.).
|
||||
- **Custom skills:** Created by users through conversation, stored in the workspace (`~/cow/skills/`), capable of implementing any complex business process or third-party integration.
|
||||
|
||||
|
||||
@@ -1,53 +1,60 @@
|
||||
---
|
||||
title: Introduction
|
||||
description: CowAgent - AI Super Assistant powered by LLMs
|
||||
description: CowAgent - Open-source super AI assistant and Agent Harness
|
||||
---
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="600px"/>
|
||||
<div align="center">
|
||||
<img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="450px"/>
|
||||
</div>
|
||||
|
||||
**CowAgent** is an AI super assistant powered by LLMs with autonomous task planning, long-term memory, skills system, multimodal messages, multiple model support, and multi-platform deployment.
|
||||
**CowAgent** is an open-source super AI assistant and Agent Harness. It proactively plans tasks, runs tools and skills, and autonomously grows with memory and knowledge.
|
||||
|
||||
CowAgent can proactively think and plan tasks, operate computers and external resources, create and execute Skills, and continuously grow with long-term memory. It supports flexible switching between multiple models, handles text, voice, images, files and other multimodal messages, and can be integrated into WeChat, web, Feishu, DingTalk, WeCom, and WeChat Official Account. It runs 7x24 hours on your personal computer or server.
|
||||
CowAgent is lightweight, easy to deploy, and built to extend. Plug in any major LLM provider, run it across Web and major IM platforms, 24/7 on a personal computer or server.
|
||||
|
||||
<Card title="GitHub" icon="github" href="https://github.com/zhayujie/CowAgent">
|
||||
github.com/zhayujie/CowAgent
|
||||
</Card>
|
||||
<CardGroup cols={2}>
|
||||
<Card title="GitHub" icon="github" href="https://github.com/zhayujie/CowAgent">
|
||||
Open-source repository — Star and contribute
|
||||
</Card>
|
||||
<Card title="Try Online" icon="cloud" href="https://link-ai.tech/cowagent/create">
|
||||
No setup required — experience CowAgent instantly
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
## Core Capabilities
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="Autonomous Task Planning" icon="brain" href="/en/intro/architecture">
|
||||
Understands complex tasks and autonomously plans execution, continuously thinking and invoking tools until goals are achieved. Supports accessing file systems, terminals, browsers, schedulers, and other system resources through tools.
|
||||
Decomposes complex tasks and executes them step by step, looping over tools and skills until the goal is reached.
|
||||
</Card>
|
||||
<Card title="Long-term Memory" icon="database" href="/en/memory">
|
||||
Three-tier memory flow (context → daily memory → global memory) with daily Deep Dream distillation, keyword and vector retrieval support.
|
||||
<Card title="Long-term Memory" icon="database" href="/en/memory/index">
|
||||
Three-tier architecture (context → daily → core), automatic Deep Dream distillation, hybrid keyword + vector retrieval.
|
||||
</Card>
|
||||
<Card title="Knowledge Base" icon="book" href="/en/knowledge">
|
||||
Automatically organizes structured knowledge with knowledge graph visualization, building a continuously growing knowledge network through cross-references.
|
||||
<Card title="Personal Knowledge Base" icon="book" href="/en/knowledge/index">
|
||||
Auto-curates structured knowledge into a Markdown wiki, builds an evolving knowledge graph with visual browsing.
|
||||
</Card>
|
||||
<Card title="Skills System" icon="puzzle-piece" href="/en/skills/index">
|
||||
Implements a Skills creation and execution engine with built-in skills, and supports custom Skills development through natural language conversation.
|
||||
A complete skill creation and execution engine. Install from Skill Hub or generate custom skills via natural-language conversation.
|
||||
</Card>
|
||||
<Card title="Multimodal Messages" icon="image" href="/en/channels/web">
|
||||
Supports parsing, processing, generating, and sending text, images, voice, files, and other message types.
|
||||
<Card title="Multimodal Messaging" icon="image" href="/en/channels/web">
|
||||
First-class support for text, images, voice, and files — recognition, generation, and delivery.
|
||||
</Card>
|
||||
<Card title="Tool System" icon="wrench" href="/en/tools/index">
|
||||
Built-in tools for file I/O, terminal execution, browser automation, scheduled tasks, messaging, and more. The Agent autonomously invokes tools to accomplish complex tasks.
|
||||
Built-in file I/O, terminal, browser, scheduler, memory retrieval, web search, and more — with native MCP integration.
|
||||
</Card>
|
||||
<Card title="Command System" icon="terminal" href="/en/cli/index">
|
||||
Provides terminal CLI and in-chat commands for process management, skill installation, configuration, context inspection, and other common operations.
|
||||
Terminal CLI and in-chat commands for process management, skill installation, configuration, and context inspection.
|
||||
</Card>
|
||||
<Card title="Multiple Model Support" icon="microchip" href="/en/models/index">
|
||||
Supports mainstream model providers including OpenAI, Claude, Gemini, DeepSeek, MiniMax, GLM, Qwen, Kimi, Doubao, and more.
|
||||
<Card title="Pluggable Models" icon="microchip" href="/en/models/index">
|
||||
Claude, GPT, Gemini, DeepSeek, Qwen, GLM, Kimi, MiniMax, Doubao, and more — swap providers from the Web console with one click.
|
||||
</Card>
|
||||
<Card title="Multi-platform Deployment" icon="server" href="/en/channels/weixin">
|
||||
Runs on local computers or servers, integrable into WeChat, web, Feishu, DingTalk, WeChat Official Account, and WeCom applications.
|
||||
<Card title="Multi-channel Integration" icon="server" href="/en/channels/index">
|
||||
A single Agent simultaneously serves Web, WeChat, Feishu, DingTalk, WeCom, QQ, and Official Accounts.
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
## Quick Experience
|
||||
## Quick Start
|
||||
|
||||
Run the following command in your terminal for one-click install, configuration, and startup:
|
||||
Run one of the commands below to install, configure, and start CowAgent in a single step:
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Linux / macOS">
|
||||
@@ -62,25 +69,25 @@ Run the following command in your terminal for one-click install, configuration,
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
By default, the Web service starts after running. Access `http://localhost:9899/chat` to chat in the web interface.
|
||||
Once started, open `http://localhost:9899` to access the **Web console** — the unified place to chat, configure providers, connect channels, and install skills.
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="Quick Start" icon="rocket" href="/en/guide/quick-start">
|
||||
Complete installation and run guide
|
||||
</Card>
|
||||
<Card title="Architecture" icon="sitemap" href="/en/intro/architecture">
|
||||
CowAgent system architecture design
|
||||
CowAgent system architecture
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
## Disclaimer
|
||||
|
||||
1. This project follows the [MIT License](https://github.com/zhayujie/CowAgent/blob/master/LICENSE) and is intended for technical research and learning. Users must comply with local laws, regulations, policies, and corporate bylaws. Any illegal or rights-infringing use is prohibited.
|
||||
2. Agent mode consumes more tokens than normal chat mode. Choose models based on effectiveness and cost. Agent has access to the host operating system — deploy with caution.
|
||||
3. CowAgent focuses on open-source development and does not participate in, authorize, or issue any cryptocurrency.
|
||||
1. This project is licensed under the [MIT License](https://github.com/zhayujie/CowAgent/blob/master/LICENSE) and is intended for technical research and learning. You are responsible for complying with applicable laws and regulations in your jurisdiction; the maintainers assume no liability for any consequences arising from use of this project.
|
||||
2. **Cost & safety:** Agent mode consumes substantially more tokens than plain chat — pick models that balance quality and cost. The Agent has access to your local operating system; deploy only in trusted environments.
|
||||
3. CowAgent is a pure open-source project and does not participate in, authorize, or issue any cryptocurrency.
|
||||
|
||||
## Community
|
||||
|
||||
Add our assistant on WeChat to join the open-source community:
|
||||
Scan the WeChat QR code to join the open-source community group:
|
||||
|
||||
<img width="140" src="https://img-1317903499.cos.ap-guangzhou.myqcloud.com/docs/open-community.png" />
|
||||
|
||||
@@ -5,6 +5,10 @@ description: CowAgent personal knowledge base — structured knowledge accumulat
|
||||
|
||||
The personal knowledge base is the Agent's long-term structured knowledge store, saved in the `knowledge/` directory within the workspace. Unlike memory, which is organized by timeline, the knowledge base organizes content by topic — articles, conversation insights, and learning materials are structured into interlinked Markdown pages, forming a continuously growing knowledge network.
|
||||
|
||||
<Frame>
|
||||
<img src="https://cdn.link-ai.tech/doc/20260413105435.png" width="800" />
|
||||
</Frame>
|
||||
|
||||
## Core Concepts
|
||||
|
||||
### Knowledge vs Memory
|
||||
@@ -43,7 +47,7 @@ Knowledge writing is an autonomous Agent behavior, triggered in these scenarios:
|
||||
Each knowledge page includes cross-reference links to related pages, gradually building a knowledge graph.
|
||||
|
||||
<Frame>
|
||||
<img src="https://gist.github.com/user-attachments/assets/3ce92f78-1863-4820-8fa8-660c0f2b7f09" alt="Conversational knowledge ingest" />
|
||||
<img src="https://cdn.link-ai.tech/doc/20260413110104.png" width="800" />
|
||||
</Frame>
|
||||
|
||||
## Knowledge Retrieval
|
||||
@@ -63,11 +67,11 @@ The web console provides a dedicated "Knowledge" module with:
|
||||
- **Chat integration** — Knowledge document links referenced in Agent replies are clickable for direct navigation
|
||||
|
||||
<Frame>
|
||||
<img src="https://gist.github.com/user-attachments/assets/b7b9d6be-0ac1-4c65-803b-2c6b36bd59a7" alt="Knowledge document browsing" />
|
||||
<img src="https://cdn.link-ai.tech/doc/17aad553d3e9e428c52ff9dc31726fda.png" width="800" />
|
||||
</Frame>
|
||||
|
||||
<Frame>
|
||||
<img src="https://gist.github.com/user-attachments/assets/44ae68ca-96cc-40b9-ab33-cdbec34c2379" alt="Knowledge graph visualization" />
|
||||
<img src="https://cdn.link-ai.tech/doc/20260413105402.png" width="800" />
|
||||
</Frame>
|
||||
|
||||
## CLI Commands
|
||||
|
||||
@@ -27,7 +27,7 @@ The Agent automatically persists conversation content to long-term memory throug
|
||||
|
||||
- **On context trimming** — When conversation turns or tokens exceed the configured limit, the oldest half of the context is trimmed, and the discarded content is summarized by LLM into key information and written to the daily memory file. The summary is also asynchronously injected into the retained context for conversational continuity
|
||||
- **Daily scheduled summary** — A full summary is automatically triggered at 23:55 every day, ensuring memory is preserved even on low-activity days (skipped if content hasn't changed)
|
||||
- **[Deep Dream (memory distillation)](/en/memory/deep-dream)** — Runs automatically after the daily summary, distilling daily memories into MEMORY.md and generating a dream diary
|
||||
- [Deep Dream (memory distillation)](/en/memory/deep-dream) — Runs automatically after the daily summary, distilling daily memories into MEMORY.md and generating a dream diary
|
||||
- **On API context overflow** — When the model API returns a context overflow error, the current conversation summary is saved as an emergency measure
|
||||
|
||||
All memory writes run asynchronously in a background thread (LLM summarization + file writing), never blocking normal conversation replies.
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
---
|
||||
title: Claude
|
||||
description: Claude model configuration
|
||||
description: Anthropic Claude model configuration (Text Chat + Image Understanding)
|
||||
---
|
||||
|
||||
Claude is provided by Anthropic and supports both text chat and image understanding. The mainstream Sonnet / Opus models natively support vision, so no separate Vision model needs to be specified.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
|
||||
</Tip>
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "claude-sonnet-4-6",
|
||||
@@ -12,6 +20,30 @@ description: Claude model configuration
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `model` | Options include `claude-sonnet-4-6`, `claude-opus-4-7`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-sonnet-4-0`, `claude-3-5-sonnet-latest`, etc. See [official models](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
|
||||
| `claude_api_key` | Create at [Claude Console](https://console.anthropic.com/settings/keys) |
|
||||
| `claude_api_base` | Optional. Defaults to `https://api.anthropic.com/v1`. Change to use third-party proxy |
|
||||
| `model` | Supports `claude-sonnet-4-6`, `claude-opus-4-7`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-sonnet-4-0`, `claude-3-5-sonnet-latest`, etc. See [official models](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
|
||||
| `claude_api_key` | Create one in the [Claude Console](https://console.anthropic.com/settings/keys) |
|
||||
| `claude_api_base` | Optional, defaults to `https://api.anthropic.com/v1`. Can be changed to a third-party proxy |
|
||||
|
||||
### Model Selection
|
||||
|
||||
| Model | Use Case |
|
||||
| --- | --- |
|
||||
| `claude-sonnet-4-6` | Default recommended, balanced cost and speed |
|
||||
| `claude-opus-4-7` | Complex reasoning and long-running tasks; best quality but higher cost |
|
||||
| `claude-sonnet-4-5` / `claude-sonnet-4-0` | Previous-generation flagships at a lower price |
|
||||
|
||||
## Image Understanding
|
||||
|
||||
Once `claude_api_key` is configured, the Agent's Vision tool automatically uses the Claude main model to recognize images, with no extra setup required.
|
||||
|
||||
To manually specify a Vision model, set it explicitly in the configuration file:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "claude-sonnet-4-6"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -77,7 +77,7 @@ Reference: [China Key](https://platform.minimaxi.com/docs/coding-plan/quickstart
|
||||
|
||||
---
|
||||
|
||||
## Zhipu GLM
|
||||
## GLM
|
||||
|
||||
```json
|
||||
{
|
||||
|
||||
@@ -1,26 +1,26 @@
|
||||
---
|
||||
title: Custom
|
||||
description: Custom provider for third-party APIs and local models
|
||||
description: Custom vendor configuration for third-party API proxies and local models
|
||||
---
|
||||
|
||||
For models accessed via OpenAI-compatible APIs, such as:
|
||||
For model services accessed via the OpenAI-compatible protocol or locally deployed models, such as:
|
||||
|
||||
- **Third-party API proxies**: Use a unified API Base to call multiple models
|
||||
- **Local models**: Models deployed locally via Ollama, vLLM, LocalAI, etc.
|
||||
- **Private deployments**: Self-hosted model services within your organization
|
||||
- **Third-party API proxies**: call multiple models through a unified API base
|
||||
- **Local models**: models deployed locally with tools like Ollama, vLLM, LocalAI
|
||||
- **Private deployments**: model services deployed inside an enterprise
|
||||
|
||||
<Note>
|
||||
Unlike the `openai` provider, switching models under the Custom provider will not auto-switch the provider type. Your custom API address is always preserved.
|
||||
Difference from the `openai` vendor: when a custom vendor is selected, switching models via `/config model` does not automatically switch the vendor type — the custom API address is always used.
|
||||
</Note>
|
||||
|
||||
## Configuration
|
||||
## Text Chat
|
||||
|
||||
### Third-party API Proxy
|
||||
### Third-party API proxy
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "custom",
|
||||
"model": "deepseek-v4-flash",
|
||||
"model": "",
|
||||
"custom_api_key": "YOUR_API_KEY",
|
||||
"custom_api_base": "https://{your-proxy.com}/v1"
|
||||
}
|
||||
@@ -29,13 +29,13 @@ For models accessed via OpenAI-compatible APIs, such as:
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `bot_type` | Must be set to `custom` |
|
||||
| `model` | Model name, any model supported by your proxy service |
|
||||
| `custom_api_key` | API key provided by your proxy service |
|
||||
| `custom_api_base` | API base URL, must be OpenAI-compatible |
|
||||
| `model` | Model name; any model name supported by the proxy service |
|
||||
| `custom_api_key` | API key provided by the proxy service |
|
||||
| `custom_api_base` | API endpoint provided by the proxy service; must be OpenAI-compatible |
|
||||
|
||||
### Local Models
|
||||
### Local models
|
||||
|
||||
Local models typically don't require an API key — just set the API base:
|
||||
Local models usually do not require an API key — only the API base needs to be filled in:
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -45,7 +45,7 @@ Local models typically don't require an API key — just set the API base:
|
||||
}
|
||||
```
|
||||
|
||||
Common local deployment tools and their default addresses:
|
||||
Common local deployment tools and their default endpoints:
|
||||
|
||||
| Tool | Default API Base |
|
||||
| --- | --- |
|
||||
@@ -53,9 +53,9 @@ Common local deployment tools and their default addresses:
|
||||
| [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` |
|
||||
| [LocalAI](https://localai.io) | `http://localhost:8080/v1` |
|
||||
|
||||
## Switching Models
|
||||
### Switching Models
|
||||
|
||||
Under the Custom provider, switching models only changes `model` without affecting `bot_type` or the API address:
|
||||
Switching models under a custom vendor only changes `model` — `bot_type` and the API endpoint remain unchanged:
|
||||
|
||||
```
|
||||
/config model qwen3.5:27b
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
---
|
||||
title: DeepSeek
|
||||
description: DeepSeek model configuration
|
||||
description: DeepSeek model configuration (Text Chat + Thinking Mode)
|
||||
---
|
||||
|
||||
Option 1: Native integration (recommended):
|
||||
DeepSeek is one of the default recommended vendors in Agent mode, focused on cost-effective text chat and task planning.
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -14,24 +16,24 @@ Option 1: Native integration (recommended):
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `model` | Supports `deepseek-v4-flash` (default) and `deepseek-v4-pro` |
|
||||
| `deepseek_api_key` | Create at [DeepSeek Platform](https://platform.deepseek.com/api_keys) |
|
||||
| `model` | Supports `deepseek-v4-flash` (Default), `deepseek-v4-pro` |
|
||||
| `deepseek_api_key` | Create one on the [DeepSeek Platform](https://platform.deepseek.com/api_keys) |
|
||||
| `deepseek_api_base` | Optional, defaults to `https://api.deepseek.com/v1`. Can be changed to a third-party proxy |
|
||||
|
||||
## Model Selection
|
||||
### Model Selection
|
||||
|
||||
| Model | Use Case |
|
||||
| --- | --- |
|
||||
| `deepseek-v4-flash` | Default: fast and cost-effective |
|
||||
| `deepseek-v4-pro` | Stronger on complex tasks |
|
||||
| `deepseek-v4-flash` | Default recommended; fast and low cost |
|
||||
| `deepseek-v4-pro` | Smarter; better for complex tasks |
|
||||
|
||||
## Thinking Mode
|
||||
|
||||
The V4 series (`deepseek-v4-flash` / `deepseek-v4-pro`) supports an explicit "thinking mode": the model emits a chain-of-thought (`reasoning_content`) before the final answer to improve answer quality.
|
||||
The V4 series (`deepseek-v4-flash` / `deepseek-v4-pro`) supports an explicit "thinking mode": before producing the final answer, the model emits a chain of thought (`reasoning_content`) to improve answer quality.
|
||||
|
||||
### Toggle
|
||||
|
||||
Controlled by the global `enable_thinking` setting:
|
||||
Controlled by the global `enable_thinking` config, and can also be toggled from the Web Console's configuration page:
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -39,12 +41,12 @@ Controlled by the global `enable_thinking` setting:
|
||||
}
|
||||
```
|
||||
|
||||
- `true`: thinking is on across all channels. The Web console renders the reasoning trace; IM channels (WeChat / WeCom / DingTalk / Feishu) don't render it but still benefit from higher answer quality.
|
||||
- `false`: thinking off, faster responses with lower first-token latency.
|
||||
- `true`: the model thinks before answering across all channels. The Web Console displays the thinking process; IM channels (WeChat / WeCom / DingTalk / Feishu) do not show it but still get better answers.
|
||||
- `false`: thinking is disabled, responses are faster, and time-to-first-token is lower.
|
||||
|
||||
### Reasoning Effort
|
||||
|
||||
Under thinking mode, `reasoning_effort` controls how hard the model thinks:
|
||||
Under thinking mode, `reasoning_effort` controls reasoning intensity:
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -55,27 +57,16 @@ Under thinking mode, `reasoning_effort` controls how hard the model thinks:
|
||||
|
||||
| Value | Use Case |
|
||||
| --- | --- |
|
||||
| `high` (default) | Day-to-day agent tasks; balanced thinking depth and latency |
|
||||
| `max` | Complex coding, long-horizon planning, strict-constraint tasks. Deeper reasoning at the cost of more output tokens and higher latency |
|
||||
| `high` (Default) | Day-to-day Agent tasks; balanced reasoning and speed |
|
||||
| `max` | Complex coding, long-horizon planning, strictly constrained tasks; deeper reasoning but more time and output tokens |
|
||||
|
||||
`reasoning_effort` only takes effect when `enable_thinking` is `true`. It is silently ignored on models that do not support thinking mode.
|
||||
`reasoning_effort` only takes effect when `enable_thinking` is `true`; it is ignored automatically when the model does not support thinking mode.
|
||||
|
||||
### Notes
|
||||
### Behavior Notes
|
||||
|
||||
- **Sampling parameters**: under thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are silently ignored by the server (no error). CowAgent skips sending them automatically.
|
||||
- **Multi-turn tool calls**: once the history contains any tool-call turn, DeepSeek requires `reasoning_content` on every assistant message. CowAgent handles the round-trip automatically, including across mid-session toggles of the thinking switch.
|
||||
- **Sampling parameters**: in thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are ignored by the server (without errors). CowAgent automatically skips them.
|
||||
- **Multi-turn tool calls**: when the history contains tool calls, DeepSeek requires every assistant message to include `reasoning_content`. CowAgent handles this automatically, so toggling thinking mode across turns will not cause errors.
|
||||
|
||||
<Tip>
|
||||
Start with `deepseek-v4-flash`; switch to `deepseek-v4-pro` for harder tasks; enable `enable_thinking` when you want deeper reasoning.
|
||||
`deepseek-v4-flash` is used by default; switch to `deepseek-v4-pro` for complex tasks; enable `enable_thinking` when deep reasoning is needed.
|
||||
</Tip>
|
||||
|
||||
Option 2: OpenAI-compatible configuration:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "deepseek-v4-flash",
|
||||
"bot_type": "openai",
|
||||
"open_ai_api_key": "YOUR_API_KEY",
|
||||
"open_ai_api_base": "https://api.deepseek.com/v1"
|
||||
}
|
||||
```
|
||||
|
||||
@@ -1,17 +1,66 @@
|
||||
---
|
||||
title: Doubao (ByteDance)
|
||||
description: Doubao (Volcano Ark) model configuration
|
||||
title: Doubao
|
||||
description: Doubao (Volcengine Ark) model configuration (Text / Image Understanding / Image Generation / Embedding)
|
||||
---
|
||||
|
||||
Doubao (Volcengine Ark) supports text chat, image understanding, image generation (Seedream), and embedding. A single `ark_api_key` enables all capabilities.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
|
||||
</Tip>
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "doubao-seed-2-0-code-preview-260215",
|
||||
"model": "doubao-seed-2-0-pro-260215",
|
||||
"ark_api_key": "YOUR_API_KEY"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `model` | Options include `doubao-seed-2-0-code-preview-260215`, `doubao-seed-2-0-pro-260215`, `doubao-seed-2-0-lite-260215`, etc. |
|
||||
| `ark_api_key` | Create at [Volcano Ark Console](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) |
|
||||
| `ark_base_url` | Optional. Defaults to `https://ark.cn-beijing.volces.com/api/v3` |
|
||||
| `model` | Can be `doubao-seed-2-0-pro-260215`, `doubao-seed-2-0-code-preview-260215`, `doubao-seed-2-0-lite-260215`, etc. |
|
||||
| `ark_api_key` | Create one in the [Volcengine Ark Console](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) |
|
||||
| `ark_base_url` | Optional, defaults to `https://ark.cn-beijing.volces.com/api/v3` |
|
||||
|
||||
## Image Understanding
|
||||
|
||||
Once `ark_api_key` is configured, the Agent's Vision tool automatically uses `doubao-seed-2-0-pro-260215` to recognize images, with no extra setup required.
|
||||
|
||||
To manually specify a Vision model:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "doubao-seed-2-0-pro-260215"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Image Generation
|
||||
|
||||
```json
|
||||
{
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "seedream-5.0-lite"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Available models: `seedream-5.0-lite`, `seedream-4.5`.
|
||||
|
||||
## Embedding
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding_provider": "doubao",
|
||||
"embedding_model": "doubao-embedding-vision-251215"
|
||||
}
|
||||
```
|
||||
|
||||
The default model is `doubao-embedding-vision-251215` (multimodal embedding); the dimension (1024 or 2048) can be set via `embedding_dimensions` in the configuration file. After changing the embedding, run `/memory rebuild-index` to rebuild the index.
|
||||
|
||||
@@ -1,16 +1,59 @@
|
||||
---
|
||||
title: Gemini
|
||||
description: Google Gemini model configuration
|
||||
description: Google Gemini model configuration (Text Chat + Image Understanding + Image Generation)
|
||||
---
|
||||
|
||||
Google Gemini supports text chat, image understanding, and image generation (Nano Banana series). A single `gemini_api_key` enables all capabilities.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
|
||||
</Tip>
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "gemini-3.1-pro-preview",
|
||||
"model": "gemini-3.5-flash",
|
||||
"gemini_api_key": "YOUR_API_KEY"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `model` | Options include `gemini-3.1-flash-lite-preview`, `gemini-3.1-pro-preview`, `gemini-3-flash-preview`, `gemini-3-pro-preview`, etc. See [official docs](https://ai.google.dev/gemini-api/docs/models) |
|
||||
| `gemini_api_key` | Create at [Google AI Studio](https://aistudio.google.com/app/apikey) |
|
||||
| `model` | Recommended: `gemini-3.5-flash`; also supports `gemini-3.1-pro-preview`, `gemini-3.1-flash-lite-preview`, `gemini-3-flash-preview`, `gemini-3-pro-preview`, etc. See [official docs](https://ai.google.dev/gemini-api/docs/models) |
|
||||
| `gemini_api_key` | Create one in [Google AI Studio](https://aistudio.google.com/app/apikey) |
|
||||
| `gemini_api_base` | Optional, defaults to `https://generativelanguage.googleapis.com`. Can be changed to a third-party proxy |
|
||||
|
||||
## Image Understanding
|
||||
|
||||
All Gemini models natively support vision. Once `gemini_api_key` is configured, the Agent's Vision tool automatically uses the main model to recognize images, with no extra setup required.
|
||||
|
||||
To manually specify a Vision model:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "gemini-3.1-flash-lite-preview"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Image Generation
|
||||
|
||||
```json
|
||||
{
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "gemini-3.1-flash-image-preview"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| Model ID | Alias |
|
||||
| --- | --- |
|
||||
| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
|
||||
| `gemini-3-pro-image-preview` | Nano Banana Pro |
|
||||
| `gemini-2.5-flash-image` | Nano Banana |
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
---
|
||||
title: GLM (Zhipu AI)
|
||||
description: Zhipu AI GLM model configuration
|
||||
title: GLM
|
||||
description: Zhipu AI GLM model configuration (Text / Image Understanding / Speech-to-Text / Embedding)
|
||||
---
|
||||
|
||||
Zhipu AI supports text chat, image understanding, speech-to-text (ASR), and embedding. A single `zhipu_ai_api_key` enables all capabilities.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
|
||||
</Tip>
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "glm-5.1",
|
||||
@@ -12,16 +20,37 @@ description: Zhipu AI GLM model configuration
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `model` | Options include `glm-5.1`, `glm-5-turbo`, `glm-5`, `glm-4.7`, `glm-4-plus`, `glm-4-flash`, `glm-4-air`, etc. See [model codes](https://bigmodel.cn/dev/api/normal-model/glm-4) |
|
||||
| `zhipu_ai_api_key` | Create at [Zhipu AI Console](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
|
||||
| `model` | Can be `glm-5.1`, `glm-5-turbo`, `glm-5`, `glm-4.7`, `glm-4-plus`, `glm-4-flash`, `glm-4-air`, etc. See [model codes](https://bigmodel.cn/dev/api/normal-model/glm-4) |
|
||||
| `zhipu_ai_api_key` | Create one in the [Zhipu AI Console](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) |
|
||||
| `zhipu_ai_api_base` | Optional, defaults to `https://open.bigmodel.cn/api/paas/v4` |
|
||||
|
||||
OpenAI-compatible configuration is also supported:
|
||||
## Image Understanding
|
||||
|
||||
Zhipu's chat models (`glm-5.1`, `glm-5-turbo`, etc.) do not support vision; vision calls are uniformly routed to `glm-5v-turbo`. Once `zhipu_ai_api_key` is configured, the Agent's Vision tool automatically uses this model, with no need to specify it explicitly in the configuration file.
|
||||
|
||||
## Speech-to-Text (ASR)
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "glm-5.1",
|
||||
"open_ai_api_base": "https://open.bigmodel.cn/api/paas/v4",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
"voice_to_text": "zhipu",
|
||||
"voice_to_text_model": "glm-asr-2512"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `voice_to_text` | Set to `zhipu` to enable Zhipu ASR |
|
||||
| `voice_to_text_model` | Optional, defaults to `glm-asr-2512` |
|
||||
|
||||
Credentials are automatically reused from `zhipu_ai_api_key`. Audio files should be smaller than 25MB; oversized files may be rejected by the server.
|
||||
|
||||
## Embedding
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding_provider": "zhipu",
|
||||
"embedding_model": "embedding-3"
|
||||
}
|
||||
```
|
||||
|
||||
Available models: `embedding-3`, `embedding-2`. After changing the embedding, run `/memory rebuild-index` to rebuild the index.
|
||||
|
||||
@@ -1,58 +1,38 @@
|
||||
---
|
||||
title: Models Overview
|
||||
description: Supported models and recommended choices for CowAgent
|
||||
description: Model vendors supported by CowAgent and their capability matrix
|
||||
---
|
||||
|
||||
CowAgent supports mainstream LLMs from domestic and international providers. Model interfaces are implemented in the project's `models/` directory.
|
||||
CowAgent supports a wide range of mainstream large language models. Model interfaces live under the project's `models/` directory. Beyond text chat, several vendors also provide vision understanding, image generation, speech-to-text, text-to-speech, and embeddings — all of which can be invoked on demand in the Agent flow.
|
||||
|
||||
<Note>
|
||||
For Agent mode, the following models are recommended based on quality and cost: deepseek-v4-flash, MiniMax-M2.7, claude-sonnet-4-6, gemini-3.1-pro-preview, glm-5.1, qwen3.6-plus, kimi-k2.6, ernie-5.1
|
||||
</Note>
|
||||
## Capability Matrix
|
||||
|
||||
## Configuration
|
||||
A snapshot of each vendor's capabilities. "Text" refers to the main chat model; the remaining columns show which Agent capabilities the vendor can power.
|
||||
|
||||
Configure the model name and API key in `config.json` according to your chosen model. Each model also supports OpenAI-compatible access by setting `bot_type` to `openai` and configuring `open_ai_api_base` and `open_ai_api_key`.
|
||||
|
||||
You can also use the [LinkAI](https://link-ai.tech) platform interface to flexibly switch between multiple models with support for knowledge base, workflows, and other Agent capabilities.
|
||||
|
||||
## Supported Models
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="DeepSeek" href="/en/models/deepseek">
|
||||
deepseek-v4-flash, deepseek-v4-pro, and more
|
||||
</Card>
|
||||
<Card title="Baidu Qianfan / ERNIE" href="/en/models/qianfan">
|
||||
ernie-5.1, ernie-5.0, ernie-4.5-turbo-128k, and more
|
||||
</Card>
|
||||
<Card title="MiniMax" href="/en/models/minimax">
|
||||
MiniMax-M2.7 and other series models
|
||||
</Card>
|
||||
<Card title="Claude" href="/en/models/claude">
|
||||
claude-sonnet-4-6 and more
|
||||
</Card>
|
||||
<Card title="Gemini" href="/en/models/gemini">
|
||||
gemini-3.1-pro-preview and more
|
||||
</Card>
|
||||
<Card title="OpenAI" href="/en/models/openai">
|
||||
gpt-5.4, gpt-4.1, o-series and more
|
||||
</Card>
|
||||
<Card title="GLM (Zhipu AI)" href="/en/models/glm">
|
||||
glm-5.1, glm-5-turbo, glm-5 and other series models
|
||||
</Card>
|
||||
<Card title="Qwen (Tongyi Qianwen)" href="/en/models/qwen">
|
||||
qwen3.6-plus, qwen3-max and more
|
||||
</Card>
|
||||
<Card title="Doubao (ByteDance)" href="/en/models/doubao">
|
||||
doubao-seed series models
|
||||
</Card>
|
||||
<Card title="Kimi" href="/en/models/kimi">
|
||||
kimi-k2.6, kimi-k2.5, kimi-k2 and more
|
||||
</Card>
|
||||
<Card title="LinkAI" href="/en/models/linkai">
|
||||
Unified multi-model interface + knowledge base
|
||||
</Card>
|
||||
</CardGroup>
|
||||
| Vendor | Representative Models | Text | Vision | Image Gen | STT | TTS | Embedding |
|
||||
| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: |
|
||||
| [DeepSeek](/en/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | |
|
||||
| [MiniMax](/en/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
|
||||
| [Claude](/en/models/claude) | claude-opus-4-7 | ✅ | ✅ | | | | |
|
||||
| [Gemini](/en/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | |
|
||||
| [OpenAI](/en/models/openai) | gpt-5.5, o-series | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [GLM](/en/models/glm) | glm-5.1, glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ |
|
||||
| [Qwen](/en/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [Doubao](/en/models/doubao) | doubao-seed-2.0 series | ✅ | ✅ | ✅ | | | ✅ |
|
||||
| [Kimi](/en/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||
| [ERNIE](/en/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||
| [MiMo](/en/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
|
||||
| [LinkAI](/en/models/linkai) | 100+ models from multiple vendors | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [Custom](/en/models/custom) | Local models / third-party proxies | ✅ | | | | | |
|
||||
|
||||
<Tip>
|
||||
For a full list of model names, refer to the project's [`common/const.py`](https://github.com/zhayujie/CowAgent/blob/master/common/const.py) file.
|
||||
Every capability in the Web console (Vision / Image / STT / TTS / Embedding / Web Search) can be configured independently with its own vendor and model — there is no forced binding between them.
|
||||
</Tip>
|
||||
|
||||
## How to Configure
|
||||
|
||||
**Option 1 (recommended):** Manage models and capabilities online via the [Web console](/en/channels/web), with no need to edit the configuration file:
|
||||
|
||||
<img width="900" src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-models-config.png" />
|
||||
|
||||
**Option 2:** Edit `config.json` manually and fill in the model name and API key for the selected vendor. Every model also supports OpenAI-compatible access — just set `bot_type` to `openai` and configure `open_ai_api_base` and `open_ai_api_key`.
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
---
|
||||
title: Kimi (Moonshot)
|
||||
description: Kimi (Moonshot) model configuration
|
||||
title: Kimi
|
||||
description: Kimi (Moonshot) model configuration (Text Chat + Image Understanding)
|
||||
---
|
||||
|
||||
Kimi is provided by Moonshot and supports both text chat and image understanding. The `kimi-k2.x` series natively supports vision.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
|
||||
</Tip>
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "kimi-k2.6",
|
||||
@@ -12,16 +20,22 @@ description: Kimi (Moonshot) model configuration
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `model` | Options include `kimi-k2.6`, `kimi-k2.5`, `kimi-k2`, `moonshot-v1-8k`, `moonshot-v1-32k`, `moonshot-v1-128k` |
|
||||
| `moonshot_api_key` | Create at [Moonshot Console](https://platform.moonshot.cn/console/api-keys) |
|
||||
| `model` | Can be `kimi-k2.6`, `kimi-k2.5`, `kimi-k2`, `moonshot-v1-8k`, `moonshot-v1-32k`, `moonshot-v1-128k` |
|
||||
| `moonshot_api_key` | Create one in the [Moonshot Console](https://platform.moonshot.cn/console/api-keys) |
|
||||
| `moonshot_base_url` | Optional, defaults to `https://api.moonshot.cn/v1` |
|
||||
|
||||
OpenAI-compatible configuration is also supported:
|
||||
## Image Understanding
|
||||
|
||||
Once `moonshot_api_key` is configured, the Agent's Vision tool automatically uses `kimi-k2.6` to recognize images, with no extra setup required.
|
||||
|
||||
To manually specify a Vision model:
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "kimi-k2.6",
|
||||
"open_ai_api_base": "https://api.moonshot.cn/v1",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "kimi-k2.6"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
---
|
||||
title: LinkAI
|
||||
description: Unified access to multiple models via LinkAI platform
|
||||
description: Access text, vision, image, speech, and embedding capabilities through the LinkAI platform
|
||||
---
|
||||
|
||||
The [LinkAI](https://link-ai.tech) platform lets you flexibly switch between OpenAI, Claude, Gemini, DeepSeek, MiniMax, Qwen, Kimi, and other models, with support for knowledge base, workflows, plugins, and other Agent capabilities.
|
||||
A single `linkai_api_key` gives you access to all capabilities of mainstream vendors such as OpenAI, Claude, Gemini, DeepSeek, MiniMax, Qwen, Kimi, and Doubao.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
|
||||
</Tip>
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -14,8 +20,84 @@ The [LinkAI](https://link-ai.tech) platform lets you flexibly switch between Ope
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `use_linkai` | Set to `true` to enable LinkAI interface |
|
||||
| `linkai_api_key` | Create at [LinkAI Console](https://link-ai.tech/console/interface) |
|
||||
| `model` | Leave empty to use the agent's default model. Can be switched flexibly on the platform. All models in the [model list](https://link-ai.tech/console/models) are supported |
|
||||
| `use_linkai` | Set to `true` to enable |
|
||||
| `linkai_api_key` | Create one in the [Console](https://link-ai.tech/console/interface) |
|
||||
| `model` | Can be any code from the [model list](https://link-ai.tech/console/models) |
|
||||
|
||||
See the [API documentation](https://docs.link-ai.tech/platform/api) for more details.
|
||||
See [Model Service](https://link-ai.tech/console/models) for more.
|
||||
|
||||
## Image Understanding
|
||||
|
||||
Once configured, the Agent's Vision tool automatically calls multimodal models via the gateway, with no extra setup required. To manually specify a Vision model:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "gpt-5.4-mini"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Available models: `gpt-4.1-mini`, `gpt-5.4-mini`, `qwen3.6-plus`, `doubao-seed-2-0-pro-260215`, `kimi-k2.6`, `claude-sonnet-4-6`, `gemini-3.1-flash-lite-preview`, etc.
|
||||
|
||||
## Image Generation
|
||||
|
||||
```json
|
||||
{
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "gpt-image-2"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| Model ID | Alias |
|
||||
| --- | --- |
|
||||
| `gpt-image-2` | OpenAI |
|
||||
| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
|
||||
| `gemini-3-pro-image-preview` | Nano Banana Pro |
|
||||
| `seedream-5.0-lite` | ByteDance Doubao Seedream |
|
||||
|
||||
## Speech-to-Text (ASR)
|
||||
|
||||
```json
|
||||
{
|
||||
"voice_to_text": "linkai"
|
||||
}
|
||||
```
|
||||
|
||||
ASR uses Whisper by default; credentials are automatically reused from `linkai_api_key`.
|
||||
|
||||
## Text-to-Speech (TTS)
|
||||
|
||||
The TTS gateway supports multiple underlying engines. The engine is selected by `text_to_voice_model`, and the available voices change with the engine.
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "linkai",
|
||||
"text_to_voice_model": "doubao",
|
||||
"tts_voice_id": "BV001_streaming"
|
||||
}
|
||||
```
|
||||
|
||||
| `text_to_voice_model` | Engine |
|
||||
| --- | --- |
|
||||
| `tts-1` | OpenAI · Multi-language (voices like `alloy` / `nova` / `echo`, etc.) |
|
||||
| `doubao` | ByteDance Doubao · Rich Chinese voices |
|
||||
| `baidu` | Baidu · Chinese broadcaster voices |
|
||||
|
||||
Voices differ by engine; we recommend selecting them visually in the Web Console under "Model Management → Text-to-Speech".
|
||||
|
||||
## Embedding
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding_provider": "linkai",
|
||||
"embedding_model": "text-embedding-3-small"
|
||||
}
|
||||
```
|
||||
|
||||
The default model is `text-embedding-3-small` (OpenAI-compatible). After changing the embedding, run `/memory rebuild-index` to rebuild the index.
|
||||
|
||||
136
docs/en/models/mimo.mdx
Normal file
136
docs/en/models/mimo.mdx
Normal file
@@ -0,0 +1,136 @@
|
||||
---
|
||||
title: MiMo
|
||||
description: Xiaomi MiMo model configuration (Text Chat + Image Understanding + Text-to-Speech)
|
||||
---
|
||||
|
||||
Xiaomi MiMo is a native omni-modal large model. A single `mimo_api_key` enables text chat, image understanding, and text-to-speech all at once.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console — no need to manually edit the configuration file.
|
||||
</Tip>
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "mimo-v2.5-pro",
|
||||
"mimo_api_key": "YOUR_API_KEY",
|
||||
"mimo_api_base": "https://api.xiaomimimo.com/v1"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `model` | Default recommendation: `mimo-v2.5-pro`; `mimo-v2.5` is also supported |
|
||||
| `mimo_api_key` | Create one in the [MiMo Open Platform](https://platform.xiaomimimo.com/console/api-keys) |
|
||||
| `mimo_api_base` | Optional, defaults to `https://api.xiaomimimo.com/v1` |
|
||||
|
||||
### Model Selection
|
||||
|
||||
| Model | Use Case |
|
||||
| --- | --- |
|
||||
| `mimo-v2.5-pro` | Flagship: native omni-modal + Agent capability, up to 1M tokens context |
|
||||
| `mimo-v2.5` | General-purpose, native omni-modal (text / image / video / audio) |
|
||||
|
||||
## Thinking Mode
|
||||
|
||||
The MiMo V2.5 series enables "thinking mode" by default: the model emits `reasoning_content` (chain-of-thought) before the final answer, improving performance on complex tasks.
|
||||
|
||||
Use the global `enable_thinking` flag to toggle visibility (also switchable from the Web Console settings):
|
||||
|
||||
```json
|
||||
{
|
||||
"enable_thinking": true
|
||||
}
|
||||
```
|
||||
|
||||
## Image Understanding
|
||||
|
||||
Once `mimo_api_key` is configured, the Agent's Vision tool can automatically use MiMo's vision models:
|
||||
|
||||
- When the main model itself is multimodal (`mimo-v2.5-pro` / `mimo-v2.5`), images are handled directly by the main model with no extra setup.
|
||||
- When the main model belongs to another vendor, the Vision tool falls back to `mimo-v2.5-pro` in order.
|
||||
|
||||
To force a specific Vision model, set it explicitly in the configuration:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"provider": "mimo",
|
||||
"model": "mimo-v2.5-pro"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Text-to-Speech (TTS)
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "mimo",
|
||||
"text_to_voice_model": "mimo-v2.5-tts",
|
||||
"tts_voice_id": "冰糖"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `text_to_voice_model` | Currently only `mimo-v2.5-tts` (preset voices + singing mode) |
|
||||
| `tts_voice_id` | Preset voice name (Chinese voice IDs use the Chinese name directly) |
|
||||
|
||||
### Preset Voices
|
||||
|
||||
| Voice ID | Description |
|
||||
| --- | --- |
|
||||
| `Mia` | English · Female |
|
||||
| `Chloe` | English · Female |
|
||||
| `Milo` | English · Male |
|
||||
| `Dean` | English · Male |
|
||||
| `冰糖` | Chinese · Female (default) |
|
||||
| `茉莉` | Chinese · Female |
|
||||
| `苏打` | Chinese · Male |
|
||||
| `白桦` | Chinese · Male |
|
||||
|
||||
|
||||
You can also pick a voice visually from the Web Console under "Model Management → Text-to-Speech".
|
||||
|
||||
### Style Control
|
||||
|
||||
MiMo TTS supports embedding **audio tags** in the synthesis text to control emotion, tone, dialect, persona, and even singing. Tags must appear in the **text that will be synthesized to speech (i.e. the Agent's reply)**, with the overall style tag placed at the very beginning:
|
||||
|
||||
```
|
||||
(style)content-to-synthesize
|
||||
```
|
||||
|
||||
Half-width `()`, full-width `()`, and `[]` brackets are all accepted. Both Chinese and English style descriptors work — pick whichever language expresses the timbre most precisely. Common examples:
|
||||
|
||||
| Category | Example tags |
|
||||
| --- | --- |
|
||||
| Basic emotions | `happy` `sad` `angry` `fear` `surprised` `excited` `aggrieved` `calm` `indifferent` |
|
||||
| Compound emotions | `wistful` `relieved` `helpless` `guilty` `at ease` `uneasy` `touched` |
|
||||
| Overall tone | `gentle` `aloof` `lively` `serious` `languid` `playful` `deep` `sharp` `cutting` |
|
||||
| Voice character | `magnetic` `mellow` `bright` `ethereal` `childlike` `aged` `sweet` `husky` |
|
||||
| Persona | `squeaky` `mature lady` `young boy` `uncle` `Taiwanese accent` |
|
||||
| Dialect | `Northeastern` `Sichuan` `Henan` `Cantonese` |
|
||||
| Role-play | `Sun Wukong` `Lin Daiyu` |
|
||||
| Singing | `sing` / `singing` |
|
||||
|
||||
Examples:
|
||||
|
||||
- `(magnetic)The night is deep, and the city is still breathing.`
|
||||
- `(gentle)Take a breath. You've got this.`
|
||||
- `(serious)This is the final warning before the system reboots.`
|
||||
- `(singing)Oh, when the saints go marching in…`
|
||||
|
||||
You can also insert fine-grained audio tags at any position in the text to control breathing, laughter, pauses, etc. For example:
|
||||
|
||||
```
|
||||
(nervous, deep breath) Phew… stay calm, stay calm. (faster pace) I've rehearsed this intro fifty times, it'll be fine.
|
||||
```
|
||||
|
||||
See the [MiMo speech synthesis documentation](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5) for the full tag list.
|
||||
|
||||
<Tip>
|
||||
When CowAgent calls TTS, the Agent's reply text (including any `(...)` tags) is forwarded directly to MiMo for synthesis. Tell the model in its persona / system prompt to "prefix replies with a `(style)` tag to control the tone", and IM channels (WeChat / Feishu / DingTalk / WeCom) will play voice replies with the corresponding emotion, dialect, or even singing.
|
||||
</Tip>
|
||||
@@ -1,8 +1,16 @@
|
||||
---
|
||||
title: MiniMax
|
||||
description: MiniMax model configuration
|
||||
description: MiniMax model configuration (Text / Image Understanding / Image Generation / Text-to-Speech)
|
||||
---
|
||||
|
||||
MiniMax supports text chat, image understanding, image generation, and text-to-speech. A single `minimax_api_key` enables all capabilities.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
|
||||
</Tip>
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "MiniMax-M2.7",
|
||||
@@ -12,16 +20,52 @@ description: MiniMax model configuration
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `model` | Options include `MiniMax-M2.7`, `MiniMax-M2.5`, `MiniMax-M2.1`, `MiniMax-M2.1-lightning`, `MiniMax-M2`, etc. |
|
||||
| `minimax_api_key` | Create at [MiniMax Console](https://platform.minimaxi.com/user-center/basic-information/interface-key) |
|
||||
| `model` | Can be `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.1`, `MiniMax-M2.1-lightning`, `MiniMax-M2`, etc. |
|
||||
| `minimax_api_key` | Create one in the [MiniMax Console](https://platform.minimaxi.com/user-center/basic-information/interface-key) |
|
||||
|
||||
OpenAI-compatible configuration is also supported:
|
||||
## Image Understanding
|
||||
|
||||
MiniMax's M2.x chat models do not support vision natively; vision calls are uniformly routed to `MiniMax-Text-01`. Once `minimax_api_key` is configured, the Agent's Vision tool automatically uses this model, with no need to specify it explicitly in the configuration file.
|
||||
|
||||
## Image Generation
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "MiniMax-M2.7",
|
||||
"open_ai_api_base": "https://api.minimaxi.com/v1",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "image-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Available models: `image-01`.
|
||||
|
||||
## Text-to-Speech (TTS)
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "minimax",
|
||||
"text_to_voice_model": "speech-2.8-hd",
|
||||
"tts_voice_id": "female-shaonv"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `text_to_voice_model` | `speech-2.8-hd` (emotional rendering, natural sound), `speech-2.8-turbo` (ultra-fast), `speech-2.6-hd`, `speech-2.6-turbo` |
|
||||
| `tts_voice_id` | Voice ID; supports Chinese / Cantonese / English / Japanese / Korean — 70+ voices in total |
|
||||
|
||||
Common voice examples:
|
||||
|
||||
| Voice ID | Description |
|
||||
| --- | --- |
|
||||
| `female-shaonv` | Chinese · Young Girl (Female) |
|
||||
| `female-yujie` | Chinese · Mature Lady (Female) |
|
||||
| `female-tianmei` | Chinese · Sweet Female (Female) |
|
||||
| `male-qn-jingying` | Chinese · Elite Youth (Male) |
|
||||
| `male-qn-badao` | Chinese · Dominant Youth (Male) |
|
||||
| `Cantonese_GentleLady` | Cantonese · Gentle Female Voice |
|
||||
| `English_Graceful_Lady` | English · Graceful Lady |
|
||||
|
||||
For the full voice list (70+ voices across Chinese / Cantonese / English / Japanese / Korean), see the [system voice list](https://platform.minimaxi.com/docs/faq/system-voice-id), or select visually in the Web Console under "Model Management → Text-to-Speech".
|
||||
|
||||
@@ -1,11 +1,20 @@
|
||||
---
|
||||
title: OpenAI
|
||||
description: OpenAI model configuration
|
||||
description: OpenAI model configuration (Text / Vision / Image / Speech / Embedding)
|
||||
---
|
||||
|
||||
OpenAI offers the most complete coverage and can simultaneously serve text chat, vision understanding, image generation, speech-to-text (ASR), text-to-speech (TTS), and embedding. A single `open_ai_api_key` lets the Agent use all of these capabilities.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
|
||||
</Tip>
|
||||
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "gpt-5.4",
|
||||
"model": "gpt-5.5",
|
||||
"open_ai_api_key": "YOUR_API_KEY",
|
||||
"open_ai_api_base": "https://api.openai.com/v1"
|
||||
}
|
||||
@@ -13,7 +22,82 @@ description: OpenAI model configuration
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `model` | Matches the [model parameter](https://platform.openai.com/docs/models) of the OpenAI API. Supports o-series, gpt-5.4, gpt-5 series, gpt-4.1, etc. Recommended for Agent mode: `gpt-5.4` |
|
||||
| `open_ai_api_key` | Create at [OpenAI Platform](https://platform.openai.com/api-keys) |
|
||||
| `open_ai_api_base` | Optional. Change to use third-party proxy |
|
||||
| `bot_type` | Not required for official OpenAI models. Set to `openai` when using Claude or other non-OpenAI models via proxy |
|
||||
| `model` | Same as OpenAI's [model parameter](https://platform.openai.com/docs/models); supports `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`, the `gpt-5` series, `gpt-4.1`, the o-series, etc. Agent mode defaults to `gpt-5.5`; use `gpt-5.4` for better cost-efficiency |
|
||||
| `open_ai_api_key` | Create one on the [OpenAI Platform](https://platform.openai.com/api-keys) |
|
||||
| `open_ai_api_base` | Optional; change it to access a third-party proxy |
|
||||
| `bot_type` | Not required when using OpenAI's official models; set to `openai` when accessing other vendors via the compatible protocol |
|
||||
|
||||
## Image Understanding
|
||||
|
||||
OpenAI models like `gpt-5.5`, `gpt-5.4`, `gpt-4o`, and `gpt-4.1` natively support vision. Once `open_ai_api_key` is configured, the Agent's Vision tool automatically uses the main model to recognize images. If the main model does not support vision or you want to specify it explicitly, set it in the configuration file:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "gpt-5.4-mini"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Supported Vision models: `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`, `gpt-5`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4o`.
|
||||
|
||||
## Image Generation
|
||||
|
||||
Specify the image generation model in the configuration file; the Agent automatically routes image generation skill calls to OpenAI:
|
||||
|
||||
```json
|
||||
{
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "gpt-image-2"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Supported image generation models: `gpt-image-2`, `gpt-image-1`.
|
||||
|
||||
## Speech-to-Text (ASR)
|
||||
|
||||
```json
|
||||
{
|
||||
"voice_to_text": "openai",
|
||||
"voice_to_text_model": "gpt-4o-mini-transcribe"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `voice_to_text` | Set to `openai` to enable OpenAI speech-to-text |
|
||||
| `voice_to_text_model` | Optional, defaults to `gpt-4o-mini-transcribe`; can also be `gpt-4o-transcribe`, `whisper-1` |
|
||||
|
||||
Credentials are automatically reused from `open_ai_api_key`.
|
||||
|
||||
## Text-to-Speech (TTS)
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "openai",
|
||||
"text_to_voice_model": "tts-1",
|
||||
"tts_voice_id": "alloy"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `text_to_voice_model` | `tts-1`, `tts-1-hd`, `gpt-4o-mini-tts` |
|
||||
| `tts_voice_id` | Voices: `alloy`, `echo`, `fable`, `onyx`, `nova`, `shimmer`, `ash`, `ballad`, `coral`, `sage`, `verse` |
|
||||
|
||||
## Embedding
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding_provider": "openai",
|
||||
"embedding_model": "text-embedding-3-small"
|
||||
}
|
||||
```
|
||||
|
||||
Available models: `text-embedding-3-small`, `text-embedding-3-large`, `text-embedding-ada-002`. After changing the embedding, run `/memory rebuild-index` to rebuild the index.
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
title: Baidu Qianfan / ERNIE
|
||||
description: Baidu Qianfan ERNIE model configuration
|
||||
title: ERNIE
|
||||
description: ERNIE model configuration (Baidu Qianfan)
|
||||
---
|
||||
|
||||
Option 1: Native integration (recommended):
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
---
|
||||
title: Qwen (Tongyi Qianwen)
|
||||
description: Tongyi Qianwen model configuration
|
||||
title: Qwen
|
||||
description: Qwen model configuration (Text / Image Understanding / Image Generation / Speech-to-Text / Text-to-Speech / Embedding)
|
||||
---
|
||||
|
||||
Qwen (Alibaba DashScope / Bailian) is one of the most fully-featured vendors. Text, image understanding, image generation, speech-to-text, text-to-speech, and embedding can all be enabled with a single `dashscope_api_key`.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
|
||||
</Tip>
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "qwen3.6-plus",
|
||||
@@ -12,16 +20,93 @@ description: Tongyi Qianwen model configuration
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `model` | Options include `qwen3.6-plus`, `qwen3.5-plus`, `qwen3-max`, `qwen-max`, `qwen-plus`, `qwen-turbo`, `qwq-plus`, etc. |
|
||||
| `dashscope_api_key` | Create at [Bailian Console](https://bailian.console.aliyun.com/?tab=model#/api-key). See [official docs](https://bailian.console.aliyun.com/?tab=api#/api) |
|
||||
| `model` | Can be `qwen3.6-plus`, `qwen3.7-max`, `qwen3.5-plus`, `qwen3-max`, `qwen-max`, `qwen-plus`, `qwen-turbo`, `qwq-plus`, etc. |
|
||||
| `dashscope_api_key` | Create one in the [Bailian Console](https://bailian.console.aliyun.com/?tab=model#/api-key); see the [official docs](https://bailian.console.aliyun.com/?tab=api#/api) |
|
||||
|
||||
OpenAI-compatible configuration is also supported:
|
||||
## Image Understanding
|
||||
|
||||
Once `dashscope_api_key` is configured, the Agent's Vision tool automatically calls Qwen's vision models to recognize images. Models like `qwen3-max` / `qwen3.5-plus` / `qwen3.6-plus` are already multimodal; if the main model is text-only (e.g. `qwen-turbo`), it automatically falls back to `qwen-vl-max`.
|
||||
|
||||
To manually specify a Vision model:
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "qwen3.6-plus",
|
||||
"open_ai_api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "qwen3.6-plus"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Supported models: `qwen3.6-plus`, `qwen3.5-plus`, `qwen3-max`.
|
||||
|
||||
## Image Generation
|
||||
|
||||
```json
|
||||
{
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "qwen-image-2.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Available models: `qwen-image-2.0`, `qwen-image-2.0-pro`.
|
||||
|
||||
## Speech-to-Text (ASR)
|
||||
|
||||
```json
|
||||
{
|
||||
"voice_to_text": "dashscope",
|
||||
"voice_to_text_model": "qwen3-asr-flash"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `voice_to_text` | Set to `dashscope` to enable Qwen ASR |
|
||||
| `voice_to_text_model` | Optional, defaults to `qwen3-asr-flash` |
|
||||
|
||||
Credentials are automatically reused from `dashscope_api_key`. A single audio segment should be smaller than 10MB and no longer than 300 seconds.
|
||||
|
||||
## Text-to-Speech (TTS)
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "dashscope",
|
||||
"text_to_voice_model": "qwen3-tts-flash",
|
||||
"tts_voice_id": "Cherry"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `text_to_voice_model` | Optional, defaults to `qwen3-tts-flash`; covers Mandarin, dialects, and major foreign languages |
|
||||
| `tts_voice_id` | Voice ID; see the common list below |
|
||||
|
||||
Common voice examples:
|
||||
|
||||
| Voice ID | Description |
|
||||
| --- | --- |
|
||||
| `Cherry` | Qianyue · Sunny Female Voice |
|
||||
| `Serena` | Suyao · Gentle Female Voice |
|
||||
| `Ethan` | Chenxu · Sunny Male Voice |
|
||||
| `Chelsie` | Qianxue · Anime Girl |
|
||||
| `Dylan` | Beijing Dialect · Xiaodong |
|
||||
| `Rocky` | Cantonese · Aqiang |
|
||||
| `Sunny` | Sichuan Dialect · Qing'er |
|
||||
|
||||
The full voice list (Mandarin / regional dialects / bilingual, etc.) can be selected visually in the Web Console under "Model Management → Text-to-Speech".
|
||||
|
||||
## Embedding
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding_provider": "dashscope",
|
||||
"embedding_model": "text-embedding-v4"
|
||||
}
|
||||
```
|
||||
|
||||
The default model is `text-embedding-v4`. After changing the embedding, run `/memory rebuild-index` to rebuild the index.
|
||||
|
||||
@@ -5,12 +5,15 @@ description: CowAgent version history
|
||||
|
||||
| Version | Date | Description |
|
||||
| --- | --- | --- |
|
||||
| [2.0.9](/en/releases/v2.0.9) | 2026.05.22 | Model management console, MCP protocol support, browser persistent login, new models (gpt-5.5, gemini-3.5-flash, qwen3.7-max, etc.), deployment hardening |
|
||||
| [2.0.8](/en/releases/v2.0.8) | 2026.05.06 | Major Feishu channel upgrade (voice, streaming and Markdown, one-click QR-scan setup), DeepSeek V4 and Baidu models, scheduler tool enhancements |
|
||||
| [2.0.7](/en/releases/v2.0.7) | 2026.04.22 | Image Generation Skill (6-provider auto-routing), new models (Kimi K2.6, Claude Opus 4.7, GLM 5.1), knowledge base and Web Console improvements |
|
||||
| [2.0.6](/en/releases/v2.0.6) | 2026.04.14 | Knowledge Base, Deep Dream Memory Distillation, Smart Context Compression, Web Console upgrades |
|
||||
| [2.0.6](/en/releases/v2.0.6) | 2026.04.14 | Project rename, Knowledge Base system, Deep Dream Memory Distillation, Smart Context Compression, Web Console multi-session and various improvements |
|
||||
| [2.0.5](/en/releases/v2.0.5) | 2026.04.01 | Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more |
|
||||
| [2.0.4](/en/releases/v2.0.4) | 2026.03.22 | Personal WeChat channel, new model support, Japanese docs, script refactoring and bug fixes |
|
||||
| [2.0.3](/en/releases/v2.0.3) | 2026.03.18 | WeCom Smart Bot and QQ channels, Coding Plan support, multiple new models, Web file processing, memory system upgrade |
|
||||
| [2.0.2](/en/releases/v2.0.2) | 2026.02.27 | Web Console upgrade, multi-channel concurrency, session persistence |
|
||||
| [2.0.1](/en/releases/v2.0.1) | 2026.02.27 | Built-in Web Search tool, smart context management, multiple fixes |
|
||||
| [2.0.1](/en/releases/v2.0.1) | 2026.02.13 | Built-in Web Search tool, smart context management, multiple fixes |
|
||||
| [2.0.0](/en/releases/v2.0.0) | 2026.02.03 | Full upgrade to AI super assistant |
|
||||
| 1.7.6 | 2025.05.23 | Web Channel optimization, AgentMesh plugin |
|
||||
| 1.7.5 | 2025.04.11 | DeepSeek model |
|
||||
@@ -21,6 +24,8 @@ description: CowAgent version history
|
||||
| 1.6.9 | 2024.07.19 | gpt-4o-mini, Alibaba voice recognition |
|
||||
| 1.6.8 | 2024.07.05 | Claude 3.5, Gemini 1.5 Pro |
|
||||
| 1.6.0 | 2024.04.26 | Kimi integration, gpt-4-turbo upgrade |
|
||||
| 1.5.8 | 2024.03.26 | GLM-4, Claude-3, edge-tts |
|
||||
| 1.5.2 | 2023.11.10 | Feishu channel, image recognition chat |
|
||||
| 1.5.0 | 2023.11.10 | gpt-4-turbo, dall-e-3, tts multimodal |
|
||||
| 1.0.0 | 2022.12.12 | Project created, first ChatGPT integration |
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ Related commits: [30c6d9b](https://github.com/zhayujie/CowAgent/commit/30c6d9b)
|
||||
|
||||
## 💰 Coding Plan Support
|
||||
|
||||
Added integration with vendor Coding Plan (monthly programming subscription) tiers via the unified OpenAI-compatible path. Supported vendors include Aliyun, MiniMax, Zhipu GLM, Kimi, and Volcengine.
|
||||
Added integration with vendor Coding Plan (monthly programming subscription) tiers via the unified OpenAI-compatible path. Supported vendors include Aliyun, MiniMax, GLM, Kimi, and Volcengine.
|
||||
|
||||
See [Coding Plan docs](https://docs.cowagent.ai/en/models/coding-plan) for detailed configuration.
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
title: v2.0.8
|
||||
description: CowAgent 2.0.8 - Major Feishu channel upgrade (voice, streaming typewriter, one-click QR app creation), DeepSeek V4 / Baidu Qianfan ERNIE 5.0 support, scheduler memory enhancements and multiple fixes
|
||||
description: CowAgent 2.0.8 - Major Feishu channel upgrade (voice, streaming typewriter, one-click QR app creation), DeepSeek V4 / ERNIE 5.0 support, scheduler memory enhancements and multiple fixes
|
||||
---
|
||||
|
||||
## 🪶 Major Feishu Channel Upgrade
|
||||
@@ -30,9 +30,9 @@ The voice and streaming building blocks come from a community contribution #2791
|
||||
|
||||
- **DeepSeek V4 series**: Added `deepseek-v4-pro` / `deepseek-v4-flash`, with `deepseek-v4-flash` set as the new default
|
||||
- **Unified thinking-mode toggle**: DeepSeek V4, Qwen3 and other thinking-capable models now share the same `enable_thinking` switch
|
||||
- **Baidu Qianfan / ERNIE first-class integration**: New `qianfan` provider supporting `ernie-5.0` (default recommendation), `ernie-x1.1`, `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k`. Dedicated `qianfan_api_key` / `qianfan_api_base` settings keep OpenAI config clean; legacy `wenxin` / `wenxin-4` paths are fully preserved. #2790 Thanks [@jimmyzhuu](https://github.com/jimmyzhuu)
|
||||
- **ERNIE first-class integration**: New `qianfan` provider supporting `ernie-5.0` (default recommendation), `ernie-x1.1`, `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k`. Dedicated `qianfan_api_key` / `qianfan_api_base` settings keep OpenAI config clean; legacy `wenxin` / `wenxin-4` paths are fully preserved. #2790 Thanks [@jimmyzhuu](https://github.com/jimmyzhuu)
|
||||
|
||||
Documentation: [Baidu Qianfan / ERNIE](https://docs.cowagent.ai/en/models/qianfan)
|
||||
Documentation: [ERNIE](https://docs.cowagent.ai/en/models/qianfan)
|
||||
|
||||
## 🌐 Translation Provider
|
||||
|
||||
|
||||
65
docs/en/releases/v2.0.9.mdx
Normal file
65
docs/en/releases/v2.0.9.mdx
Normal file
@@ -0,0 +1,65 @@
|
||||
---
|
||||
title: v2.0.9
|
||||
description: CowAgent 2.0.9 - Web Console model management, MCP protocol support, browser persistent login, new models and deployment hardening
|
||||
---
|
||||
|
||||
## 🖥️ Model Management Console
|
||||
|
||||
The Web Console adds a new **Models** page that organizes everything by **provider × capability**, covering chat, image, voice, embedding and search models in one place:
|
||||
|
||||
- **Per-provider configuration**: Each provider's API Key / API Base is configured once at the top, and every capability below picks it up automatically — no more re-entering credentials
|
||||
- **Image models**: Image understanding and image generation can each pick their own provider and model independently; falls back to the main model when unspecified
|
||||
- **Voice models**: ASR (speech-to-text) and TTS (text-to-speech) can be configured independently, with new Qwen and Zhipu ASR/TTS models added
|
||||
- **Embedding models**: Configurable embedding models (used for memory and knowledge-base retrieval), with new support for OpenAI, Tongyi, Doubao, Zhipu and others; run `/memory rebuild-index` after switching to rebuild the index online
|
||||
- **Search capability**: Web search has been upgraded to support Bocha, Baidu, Zhipu and more providers — in auto mode the agent can synthesize results from multiple sources for deeper research
|
||||
|
||||
Documentation: [Models Overview](https://docs.cowagent.ai/en/models)
|
||||
|
||||
<img width="720" alt="20260522113305" src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-models-config.png" />
|
||||
|
||||
|
||||
## 🧩 MCP Protocol Support
|
||||
|
||||
Adds support for **MCP (Model Context Protocol)**, expanding from a fixed built-in toolset to an open, pluggable tool ecosystem — any MCP-compatible service can be plugged in directly as an agent tool.
|
||||
|
||||
- Native JSON-RPC implementation, zero extra dependencies, supports both `stdio` and `sse` transports
|
||||
- Compatible with the `mcpServers` configuration style used by Claude Desktop / Cursor, reads `~/cow/mcp.json` by default
|
||||
|
||||
Documentation: [MCP Tools](https://docs.cowagent.ai/en/tools/mcp). Thanks [@yangluxin613](https://github.com/yangluxin613) (#2801)
|
||||
|
||||
## 🌐 Browser Persistent Login
|
||||
|
||||
For sites that require login or have anti-bot protection, the browser tool can now persist a login session for long-term reuse, and supports attaching to your real Chrome browser to bypass fingerprint detection:
|
||||
|
||||
- **Persistent user profile (default)**: Uses `~/.cow/browser_profile` as the browser user data dir by default; once logged in, sessions are reused automatically on subsequent runs
|
||||
- **CDP mode**: Configure `tools.browser.cdp_endpoint` to take over a real Chrome instance with full browser permissions
|
||||
|
||||
Documentation: [Browser Tool](https://docs.cowagent.ai/en/tools/browser). Thanks [@leafmove](https://github.com/leafmove) (#2809)
|
||||
|
||||
## 🤖 New Models and Improvements
|
||||
|
||||
- **New models**: `gpt-5.5`, `gemini-3.5-flash`, `qwen3.7-max`, `ernie-5.1`
|
||||
- **Improvements**: DeepSeek V4 supports the `reasoning_effort` thinking-depth parameter; fixed thinking models like MiMo failing to connect via the OpenAI-compatible protocol
|
||||
|
||||
## 🔒 Deployment & Security
|
||||
|
||||
- **Bind to localhost by default**: The Web Console `web_host` now defaults to `127.0.0.1`; for server deployments, set it to `0.0.0.0` and configure a password manually. Thanks @August829, @yidaozhongqing, @YLChen-007, @icysun
|
||||
- **Fully bundled frontend assets**: All third-party CSS / JS are now served locally — the console works offline and on intranet deployments. Thanks [@gitlayzer](https://github.com/gitlayzer) (#2816)
|
||||
|
||||
## 🛠 UX Improvements & Fixes
|
||||
|
||||
- **TTS rolls out to more channels**: Web Console, Personal WeChat, Feishu, DingTalk and WeCom Smart Bot all support voice replies — see the [Channels Overview](https://docs.cowagent.ai/en/channels)
|
||||
- **Log panel enhancements**: Differentiated highlighting by log level, with level-based filtering. Thanks [@yangluxin613](https://github.com/yangluxin613) (#2807)
|
||||
- **Auto-launch Web Console**: The Web Console now opens automatically on startup. Thanks [@yangluxin613](https://github.com/yangluxin613) (#2804)
|
||||
- **Clean Ctrl+C exit**: No more long `KeyboardInterrupt` stack traces. Thanks [@yangluxin613](https://github.com/yangluxin613) (#2806)
|
||||
- **Folder upload**: Web Console supports directory uploads, with path validation adapted for Windows. Thanks [@TryToMakeUsBetter](https://github.com/TryToMakeUsBetter) (#2814)
|
||||
- Fixed scheduled tasks executing duplicates under certain conditions. Thanks [@CNXudiandian](https://github.com/CNXudiandian) (#2820)
|
||||
- Fixed one-shot scheduled tasks with timezone not firing. Thanks @AethericSpace
|
||||
- Fixed failed tool calls not being displayed after page refresh. Thanks [@a1094174619](https://github.com/a1094174619) (#2822)
|
||||
- Fixed WeCom bot messages with illegal control characters failing to be delivered. Thanks [@Jacques-Zhao](https://github.com/Jacques-Zhao) (#2810)
|
||||
|
||||
## 📦 Upgrade
|
||||
|
||||
Source-code deployments can run `cow update` for a one-click upgrade, or pull the latest code and restart manually. See the [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade) for details.
|
||||
|
||||
**Release Date**: 2026.05.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.8...2.0.9)
|
||||
65
docs/en/skills/hub.mdx
Normal file
65
docs/en/skills/hub.mdx
Normal file
@@ -0,0 +1,65 @@
|
||||
---
|
||||
title: Skill Hub
|
||||
description: Browse, search, and install AI Agent skills
|
||||
---
|
||||
|
||||
[Cow Skill Hub](https://skills.cowagent.ai/) is an open-source skill marketplace for AI Agents, aggregating official picks, community contributions, and third-party skills from GitHub, ClawHub, and beyond.
|
||||
|
||||
Source code: [github.com/zhayujie/cow-skill-hub](https://github.com/zhayujie/cow-skill-hub)
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260401110103.png" width="800" />
|
||||
|
||||
## Features
|
||||
|
||||
- **Browse skills** — filter by category (Featured / Community / Third-party) and tags
|
||||
- **Search skills** — find skills by name or description
|
||||
- **View details** — read the skill manifest, file contents, install command, and required environment variables
|
||||
- **One-click install** — copy the install command and run it in CowAgent
|
||||
|
||||
## Installing a skill
|
||||
|
||||
Run the install command in chat or in your terminal:
|
||||
|
||||
<CodeGroup>
|
||||
```text Chat
|
||||
/skill install <name>
|
||||
```
|
||||
|
||||
```bash Terminal
|
||||
cow skill install <name>
|
||||
```
|
||||
</CodeGroup>
|
||||
|
||||
You can also browse the marketplace directly from chat:
|
||||
|
||||
```text
|
||||
/skill list --remote
|
||||
/skill search <keyword>
|
||||
```
|
||||
|
||||
Beyond the curated list, you can install third-party skills from **GitHub, ClawHub, LinkAI, or any URL** via the CLI. See [Installing skills](/en/skills/install) for details.
|
||||
|
||||
## Contributing a skill
|
||||
|
||||
To submit your own skill:
|
||||
|
||||
1. Visit [skills.cowagent.ai/submit](https://skills.cowagent.ai/submit)
|
||||
2. Sign in with GitHub or Google
|
||||
3. Upload a folder or zip file containing `SKILL.md`
|
||||
4. Skill name, display name, and description are auto-detected — adjust as needed
|
||||
5. Submit for review; skills go live after security and quality checks
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260401111904.png" width="800" />
|
||||
|
||||
Skill file layout:
|
||||
|
||||
```
|
||||
your-skill/
|
||||
├── SKILL.md # required, in the root
|
||||
├── scripts/ # optional, runtime scripts
|
||||
└── resources/ # optional, additional assets
|
||||
```
|
||||
|
||||
<Tip>
|
||||
Skills are built around the `SKILL.md` manifest. You can also download `SKILL.md` from a skill's detail page and use it with any Agent that supports custom instructions (OpenClaw, Cursor, Claude Code, and more).
|
||||
</Tip>
|
||||
@@ -1,149 +1,89 @@
|
||||
---
|
||||
title: image-generation - Image Generation
|
||||
title: image-generation
|
||||
description: Text-to-image / image-to-image / multi-image fusion with automatic multi-provider routing and fallback
|
||||
---
|
||||
|
||||
A general-purpose image generation and editing skill supporting six providers: OpenAI, Gemini, Seedream (Volcengine Ark), Qwen (DashScope), MiniMax, and LinkAI. No need to choose a model manually — the script automatically selects a configured provider based on a fixed priority order.
|
||||
A general-purpose image generation and editing skill supporting six providers: OpenAI, Gemini, Seedream (Volcengine Ark), Qwen (DashScope), MiniMax, and LinkAI. Configure any one provider's key to start using it; configure multiple to enable automatic fallback.
|
||||
|
||||
## Model Selection
|
||||
|
||||
`image-generation` uses a "fixed priority + automatic fallback" strategy — just configure your keys and it works:
|
||||
|
||||
1. **Priority order**: `OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI`
|
||||
2. **Unconfigured providers are skipped**: only providers with an API key participate
|
||||
3. **Automatic fallback on failure**: on errors like 401, model not enabled, or network issues, the next provider is tried
|
||||
4. **Specified model goes first**: if a specific model name is provided, its provider is promoted to the front
|
||||
|
||||
### Supported Models
|
||||
## Supported Models
|
||||
|
||||
| Provider | Models / Aliases | Notes |
|
||||
| --- | --- | --- |
|
||||
| OpenAI | `gpt-image-2`, `gpt-image-1` | General-purpose, high quality, supports `quality` parameter |
|
||||
| Gemini Nano Banana | `nano-banana-2`, `nano-banana-pro`, `nano-banana` | Corresponds to `gemini-3.1-flash`, `gemini-3-pro`, `gemini-2.5-flash` image variants |
|
||||
| Gemini Nano Banana | `nano-banana-2`, `nano-banana-pro`, `nano-banana` | Corresponds to the image variants of `gemini-3.1-flash`, `gemini-3-pro`, `gemini-2.5-flash` |
|
||||
| Seedream (Volcengine Ark) | `seedream-5.0-lite`, `seedream-4.5` | Native 2K–4K, up to 14 reference images for fusion |
|
||||
| Qwen (DashScope) | `qwen-image-2.0`, `qwen-image-2.0-pro` | Strong with Chinese text rendering and text-image layouts |
|
||||
| MiniMax | `image-01` | Fast and simple image generation |
|
||||
| LinkAI | Any model | Universal proxy, used as fallback |
|
||||
| MiniMax | `image-01` | Fast and simple |
|
||||
| LinkAI | Any model | Universal gateway, used as fallback |
|
||||
|
||||
<Note>
|
||||
By default, the Agent does not pick a model — it uses automatic routing. If you want a specific model, just say so in the conversation, e.g. "use seedream to draw a cat" or "generate a poster with gpt-image-2". You can also pin a default model via the "Custom Configuration" section below.
|
||||
</Note>
|
||||
## Model Selection
|
||||
|
||||
## Custom Configuration
|
||||
By default, "auto routing + automatic fallback" is used:
|
||||
|
||||
### API Key Setup
|
||||
1. Pick the first configured provider in the order `OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI`
|
||||
2. On errors such as 401, model not enabled, or network issues, automatically switch to the next provider
|
||||
3. If the user specifies a model in the conversation (e.g. "use seedream to draw a cat"), the corresponding provider is promoted to the front
|
||||
|
||||
You need **at least one** provider key. Configuring multiple providers enables automatic fallback. There are three ways to set up keys:
|
||||
|
||||
#### Option 1: Automatic Reuse of Existing Keys
|
||||
|
||||
If you have already configured model keys in the web console or `config.json` (e.g. `openai_api_key`, `gemini_api_key`, etc.), these keys are **automatically synced** to the corresponding environment variables at startup. In other words, if your chat model works, image generation can use the same key with zero extra configuration.
|
||||
|
||||
#### Option 2: Configure in config.json
|
||||
|
||||
Add the key fields directly to `config.json`:
|
||||
To pin a specific model:
|
||||
|
||||
```json
|
||||
{
|
||||
"openai_api_key": "sk-xxx",
|
||||
"openai_api_base": "https://api.openai.com/v1",
|
||||
"gemini_api_key": "AIza-xxx",
|
||||
"ark_api_key": "xxx",
|
||||
"dashscope_api_key": "sk-xxx",
|
||||
"minimax_api_key": "xxx",
|
||||
"linkai_api_key": "xxx"
|
||||
}
|
||||
```
|
||||
|
||||
A restart is required after changes. Each key also has a corresponding `*_api_base` field for custom endpoints.
|
||||
|
||||
#### Option 3: Configure via Conversation
|
||||
|
||||
Send an API key in the chat and the Agent will save it to `~/cow/.env` using the `env_config` tool — **no restart needed**. For example:
|
||||
|
||||
```
|
||||
Set OPENAI_API_KEY to sk-xxx
|
||||
```
|
||||
|
||||
Or:
|
||||
|
||||
```
|
||||
Configure ARK_API_KEY as xxx
|
||||
```
|
||||
|
||||
### API Key Reference
|
||||
|
||||
| Environment Variable | config.json Field | Provider | Default Base URL |
|
||||
| --- | --- | --- | --- |
|
||||
| `OPENAI_API_KEY` | `openai_api_key` | OpenAI | `https://api.openai.com/v1` |
|
||||
| `GEMINI_API_KEY` | `gemini_api_key` | Gemini | `https://generativelanguage.googleapis.com` |
|
||||
| `ARK_API_KEY` | `ark_api_key` | Volcengine Ark (Seedream) | `https://ark.cn-beijing.volces.com/api/v3` |
|
||||
| `DASHSCOPE_API_KEY` | `dashscope_api_key` | Alibaba DashScope (Qwen) | `https://dashscope.aliyuncs.com` |
|
||||
| `MINIMAX_API_KEY` | `minimax_api_key` | MiniMax | `https://api.minimaxi.com` |
|
||||
| `LINKAI_API_KEY` | `linkai_api_key` | LinkAI | `https://api.link-ai.tech` |
|
||||
|
||||
### Pinning a Default Model
|
||||
|
||||
To force all image generation through a specific provider's model, add this to `config.json`:
|
||||
|
||||
```json
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "seedream-5.0-lite"
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "seedream-5.0-lite"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
At startup, this is automatically converted to the environment variable `SKILL_IMAGE_GENERATION_MODEL`, and the script will always use this model's provider for generation.
|
||||
## Configuring API Keys
|
||||
|
||||
<Tip>
|
||||
It is recommended to configure providers from the "Model Management" page in the [Web console](/en/channels/web). Chat model keys configured there are automatically reused by the image generation skill — no need to set them twice. You can also edit the configuration file manually or temporarily set keys in a conversation using the `env_config` tool.
|
||||
</Tip>
|
||||
|
||||
Credentials are shared with the main model providers:
|
||||
|
||||
| Field | Provider |
|
||||
| --- | --- |
|
||||
| `openai_api_key` | OpenAI |
|
||||
| `gemini_api_key` | Gemini |
|
||||
| `ark_api_key` | Volcengine Ark (Seedream) |
|
||||
| `dashscope_api_key` | Alibaba DashScope (Qwen) |
|
||||
| `minimax_api_key` | MiniMax |
|
||||
| `linkai_api_key` | LinkAI |
|
||||
|
||||
|
||||
## Enabling and Disabling
|
||||
|
||||
`image-generation` is a built-in skill that **automatically adjusts its status based on API keys**:
|
||||
The skill automatically adjusts its status based on API keys:
|
||||
|
||||
- **Key configured**: the skill is active — the Agent will invoke it when asked to draw
|
||||
- **Key not configured**: the skill still appears in context (marked as "needs configuration") — the Agent will guide the user to set up a key rather than failing silently
|
||||
- **Key configured**: the Agent calls the skill directly when it receives a drawing request
|
||||
- **Key not configured**: the skill still appears in context (marked as "needs configuration") — the Agent will guide the user to set up a key
|
||||
|
||||
To control it manually:
|
||||
|
||||
```text
|
||||
/skill disable image-generation # Disable (won't be invoked even if keys are present)
|
||||
/skill disable image-generation # Disable
|
||||
/skill enable image-generation # Re-enable
|
||||
```
|
||||
|
||||
In the terminal: `cow skill disable image-generation` / `cow skill enable image-generation`.
|
||||
Equivalent terminal commands: `cow skill disable image-generation` / `cow skill enable image-generation`.
|
||||
|
||||
## Parameters
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| `prompt` | string | Yes | — | Image description |
|
||||
| `image_url` | string / list | No | null | Input image(s) for editing — local path or URL. Pass multiple for multi-image fusion |
|
||||
| `quality` | string | No | auto | `low` / `medium` / `high` — only some providers support this |
|
||||
| `image_url` | string / list | No | null | Input image for editing — local path or URL; pass a list for multi-image fusion |
|
||||
| `quality` | string | No | auto | `low` / `medium` / `high`, supported only by some providers |
|
||||
| `size` | string | No | auto | `512` / `1K` / `2K` / `3K` / `4K`, or pixel value like `1024x1024` |
|
||||
| `aspect_ratio` | string | No | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9`; Gemini also supports `1:4` / `4:1` / `1:8` / `8:1` |
|
||||
|
||||
<Warning>
|
||||
**Higher quality and larger size cost more and take longer.**
|
||||
|
||||
- For everyday conversations and quick previews, use the defaults (`auto`) or `quality=low` + `size=1K` — roughly 20 seconds
|
||||
- For posters or when the user explicitly asks for high resolution, use `quality=high` + `size=2K/4K` — may take 1–5 minutes depending on the model
|
||||
**Higher quality and larger size cost more and take longer.** For everyday conversations, use the defaults (`auto`) or `quality=low` + `size=1K` — about 20 seconds per image. For posters or when high resolution is explicitly requested, use `quality=high` + `size=2K/4K` — may take 1–5 minutes.
|
||||
</Warning>
|
||||
|
||||
## Output
|
||||
|
||||
On success:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "doubao-seedream-5-0-260128",
|
||||
"images": [
|
||||
{"url": "/path/to/output.png"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
On failure: `{ "error": "..." }`. After an error, **do not retry directly** — it is almost always a configuration issue (wrong key, incorrect API base, model not enabled). Have the user fix the configuration first.
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
- **Text-to-image**: generate illustrations, posters, icons, avatars, storyboards, etc. from a description
|
||||
@@ -151,8 +91,8 @@ On failure: `{ "error": "..." }`. After an error, **do not retry directly** —
|
||||
- **Multi-image fusion**: combine multiple reference images into one (outfit swaps, character group photos, etc.)
|
||||
|
||||
<Note>
|
||||
- Bash timeout should be set to 600 seconds. Each provider has a 300-second HTTP timeout, but the script may try multiple providers sequentially
|
||||
- Bash timeout should be set to 600 seconds: each provider has a 300-second HTTP timeout, and the script may try multiple providers sequentially
|
||||
- Input images are automatically compressed to ≤ 4 MB with the longest edge ≤ 4096 px
|
||||
- Gemini / Seedream / Qwen / MiniMax do not support the `quality` parameter — passing it has no effect
|
||||
- Gemini / Seedream / Qwen / MiniMax do not support the `quality` parameter
|
||||
- Seedream defaults to 2K; `seedream-5.0-lite` supports up to 3K; `seedream-4.5` supports up to 4K
|
||||
</Note>
|
||||
|
||||
@@ -3,11 +3,11 @@ title: Install Skills
|
||||
description: Install skills from multiple sources with a single command
|
||||
---
|
||||
|
||||
CowAgent supports installing skills from **Cow Skill Hub, GitHub, ClawHub**, and any URL with a unified `install` command. Use `/skill install` in chat or `cow skill install` in the terminal.
|
||||
CowAgent supports installing skills from [Cow Skill Hub](https://skills.cowagent.ai/), GitHub, ClawHub, LinkAI, and any URL via a unified `install` command. Use `/skill install` in chat or `cow skill install` in the terminal.
|
||||
|
||||
## From Skill Hub
|
||||
## From the Skill Hub
|
||||
|
||||
Browse the Skill Hub and install:
|
||||
Browse all available skills at [skills.cowagent.ai](https://skills.cowagent.ai/) and install by name:
|
||||
|
||||
```text
|
||||
/skill list --remote
|
||||
@@ -16,7 +16,7 @@ Browse the Skill Hub and install:
|
||||
|
||||
## From GitHub
|
||||
|
||||
Supports batch install from repositories and single skill from subdirectories:
|
||||
Any GitHub-hosted skill can be installed directly. Supports both repository-level batch install and subdirectory-level single install:
|
||||
|
||||
```text
|
||||
/skill install larksuite/cli
|
||||
@@ -25,10 +25,22 @@ Supports batch install from repositories and single skill from subdirectories:
|
||||
|
||||
## From ClawHub
|
||||
|
||||
All [ClawHub](https://clawhub.ai/) skills (40k+) can be installed with a single command:
|
||||
|
||||
```text
|
||||
/skill install clawhub:baidu-search
|
||||
/skill install clawhub:<name>
|
||||
```
|
||||
|
||||
## From LinkAI
|
||||
|
||||
All public resources on [LinkAI](https://link-ai.tech/console) (10k+ apps / workflows / plugins), as well as your own resources (apps, workflows, knowledge bases, databases, plugins), can be installed via:
|
||||
|
||||
```text
|
||||
/skill install linkai:<code>
|
||||
```
|
||||
|
||||
> Every resource created on the LinkAI platform has a unique `code`. Find it on each resource's page in the [console](https://link-ai.tech/console).
|
||||
|
||||
## From URL
|
||||
|
||||
Supports zip archives and SKILL.md file links:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: knowledge-wiki - Knowledge Base
|
||||
title: knowledge-wiki
|
||||
description: Maintain a local structured knowledge base with automatic archiving, categorisation, and cross-referencing
|
||||
---
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: skill-creator - Skill Creator
|
||||
title: skill-creator
|
||||
description: Create, install, and update skills — standardises SKILL.md format and directory structure
|
||||
---
|
||||
|
||||
|
||||
@@ -34,7 +34,9 @@ Fully compatible with the MCP community standard, identical to Claude Desktop /
|
||||
| `command` | stdio | Executable to launch the server (e.g. `npx`, `python`, `uvx`) |
|
||||
| `args` | No | Arguments passed to `command` |
|
||||
| `env` | No | Environment variables for the subprocess, commonly used for API keys |
|
||||
| `url` | SSE | SSE endpoint URL (alternative to `command`) |
|
||||
| `url` | SSE / Streamable HTTP | Remote endpoint URL (alternative to `command`) |
|
||||
| `type` | Remote | Remote transport type: `sse` or `streamable-http` (defaults to `sse`) |
|
||||
| `headers` | No | Extra HTTP headers for remote requests (e.g. `Authorization`); Streamable HTTP only |
|
||||
| `disabled` | No | When `true`, this server is skipped — handy for temporary disabling |
|
||||
|
||||
### Full Example
|
||||
@@ -88,7 +90,8 @@ The Agent will:
|
||||
| Transport | Description | Config Field |
|
||||
| --- | --- | --- |
|
||||
| **stdio** | Subprocess communication. The most common option, with the richest community ecosystem. | `command` + `args` |
|
||||
| **SSE** | HTTP Server-Sent Events, suitable for remotely hosted MCP services. | `url` |
|
||||
| **SSE** | HTTP Server-Sent Events. Legacy remote transport. | `url` (default) |
|
||||
| **Streamable HTTP** | New unified remote transport, gradually replacing SSE. | `type: "streamable-http"` + `url` |
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
@@ -106,4 +109,4 @@ You can browse third-party MCP marketplaces and copy a JSON config to use direct
|
||||
- [mcp.so](https://mcp.so) — Global MCP service index
|
||||
- [ModelScope MCP Hub](https://modelscope.cn/mcp) — ModelScope's MCP hub, more reliable from mainland China
|
||||
|
||||
Any MCP server that follows the standard protocol (stdio / SSE) integrates with CowAgent out of the box.
|
||||
Any MCP server that follows the standard protocol (stdio / SSE / Streamable HTTP) integrates with CowAgent out of the box.
|
||||
|
||||
@@ -38,3 +38,43 @@ Create and manage scheduled tasks with natural language:
|
||||
<Frame>
|
||||
<img src="https://cdn.link-ai.tech/doc/20260202195402.png" width="800" />
|
||||
</Frame>
|
||||
|
||||
## Results injected into the conversation
|
||||
|
||||
Scheduled tasks run inside an isolated session (so internal planning and tool calls do not pollute the user's chat), but the **final output** is written back to the user's real session as a message pair. You can directly follow up — e.g. "expand on point 2 from earlier".
|
||||
|
||||
**Default policy**
|
||||
|
||||
- Output of Agent dynamic tasks is injected into the conversation
|
||||
- Fixed-message tasks are not injected by default (configurable)
|
||||
- Each session keeps the most recent **3 pairs** of scheduler messages; older pairs are pruned automatically. Regular user messages are unaffected
|
||||
|
||||
**Configuration**
|
||||
|
||||
| Key | Default | Description |
|
||||
| --- | --- | --- |
|
||||
| `scheduler_inject_to_session` | `true` | Master switch |
|
||||
| `scheduler_inject_max_per_session` | `3` | Max scheduler message pairs kept per session |
|
||||
| `scheduler_inject_send_message` | `false` | Whether to also inject fixed-message tasks |
|
||||
|
||||
```json
|
||||
{
|
||||
"scheduler_inject_to_session": true,
|
||||
"scheduler_inject_max_per_session": 3,
|
||||
"scheduler_inject_send_message": false
|
||||
}
|
||||
```
|
||||
|
||||
## Context inside scheduled task execution
|
||||
|
||||
The isolated session for scheduled tasks retains a few recent runs of conversation history, so you can naturally do "compare with last time" or "continue from previous conclusion". To prevent prompts from growing unbounded for high-frequency tasks (e.g. a 5-minute monitor), history is auto-trimmed:
|
||||
|
||||
```
|
||||
scheduler_keep_turns = max(1, agent_max_context_turns / 5)
|
||||
```
|
||||
|
||||
`agent_max_context_turns` defaults to `20`, so each scheduled run keeps the most recent **4 turns** of history by default. Increase `agent_max_context_turns` if you need longer memory.
|
||||
|
||||
<Note>
|
||||
For group-chat scenarios (Feishu / WeCom group bots / DingTalk, etc.), the user's real `session_id` looks like `user_id:group_id` — different from `receiver`. Scheduler records the correct `session_id` when a task is created. For older `tasks.json` entries missing this field, the runtime falls back to `receiver`, matching legacy behavior.
|
||||
</Note>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: vision - Image Analysis
|
||||
title: vision - Image Understanding
|
||||
description: Analyze image content (recognition, description, OCR, etc.)
|
||||
---
|
||||
|
||||
@@ -9,33 +9,49 @@ Analyze local images or image URLs using Vision API. Supports content descriptio
|
||||
|
||||
The vision tool uses a multi-level auto-selection strategy with automatic fallback — no manual configuration required:
|
||||
|
||||
1. **Main model** — uses the currently configured main model for image recognition (zero extra cost)
|
||||
2. **Other configured models** — auto-discovers other models with configured API keys as alternatives
|
||||
3. **OpenAI** — uses `open_ai_api_key` to call gpt-4.1-mini
|
||||
4. **LinkAI** — uses `linkai_api_key` to call LinkAI vision service
|
||||
|
||||
When `use_linkai=true`, LinkAI is promoted to the highest priority.
|
||||
1. **Main model** — uses the currently configured main model for image recognition (must be a multimodal model)
|
||||
2. **Other configured models** — auto-discovers other multimodal models with configured API keys as alternatives
|
||||
|
||||
If the current provider fails, the tool automatically tries the next one until it succeeds or all fail.
|
||||
|
||||
### Supported Models
|
||||
|
||||
| Vendor | Vision Model | Notes |
|
||||
| Provider | Vision Model | Notes |
|
||||
| --- | --- | --- |
|
||||
| OpenAI / Compatible | Main model | All OpenAI-compatible multimodal models |
|
||||
| Baidu Qianfan | Main model | Multimodal main models (e.g. `ernie-5.1`) handle images directly; falls back to `ernie-4.5-turbo-vl` for text-only main models |
|
||||
| Qwen (DashScope) | Main model | Via MultiModalConversation API |
|
||||
| OpenAI / Compatible | Main model | All OpenAI-protocol-compatible multimodal models |
|
||||
| Qwen (DashScope) | Main model | e.g. qwen3.6-plus, etc. |
|
||||
| Claude | Main model | Anthropic native image format |
|
||||
| Gemini | Main model | inlineData format |
|
||||
| Doubao | Main model | doubao-seed-2-0 series natively supported |
|
||||
| Kimi (Moonshot) | Main model | kimi-k2.6, kimi-k2.5 natively supported |
|
||||
| ZhipuAI | glm-5v-turbo | Always uses dedicated vision model |
|
||||
| MiniMax | MiniMax-Text-01 | Always uses dedicated vision model |
|
||||
| ERNIE | Main model | Defaults to the multimodal main model (e.g. `ernie-5.1`); falls back to `ernie-4.5-turbo-vl` when the main model is not multimodal |
|
||||
| ZhipuAI | glm-5v-turbo | Always uses the dedicated vision model |
|
||||
| MiniMax | MiniMax-Text-01 | Always uses the dedicated vision model |
|
||||
|
||||
<Note>
|
||||
ZhipuAI and MiniMax text models do not support image understanding, so their dedicated vision models are always used automatically.
|
||||
</Note>
|
||||
|
||||
> When `use_linkai=true`, LinkAI's multimodal model is used by default.
|
||||
|
||||
## Custom Configuration
|
||||
|
||||
To specify the model used by Vision, configure it in `config.json`, for example:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "gpt-4.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The specified model is **used first**, and the tool automatically routes to the corresponding provider based on the model name; on failure, it falls back to other configured providers.
|
||||
|
||||
In most cases no configuration is needed — the tool works automatically as long as the main model supports multimodal input or any vision-capable API key is configured.
|
||||
|
||||
## Parameters
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
@@ -45,21 +61,7 @@ If the current provider fails, the tool automatically tries the next one until i
|
||||
|
||||
Supported image formats: jpg, jpeg, png, gif, webp
|
||||
|
||||
## Custom Configuration
|
||||
|
||||
To specify a particular model for the vision tool, add to `config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "ernie-4.5-turbo-vl"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
In most cases no configuration is needed. The tool works automatically as long as the main model supports multimodal input or any vision-capable API key is configured.
|
||||
|
||||
## Use Cases
|
||||
|
||||
@@ -69,5 +71,5 @@ In most cases no configuration is needed. The tool works automatically as long a
|
||||
- Analyze screenshots and scanned documents
|
||||
|
||||
<Note>
|
||||
Images larger than 1MB are automatically compressed (max edge 1536px). All images (including remote URLs) are converted to base64 for transmission to ensure compatibility with all model backends.
|
||||
Images larger than 1MB are automatically compressed before upload. All images (including remote URLs) are converted to base64 for transmission to ensure compatibility with all model backends.
|
||||
</Note>
|
||||
|
||||
32
docs/en/tools/web-fetch.mdx
Normal file
32
docs/en/tools/web-fetch.mdx
Normal file
@@ -0,0 +1,32 @@
|
||||
---
|
||||
title: web_fetch - Web Fetch
|
||||
description: Fetch web pages and document content
|
||||
---
|
||||
|
||||
Fetch the content of an HTTP/HTTPS URL. Web pages are extracted as readable text; document files (PDF, Word, Excel, etc.) are downloaded and parsed automatically.
|
||||
|
||||
## Parameters
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| `url` | string | Yes | HTTP/HTTPS URL (web page or document) |
|
||||
|
||||
## Supported file types
|
||||
|
||||
| Type | Formats |
|
||||
| --- | --- |
|
||||
| PDF | `.pdf` |
|
||||
| Word | `.docx` |
|
||||
| Text | `.txt`, `.md`, `.csv`, `.log` |
|
||||
| Spreadsheet | `.xls`, `.xlsx` |
|
||||
| Presentation | `.ppt`, `.pptx` |
|
||||
|
||||
## Use cases
|
||||
|
||||
- Extract readable text from a web page
|
||||
- Download and parse remote documents
|
||||
- Inspect API response bodies
|
||||
|
||||
<Note>
|
||||
`web_fetch` only retrieves static HTML. For pages that require JavaScript rendering (such as SPAs), use the `browser` tool instead.
|
||||
</Note>
|
||||
@@ -1,32 +1,51 @@
|
||||
---
|
||||
title: web_search - Web Search
|
||||
description: Search the internet for real-time information
|
||||
description: Search the internet for real-time information, with support for multiple search providers
|
||||
---
|
||||
|
||||
Search the internet for real-time information, news, research, and more. Supports two search backends with automatic fallback.
|
||||
Search the internet for real-time information, news, research, and more. Supports four backends — Bocha, ERNIE, GLM, and LinkAI — and works once any one of them is configured.
|
||||
|
||||
## Dependencies
|
||||
<Tip>
|
||||
It is recommended to configure providers and routing strategy visually from the "Model Management → Search" panel in the [Web console](/en/channels/web), without manually editing the configuration file.
|
||||
</Tip>
|
||||
|
||||
Requires at least one search API key (configured via `env_config` tool or workspace `.env` file):
|
||||
## Providers
|
||||
|
||||
| Backend | Environment Variable | Priority | How to Get |
|
||||
| --- | --- | --- | --- |
|
||||
| Bocha Search | `BOCHA_API_KEY` | Primary | [Bocha Open Platform](https://open.bochaai.com/) |
|
||||
| LinkAI Search | `LINKAI_API_KEY` | Fallback | [LinkAI Console](https://link-ai.tech/console/interface) |
|
||||
| Provider | Credential | Apply |
|
||||
| --- | --- | --- |
|
||||
| Bocha | `tools.web_search.bocha_api_key` | [Bocha Open Platform](https://open.bochaai.com/) |
|
||||
| ERNIE | Reuses `qianfan_api_key` | [Qianfan Console](https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy) |
|
||||
| Zhipu | Reuses `zhipu_ai_api_key` | [Zhipu Open Platform](https://docs.bigmodel.cn/cn/guide/tools/web-search) |
|
||||
| LinkAI | Reuses `linkai_api_key` | [LinkAI Console](https://link-ai.tech/console/interface) |
|
||||
|
||||
## Parameters
|
||||
Except for Bocha which requires a dedicated `bocha_api_key`, the other three reuse the corresponding model's API key — configuring the model automatically grants search capability.
|
||||
|
||||
## Routing Strategy
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"web_search": {
|
||||
"strategy": "auto",
|
||||
"provider": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- `auto` (default): the Agent intelligently picks among configured providers and may call multiple providers in a single task to gather more comprehensive results; when none is specified, falls back through `bocha → qianfan → zhipu → linkai`.
|
||||
- `fixed`: always use the provider specified in `provider`; falls back to the auto order if that provider's credentials are missing.
|
||||
|
||||
## Tool Parameters
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| `query` | string | Yes | Search keywords |
|
||||
| `count` | integer | No | Number of results (1-50, default 10) |
|
||||
| `freshness` | string | No | Time range: `noLimit`, `oneDay`, `oneWeek`, `oneMonth`, `oneYear`, or date range like `2025-01-01..2025-02-01` |
|
||||
| `summary` | boolean | No | Return page summaries (default false) |
|
||||
|
||||
## Use Cases
|
||||
|
||||
When the user asks about latest information, needs fact-checking, or real-time data, the Agent automatically invokes this tool.
|
||||
| `count` | integer | No | Number of results (1–50, default 10) |
|
||||
| `freshness` | string | No | Time range: `noLimit` (default), `oneDay`, `oneWeek`, `oneMonth`, `oneYear`, or date range like `2025-01-01..2025-02-01` |
|
||||
| `summary` | boolean | No | Whether to return page summaries (default false) |
|
||||
| `provider` | string | No | Available when multiple providers are configured under the `auto` strategy; used to switch provider for a single call |
|
||||
|
||||
<Note>
|
||||
If no search API key is configured, this tool will not be loaded.
|
||||
If none of the four credentials are configured, this tool is not registered with the Agent.
|
||||
</Note>
|
||||
|
||||
@@ -97,7 +97,7 @@ nohup python3 app.py & tail -f nohup.out
|
||||
```
|
||||
|
||||
<Tip>
|
||||
如果在服务器上部署,需要在防火墙或安全组中放行 `9899` 端口才能通过浏览器访问 Web 控制台,建议仅对指定IP开放以保证安全。
|
||||
**服务器公网访问 Web 控制台**:默认 `web_host` 仅监听 `127.0.0.1`(本机访问),需公网访问时请在 `config.json` 中将 `web_host` 设为 `0.0.0.0`,同时强烈建议设置 `web_password` 启用鉴权。此外还需在防火墙/安全组中放行 `9899` 端口,建议仅对指定 IP 开放以保证安全。
|
||||
</Tip>
|
||||
|
||||
## Docker 部署
|
||||
@@ -129,7 +129,7 @@ sudo docker logs -f chatgpt-on-wechat
|
||||
```
|
||||
|
||||
<Tip>
|
||||
如果在服务器上部署,需要在防火墙或安全组中放行 `9899` 端口才能通过浏览器访问 Web 控制台,建议仅对指定IP开放以保证安全。
|
||||
**Docker 公网访问 Web 控制台**:在 `docker-compose.yml` 中将 `WEB_HOST` 设为 `0.0.0.0`(容器内默认绑定 `127.0.0.1` 无法从宿主机外访问),同时强烈建议设置 `WEB_PASSWORD` 启用鉴权。此外需确保 `9899` 端口正确映射到宿主机,并在防火墙/安全组放行该端口。
|
||||
</Tip>
|
||||
|
||||
## 核心配置项
|
||||
|
||||
@@ -33,6 +33,10 @@ description: 使用脚本一键安装和管理 CowAgent
|
||||
|
||||
运行后默认启动 Web 控制台,访问 `http://localhost:9899` 开始对话和管理Agent。
|
||||
|
||||
<Note>
|
||||
**服务器部署需要公网访问控制台时**,请在 `config.json` 中将 `web_host` 设为 `0.0.0.0`(默认仅监听 `127.0.0.1` 本机访问),同时强烈建议设置 `web_password` 启用鉴权。然后通过 `http://<server-ip>:9899` 访问,并确保防火墙/安全组放行 `9899` 端口。
|
||||
</Note>
|
||||
|
||||
## 管理命令
|
||||
|
||||
安装完成后,使用 `cow` CLI 管理服务:
|
||||
|
||||
@@ -9,7 +9,7 @@ CowAgent 2.0 从简单的聊天机器人全面升级为超级智能助理,采
|
||||
|
||||
CowAgent 的整体架构由以下核心模块组成:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/cow-agent-arch-zh.jpg" alt="CowAgent Architecture" />
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/architecture/zh/architecture.jpg" alt="CowAgent Architecture" />
|
||||
|
||||
| 模块 | 说明 |
|
||||
| --- | --- |
|
||||
|
||||
@@ -84,7 +84,7 @@ Agent 会在对话中自动将有价值的信息整理为知识页面,维护
|
||||
|
||||
技能系统为 Agent 提供无限的扩展性,每个 Skill 由说明文件、运行脚本(可选)、资源(可选)组成,描述如何完成特定类型的任务。通过 Skill 可以让 Agent 遵循说明完成复杂流程、调用各类工具或对接第三方系统。
|
||||
|
||||
- **[Skill Hub](https://skills.cowagent.ai/):** 开放的技能广场,汇集官方推荐、社区贡献和第三方技能,支持一键安装。
|
||||
- [Skill Hub](https://skills.cowagent.ai/):开放的技能广场,汇集官方推荐、社区贡献和第三方技能,支持一键安装。
|
||||
- **内置技能:** 在项目的 `skills/` 目录下,包含技能创造器、图像识别、LinkAI 智能体、网页抓取等。内置 Skill 根据依赖条件(API Key、系统命令等)自动判断是否启用。
|
||||
- **自定义技能:** 由用户通过对话创建,存放在工作空间中(`~/cow/skills/`),可实现任何复杂的业务流程和第三方系统对接。
|
||||
|
||||
|
||||
@@ -3,7 +3,9 @@ title: 项目介绍
|
||||
description: CowAgent - 基于大模型的超级AI助理
|
||||
---
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="450px"/>
|
||||
<div align="center">
|
||||
<img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="450px"/>
|
||||
</div>
|
||||
|
||||
**CowAgent** 是基于大模型的超级AI助理,能够主动思考和任务规划、操作计算机和外部资源、创造和执行Skills、拥有长期记忆和知识库并不断成长。
|
||||
|
||||
|
||||
@@ -1,250 +1,256 @@
|
||||
<p align="center"><img src="https://github.com/user-attachments/assets/eca9a9ec-8534-4615-9e0f-96c5ac1d10a3" alt="CowAgent" width="550" /></p>
|
||||
<p align="center"><img src="https://github.com/user-attachments/assets/eca9a9ec-8534-4615-9e0f-96c5ac1d10a3" alt="CowAgent" width="420" /></p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/zhayujie/CowAgent/releases/latest"><img src="https://img.shields.io/github/v/release/zhayujie/CowAgent" alt="Latest release"></a>
|
||||
<a href="https://github.com/zhayujie/CowAgent/blob/master/LICENSE"><img src="https://img.shields.io/github/license/zhayujie/CowAgent" alt="License: MIT"></a>
|
||||
<a href="https://github.com/zhayujie/CowAgent"><img src="https://img.shields.io/github/stars/zhayujie/CowAgent?style=flat-square" alt="Stars"></a> <br/>
|
||||
[<a href="https://github.com/zhayujie/CowAgent/blob/master/README.md">中文</a>] | [<a href="https://github.com/zhayujie/CowAgent/blob/master/docs/en/README.md">English</a>] | [日本語]
|
||||
[<a href="../../README.md">English</a>] | [<a href="../zh/README.md">中文</a>] | [日本語]
|
||||
</p>
|
||||
|
||||
**CowAgent** はLLMを搭載したAIスーパーアシスタントです。自律的なタスク計画、コンピュータや外部リソースの操作、Skillの作成・実行、長期記憶とパーソナルナレッジベースによる継続的な成長が可能です。柔軟なモデル切り替えに対応し、テキスト・音声・画像・ファイルを処理でき、WeChat、Web、Feishu(飛書)、DingTalk(釘釘)、WeCom Bot(企業微信ボット)、WeComアプリ、WeChat公式アカウントに統合可能で、個人のPCやサーバー上で24時間365日稼働できます。
|
||||
**CowAgent** は、自律的にタスクを計画し、コンピュータや外部リソースを操作し、Skill を作成・実行し、パーソナルナレッジベースと長期記憶でユーザーとともに成長するオープンソースのスーパー AI アシスタントです。エンドツーエンドの Agent Harness のリファレンス実装の一つでもあります。
|
||||
|
||||
CowAgent は軽量でデプロイしやすく、拡張性に優れています。主要な LLM プロバイダーをそのまま組み込み、Web や主要な IM プラットフォーム上で動作。個人 PC やサーバー上で 24 時間 365 日稼働できます。
|
||||
|
||||
<p align="center">
|
||||
<a href="https://cowagent.ai/">🌐 ウェブサイト</a> ·
|
||||
<a href="https://docs.cowagent.ai/en/intro/index">📖 ドキュメント</a> ·
|
||||
<a href="https://docs.cowagent.ai/en/guide/quick-start">🚀 クイックスタート</a> ·
|
||||
<a href="https://docs.cowagent.ai/ja/intro/index">📖 ドキュメント</a> ·
|
||||
<a href="https://docs.cowagent.ai/ja/guide/quick-start">🚀 クイックスタート</a> ·
|
||||
<a href="https://skills.cowagent.ai/">🧩 Skill Hub</a> ·
|
||||
<a href="https://link-ai.tech/cowagent/create">☁️ オンラインで試す</a>
|
||||
</p>
|
||||
|
||||
## はじめに
|
||||
<br/>
|
||||
|
||||
> CowAgentは、すぐに使えるAIスーパーアシスタントであると同時に、高い拡張性を持つAgentフレームワークでもあります。新しいモデルインターフェース、チャネル、組み込みツール、Skillシステムを拡張することで、さまざまなカスタマイズニーズに柔軟に対応できます。
|
||||
## 🌟 主な機能
|
||||
|
||||
- ✅ **自律的タスク計画**: 複雑なタスクを理解し、自律的に実行計画を立て、目標達成までツールを呼び出しながら継続的に思考します。
|
||||
- ✅ **長期記憶**: 会話の記憶をローカルファイルやデータベースに自動的に永続化します。コアメモリ、デイリーメモリ、Deep Dream 蒸留を含み、キーワード検索やベクトル検索に対応しています。
|
||||
- ✅ **パーソナルナレッジベース**: 構造化された知識を自動整理し、相互参照によるナレッジグラフを構築。Web での可視化ブラウジングと対話による管理をサポートします。
|
||||
- ✅ **Skillシステム**: Skillの作成・実行エンジンを実装。[Skill Hub](https://skills.cowagent.ai)、GitHubなどからSkillをインストールでき、会話を通じたカスタムSkill作成もサポートしています。
|
||||
- ✅ **ツールシステム**: ファイル読み書き、ターミナル実行、ブラウザ操作、スケジュールタスク、メッセージ送信などの組み込みツールを提供。Agentが自律的に呼び出して複雑なタスクを完了します。
|
||||
- ✅ **CLIシステム**: ターミナルコマンドとチャットコマンドを提供し、プロセス管理、Skillインストール、設定変更などの操作をサポートします。
|
||||
- ✅ **マルチモーダルメッセージ**: テキスト、画像、音声、ファイルなど、さまざまなメッセージタイプの解析・処理・生成・送信に対応しています。
|
||||
- ✅ **複数モデル対応**: DeepSeek、MiniMax、Claude、Gemini、OpenAI、GLM、Qwen、Doubao、Kimiなど、主要なモデルプロバイダーに対応しています。
|
||||
- ✅ **マルチプラットフォームデプロイ**: ローカルPCやサーバー上で実行でき、WeChat、Web、Feishu、DingTalk、WeChat公式アカウント、WeComアプリケーションに統合可能です。
|
||||
| 機能 | 説明 |
|
||||
| :--- | :--- |
|
||||
| [タスク計画](https://docs.cowagent.ai/ja/intro/architecture) | 複雑なタスクを分解し、目標達成までツールを繰り返し呼び出して段階的に実行 |
|
||||
| [長期記憶](https://docs.cowagent.ai/ja/memory/index) | 三層構造(コンテキスト → デイリー → コア)、Deep Dream による自動蒸留、キーワードとベクトルのハイブリッド検索 |
|
||||
| [ナレッジベース](https://docs.cowagent.ai/ja/knowledge/index) | 構造化された知識を Markdown Wiki として自動整理し、進化し続けるナレッジグラフを可視化ブラウジング |
|
||||
| [Skill](https://docs.cowagent.ai/ja/skills/index) | [Skill Hub](https://skills.cowagent.ai/)、GitHub、ClawHub からワンクリックでインストール;対話によるカスタム Skill 作成にも対応 |
|
||||
| [ツール](https://docs.cowagent.ai/ja/tools/index) | ファイル I/O、ターミナル、ブラウザ、スケジューラ、記憶検索、Web 検索など 10+ の組み込みツール — MCP プロトコルに完全対応 |
|
||||
| [チャネル](https://docs.cowagent.ai/ja/channels/index) | 一つの Agent で Web、WeChat、Feishu、DingTalk、WeCom、QQ、公式アカウント、Telegram を同時にサポート |
|
||||
| マルチモーダル | テキスト・画像・音声・ファイルをフルサポート — 認識・生成・双方向送受信 |
|
||||
| [モデル](https://docs.cowagent.ai/ja/models/index) | Claude、GPT、Gemini、DeepSeek、GLM、Qwen、Kimi、MiniMax、Doubao など、設定 1 行で切り替え可能 |
|
||||
| [デプロイ](https://docs.cowagent.ai/ja/guide/quick-start) | ワンラインインストーラー、統合された Web コンソール、複数のデプロイモード(ローカル / Docker / サーバー) |
|
||||
|
||||
## 免責事項
|
||||
<br/>
|
||||
|
||||
1. 本プロジェクトは [MIT License](/LICENSE) に基づいており、技術研究・学習を目的としています。利用者は現地の法律、規制、ポリシー、企業の社則を遵守する必要があります。違法行為や権利侵害となる利用は禁止されています。
|
||||
2. Agentモードは通常のチャットモードよりも多くのトークンを消費します。効果とコストに基づいてモデルを選択してください。AgentはホストOSにアクセスできるため、信頼できる環境にデプロイしてください。
|
||||
3. CowAgentはオープンソース開発に注力しており、いかなる暗号通貨の発行・参加・承認も行っていません。
|
||||
## 🏗️ アーキテクチャ
|
||||
|
||||
## デモ
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/architecture/en/architecture.jpg" alt="CowAgent Architecture" width="750"/>
|
||||
|
||||
オンラインで試す(デプロイ不要): [CowAgent](https://link-ai.tech/cowagent/create)
|
||||
CowAgent は完全な **Agent Harness** です:メッセージは各種**チャネル**から流入し、**Agent Core** が記憶・知識・利用可能なツール/Skill を組み合わせてタスクを計画・判断、**モデル**が応答を生成し、結果は元のチャネルに返されます。各レイヤーは疎結合で、独立して拡張可能です。
|
||||
|
||||
## 更新履歴
|
||||
|
||||
> **2026.04.14:** [v2.0.6](https://github.com/zhayujie/CowAgent/releases/tag/2.0.6) — ナレッジベース、Deep Dream 記憶蒸留、スマートコンテキスト圧縮、Web コンソールアップグレード。
|
||||
|
||||
> **2026.04.01:** [v2.0.5](https://github.com/zhayujie/CowAgent/releases/tag/2.0.5) — Cow CLI、Skill Hubオープンソース化、ブラウザツール、WeCom Botスキャン作成など。
|
||||
|
||||
> **2026.02.27:** [v2.0.2](https://github.com/zhayujie/CowAgent/releases/tag/2.0.2) — Webコンソールの全面刷新(ストリーミングチャット、モデル/Skill/メモリ/チャネル/スケジューラ/ログ管理)、マルチチャネル同時実行、セッション永続化、Gemini 3.1 Pro / Claude 4.6 Sonnet / Qwen3.5 Plusなど新モデル追加。
|
||||
|
||||
> **2026.02.13:** [v2.0.1](https://github.com/zhayujie/CowAgent/releases/tag/2.0.1) — 組み込みWeb検索ツール、スマートコンテキストトリミング、ランタイム情報の動的更新、Windows互換性、スケジューラのメモリ喪失やFeishu接続問題などの修正。
|
||||
|
||||
> **2026.02.03:** [v2.0.0](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) — マルチステップタスク計画、長期記憶、組み込みツール、Skillフレームワーク、新モデル、チャネル最適化を備えたAIスーパーアシスタントへの全面アップグレード。
|
||||
|
||||
> **2025.05.23:** [v1.7.6](https://github.com/zhayujie/CowAgent/releases/tag/1.7.6) — Webチャネル最適化、AgentMeshマルチエージェントプラグイン、Baidu TTS、claude-4-sonnet/opus対応。
|
||||
|
||||
> **2025.04.11:** [v1.7.5](https://github.com/zhayujie/CowAgent/releases/tag/1.7.5) — wechatferryプロトコル、DeepSeekモデル、Tencent Cloud音声、ModelScope・Gitee-AI対応。
|
||||
|
||||
> **2024.12.13:** [v1.7.4](https://github.com/zhayujie/CowAgent/releases/tag/1.7.4) — Gemini 2.0モデル、Webチャネル、メモリリーク修正。
|
||||
|
||||
全更新履歴: [リリースノート](https://docs.cowagent.ai/en/releases/overview)
|
||||
詳細は [アーキテクチャ](https://docs.cowagent.ai/ja/intro/architecture) を参照してください。
|
||||
|
||||
<br/>
|
||||
|
||||
## 🚀 クイックスタート
|
||||
|
||||
本プロジェクトは、インストール・設定・起動・管理をワンクリックで行えるスクリプトを提供しています:
|
||||
依存関係のインストール、設定、起動を自動で行うワンラインインストーラーを提供しています:
|
||||
|
||||
**Linux / macOS:**
|
||||
|
||||
```bash
|
||||
bash <(curl -fsSL https://cdn.link-ai.tech/code/cow/run.sh)
|
||||
```
|
||||
|
||||
**Windows (PowerShell):**
|
||||
|
||||
```powershell
|
||||
irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex
|
||||
```
|
||||
|
||||
実行後、デフォルトでWebサービスが起動します。`http://localhost:9899/chat` にアクセスしてチャットを開始できます。
|
||||
|
||||
スクリプトの使い方: [ワンクリックインストール](https://docs.cowagent.ai/ja/guide/quick-start)。インストール後は `cow start`、`cow stop` などの [CLI コマンド](https://docs.cowagent.ai/ja/cli/index)でサービスを管理できます。
|
||||
|
||||
### 手動インストール
|
||||
|
||||
**1. プロジェクトのクローン**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/zhayujie/CowAgent
|
||||
cd CowAgent/
|
||||
```
|
||||
|
||||
**2. 依存関係のインストール**
|
||||
|
||||
```bash
|
||||
pip3 install -r requirements.txt
|
||||
pip3 install -r requirements-optional.txt # 任意ですが推奨
|
||||
```
|
||||
|
||||
**3. Cow CLI のインストール(推奨)**
|
||||
|
||||
```bash
|
||||
pip3 install -e .
|
||||
```
|
||||
|
||||
インストール後、`cow` コマンドでサービス管理(起動、停止、更新など)やSkill管理ができます。[コマンドドキュメント](https://docs.cowagent.ai/ja/cli/index)を参照してください。
|
||||
|
||||
**4. ブラウザのインストール(任意)**
|
||||
|
||||
Agentにブラウザ操作(Webページへのアクセス、フォーム入力など)が必要な場合:
|
||||
|
||||
```bash
|
||||
cow install-browser
|
||||
```
|
||||
|
||||
`playwright` と Chromium を自動インストールします。[ブラウザツールドキュメント](https://docs.cowagent.ai/ja/tools/browser)を参照してください。
|
||||
|
||||
**5. 設定**
|
||||
|
||||
```bash
|
||||
cp config-template.json config.json
|
||||
```
|
||||
|
||||
`config.json` にモデルのAPIキーとチャネルタイプを記入してください。詳細は[設定ドキュメント](https://docs.cowagent.ai/en/guide/manual-install)を参照してください。
|
||||
|
||||
**6. 実行**
|
||||
|
||||
```bash
|
||||
cow start # 推奨、Cow CLI が必要
|
||||
python3 app.py # または直接実行
|
||||
```
|
||||
|
||||
サーバーデプロイでは、`cow` コマンドでサービスを管理できます:
|
||||
|
||||
```bash
|
||||
cow start # バックグラウンドで起動
|
||||
cow stop # サービス停止
|
||||
cow restart # サービス再起動
|
||||
cow status # 実行状態を確認
|
||||
cow logs # ログを表示
|
||||
cow update # 最新コードを取得して再起動
|
||||
```
|
||||
|
||||
または従来の方法で実行:
|
||||
|
||||
```bash
|
||||
nohup python3 app.py & tail -f nohup.out
|
||||
```
|
||||
|
||||
### Dockerデプロイ
|
||||
**Docker:**
|
||||
|
||||
```bash
|
||||
curl -O https://cdn.link-ai.tech/code/cow/docker-compose.yml
|
||||
# docker-compose.yml を編集して設定を記入
|
||||
sudo docker compose up -d
|
||||
sudo docker logs -f chatgpt-on-wechat
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
起動後、`http://localhost:9899` にアクセスして **Web コンソール**を開くと、モデル設定・チャネル接続・Skill インストールがすべてここで完結します。
|
||||
|
||||
> サーバーデプロイでコンソールに公開アクセスする場合は、`config.json` の `web_host` を `0.0.0.0` に設定してください(あわせて `web_password` の設定も強く推奨)。その後 `http://<server-ip>:9899` にアクセスし、ファイアウォール/セキュリティグループで `9899` ポートを開放することも忘れずに。
|
||||
|
||||
> 📖 詳細ガイド: [クイックスタート](https://docs.cowagent.ai/ja/guide/quick-start) · [ソースからインストール](https://docs.cowagent.ai/ja/guide/manual-install) · [アップグレード](https://docs.cowagent.ai/ja/guide/upgrade)
|
||||
|
||||
インストール後は、[`cow` CLI](https://docs.cowagent.ai/ja/cli/index) でサービスを管理できます:
|
||||
|
||||
```bash
|
||||
cow start | stop | restart # サービス制御
|
||||
cow status | logs # ステータスとログ
|
||||
cow update # 最新コード取得後に再起動
|
||||
cow skill install <名前> # Skill のインストール
|
||||
cow install-browser # ブラウザツールのインストール
|
||||
```
|
||||
|
||||
<br/>
|
||||
|
||||
## モデル
|
||||
## 🤖 モデル
|
||||
|
||||
主要なモデルプロバイダーに対応しています。Agentモードの推奨モデル:
|
||||
CowAgent は主要な LLM プロバイダーすべてに対応しています。**チャット、画像認識、画像生成、ASR/TTS、埋め込み(Embedding)** の各機能はそれぞれ別のベンダーで設定可能です。
|
||||
|
||||
| プロバイダー | 推奨モデル |
|
||||
| --- | --- |
|
||||
| DeepSeek | `deepseek-v4-flash` |
|
||||
| MiniMax | `MiniMax-M2.7` |
|
||||
| Claude | `claude-sonnet-4-6` |
|
||||
| Gemini | `gemini-3.1-pro-preview` |
|
||||
| OpenAI | `gpt-5.4` |
|
||||
| GLM | `glm-5.1` |
|
||||
| Qwen | `qwen3.6-plus` |
|
||||
| Doubao | `doubao-seed-2-0-code-preview-260215` |
|
||||
| Kimi | `kimi-k2.6` |
|
||||
| プロバイダー | 代表的なモデル | チャット | 画像認識 | 画像生成 | ASR | TTS | Embedding |
|
||||
| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: |
|
||||
| [Claude](https://docs.cowagent.ai/ja/models/claude) | claude-opus-4-7 | ✅ | ✅ | | | | |
|
||||
| [OpenAI](https://docs.cowagent.ai/ja/models/openai) | gpt-5.5、o シリーズ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [Gemini](https://docs.cowagent.ai/ja/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | |
|
||||
| [DeepSeek](https://docs.cowagent.ai/ja/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | |
|
||||
| [Qwen](https://docs.cowagent.ai/ja/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [GLM](https://docs.cowagent.ai/ja/models/glm) | glm-5.1、glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ |
|
||||
| [Doubao](https://docs.cowagent.ai/ja/models/doubao) | doubao-seed-2.0 シリーズ | ✅ | ✅ | ✅ | | | ✅ |
|
||||
| [Kimi](https://docs.cowagent.ai/ja/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||
| [MiniMax](https://docs.cowagent.ai/ja/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
|
||||
| [ERNIE](https://docs.cowagent.ai/ja/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||
| [MiMo](https://docs.cowagent.ai/ja/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
|
||||
| [LinkAI](https://docs.cowagent.ai/ja/models/linkai) | 1 つの Key で 100+ モデルに接続 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [カスタム](https://docs.cowagent.ai/ja/models/custom) | ローカルモデル / サードパーティプロキシ | ✅ | | | | | |
|
||||
|
||||
各モデルの詳細設定については、[モデルドキュメント](https://docs.cowagent.ai/en/models/index)を参照してください。
|
||||
> Web コンソールでの設定が推奨されており、ファイルを手動編集する必要はありません。手動設定については各プロバイダーのドキュメントおよび [モデル概要](https://docs.cowagent.ai/ja/models/index) を参照してください。
|
||||
|
||||
### Coding Plan
|
||||
<br/>
|
||||
|
||||
Coding Planは各プロバイダーが提供する月額サブスクリプションパッケージで、高頻度のAgent利用に最適です。すべてのプロバイダーはOpenAI互換モードでアクセスできます:
|
||||
## 💬 チャネル
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "MODEL_NAME",
|
||||
"open_ai_api_base": "PROVIDER_CODING_PLAN_API_BASE",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
}
|
||||
一つの Agent インスタンスで複数のチャネルを同時に提供できます。`channel_type` 設定で切り替えるか、複数のチャネルを並列実行できます。
|
||||
|
||||
| チャネル | テキスト | 画像 | ファイル | 音声 | グループ |
|
||||
| --- | :-: | :-: | :-: | :-: | :-: |
|
||||
| [Web コンソール](https://docs.cowagent.ai/ja/channels/web)(デフォルト) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [WeChat](https://docs.cowagent.ai/ja/channels/weixin) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Feishu / Lark](https://docs.cowagent.ai/ja/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [DingTalk](https://docs.cowagent.ai/ja/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [WeCom Bot](https://docs.cowagent.ai/ja/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [QQ](https://docs.cowagent.ai/ja/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
|
||||
| [WeCom App](https://docs.cowagent.ai/ja/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [WeChat 公式アカウント](https://docs.cowagent.ai/ja/channels/wechatmp) | ✅ | ✅ | | ✅ | |
|
||||
| [Telegram](https://docs.cowagent.ai/ja/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
> Feishu と WeCom Bot は **Web コンソール内で QR コードをスキャンするだけで接続**できます — パブリック IP は不要です。詳細は [チャネル概要](https://docs.cowagent.ai/ja/channels/index) を参照してください。
|
||||
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-chat.png" alt="CowAgent Web Console" width="800"/>
|
||||
|
||||
*Web コンソールはデフォルトのチャネルであると同時に、Agent の設定・管理を統一的に行う場でもあります。*
|
||||
|
||||
<br/>
|
||||
|
||||
## 🧠 記憶とナレッジベース
|
||||
|
||||
**長期記憶**は三層構造:会話コンテキスト(短期)→ デイリー記憶(中期)→ MEMORY.md(長期)。毎晩の **Deep Dream** が散在する記憶を洗練された長期記憶とナラティブな日記に蒸留します。詳細は [長期記憶](https://docs.cowagent.ai/ja/memory/index) · [Deep Dream](https://docs.cowagent.ai/ja/memory/deep-dream) を参照してください。
|
||||
|
||||
**パーソナルナレッジベース**は時系列の記憶とは異なり、構造化された知識を**トピック単位**で整理します。Agent が会話中に有用な情報を自動でキュレーションし、相互参照とインデックスを維持し、Web コンソールでナレッジグラフを可視化できます。詳細は [パーソナルナレッジベース](https://docs.cowagent.ai/ja/knowledge/index) を参照してください。
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td width="50%">
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-memory.png" alt="長期記憶" />
|
||||
<p align="center"><em>長期記憶 · 三層構造 + Deep Dream</em></p>
|
||||
</td>
|
||||
<td width="50%">
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-knowledge.png" alt="パーソナルナレッジベース" />
|
||||
<p align="center"><em>ナレッジベース · 自動キュレーションされた Markdown Wiki</em></p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br/>
|
||||
|
||||
## 🔧 ツールと Skill
|
||||
|
||||
**ツール(Tools)** は Agent がシステムリソースを操作するためのアトミックな機能です。**Skill(Skills)** はマニフェストファイルで定義される高レベルのワークフローで、複数のツールを組み合わせて複雑なタスクを完了します。
|
||||
|
||||
### ツールシステム
|
||||
|
||||
**組み込みツール**には、ファイル I/O(`read` / `write` / `edit` / `ls`)、ターミナル(`bash`)、ファイル送信(`send`)、記憶検索(`memory`)、環境変数(`env_config`)、Web フェッチ(`web_fetch`)、スケジューラ(`scheduler`)、Web 検索(`web_search`)、画像認識(`vision`)、ブラウザ自動化(`browser`)などが含まれます。
|
||||
|
||||
**MCP プロトコル**は [Model Context Protocol](https://modelcontextprotocol.io) のオープンエコシステムを統合します。`mcp.json` を一度設定すれば即利用可能で、stdio / SSE トランスポート、ホットリロード、ノーコード統合をサポートします。
|
||||
|
||||
詳細: [ツール概要](https://docs.cowagent.ai/ja/tools/index) · [MCP 統合](https://docs.cowagent.ai/ja/tools/mcp)。
|
||||
|
||||
### Skill システム
|
||||
|
||||
- **[Skill Hub](https://skills.cowagent.ai/)** — オープン Skill マーケットプレイス:閲覧、検索、ワンクリックインストール
|
||||
- **GitHub / ClawHub / URL など** — 任意のソースからワンクリックでインストール
|
||||
- **対話による作成** — `skill-creator` を使って対話でカスタム Skill を生成;ワークフローやサードパーティ API を再利用可能な Skill に変換
|
||||
|
||||
```bash
|
||||
/skill list # インストール済み Skill の一覧
|
||||
/skill search <キーワード> # マーケットプレイスで検索
|
||||
/skill install <名前> # ワンクリックインストール
|
||||
```
|
||||
|
||||
- `bot_type`: `openai` を指定
|
||||
- `model`: プロバイダーがサポートするモデル名
|
||||
- `open_ai_api_base`: プロバイダーのCoding Plan API Base(標準の従量課金とは異なります)
|
||||
- `open_ai_api_key`: プロバイダーのCoding Plan APIキー
|
||||
|
||||
> 注意:Coding PlanのAPI BaseとAPIキーは、通常の従量課金のものとは別です。各プロバイダーのプラットフォームから取得してください。
|
||||
|
||||
対応プロバイダーには、Alibaba Cloud、MiniMax、Zhipu GLM、Kimi、Volcengineなどがあります。各プロバイダーの詳細設定については、[Coding Planドキュメント](https://docs.cowagent.ai/en/models/coding-plan)を参照してください。
|
||||
詳細: [Skill 概要](https://docs.cowagent.ai/ja/skills/index) · [Skill 作成](https://docs.cowagent.ai/ja/skills/create)。
|
||||
|
||||
<br/>
|
||||
|
||||
## チャネル
|
||||
## 🏷 更新履歴
|
||||
|
||||
複数のプラットフォームに対応しています。`config.json` の `channel_type` を設定して切り替えます:
|
||||
> **2026.05.22:** [v2.0.9](https://github.com/zhayujie/CowAgent/releases/tag/2.0.9) — モデル管理、MCP プロトコル対応、ブラウザセッション永続化、新モデル(gpt-5.5、gemini-3.5-flash、qwen3.7-max)、デプロイのセキュリティ強化。
|
||||
|
||||
| チャネル | `channel_type` | ドキュメント |
|
||||
| --- | --- | --- |
|
||||
| WeChat | `weixin` | [WeChat設定](https://docs.cowagent.ai/ja/channels/weixin) |
|
||||
| Web(デフォルト) | `web` | [Webチャネル](https://docs.cowagent.ai/en/channels/web) |
|
||||
| Feishu(飛書) | `feishu` | [Feishu設定](https://docs.cowagent.ai/en/channels/feishu) |
|
||||
| DingTalk(釘釘) | `dingtalk` | [DingTalk設定](https://docs.cowagent.ai/en/channels/dingtalk) |
|
||||
| WeCom Bot | `wecom_bot` | [WeCom Bot設定](https://docs.cowagent.ai/en/channels/wecom-bot) |
|
||||
| WeComアプリ | `wechatcom_app` | [WeCom設定](https://docs.cowagent.ai/en/channels/wecom) |
|
||||
| WeChat公式アカウント | `wechatmp` / `wechatmp_service` | [WeChat公式アカウント設定](https://docs.cowagent.ai/en/channels/wechatmp) |
|
||||
| ターミナル | `terminal` | — |
|
||||
> **2026.05.06:** [v2.0.8](https://github.com/zhayujie/CowAgent/releases/tag/2.0.8) — Feishu チャネル全面アップグレード(音声、ストリーミング、QR 接続)、DeepSeek V4 と Baidu Qianfan 対応、スケジューラツール強化。
|
||||
|
||||
複数チャネルを同時に有効化できます。カンマ区切りで指定してください:`"channel_type": "feishu,dingtalk"`
|
||||
> **2026.04.22:** [v2.0.7](https://github.com/zhayujie/CowAgent/releases/tag/2.0.7) — 組み込み画像生成(GPT Image 2、Nano Banana)、新モデル(Kimi K2.6、Claude Opus 4.7、GLM 5.1)、ナレッジベースと記憶の強化。
|
||||
|
||||
> **2026.04.14:** [v2.0.6](https://github.com/zhayujie/CowAgent/releases/tag/2.0.6) — ナレッジベース、Deep Dream 記憶蒸留、スマートコンテキスト圧縮、マルチセッション Web コンソール。
|
||||
|
||||
> **2026.04.01:** [v2.0.5](https://github.com/zhayujie/CowAgent/releases/tag/2.0.5) — Cow CLI、Skill Hub オープンソース化、ブラウザツール、WeCom Bot QR 接続。
|
||||
|
||||
> **2026.02.03:** [v2.0.0](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) — マルチステップタスク計画、長期記憶、Skill フレームワークを備えたスーパー Agent アシスタントへの全面アップグレード。
|
||||
|
||||
完全な履歴: [リリースノート](https://docs.cowagent.ai/ja/releases/overview)
|
||||
|
||||
<br/>
|
||||
|
||||
## エンタープライズサービス
|
||||
## 🤝 コミュニティとサポート
|
||||
|
||||
<a href="https://link-ai.tech" target="_blank"><img width="720" src="https://cdn.link-ai.tech/image/link-ai-intro.jpg"></a>
|
||||
GitHub で [Issue を報告](https://github.com/zhayujie/CowAgent/issues) するか、下記 QR コードをスキャンして WeChat コミュニティに参加してください:
|
||||
|
||||
> [LinkAI](https://link-ai.tech/) は、企業や開発者向けのワンストップAIエージェントプラットフォームです。マルチモーダルLLM、ナレッジベース、Agentプラグイン、ワークフローを統合しています。主要プラットフォームへのワンクリック統合、SaaSおよびプライベートデプロイに対応しています。
|
||||
<img width="130" src="https://img-1317903499.cos.ap-guangzhou.myqcloud.com/docs/open-community.png">
|
||||
|
||||
<br/>
|
||||
|
||||
## 🔗 関連プロジェクト
|
||||
|
||||
- [Cow Skill Hub](https://github.com/zhayujie/cow-skill-hub): AIエージェント向けのオープンSkillマーケットプレイス。CowAgent、OpenClaw、Claude Codeなどで利用可能なSkillの閲覧・検索・インストール・公開が可能。
|
||||
- [bot-on-anything](https://github.com/zhayujie/bot-on-anything): 軽量で高い拡張性を持つLLMアプリケーションフレームワーク。Slack、Telegram、Discord、Gmailなどに対応。
|
||||
- [AgentMesh](https://github.com/MinimalFuture/AgentMesh): エージェントチームの協調による複雑な問題解決のためのオープンソースのマルチエージェントフレームワーク。
|
||||
- **[Cow Skill Hub](https://github.com/zhayujie/cow-skill-hub)** — AI エージェント向けのオープン Skill マーケットプレイス;CowAgent、OpenClaw、Claude Code などに対応
|
||||
- **[bot-on-anything](https://github.com/zhayujie/bot-on-anything)** — 軽量な LLM アプリケーションフレームワーク;Slack、Telegram、Discord、Gmail などに対応
|
||||
- **[AgentMesh](https://github.com/MinimalFuture/AgentMesh)** — チーム協調による複雑な問題解決のためのオープンソースのマルチエージェントフレームワーク
|
||||
|
||||
## 🔎 よくある質問
|
||||
<br/>
|
||||
|
||||
FAQ: <https://github.com/zhayujie/CowAgent/wiki/FAQs>
|
||||
## 🏢 エンタープライズサービス
|
||||
|
||||
## 🛠️ コントリビューション
|
||||
[**LinkAI**](https://link-ai.tech/) は企業や開発者向けのワンストップ AI Agent プラットフォームで、CowAgent にマネージドホスティングとエンタープライズグレードのサポートを提供します:
|
||||
|
||||
新しいチャネルの追加を歓迎します。[Feishuチャネル](https://github.com/zhayujie/CowAgent/blob/master/channel/feishu/feishu_channel.py)を参考にしてください。また、新しいSkillのコントリビューションも歓迎します。[Skill作成ドキュメント](https://docs.cowagent.ai/ja/skills/create)を参照するか、[Skill Hub](https://skills.cowagent.ai/submit)に提出してください。
|
||||
- **🚀 デプロイ不要のホスト型ランタイム** — [CowAgent オンラインアシスタント](https://link-ai.tech/cowagent/create) を 1 分以内に起動、サーバー不要
|
||||
- **🧠 Agent インフラ** — 主要 LLM・ナレッジベース・データベース・Skill・ワークフローへの統一アクセス。CowAgent の機能を拡張する、すぐに使えるビルディングブロック
|
||||
- **🏢 チーム & エンタープライズ機能** — ワークスペース、ロールベースのアクセス制御、監査ログ、本番運用向けプライベートデプロイ
|
||||
|
||||
## ✉ お問い合わせ
|
||||
エンタープライズに関するお問い合わせ:**sales@simple-future.tech** または [QR コードをスキャン](https://cdn.link-ai.tech/consultant.jpg) して WeChat でお問い合わせください。
|
||||
|
||||
PRやIssueの提出を歓迎します。🌟 Starでプロジェクトをサポートしてください。ご質問がある場合は、[FAQリスト](https://github.com/zhayujie/CowAgent/wiki/FAQs)を確認するか、[Issues](https://github.com/zhayujie/CowAgent/issues)を検索してください。
|
||||
<br/>
|
||||
|
||||
## 🛠️ 開発とコントリビューション
|
||||
|
||||
新しいチャネルの追加を歓迎します — [Feishu チャネル](https://github.com/zhayujie/CowAgent/blob/master/channel/feishu/feishu_channel.py) を参考にカスタムチャネルを実装できます。新しい Skill のコントリビューションも [Skill Hub](https://skills.cowagent.ai/submit) で受け付けています。
|
||||
|
||||
⭐ Star でプロジェクトの更新をフォローしてください。PR や Issue の提出も歓迎します。
|
||||
|
||||
## 🌟 コントリビューター
|
||||
|
||||

|
||||
|
||||
<br/>
|
||||
|
||||
## ⚠️ 免責事項
|
||||
|
||||
1. 本プロジェクトは [MIT License](/LICENSE) に基づき、技術研究と学習を目的としています。利用者は所在地の法令・規制を遵守する必要があり、本プロジェクトの利用に起因するいかなる結果についてもメンテナーは責任を負いません。
|
||||
2. **コストと安全性:** Agent モードは通常のチャットよりトークン消費が大幅に多いため、品質とコストのバランスを考慮してモデルを選択してください。Agent はローカル OS にアクセスできるため、信頼できる環境にのみデプロイしてください。
|
||||
3. CowAgent は純粋なオープンソースプロジェクトであり、暗号通貨の発行・参加・承認は一切行いません。
|
||||
|
||||
<br/>
|
||||
|
||||
## 📌 プロジェクト改名のお知らせ
|
||||
|
||||
本プロジェクトは旧名 `chatgpt-on-wechat` から、2026.04.13 に **CowAgent** へ正式に改名されました。元の GitHub URL は自動的にリダイレクトされます。既存ユーザーは `git remote set-url origin https://github.com/zhayujie/CowAgent.git` でローカルのリモートを更新できます。
|
||||
|
||||
41
docs/ja/channels/index.mdx
Normal file
41
docs/ja/channels/index.mdx
Normal file
@@ -0,0 +1,41 @@
|
||||
---
|
||||
title: チャネル一覧
|
||||
description: CowAgent が対応するチャネルと機能マトリクス
|
||||
---
|
||||
|
||||
CowAgent は複数のチャットチャネルへの接続に対応しており、起動時に `channel_type` で切り替えます。Web コンソールはデフォルトで有効で、他の接続チャネルと並行して動作します。
|
||||
|
||||
## 機能マトリクス
|
||||
|
||||
下表は各チャネルが対応する受信メッセージタイプ、ボットの返信タイプ、グループチャット機能をまとめたものです。シーンに合わせて選択してください。
|
||||
|
||||
| チャネル | テキスト | 画像 | ファイル | 音声 | グループチャット |
|
||||
| --- | :-: | :-: | :-: | :-: | :-: |
|
||||
| [WeChat](/ja/channels/weixin) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Web コンソール](/ja/channels/web) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Feishu](/ja/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [DingTalk](/ja/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [WeCom スマートボット](/ja/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [QQ](/ja/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
|
||||
| [WeCom アプリ](/ja/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [WeChat 公式アカウント](/ja/channels/wechatmp) | ✅ | ✅ | | ✅ | |
|
||||
| [Telegram](/ja/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
- **画像 / ファイル / 音声**列は対応するメッセージタイプの送受信に対応していることを示します。詳細は各チャネルのドキュメントを参照してください
|
||||
- **グループチャット**列はグループメッセージを認識して応答できることを示します
|
||||
|
||||
<Tip>
|
||||
各チャネルの音声 / 画像機能は、対応するモデルプロバイダーの設定に依存します。詳細は [モデル一覧](/ja/models) を参照してください。
|
||||
</Tip>
|
||||
|
||||
## チャネル一覧
|
||||
|
||||
- [Web コンソール](/ja/channels/web) — 組み込みのブラウザ対話・管理パネル、デフォルトで有効
|
||||
- [WeChat](/ja/channels/weixin) — 個人 WeChat の QR コードログイン
|
||||
- [Feishu](/ja/channels/feishu) — Feishu 自作ボット
|
||||
- [DingTalk](/ja/channels/dingtalk) — DingTalk 自作ボット
|
||||
- [WeCom スマートボット](/ja/channels/wecom-bot) — WeCom スマートボット
|
||||
- [QQ](/ja/channels/qq) — QQ 公式ボットオープンプラットフォーム
|
||||
- [WeCom アプリ](/ja/channels/wecom) — WeCom 自作アプリ接続
|
||||
- [WeChat 公式アカウント](/ja/channels/wechatmp) — WeChat 公式アカウント(購読アカウント / サービスアカウント)
|
||||
- [Telegram](/ja/channels/telegram) — グローバル IM、5 分で接続、公開 IP 不要
|
||||
111
docs/ja/channels/telegram.mdx
Normal file
111
docs/ja/channels/telegram.mdx
Normal file
@@ -0,0 +1,111 @@
|
||||
---
|
||||
title: Telegram
|
||||
description: Telegram Bot API 経由で CowAgent を接続
|
||||
---
|
||||
|
||||
> 公式の Telegram Bot API を通じて CowAgent を接続します。1 対 1 チャットおよびグループチャット(@メンションまたはボットへの返信で起動)に対応。Long Polling 方式のため公開 IP は不要で、すぐに利用できます。
|
||||
|
||||
|
||||
## 1. 接続手順
|
||||
|
||||
### ステップ 1: BotFather で Bot を作成
|
||||
|
||||
1. Telegram で公式アカウント [@BotFather](https://t.me/BotFather) を開きます。
|
||||
2. `/newbot` を送り、案内に従って入力します:
|
||||
- **Bot 名**(表示名、例: `My CowAgent Bot`)
|
||||
- **Bot ユーザー名**(`bot` で終わる必要があります、例: `my_cowagent_bot`)
|
||||
3. 作成完了後、BotFather から **HTTP API Token**(例: `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`)が返されます。大切に保管してください。
|
||||
|
||||
<Tip>
|
||||
Token は Bot のパスワードに相当します。漏えいしないよう注意してください。万が一漏れた場合は `@BotFather` に `/revoke` を送って再発行できます。
|
||||
</Tip>
|
||||
|
||||
### ステップ 2:(グループ利用時)Privacy Mode を無効化
|
||||
|
||||
1 対 1 チャットのみ利用する場合はスキップ可能です。Telegram Bot は既定で **Privacy Mode** が有効で、グループ内では `@bot` 接尾辞付きのコマンド(例: `/start@your_bot`)と、Bot メッセージへの返信のみ受信できます。**通常の `@bot こんにちは` のようなテキストメッセージは届きません**。そのままだとグループで反応しないので、必要に応じて以下を設定してください。
|
||||
|
||||
`@BotFather` に対して:
|
||||
|
||||
1. `/setprivacy` を送信
|
||||
2. 作成した Bot を選択
|
||||
3. `Disable` を選択
|
||||
|
||||
<Note>
|
||||
設定後もグループで反応しない場合は、Bot を一度グループから外して再度追加してみてください。
|
||||
</Note>
|
||||
|
||||
### ステップ 3: CowAgent に接続
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Web コンソール(推奨)">
|
||||
Web コンソール(既定 `http://127.0.0.1:9899`)を開き、**チャネル** メニュー → **チャネルを追加** → **Telegram** を選択し、Bot Token を貼り付けて接続をクリックします。
|
||||
</Tab>
|
||||
<Tab title="設定ファイル">
|
||||
`config.json` に以下を追加して Cow を起動します:
|
||||
|
||||
```json
|
||||
{
|
||||
"channel_type": "telegram",
|
||||
"telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ",
|
||||
"telegram_group_trigger": "mention_or_reply"
|
||||
}
|
||||
```
|
||||
|
||||
| パラメータ | 説明 | 既定値 |
|
||||
| --- | --- | --- |
|
||||
| `telegram_token` | BotFather から発行された HTTP API Token | - |
|
||||
| `telegram_group_trigger` | グループのトリガー方式: `mention_or_reply`(@ または返信)/ `mention_only`(@ のみ)/ `all`(全メッセージ) | `mention_or_reply` |
|
||||
| `telegram_register_commands` | 起動時に BotFather にコマンドメニューを登録するかどうか | `true` |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
ログに以下のような出力が表示されれば接続成功です:
|
||||
|
||||
```
|
||||
[Telegram] Bot logged in as @my_cowagent_bot (id=123456789)
|
||||
[Telegram] Registered 10 bot commands
|
||||
[Telegram] ✅ Telegram bot ready, polling for updates
|
||||
```
|
||||
|
||||
## 2. 機能
|
||||
|
||||
| 機能 | 対応状況 |
|
||||
| --- | --- |
|
||||
| 1 対 1 チャット | ✅ |
|
||||
| グループチャット(@bot / Bot への返信) | ✅ |
|
||||
| テキストメッセージ | ✅ 送受信 |
|
||||
| 画像メッセージ | ✅ 送受信 |
|
||||
| 音声メッセージ | ✅ 送受信(OGG/Opus) |
|
||||
| 動画メッセージ | ✅ 送受信 |
|
||||
| ファイルメッセージ | ✅ 送受信(PDF / Word / Excel など) |
|
||||
| コマンドメニュー | ✅ Web コンソールの slash コマンドと一致 |
|
||||
|
||||
### コマンドメニュー
|
||||
|
||||
起動時に BotFather へコマンドメニューを自動登録します。Telegram の入力欄で `/` を入力するとサジェストが表示されます:
|
||||
|
||||
| コマンド | 説明 |
|
||||
| --- | --- |
|
||||
| `/help` | コマンドヘルプを表示 |
|
||||
| `/status` | 実行ステータスを確認 |
|
||||
| `/context` | 対話コンテキストを表示(`/context clear` でクリア) |
|
||||
| `/skill` | スキル管理(`/skill list`、`/skill install` など) |
|
||||
| `/memory` | 記憶管理(`/memory dream`) |
|
||||
| `/knowledge` | ナレッジベース管理(`/knowledge list` / `on` / `off`) |
|
||||
| `/config` | 現在の設定を表示 |
|
||||
| `/cancel` | 実行中の Agent タスクを中断 |
|
||||
| `/logs` | 最近のログを表示 |
|
||||
| `/version` | バージョンを表示 |
|
||||
|
||||
<Note>
|
||||
Telegram のコマンドメニューはトップレベルのコマンドのみ表示されます。サブコマンドはスペース区切りで入力します(例: `/skill list`、`/context clear`)。
|
||||
</Note>
|
||||
|
||||
## 3. 使い方
|
||||
|
||||
接続が完了したら:
|
||||
|
||||
- **1 対 1 チャット**: Telegram で Bot のユーザー名(例: `@my_cowagent_bot`)を検索し、`Start` をタップして会話を開始します。
|
||||
- **グループチャット**: Bot をグループに追加し、`@bot こんにちは` または **Bot のメッセージに返信** することで起動します。グループで反応しない場合は [ステップ 2](#ステップ-2-グループ利用時-privacy-mode-を無効化) の Privacy Mode 設定を確認してください。
|
||||
|
||||
画像やファイルを送るときは、添付欄の上の入力欄に **キャプション**(説明・質問)を直接書いて一緒に送信できます。Bot は添付ファイルとキャプションを合わせて回答します。先に添付を送り、その後に質問を送る形でも、2 つのメッセージは自動でまとめて処理されます。
|
||||
@@ -3,56 +3,65 @@ title: Web コンソール
|
||||
description: Web コンソールで CowAgent を使用する
|
||||
---
|
||||
|
||||
Web コンソールは CowAgent のデフォルトチャネルです。起動後に自動的に開始され、ブラウザを通じて Agent とチャットしたり、モデル、Skill、メモリ、チャネルなどの設定をオンラインで管理できます。
|
||||
Web コンソールは CowAgent のデフォルトチャネルです。起動後に自動的に実行され、ブラウザを通じて Agent と対話できるほか、モデル、Skill、メモリ、チャネルなどの設定をオンラインで管理できます。
|
||||
|
||||
## 設定
|
||||
|
||||
```json
|
||||
{
|
||||
"channel_type": "web",
|
||||
"web_port": 9899
|
||||
"web_host": "0.0.0.0",
|
||||
"web_port": 9899,
|
||||
"web_password": "",
|
||||
"enable_thinking": false
|
||||
}
|
||||
```
|
||||
|
||||
| パラメータ | 説明 | デフォルト値 |
|
||||
| --- | --- | --- |
|
||||
| `channel_type` | `web` に設定 | `web` |
|
||||
| `web_host` | Web サービスのリスンアドレス。デフォルトは `127.0.0.1`(ローカルのみ)。公開アクセスが必要な場合は `0.0.0.0` に変更してパスワードを設定してください | `""` |
|
||||
| `web_port` | Web サービスのリスンポート | `9899` |
|
||||
| `web_password` | アクセスパスワード。空欄の場合はパスワード保護が無効。`0.0.0.0` でリスンする場合は設定を推奨 | `""` |
|
||||
| `web_session_expire_days` | ログインセッションの有効日数 | `30` |
|
||||
| `enable_thinking` | 深い思考モードを有効化するか | `false` |
|
||||
|
||||
パスワード設定後、コンソールへアクセスする際にはまずパスワード入力によるログインが必要です。ログイン状態はデフォルトで 30 日間保持され、その間はサービスを再起動しても再ログインは不要です。パスワードはコンソールの「設定」ページからオンラインで変更することもできます。
|
||||
|
||||
## アクセス URL
|
||||
|
||||
プロジェクト起動後、以下にアクセスしてください:
|
||||
|
||||
- ローカル: `http://localhost:9899`
|
||||
- サーバー: `http://<server-ip>:9899`
|
||||
- ローカル実行: `http://localhost:9899`
|
||||
- サーバー実行: `http://<server-ip>:9899`
|
||||
|
||||
<Note>
|
||||
サーバーのファイアウォールとセキュリティグループで該当ポートが許可されていることを確認してください。
|
||||
</Note>
|
||||
|
||||
## 機能
|
||||
## 機能紹介
|
||||
|
||||
### チャット画面
|
||||
|
||||
ストリーミング出力に対応しており、Agent の推論プロセスやツール呼び出しをリアルタイムで表示し、Agent の意思決定を直感的に観察できます:
|
||||
ストリーミング出力に対応しており、Agent の思考プロセス(Reasoning)とツール呼び出しプロセス(Tool Calls)をリアルタイムで表示でき、Agent の意思決定をより直感的に観察できます。深い思考機能は設定またはコンソールの「Agent 設定」スイッチで制御できます。
|
||||
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260227180120.png" />
|
||||
|
||||
#### マルチセッション管理
|
||||
|
||||
チャット画面はマルチセッション管理に対応しています。すべてのセッション記録は SQLite データベースに永続的に保存されます:
|
||||
チャット画面はマルチセッション(Session)管理に対応しています。すべてのセッション記録はデータベースに永続化されます:
|
||||
|
||||
- **セッション一覧**:左側の履歴アイコンをクリックしてセッション一覧パネルを展開/折りたたみでき、スクロールですべての履歴セッションを読み込めます
|
||||
- **AI によるタイトル生成**:新しいセッションの最初のやり取りが完了すると、自動的にモデルを呼び出して短い要約タイトルを生成します
|
||||
- **新規セッション**:セッション一覧上部の「新しい会話」ボタン、または入力エリアの `+` ボタンをクリックして新しいセッションを作成します
|
||||
- **セッション一覧**:左側の履歴セッションアイコンをクリックするとセッション一覧パネルを展開/折りたたみでき、スクロールですべての履歴セッションを読み込めます
|
||||
- **AI によるタイトル生成**:新しいセッションの初回対話完了後、自動的にモデルを呼び出して短いセッション要約タイトルを生成します
|
||||
- **新規セッション**:セッション一覧上部の「新しい会話」ボタンまたは入力エリアの `+` ボタンをクリックして新しいセッションを作成します
|
||||
- **セッション削除**:セッション項目の削除ボタンをクリックし、確認後にそのセッションとすべてのメッセージを完全に削除します
|
||||
- **コンテキストクリア**:入力エリアのクリアボタンをクリックすると、現在のセッションに区切り線が挿入されます。区切り線より上のメッセージは表示されたままですが、モデルのコンテキストには含まれなくなります
|
||||
- **コンテキストクリア**:入力エリアのクリアボタンをクリックすると、現在のセッションに区切り線が挿入されます。区切り線より上のメッセージは表示されたままですが、モデルのコンテキスト入力には含まれなくなります
|
||||
|
||||
### モデル管理
|
||||
|
||||
設定ファイルを手動で編集せずに、オンラインでモデル設定を管理できます:
|
||||
設定ファイルを手動で編集することなく、異なるモデルプロバイダーのテキスト、画像、音声、埋め込みモデル設定をオンラインで管理できます:
|
||||
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" />
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260521212949.png" />
|
||||
|
||||
### Skill 管理
|
||||
|
||||
@@ -68,18 +77,18 @@ Agent のメモリをオンラインで閲覧・管理できます:
|
||||
|
||||
### チャネル管理
|
||||
|
||||
接続中のチャネルをオンラインで管理し、リアルタイムで接続・切断操作を行えます:
|
||||
接続中のチャネルをオンラインで管理でき、リアルタイムでの接続・切断操作に対応しています:
|
||||
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173331.png" />
|
||||
|
||||
### スケジュールタスク
|
||||
|
||||
スケジュールタスクをオンラインで閲覧・管理できます。一回限りのタスク、固定間隔、Cron 式に対応しています:
|
||||
スケジュールタスクをオンラインで閲覧・管理できます。一回限りのタスク、固定間隔、Cron 式など複数のスケジューリング方式を可視化管理できます:
|
||||
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173704.png" />
|
||||
|
||||
### ログ
|
||||
|
||||
Agent のランタイムログをリアルタイムで確認でき、監視やトラブルシューティングに活用できます:
|
||||
Agent のランタイムログをオンラインでリアルタイムに確認でき、実行状態の監視やトラブルシューティングに便利です:
|
||||
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173514.png" />
|
||||
|
||||
@@ -25,6 +25,14 @@ description: ステータスの確認、設定管理、コンテキスト制御
|
||||
/status
|
||||
```
|
||||
|
||||
## cancel
|
||||
|
||||
現在のセッションで実行中の Agent タスクを中止します。Agent が長時間のタスク(マルチターンのツール呼び出しや長いストリーミング応答など)を実行している間、`/cancel` を送信すると、次のツール実行の前に停止します。Web、WeChat、企業微信、Feishu など、すべてのチャネルで利用可能です。
|
||||
|
||||
```text
|
||||
/cancel
|
||||
```
|
||||
|
||||
## config
|
||||
|
||||
実行時設定の表示または変更を行います。変更は即座に反映され、再起動は不要です。
|
||||
|
||||
@@ -57,6 +57,7 @@ Web コンソールや接続されたチャネルの会話で `/` を入力す
|
||||
| --- | --- |
|
||||
| `/help` | コマンドヘルプを表示 |
|
||||
| `/status` | サービスの状態と設定を表示 |
|
||||
| `/cancel` | 実行中の Agent タスクを中止 |
|
||||
| `/config` | 実行時設定の表示・変更 |
|
||||
| `/skill` | スキル管理(インストール、アンインストール、有効化、無効化など) |
|
||||
| `/memory dream [N]` | 記憶蒸留を手動トリガー(デフォルト 3 日、最大 30) |
|
||||
@@ -80,6 +81,7 @@ Web コンソールや接続されたチャネルの会話で `/` を入力す
|
||||
| version | ✓ | ✓ |
|
||||
| status | ✓ | ✓ |
|
||||
| logs | ✓ | ✓ |
|
||||
| cancel | ✗ | ✓ |
|
||||
| config | ✗ | ✓ |
|
||||
| context | — | ✓ |
|
||||
| memory(サブコマンド) | ✗ | ✓ |
|
||||
|
||||
@@ -9,7 +9,7 @@ CowAgent 2.0 は、シンプルなチャットボットから、自律的な思
|
||||
|
||||
CowAgent のアーキテクチャは以下のコアモジュールで構成されています:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/cow-agent-arch-en.jpg.jpg" alt="CowAgent Architecture" />
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/architecture/en/architecture.jpg" alt="CowAgent Architecture" />
|
||||
|
||||
| モジュール | 説明 |
|
||||
| --- | --- |
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user