mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 18:17:11 +08:00
Compare commits
29 Commits
2.0.9
...
feat-teleg
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e3eacc77d7 | ||
|
|
d106465419 | ||
|
|
f39380cea7 | ||
|
|
bccce2d7cb | ||
|
|
83cd6ad158 | ||
|
|
116fb27257 | ||
|
|
8d67177a1b | ||
|
|
ad2db1a776 | ||
|
|
2e6d9e0f27 | ||
|
|
e05f85f3ce | ||
|
|
40c48a9a61 | ||
|
|
c9a7525d0b | ||
|
|
fd571ac539 | ||
|
|
c5a3f991c5 | ||
|
|
eb74b73351 | ||
|
|
9b31f45481 | ||
|
|
bc9c1691f5 | ||
|
|
73bf83d2ff | ||
|
|
36e1988fee | ||
|
|
aad6ef635e | ||
|
|
96659cd616 | ||
|
|
c8787b7de4 | ||
|
|
91d427c8f9 | ||
|
|
c8c0573dbd | ||
|
|
29af855ecd | ||
|
|
0a146a245d | ||
|
|
bd85fee7d7 | ||
|
|
571897e2fd | ||
|
|
840dabeccd |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -32,7 +32,6 @@ plugins/banwords/lib/__pycache__
|
||||
!plugins/role
|
||||
!plugins/keyword
|
||||
!plugins/linkai
|
||||
!plugins/agent
|
||||
!plugins/cow_cli
|
||||
client_config.json
|
||||
ref/
|
||||
|
||||
@@ -31,9 +31,13 @@ def detect_index_dim(storage) -> Optional[int]:
|
||||
if not row or not row["embedding"]:
|
||||
return None
|
||||
try:
|
||||
emb = json.loads(row["embedding"])
|
||||
raw = row["embedding"]
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
# New BLOB format: 4 bytes per float32
|
||||
return len(raw) // 4
|
||||
emb = json.loads(raw)
|
||||
return len(emb) if isinstance(emb, list) else None
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
except (json.JSONDecodeError, TypeError, Exception):
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from datetime import datetime, timedelta
|
||||
from agent.memory.config import MemoryConfig, get_default_memory_config
|
||||
from agent.memory.storage import MemoryStorage, MemoryChunk, SearchResult
|
||||
from agent.memory.chunker import TextChunker
|
||||
from agent.memory.embedding import EmbeddingProvider
|
||||
from agent.memory.embedding import EmbeddingProvider, EmbeddingCache
|
||||
from agent.memory.summarizer import MemoryFlushManager, create_memory_files_if_needed
|
||||
|
||||
|
||||
@@ -61,7 +61,11 @@ class MemoryManager:
|
||||
logger.info(
|
||||
"[MemoryManager] No embedding provider; memory will use keyword search only"
|
||||
)
|
||||
|
||||
|
||||
# Cache for query embeddings (avoids redundant API calls within a session)
|
||||
self._embedding_cache = EmbeddingCache()
|
||||
|
||||
|
||||
# Initialize memory flush manager
|
||||
workspace_dir = self.config.get_workspace()
|
||||
self.flush_manager = MemoryFlushManager(
|
||||
@@ -128,7 +132,14 @@ class MemoryManager:
|
||||
vector_results = []
|
||||
if self.embedding_provider:
|
||||
try:
|
||||
query_embedding = self.embedding_provider.embed_query(query)
|
||||
provider_name = type(self.embedding_provider).__name__
|
||||
model_name = getattr(self.embedding_provider, 'model', '')
|
||||
cached = self._embedding_cache.get(query, provider_name, model_name)
|
||||
if cached is not None:
|
||||
query_embedding = cached
|
||||
else:
|
||||
query_embedding = self.embedding_provider.embed_query(query)
|
||||
self._embedding_cache.put(query, provider_name, model_name, query_embedding)
|
||||
vector_results = self.storage.search_vector(
|
||||
query_embedding=query_embedding,
|
||||
user_id=user_id,
|
||||
|
||||
@@ -5,12 +5,42 @@ Provides vector and keyword search capabilities
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import re
|
||||
import sqlite3
|
||||
import json
|
||||
import hashlib
|
||||
import threading
|
||||
from typing import List, Dict, Optional, Any
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
try:
|
||||
import numpy as np
|
||||
_HAS_NUMPY = True
|
||||
except ImportError:
|
||||
_HAS_NUMPY = False
|
||||
np = None # type: ignore[assignment]
|
||||
|
||||
# UPSERT (INSERT … ON CONFLICT DO UPDATE) requires SQLite ≥ 3.24.0 (2018).
|
||||
# Older systems (e.g. CentOS 7 ships SQLite 3.7) fall back to INSERT OR REPLACE,
|
||||
# which risks FTS5 rowid drift on chunk updates (see save_chunk docstring).
|
||||
_HAS_UPSERT = sqlite3.sqlite_version_info >= (3, 24, 0)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CJK character ranges, compiled once at module load.
|
||||
# Covers: CJK Symbols/Punctuation, Japanese kana (hiragana + katakana),
|
||||
# CJK Unified Ideographs + Extension A, Korean syllables (Hangul),
|
||||
# CJK Compatibility Ideographs, and CJK Extension B–F.
|
||||
# ---------------------------------------------------------------------------
|
||||
_CJK_RANGES = (
|
||||
r'\u3000-\u30ff' # CJK Symbols/Punctuation + Japanese kana
|
||||
r'\u3400-\u9fff' # CJK Unified Ideographs (incl. Extension A)
|
||||
r'\uac00-\ud7af' # Korean syllables (Hangul)
|
||||
r'\uf900-\ufaff' # CJK Compatibility Ideographs
|
||||
r'\U00020000-\U0002fa1f' # CJK Extension B–F
|
||||
)
|
||||
_RE_CONTAINS_CJK = re.compile(f'[{_CJK_RANGES}]')
|
||||
_RE_CJK_WORDS = re.compile(f'[{_CJK_RANGES}]+')
|
||||
_RE_TRIGRAM_TOKENS = re.compile(f'[{_CJK_RANGES}]+|[A-Za-z0-9_]+')
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -48,6 +78,10 @@ class MemoryStorage:
|
||||
self.db_path = db_path
|
||||
self.conn: Optional[sqlite3.Connection] = None
|
||||
self.fts5_available = False # Track FTS5 availability
|
||||
# RLock protects concurrent writes from the same process.
|
||||
# SQLite WAL mode handles read/write concurrency at the file level,
|
||||
# but same-process concurrent writes still need a Python-level lock.
|
||||
self._lock = threading.RLock()
|
||||
self._init_db()
|
||||
|
||||
def _check_fts5_support(self) -> bool:
|
||||
@@ -69,6 +103,14 @@ class MemoryStorage:
|
||||
|
||||
# Check FTS5 support
|
||||
self.fts5_available = self._check_fts5_support()
|
||||
if not _HAS_UPSERT:
|
||||
from common.log import logger
|
||||
logger.warning(
|
||||
"[MemoryStorage] SQLite %s < 3.24 — UPSERT unavailable. "
|
||||
"Falling back to INSERT OR REPLACE; FTS5 rowid may drift on "
|
||||
"chunk updates (rebuild index periodically to recover).",
|
||||
sqlite3.sqlite_version,
|
||||
)
|
||||
if not self.fts5_available:
|
||||
from common.log import logger
|
||||
logger.debug("[MemoryStorage] FTS5 not available, using LIKE-based keyword search")
|
||||
@@ -175,6 +217,75 @@ class MemoryStorage:
|
||||
)
|
||||
self._rebuild_fts5_from_chunks()
|
||||
|
||||
# Internal key-value store for persistent flags (e.g. backfill tracking)
|
||||
self.conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS _meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
)
|
||||
""")
|
||||
|
||||
# Create trigram FTS5 table for CJK / mixed-language search
|
||||
self.trigram_fts5_available = False
|
||||
if self.fts5_available:
|
||||
try:
|
||||
self.conn.execute("""
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts_trigram USING fts5(
|
||||
text,
|
||||
id UNINDEXED,
|
||||
user_id UNINDEXED,
|
||||
path UNINDEXED,
|
||||
source UNINDEXED,
|
||||
scope UNINDEXED,
|
||||
content='chunks',
|
||||
content_rowid='rowid',
|
||||
tokenize='trigram case_sensitive 0'
|
||||
)
|
||||
""")
|
||||
self.conn.execute("""
|
||||
CREATE TRIGGER IF NOT EXISTS chunks_trigram_ai
|
||||
AFTER INSERT ON chunks BEGIN
|
||||
INSERT INTO chunks_fts_trigram(rowid, text, id, user_id, path, source, scope)
|
||||
VALUES (new.rowid, new.text, new.id, new.user_id, new.path, new.source, new.scope);
|
||||
END
|
||||
""")
|
||||
self.conn.execute("""
|
||||
CREATE TRIGGER IF NOT EXISTS chunks_trigram_ad
|
||||
AFTER DELETE ON chunks BEGIN
|
||||
DELETE FROM chunks_fts_trigram WHERE rowid = old.rowid;
|
||||
END
|
||||
""")
|
||||
self.conn.execute("""
|
||||
CREATE TRIGGER IF NOT EXISTS chunks_trigram_au
|
||||
AFTER UPDATE ON chunks BEGIN
|
||||
UPDATE chunks_fts_trigram
|
||||
SET text=new.text, id=new.id, user_id=new.user_id,
|
||||
path=new.path, source=new.source, scope=new.scope
|
||||
WHERE rowid = new.rowid;
|
||||
END
|
||||
""")
|
||||
# One-time backfill for existing rows.
|
||||
# NOTE: COUNT(*) on an FTS5 content table always returns 0, so we
|
||||
# use a persistent flag in _meta instead of counting trigram rows.
|
||||
backfill_done = self.conn.execute(
|
||||
"SELECT 1 FROM _meta WHERE key = 'trigram_backfill_done'"
|
||||
).fetchone()
|
||||
chunks_count = self.conn.execute(
|
||||
"SELECT COUNT(*) as c FROM chunks"
|
||||
).fetchone()['c']
|
||||
if chunks_count > 0 and not backfill_done:
|
||||
self.conn.execute(
|
||||
"INSERT INTO chunks_fts_trigram(chunks_fts_trigram) VALUES('rebuild')"
|
||||
)
|
||||
self.conn.execute(
|
||||
"INSERT OR REPLACE INTO _meta(key, value) VALUES('trigram_backfill_done', '1')"
|
||||
)
|
||||
self.trigram_fts5_available = True
|
||||
except Exception:
|
||||
from common.log import logger
|
||||
logger.warning("[MemoryStorage] trigram FTS5 unavailable, CJK search will use LIKE fallback", exc_info=True)
|
||||
self.trigram_fts5_available = False
|
||||
|
||||
# Create files metadata table
|
||||
self.conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
@@ -186,7 +297,7 @@ class MemoryStorage:
|
||||
updated_at INTEGER DEFAULT (strftime('%s', 'now'))
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
self.conn.commit()
|
||||
|
||||
def _fts5_state_inconsistent(self) -> bool:
|
||||
@@ -299,43 +410,98 @@ class MemoryStorage:
|
||||
self.conn.commit()
|
||||
|
||||
def save_chunk(self, chunk: MemoryChunk):
|
||||
"""Save a memory chunk"""
|
||||
self.conn.execute("""
|
||||
INSERT OR REPLACE INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line, text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
""", (
|
||||
chunk.id,
|
||||
chunk.user_id,
|
||||
chunk.scope,
|
||||
chunk.source,
|
||||
chunk.path,
|
||||
chunk.start_line,
|
||||
chunk.end_line,
|
||||
chunk.text,
|
||||
json.dumps(chunk.embedding) if chunk.embedding else None,
|
||||
"""Save a memory chunk (insert or update by id).
|
||||
|
||||
Uses SQLite UPSERT (INSERT … ON CONFLICT DO UPDATE) instead of
|
||||
INSERT OR REPLACE. INSERT OR REPLACE internally does DELETE+INSERT,
|
||||
which changes the row's rowid. Because both FTS5 tables use
|
||||
content_rowid='rowid', a new rowid would leave the old FTS index
|
||||
entries pointing at a non-existent rowid and trigger
|
||||
"fts5: missing row N from content table" errors.
|
||||
ON CONFLICT DO UPDATE fires the AFTER UPDATE trigger (chunks_au /
|
||||
chunks_trigram_au) and keeps the original rowid intact.
|
||||
"""
|
||||
if _HAS_UPSERT:
|
||||
_SQL = """
|
||||
INSERT INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line,
|
||||
text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
ON CONFLICT(id) DO UPDATE SET
|
||||
user_id = excluded.user_id,
|
||||
scope = excluded.scope,
|
||||
source = excluded.source,
|
||||
path = excluded.path,
|
||||
start_line = excluded.start_line,
|
||||
end_line = excluded.end_line,
|
||||
text = excluded.text,
|
||||
embedding = excluded.embedding,
|
||||
hash = excluded.hash,
|
||||
metadata = excluded.metadata,
|
||||
updated_at = strftime('%s', 'now')
|
||||
"""
|
||||
else:
|
||||
_SQL = """
|
||||
INSERT OR REPLACE INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line,
|
||||
text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
"""
|
||||
params = (
|
||||
chunk.id, chunk.user_id, chunk.scope, chunk.source, chunk.path,
|
||||
chunk.start_line, chunk.end_line, chunk.text,
|
||||
self._encode_embedding(chunk.embedding),
|
||||
chunk.hash,
|
||||
json.dumps(chunk.metadata) if chunk.metadata else None
|
||||
))
|
||||
self.conn.commit()
|
||||
|
||||
json.dumps(chunk.metadata) if chunk.metadata else None,
|
||||
)
|
||||
with self._lock:
|
||||
self.conn.execute(_SQL, params)
|
||||
self.conn.commit()
|
||||
|
||||
def save_chunks_batch(self, chunks: List[MemoryChunk]):
|
||||
"""Save multiple chunks in a batch"""
|
||||
self.conn.executemany("""
|
||||
INSERT OR REPLACE INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line, text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
""", [
|
||||
"""Save multiple chunks in a batch (insert or update by id).
|
||||
|
||||
See save_chunk for why UPSERT is used instead of INSERT OR REPLACE.
|
||||
"""
|
||||
if _HAS_UPSERT:
|
||||
_SQL = """
|
||||
INSERT INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line,
|
||||
text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
ON CONFLICT(id) DO UPDATE SET
|
||||
user_id = excluded.user_id,
|
||||
scope = excluded.scope,
|
||||
source = excluded.source,
|
||||
path = excluded.path,
|
||||
start_line = excluded.start_line,
|
||||
end_line = excluded.end_line,
|
||||
text = excluded.text,
|
||||
embedding = excluded.embedding,
|
||||
hash = excluded.hash,
|
||||
metadata = excluded.metadata,
|
||||
updated_at = strftime('%s', 'now')
|
||||
"""
|
||||
else:
|
||||
_SQL = """
|
||||
INSERT OR REPLACE INTO chunks
|
||||
(id, user_id, scope, source, path, start_line, end_line,
|
||||
text, embedding, hash, metadata, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
"""
|
||||
params_list = [
|
||||
(
|
||||
c.id, c.user_id, c.scope, c.source, c.path,
|
||||
c.start_line, c.end_line, c.text,
|
||||
json.dumps(c.embedding) if c.embedding else None,
|
||||
self._encode_embedding(c.embedding),
|
||||
c.hash,
|
||||
json.dumps(c.metadata) if c.metadata else None
|
||||
json.dumps(c.metadata) if c.metadata else None,
|
||||
)
|
||||
for c in chunks
|
||||
])
|
||||
self.conn.commit()
|
||||
]
|
||||
with self._lock:
|
||||
self.conn.executemany(_SQL, params_list)
|
||||
self.conn.commit()
|
||||
|
||||
def get_chunk(self, chunk_id: str) -> Optional[MemoryChunk]:
|
||||
"""Get a chunk by ID"""
|
||||
@@ -356,21 +522,21 @@ class MemoryStorage:
|
||||
limit: int = 10
|
||||
) -> List[SearchResult]:
|
||||
"""
|
||||
Vector similarity search using in-memory cosine similarity
|
||||
(sqlite-vec can be added later for better performance)
|
||||
Vector similarity search using numpy-vectorized cosine similarity.
|
||||
All embeddings are loaded then scored in a single BLAS matrix-vector
|
||||
multiply, which is ~100x faster than the pure-Python per-row loop.
|
||||
"""
|
||||
if scopes is None:
|
||||
scopes = ["shared"]
|
||||
if user_id:
|
||||
scopes.append("user")
|
||||
|
||||
# Build query
|
||||
|
||||
scope_placeholders = ','.join('?' * len(scopes))
|
||||
params = scopes
|
||||
|
||||
params = list(scopes)
|
||||
|
||||
if user_id:
|
||||
query = f"""
|
||||
SELECT * FROM chunks
|
||||
SELECT * FROM chunks
|
||||
WHERE scope IN ({scope_placeholders})
|
||||
AND (scope = 'shared' OR user_id = ?)
|
||||
AND embedding IS NOT NULL
|
||||
@@ -378,51 +544,95 @@ class MemoryStorage:
|
||||
params.append(user_id)
|
||||
else:
|
||||
query = f"""
|
||||
SELECT * FROM chunks
|
||||
SELECT * FROM chunks
|
||||
WHERE scope IN ({scope_placeholders})
|
||||
AND embedding IS NOT NULL
|
||||
"""
|
||||
|
||||
|
||||
rows = self.conn.execute(query, params).fetchall()
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
# Calculate cosine similarity. We probe the first row's dim to fail
|
||||
# loudly on a query/index dim mismatch — otherwise every doc would
|
||||
# score 0 silently, leaving the user wondering why search broke.
|
||||
results = []
|
||||
query_dim = len(query_embedding)
|
||||
if rows:
|
||||
first = json.loads(rows[0]['embedding'])
|
||||
if isinstance(first, list) and len(first) != query_dim:
|
||||
raise ValueError(
|
||||
f"Embedding dim mismatch: query is {query_dim}-dim but "
|
||||
f"index stores {len(first)}-dim vectors. The configured "
|
||||
f"embedding model differs from the one that built the "
|
||||
f"index — run /memory rebuild-index to re-embed."
|
||||
)
|
||||
|
||||
# Parse embeddings and build a (N, D) matrix in one pass.
|
||||
# New rows store BLOB bytes (np.frombuffer); legacy rows fall back to JSON.
|
||||
# Filter out rows whose embedding dimension differs from the query —
|
||||
# mixing dimensions would cause np.array() to produce an object array
|
||||
# and matrix @ q_vec to raise ValueError.
|
||||
expected_dim = len(query_embedding)
|
||||
valid_rows = []
|
||||
vectors = []
|
||||
for row in rows:
|
||||
embedding = json.loads(row['embedding'])
|
||||
similarity = self._cosine_similarity(query_embedding, embedding)
|
||||
vec = self._decode_embedding(row['embedding'])
|
||||
if not vec:
|
||||
continue
|
||||
if len(vec) != expected_dim:
|
||||
from common.log import logger
|
||||
logger.warning(
|
||||
"[MemoryStorage] Skipping chunk %s: embedding dim %d != query dim %d",
|
||||
row['id'], len(vec), expected_dim
|
||||
)
|
||||
continue
|
||||
valid_rows.append(row)
|
||||
vectors.append(vec)
|
||||
|
||||
if similarity > 0:
|
||||
results.append((similarity, row))
|
||||
|
||||
# Sort by similarity and limit
|
||||
results.sort(key=lambda x: x[0], reverse=True)
|
||||
results = results[:limit]
|
||||
|
||||
return [
|
||||
SearchResult(
|
||||
path=row['path'],
|
||||
start_line=row['start_line'],
|
||||
end_line=row['end_line'],
|
||||
score=score,
|
||||
snippet=self._truncate_text(row['text'], 500),
|
||||
source=row['source'],
|
||||
user_id=row['user_id']
|
||||
)
|
||||
for score, row in results
|
||||
]
|
||||
if not vectors:
|
||||
return []
|
||||
|
||||
if _HAS_NUMPY:
|
||||
matrix = np.array(vectors, dtype=np.float32) # (N, D)
|
||||
q_vec = np.array(query_embedding, dtype=np.float32) # (D,)
|
||||
|
||||
# Vectorized cosine similarity: dot(matrix, q) / (||matrix|| * ||q||)
|
||||
dots = matrix @ q_vec # (N,)
|
||||
row_norms = np.linalg.norm(matrix, axis=1) # (N,)
|
||||
q_norm = float(np.linalg.norm(q_vec))
|
||||
denominators = row_norms * q_norm
|
||||
np.maximum(denominators, 1e-10, out=denominators) # avoid div-by-zero
|
||||
sims = dots / denominators # (N,)
|
||||
|
||||
# Select TopK using argpartition (O(N) average), then sort only those K
|
||||
k = min(limit, len(valid_rows))
|
||||
top_idx = np.argpartition(sims, -k)[-k:]
|
||||
top_idx = top_idx[np.argsort(sims[top_idx])[::-1]]
|
||||
|
||||
return [
|
||||
SearchResult(
|
||||
path=valid_rows[i]['path'],
|
||||
start_line=valid_rows[i]['start_line'],
|
||||
end_line=valid_rows[i]['end_line'],
|
||||
score=float(sims[i]),
|
||||
snippet=self._truncate_text(valid_rows[i]['text'], 500),
|
||||
source=valid_rows[i]['source'],
|
||||
user_id=valid_rows[i]['user_id']
|
||||
)
|
||||
for i in top_idx
|
||||
if sims[i] > 0
|
||||
]
|
||||
else:
|
||||
# Pure-Python cosine similarity fallback (numpy not installed)
|
||||
import math
|
||||
q = query_embedding
|
||||
q_norm = math.sqrt(sum(x * x for x in q)) or 1e-10
|
||||
scored = []
|
||||
for i, vec in enumerate(vectors):
|
||||
dot = sum(a * b for a, b in zip(vec, q))
|
||||
v_norm = math.sqrt(sum(x * x for x in vec)) or 1e-10
|
||||
sim = dot / (v_norm * q_norm)
|
||||
if sim > 0:
|
||||
scored.append((sim, valid_rows[i]))
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
return [
|
||||
SearchResult(
|
||||
path=row['path'],
|
||||
start_line=row['start_line'],
|
||||
end_line=row['end_line'],
|
||||
score=sim,
|
||||
snippet=self._truncate_text(row['text'], 500),
|
||||
source=row['source'],
|
||||
user_id=row['user_id']
|
||||
)
|
||||
for sim, row in scored[:limit]
|
||||
]
|
||||
|
||||
def search_keyword(
|
||||
self,
|
||||
@@ -445,12 +655,37 @@ class MemoryStorage:
|
||||
if user_id:
|
||||
scopes.append("user")
|
||||
|
||||
if self.fts5_available:
|
||||
# Step 1: Standard FTS5 (unicode61) — pure ASCII queries only.
|
||||
# Skipped when query contains any CJK characters: unicode61 tokenises CJK
|
||||
# as individual characters without forming meaningful tokens, so it would
|
||||
# match only the ASCII portion of a mixed query (e.g. "Python" from
|
||||
# "Python教程") and silently discard the CJK part. Those queries go
|
||||
# directly to Step 2 (trigram), which handles both ASCII and CJK together.
|
||||
fts1_attempted = False
|
||||
if (self.fts5_available
|
||||
and not MemoryStorage._contains_cjk(query)
|
||||
and MemoryStorage._build_fts_query(query)):
|
||||
fts1_attempted = True
|
||||
fts_results = self._search_fts5(query, user_id, scopes, limit)
|
||||
if fts_results:
|
||||
return fts_results
|
||||
|
||||
return self._search_like(query, user_id, scopes, limit)
|
||||
# Step 2: Trigram FTS5 — CJK/mixed queries, plus fallback when unicode61
|
||||
# returned nothing (trigram indexes all scripts with 3-char sliding windows,
|
||||
# so it can catch terms that unicode61 tokenisation misses).
|
||||
if self.trigram_fts5_available and (
|
||||
MemoryStorage._contains_cjk(query) or fts1_attempted
|
||||
):
|
||||
trigram_results = self._search_fts5_trigram(query, user_id, scopes, limit)
|
||||
if trigram_results:
|
||||
return trigram_results
|
||||
|
||||
# Step 3: LIKE fallback — last resort (FTS5 unavailable, or CJK tokens
|
||||
# shorter than 3 characters that trigram cannot match, e.g. a single-char query).
|
||||
if not self.fts5_available or MemoryStorage._contains_cjk(query):
|
||||
return self._search_like(query, user_id, scopes, limit)
|
||||
|
||||
return []
|
||||
|
||||
def _search_fts5(
|
||||
self,
|
||||
@@ -471,7 +706,7 @@ class MemoryStorage:
|
||||
sql_query = f"""
|
||||
SELECT chunks.*, bm25(chunks_fts) as rank
|
||||
FROM chunks_fts
|
||||
JOIN chunks ON chunks.id = chunks_fts.id
|
||||
JOIN chunks ON chunks.rowid = chunks_fts.rowid
|
||||
WHERE chunks_fts MATCH ?
|
||||
AND chunks.scope IN ({scope_placeholders})
|
||||
AND (chunks.scope = 'shared' OR chunks.user_id = ?)
|
||||
@@ -483,7 +718,7 @@ class MemoryStorage:
|
||||
sql_query = f"""
|
||||
SELECT chunks.*, bm25(chunks_fts) as rank
|
||||
FROM chunks_fts
|
||||
JOIN chunks ON chunks.id = chunks_fts.id
|
||||
JOIN chunks ON chunks.rowid = chunks_fts.rowid
|
||||
WHERE chunks_fts MATCH ?
|
||||
AND chunks.scope IN ({scope_placeholders})
|
||||
ORDER BY rank
|
||||
@@ -505,13 +740,11 @@ class MemoryStorage:
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
from common.log import logger
|
||||
logger.error(
|
||||
f"[MemoryStorage] FTS5 search failed (caller will fall back to LIKE): {e}"
|
||||
)
|
||||
logger.warning("[MemoryStorage] _search_fts5 failed, returning empty", exc_info=True)
|
||||
return []
|
||||
|
||||
|
||||
def _search_like(
|
||||
self,
|
||||
query: str,
|
||||
@@ -522,12 +755,11 @@ class MemoryStorage:
|
||||
"""LIKE-based search.
|
||||
|
||||
Used as the keyword-search fallback when FTS5 is unavailable, fails,
|
||||
or returns empty. Supports both CJK runs and ASCII word tokens so it
|
||||
can serve as a true safety net for any query.
|
||||
or returns empty. Supports both CJK runs (1+ chars) and ASCII word
|
||||
tokens (3+ chars) so it can serve as a true safety net for any query.
|
||||
"""
|
||||
import re
|
||||
# CJK runs (2+ chars) + ASCII word tokens (3+ chars to avoid noise)
|
||||
cjk_words = re.findall(r'[\u4e00-\u9fff]{2,}', query)
|
||||
# CJK runs (1+ chars, wide Unicode range) + ASCII words (3+ chars to avoid noise)
|
||||
cjk_words = _RE_CJK_WORDS.findall(query)
|
||||
ascii_words = [t for t in re.findall(r'[A-Za-z0-9_]+', query) if len(t) >= 3]
|
||||
words = cjk_words + ascii_words
|
||||
if not words:
|
||||
@@ -565,44 +797,54 @@ class MemoryStorage:
|
||||
|
||||
try:
|
||||
rows = self.conn.execute(sql_query, params).fetchall()
|
||||
return [
|
||||
SearchResult(
|
||||
results = []
|
||||
for row in rows:
|
||||
# Dynamic score: reward chunks that contain more of the query words.
|
||||
# Use all tokens (CJK + ASCII) so pure-ASCII queries are not skipped.
|
||||
# matched_count is always ≥1 because the WHERE clause uses OR, but
|
||||
# guard defensively so unexpected zero-match rows are never surfaced.
|
||||
text_lower = row['text'].lower()
|
||||
matched_count = sum(1 for w in words if w.lower() in text_lower)
|
||||
if matched_count == 0:
|
||||
continue
|
||||
score = min(0.85, 0.3 + 0.15 * matched_count)
|
||||
results.append(SearchResult(
|
||||
path=row['path'],
|
||||
start_line=row['start_line'],
|
||||
end_line=row['end_line'],
|
||||
score=0.5, # Fixed score for LIKE search
|
||||
score=score,
|
||||
snippet=self._truncate_text(row['text'], 500),
|
||||
source=row['source'],
|
||||
user_id=row['user_id']
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
except Exception as e:
|
||||
))
|
||||
results.sort(key=lambda r: r.score, reverse=True)
|
||||
return results
|
||||
except Exception:
|
||||
from common.log import logger
|
||||
logger.error(f"[MemoryStorage] LIKE search failed: {e}")
|
||||
logger.warning("[MemoryStorage] _search_like failed, returning empty", exc_info=True)
|
||||
return []
|
||||
|
||||
|
||||
def delete_by_path(self, path: str):
|
||||
"""Delete all chunks from a file"""
|
||||
self.conn.execute("""
|
||||
DELETE FROM chunks WHERE path = ?
|
||||
""", (path,))
|
||||
self.conn.commit()
|
||||
|
||||
with self._lock:
|
||||
self.conn.execute("DELETE FROM chunks WHERE path = ?", (path,))
|
||||
self.conn.commit()
|
||||
|
||||
def get_file_hash(self, path: str) -> Optional[str]:
|
||||
"""Get stored file hash"""
|
||||
row = self.conn.execute("""
|
||||
SELECT hash FROM files WHERE path = ?
|
||||
""", (path,)).fetchone()
|
||||
return row['hash'] if row else None
|
||||
|
||||
|
||||
def update_file_metadata(self, path: str, source: str, file_hash: str, mtime: int, size: int):
|
||||
"""Update file metadata"""
|
||||
self.conn.execute("""
|
||||
INSERT OR REPLACE INTO files (path, source, hash, mtime, size, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
""", (path, source, file_hash, mtime, size))
|
||||
self.conn.commit()
|
||||
with self._lock:
|
||||
self.conn.execute("""
|
||||
INSERT OR REPLACE INTO files (path, source, hash, mtime, size, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, strftime('%s', 'now'))
|
||||
""", (path, source, file_hash, mtime, size))
|
||||
self.conn.commit()
|
||||
|
||||
def get_stats(self) -> Dict[str, int]:
|
||||
"""Get storage statistics"""
|
||||
@@ -632,7 +874,8 @@ class MemoryStorage:
|
||||
self.conn.close()
|
||||
self.conn = None # Mark as closed
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error closing database connection: {e}")
|
||||
from common.log import logger
|
||||
logger.warning("[MemoryStorage] Error closing database connection: %s", e)
|
||||
|
||||
def __del__(self):
|
||||
"""Destructor to ensure connection is closed"""
|
||||
@@ -642,7 +885,33 @@ class MemoryStorage:
|
||||
pass # Ignore errors during cleanup
|
||||
|
||||
# Helper methods
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _encode_embedding(embedding: Optional[List[float]]) -> Optional[bytes]:
|
||||
"""Encode embedding as float32 BLOB bytes (~6x smaller and faster than JSON).
|
||||
Falls back to struct.pack when numpy is unavailable."""
|
||||
if embedding is None:
|
||||
return None
|
||||
if _HAS_NUMPY:
|
||||
return np.array(embedding, dtype=np.float32).tobytes()
|
||||
import struct
|
||||
return struct.pack(f'{len(embedding)}f', *embedding)
|
||||
|
||||
@staticmethod
|
||||
def _decode_embedding(raw) -> Optional[List[float]]:
|
||||
"""Decode embedding from BLOB bytes or legacy JSON string.
|
||||
Handles both numpy and numpy-free environments."""
|
||||
if raw is None:
|
||||
return None
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
if _HAS_NUMPY:
|
||||
return np.frombuffer(raw, dtype=np.float32).tolist()
|
||||
import struct
|
||||
n = len(raw) // 4
|
||||
return list(struct.unpack(f'{n}f', raw))
|
||||
# Legacy JSON format written by older versions
|
||||
return json.loads(raw)
|
||||
|
||||
def _row_to_chunk(self, row) -> MemoryChunk:
|
||||
"""Convert database row to MemoryChunk"""
|
||||
return MemoryChunk(
|
||||
@@ -654,32 +923,89 @@ class MemoryStorage:
|
||||
start_line=row['start_line'],
|
||||
end_line=row['end_line'],
|
||||
text=row['text'],
|
||||
embedding=json.loads(row['embedding']) if row['embedding'] else None,
|
||||
embedding=self._decode_embedding(row['embedding']),
|
||||
hash=row['hash'],
|
||||
metadata=json.loads(row['metadata']) if row['metadata'] else None
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
|
||||
"""Calculate cosine similarity between two vectors"""
|
||||
if len(vec1) != len(vec2):
|
||||
return 0.0
|
||||
|
||||
dot_product = sum(a * b for a, b in zip(vec1, vec2))
|
||||
norm1 = sum(a * a for a in vec1) ** 0.5
|
||||
norm2 = sum(b * b for b in vec2) ** 0.5
|
||||
|
||||
if norm1 == 0 or norm2 == 0:
|
||||
return 0.0
|
||||
|
||||
return dot_product / (norm1 * norm2)
|
||||
def _contains_cjk(text: str) -> bool:
|
||||
"""Check if text contains CJK or related characters (Chinese, Japanese, Korean)."""
|
||||
return bool(_RE_CONTAINS_CJK.search(text))
|
||||
|
||||
@staticmethod
|
||||
def _contains_cjk(text: str) -> bool:
|
||||
"""Check if text contains CJK (Chinese/Japanese/Korean) characters"""
|
||||
import re
|
||||
return bool(re.search(r'[\u4e00-\u9fff]', text))
|
||||
|
||||
def _build_trigram_query(raw_query: str) -> Optional[str]:
|
||||
"""
|
||||
Build FTS5 MATCH query for the trigram tokenizer.
|
||||
Extracts CJK sequences (including single characters) and ASCII words,
|
||||
joining them with AND so all terms must appear in the matched chunk.
|
||||
"""
|
||||
tokens = _RE_TRIGRAM_TOKENS.findall(raw_query)
|
||||
tokens = [t for t in tokens if t]
|
||||
if not tokens:
|
||||
return None
|
||||
# Escape embedded double-quotes (FTS5 uses "" inside quoted phrases)
|
||||
quoted = [f'"{t.replace(chr(34), chr(34)*2)}"' for t in tokens]
|
||||
return ' AND '.join(quoted)
|
||||
|
||||
def _search_fts5_trigram(
|
||||
self,
|
||||
query: str,
|
||||
user_id: Optional[str],
|
||||
scopes: List[str],
|
||||
limit: int
|
||||
) -> List[SearchResult]:
|
||||
"""Trigram FTS5 search — handles CJK and mixed queries with BM25 ranking."""
|
||||
trigram_query = self._build_trigram_query(query)
|
||||
if not trigram_query:
|
||||
return []
|
||||
|
||||
scope_placeholders = ','.join('?' * len(scopes))
|
||||
params = [trigram_query] + list(scopes)
|
||||
|
||||
if user_id:
|
||||
sql = f"""
|
||||
SELECT chunks.*, bm25(chunks_fts_trigram) as rank
|
||||
FROM chunks_fts_trigram
|
||||
JOIN chunks ON chunks.rowid = chunks_fts_trigram.rowid
|
||||
WHERE chunks_fts_trigram MATCH ?
|
||||
AND chunks.scope IN ({scope_placeholders})
|
||||
AND (chunks.scope = 'shared' OR chunks.user_id = ?)
|
||||
ORDER BY rank
|
||||
LIMIT ?
|
||||
"""
|
||||
params.extend([user_id, limit])
|
||||
else:
|
||||
sql = f"""
|
||||
SELECT chunks.*, bm25(chunks_fts_trigram) as rank
|
||||
FROM chunks_fts_trigram
|
||||
JOIN chunks ON chunks.rowid = chunks_fts_trigram.rowid
|
||||
WHERE chunks_fts_trigram MATCH ?
|
||||
AND chunks.scope IN ({scope_placeholders})
|
||||
ORDER BY rank
|
||||
LIMIT ?
|
||||
"""
|
||||
params.append(limit)
|
||||
|
||||
try:
|
||||
rows = self.conn.execute(sql, params).fetchall()
|
||||
return [
|
||||
SearchResult(
|
||||
path=row['path'],
|
||||
start_line=row['start_line'],
|
||||
end_line=row['end_line'],
|
||||
score=self._bm25_rank_to_score(row['rank']),
|
||||
snippet=self._truncate_text(row['text'], 500),
|
||||
source=row['source'],
|
||||
user_id=row['user_id']
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
except Exception:
|
||||
from common.log import logger
|
||||
logger.warning("[MemoryStorage] _search_fts5_trigram failed, returning empty", exc_info=True)
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _build_fts_query(raw_query: str) -> Optional[str]:
|
||||
"""
|
||||
@@ -688,7 +1014,6 @@ class MemoryStorage:
|
||||
Works best for English and word-based languages.
|
||||
For CJK characters, LIKE search will be used as fallback.
|
||||
"""
|
||||
import re
|
||||
# Extract words (primarily English words and numbers)
|
||||
tokens = re.findall(r'[A-Za-z0-9_]+', raw_query)
|
||||
if not tokens:
|
||||
@@ -701,9 +1026,22 @@ class MemoryStorage:
|
||||
|
||||
@staticmethod
|
||||
def _bm25_rank_to_score(rank: float) -> float:
|
||||
"""Convert BM25 rank to 0-1 score"""
|
||||
normalized = max(0, rank) if rank is not None else 999
|
||||
return 1 / (1 + normalized)
|
||||
"""Convert SQLite BM25 rank to a [0, 1) relevance score.
|
||||
|
||||
SQLite's bm25() returns a non-positive float (0 or negative).
|
||||
More negative = more relevant. max(0, rank) would clip every
|
||||
negative value to 0, making every score 1/(1+0) = 1.0 and
|
||||
destroying all ranking information.
|
||||
|
||||
abs(rank) / (1 + abs(rank)) maps the absolute relevance magnitude
|
||||
to [0, 1): larger |rank| (stronger match) → score closer to 1.
|
||||
"""
|
||||
if rank is None:
|
||||
return 0.0
|
||||
# Add a floor of 0.3 so any FTS5 match always exceeds typical
|
||||
# min_score thresholds (default 0.1). Small-corpus ranks close to
|
||||
# 0 would otherwise produce score≈0 and be filtered out downstream.
|
||||
return 0.3 + 0.69 * (abs(rank) / (1.0 + abs(rank)))
|
||||
|
||||
@staticmethod
|
||||
def _truncate_text(text: str, max_chars: int) -> str:
|
||||
|
||||
@@ -3,6 +3,11 @@ from .agent_stream import AgentStreamExecutor
|
||||
from .task import Task, TaskType, TaskStatus
|
||||
from .result import AgentResult, AgentAction, AgentActionType, ToolResult
|
||||
from .models import LLMModel, LLMRequest, ModelFactory
|
||||
from .cancel import (
|
||||
AgentCancelledError,
|
||||
CancelTokenRegistry,
|
||||
get_cancel_registry,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'Agent',
|
||||
@@ -16,5 +21,8 @@ __all__ = [
|
||||
'ToolResult',
|
||||
'LLMModel',
|
||||
'LLMRequest',
|
||||
'ModelFactory'
|
||||
]
|
||||
'ModelFactory',
|
||||
'AgentCancelledError',
|
||||
'CancelTokenRegistry',
|
||||
'get_cancel_registry',
|
||||
]
|
||||
|
||||
@@ -365,7 +365,8 @@ class Agent:
|
||||
|
||||
return action
|
||||
|
||||
def run_stream(self, user_message: str, on_event=None, clear_history: bool = False, skill_filter=None) -> str:
|
||||
def run_stream(self, user_message: str, on_event=None, clear_history: bool = False,
|
||||
skill_filter=None, cancel_event=None) -> str:
|
||||
"""
|
||||
Execute single agent task with streaming (based on tool-call)
|
||||
|
||||
@@ -374,6 +375,7 @@ class Agent:
|
||||
- Multi-turn reasoning based on tool-call
|
||||
- Event callbacks
|
||||
- Persistent conversation history across calls
|
||||
- User-initiated cancellation via ``cancel_event``
|
||||
|
||||
Args:
|
||||
user_message: User message
|
||||
@@ -381,6 +383,11 @@ class Agent:
|
||||
event = {"type": str, "timestamp": float, "data": dict}
|
||||
clear_history: If True, clear conversation history before this call (default: False)
|
||||
skill_filter: Optional list of skill names to include in this run
|
||||
cancel_event: Optional threading.Event polled at agent checkpoints.
|
||||
When set, the loop exits at the next safe point, injects a
|
||||
"[Interrupted by user]" assistant note, and returns the
|
||||
partial response. ``messages`` stays in a valid state
|
||||
(tool_use/tool_result pairs preserved).
|
||||
|
||||
Returns:
|
||||
Final response text
|
||||
@@ -424,7 +431,8 @@ class Agent:
|
||||
max_turns=self.max_steps,
|
||||
on_event=on_event,
|
||||
messages=messages_copy, # Pass copied message history
|
||||
max_context_turns=max_context_turns
|
||||
max_context_turns=max_context_turns,
|
||||
cancel_event=cancel_event,
|
||||
)
|
||||
|
||||
# Execute
|
||||
|
||||
@@ -7,11 +7,19 @@ import json
|
||||
import time
|
||||
from typing import List, Dict, Any, Optional, Callable, Tuple
|
||||
|
||||
from agent.protocol.cancel import AgentCancelledError
|
||||
from agent.protocol.models import LLMRequest, LLMModel
|
||||
from agent.protocol.message_utils import sanitize_claude_messages, compress_turn_to_text_only
|
||||
from agent.tools.base_tool import BaseTool, ToolResult
|
||||
from common.log import logger
|
||||
|
||||
# Optional: repair malformed JSON args from non-strict providers (e.g. unescaped quotes in long content).
|
||||
try:
|
||||
from json_repair import repair_json as _repair_json
|
||||
_HAS_JSON_REPAIR = True
|
||||
except ImportError:
|
||||
_HAS_JSON_REPAIR = False
|
||||
|
||||
|
||||
# Maximum number of characters of model "reasoning / thinking" content to persist
|
||||
# in conversation history. The full reasoning is still streamed to the UI in real
|
||||
@@ -44,6 +52,30 @@ def _truncate_reasoning_for_storage(text: str) -> str:
|
||||
return head + _REASONING_TRUNCATE_MARKER.format(omitted=omitted) + tail
|
||||
|
||||
|
||||
def _parse_tool_args(args_str: str, finish_reason: Optional[str]) -> Tuple[dict, Optional[str]]:
|
||||
"""Parse tool args JSON. Returns (args, error_msg); error_msg is None on success.
|
||||
|
||||
On JSONDecodeError: detect truncation first (skip repair, surface max_tokens hint);
|
||||
otherwise try json-repair for escape issues; finally fall back to the raw decoder error.
|
||||
"""
|
||||
if not args_str:
|
||||
return {}, None
|
||||
try:
|
||||
return json.loads(args_str), None
|
||||
except json.JSONDecodeError as e:
|
||||
if finish_reason in ("length", "max_tokens") or not args_str.rstrip().endswith("}"):
|
||||
return {}, "Output truncated (max_tokens reached). Split content into smaller chunks across multiple tool calls."
|
||||
if _HAS_JSON_REPAIR:
|
||||
try:
|
||||
repaired = _repair_json(args_str, return_objects=True)
|
||||
if isinstance(repaired, dict):
|
||||
logger.warning(f"Tool args JSON repaired ({len(args_str)} chars)")
|
||||
return repaired, None
|
||||
except Exception:
|
||||
pass
|
||||
return {}, f"Invalid JSON in tool arguments: {e.msg}"
|
||||
|
||||
|
||||
class AgentStreamExecutor:
|
||||
"""
|
||||
Agent Stream Executor
|
||||
@@ -64,7 +96,8 @@ class AgentStreamExecutor:
|
||||
max_turns: int = 50,
|
||||
on_event: Optional[Callable] = None,
|
||||
messages: Optional[List[Dict]] = None,
|
||||
max_context_turns: int = 30
|
||||
max_context_turns: int = 30,
|
||||
cancel_event=None,
|
||||
):
|
||||
"""
|
||||
Initialize stream executor
|
||||
@@ -78,6 +111,10 @@ class AgentStreamExecutor:
|
||||
on_event: Event callback function
|
||||
messages: Optional existing message history (for persistent conversations)
|
||||
max_context_turns: Maximum number of conversation turns to keep in context
|
||||
cancel_event: Optional threading.Event used to signal user cancel.
|
||||
Checked at every safe point (turn boundary, before tool execution,
|
||||
during LLM streaming). When set, raises AgentCancelledError which
|
||||
run_stream catches to gracefully wind down.
|
||||
"""
|
||||
self.agent = agent
|
||||
self.model = model
|
||||
@@ -87,6 +124,7 @@ class AgentStreamExecutor:
|
||||
self.max_turns = max_turns
|
||||
self.on_event = on_event
|
||||
self.max_context_turns = max_context_turns
|
||||
self.cancel_event = cancel_event
|
||||
|
||||
# Message history - use provided messages or create new list
|
||||
self.messages = messages if messages is not None else []
|
||||
@@ -97,6 +135,73 @@ class AgentStreamExecutor:
|
||||
# Track files to send (populated by read tool)
|
||||
self.files_to_send = [] # List of file metadata dicts
|
||||
|
||||
def _check_cancelled(self) -> None:
|
||||
"""Raise AgentCancelledError if the user requested cancellation.
|
||||
|
||||
Called at safe points (turn start, between tool calls, between LLM
|
||||
chunks). Cheap to call: just an Event.is_set() probe.
|
||||
"""
|
||||
if self.cancel_event is not None and self.cancel_event.is_set():
|
||||
raise AgentCancelledError("agent cancelled by user")
|
||||
|
||||
def _handle_cancelled(self, partial_response: str) -> None:
|
||||
"""Wind down ``self.messages`` after a user-initiated cancel.
|
||||
|
||||
The messages list may be in any of these states when we get here:
|
||||
(a) Last message is an assistant message containing tool_use
|
||||
blocks but the matching tool_result has not been appended yet.
|
||||
(b) Last message is an assistant text-only reply (cancel happened
|
||||
right before the next turn started).
|
||||
(c) Last message is a user tool_result message and we cancelled
|
||||
between turns.
|
||||
|
||||
For (a) we MUST synthesise tool_result blocks, otherwise the next
|
||||
request will fail Claude/OpenAI's strict pairing validation. For
|
||||
(b)/(c) the state is already valid and we just append a small
|
||||
cancellation note so the user/LLM both see the boundary clearly.
|
||||
"""
|
||||
try:
|
||||
# Step 1: close any orphaned tool_use in the trailing assistant
|
||||
# message by injecting matching tool_result blocks.
|
||||
if self.messages and isinstance(self.messages[-1], dict) \
|
||||
and self.messages[-1].get("role") == "assistant":
|
||||
last = self.messages[-1]
|
||||
content = last.get("content")
|
||||
if isinstance(content, list):
|
||||
pending_tool_use_ids = [
|
||||
block.get("id")
|
||||
for block in content
|
||||
if isinstance(block, dict) and block.get("type") == "tool_use"
|
||||
]
|
||||
pending_tool_use_ids = [tid for tid in pending_tool_use_ids if tid]
|
||||
if pending_tool_use_ids:
|
||||
tool_result_blocks = [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": tid,
|
||||
"content": "Cancelled by user before this tool finished.",
|
||||
"is_error": True,
|
||||
}
|
||||
for tid in pending_tool_use_ids
|
||||
]
|
||||
self.messages.append({
|
||||
"role": "user",
|
||||
"content": tool_result_blocks,
|
||||
})
|
||||
logger.info(
|
||||
f"[Agent] Injected {len(tool_result_blocks)} cancellation "
|
||||
f"tool_result blocks to keep message history valid"
|
||||
)
|
||||
|
||||
# Step 2: append a stable "interrupted" marker so the LLM sees a
|
||||
# clear stop boundary on the next turn.
|
||||
self.messages.append({
|
||||
"role": "assistant",
|
||||
"content": [{"type": "text", "text": "_(Cancelled by user)_"}],
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"[Agent] _handle_cancelled cleanup failed: {e}")
|
||||
|
||||
def _emit_event(self, event_type: str, data: dict = None):
|
||||
"""Emit event"""
|
||||
if self.on_event:
|
||||
@@ -270,8 +375,13 @@ class AgentStreamExecutor:
|
||||
final_response = ""
|
||||
turn = 0
|
||||
|
||||
cancelled = False
|
||||
try:
|
||||
while turn < self.max_turns:
|
||||
# Check at the very top of every turn so a cancel arriving
|
||||
# between turns short-circuits cleanly.
|
||||
self._check_cancelled()
|
||||
|
||||
turn += 1
|
||||
logger.info(f"[Agent] 第 {turn} 轮")
|
||||
self._emit_event("turn_start", {"turn": turn})
|
||||
@@ -375,6 +485,8 @@ class AgentStreamExecutor:
|
||||
|
||||
try:
|
||||
for tool_call in tool_calls:
|
||||
# Honour cancel between tool invocations within the same turn
|
||||
self._check_cancelled()
|
||||
result = self._execute_tool(tool_call)
|
||||
tool_results.append(result)
|
||||
|
||||
@@ -557,6 +669,15 @@ class AgentStreamExecutor:
|
||||
self.messages.pop(prompt_insert_idx)
|
||||
logger.debug("[Agent] Removed injected max-steps prompt from message history")
|
||||
|
||||
except AgentCancelledError:
|
||||
# User-initiated stop: wind down message history cleanly so the
|
||||
# next turn is unaffected; channels emit a "cancelled" UI event.
|
||||
cancelled = True
|
||||
logger.info(f"[Agent] 🛑 已被用户中止 (第 {turn} 轮)")
|
||||
self._handle_cancelled(final_response)
|
||||
if not final_response or not final_response.strip():
|
||||
final_response = "_(Cancelled)_"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Agent执行错误: {e}")
|
||||
self._emit_event("error", {"error": str(e)})
|
||||
@@ -564,8 +685,11 @@ class AgentStreamExecutor:
|
||||
|
||||
finally:
|
||||
final_response = final_response.strip() if final_response else final_response
|
||||
logger.info(f"[Agent] 🏁 完成 ({turn}轮)")
|
||||
self._emit_event("agent_end", {"final_response": final_response})
|
||||
if cancelled:
|
||||
# Emit before agent_end so channels can mark UI as cancelled
|
||||
self._emit_event("agent_cancelled", {"final_response": final_response})
|
||||
logger.info(f"[Agent] 🏁 完成 ({turn}轮)" + (" [cancelled]" if cancelled else ""))
|
||||
self._emit_event("agent_end", {"final_response": final_response, "cancelled": cancelled})
|
||||
|
||||
return final_response
|
||||
|
||||
@@ -644,7 +768,32 @@ class AgentStreamExecutor:
|
||||
try:
|
||||
stream = self.model.call_stream(request)
|
||||
|
||||
# Probe cancel every N chunks to bound reaction time without
|
||||
# checking on every token.
|
||||
_cancel_probe_counter = 0
|
||||
_CANCEL_PROBE_EVERY = 8
|
||||
|
||||
for chunk in stream:
|
||||
_cancel_probe_counter += 1
|
||||
if _cancel_probe_counter >= _CANCEL_PROBE_EVERY:
|
||||
_cancel_probe_counter = 0
|
||||
if self.cancel_event is not None and self.cancel_event.is_set():
|
||||
# Persist partial text only; tool_use args may be
|
||||
# truncated mid-stream and would fail validation.
|
||||
logger.info("[Agent] cancel detected mid-stream, aborting LLM call")
|
||||
if full_content:
|
||||
partial_msg = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "text", "text": full_content}],
|
||||
}
|
||||
self.messages.append(partial_msg)
|
||||
self._emit_event("message_end", {
|
||||
"content": full_content,
|
||||
"tool_calls": [],
|
||||
"cancelled": True,
|
||||
})
|
||||
raise AgentCancelledError("cancelled during LLM streaming")
|
||||
|
||||
# Check for errors
|
||||
if isinstance(chunk, dict) and chunk.get("error"):
|
||||
# Extract error message from nested structure
|
||||
@@ -738,6 +887,10 @@ class AgentStreamExecutor:
|
||||
elif isinstance(choice, dict) and choice.get("_gemini_raw_parts"):
|
||||
gemini_raw_parts = choice["_gemini_raw_parts"]
|
||||
|
||||
except AgentCancelledError:
|
||||
# Must propagate untouched; never treat as a retryable error.
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
error_str_lower = error_str.lower()
|
||||
@@ -851,26 +1004,17 @@ class AgentStreamExecutor:
|
||||
import uuid
|
||||
tool_id = f"call_{uuid.uuid4().hex[:24]}"
|
||||
|
||||
try:
|
||||
# Safely get arguments, handle None case
|
||||
args_str = tc.get("arguments") or ""
|
||||
arguments = json.loads(args_str) if args_str else {}
|
||||
except json.JSONDecodeError as e:
|
||||
# Handle None or invalid arguments safely
|
||||
args_str = tc.get('arguments') or ""
|
||||
args_preview = args_str[:200] if len(args_str) > 200 else args_str
|
||||
logger.error(f"Failed to parse tool arguments for {tc['name']}")
|
||||
logger.error(f"Arguments length: {len(args_str)} chars")
|
||||
logger.error(f"Arguments preview: {args_preview}...")
|
||||
logger.error(f"JSON decode error: {e}")
|
||||
|
||||
# Return a clear error message to the LLM instead of empty dict
|
||||
# This helps the LLM understand what went wrong
|
||||
args_str = tc.get("arguments") or ""
|
||||
arguments, parse_err = _parse_tool_args(args_str, stop_reason)
|
||||
if parse_err:
|
||||
logger.error(
|
||||
f"Tool args parse failed for {tc['name']} ({len(args_str)} chars): {parse_err}"
|
||||
)
|
||||
tool_calls.append({
|
||||
"id": tool_id,
|
||||
"name": tc["name"],
|
||||
"arguments": {},
|
||||
"_parse_error": f"Invalid JSON in tool arguments: {args_preview}... Error: {str(e)}. Tip: For large content, consider splitting into smaller chunks or using a different approach."
|
||||
"_parse_error": parse_err,
|
||||
})
|
||||
continue
|
||||
|
||||
@@ -958,14 +1102,11 @@ class AgentStreamExecutor:
|
||||
tool_id = tool_call["id"]
|
||||
arguments = tool_call["arguments"]
|
||||
|
||||
# Check if there was a JSON parse error
|
||||
if "_parse_error" in tool_call:
|
||||
parse_error = tool_call["_parse_error"]
|
||||
logger.error(f"Skipping tool execution due to parse error: {parse_error}")
|
||||
result = {
|
||||
"status": "error",
|
||||
"result": f"Failed to parse tool arguments. {parse_error}. Please ensure your tool call uses valid JSON format with all required parameters.",
|
||||
"execution_time": 0
|
||||
"result": tool_call["_parse_error"],
|
||||
"execution_time": 0,
|
||||
}
|
||||
self._record_tool_result(tool_name, arguments, False)
|
||||
return result
|
||||
|
||||
121
agent/protocol/cancel.py
Normal file
121
agent/protocol/cancel.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
Cancel token registry for aborting in-flight agent runs.
|
||||
|
||||
A user cancel (web Cancel button, /cancel command) sets a threading.Event
|
||||
that the agent loop polls at safe checkpoints. Tokens are keyed by
|
||||
request_id (preferred) and tracked under session_id as a fallback. Entries
|
||||
are released after the run completes to keep the registry bounded.
|
||||
|
||||
No project deps — importable from any layer without circular imports.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
class AgentCancelledError(Exception):
|
||||
"""Raised inside the agent loop when a stop has been requested.
|
||||
|
||||
The agent stream executor catches this, injects a "[Interrupted]" note
|
||||
into the message history (preserving tool_use/tool_result integrity)
|
||||
and returns a partial response to the caller.
|
||||
"""
|
||||
|
||||
|
||||
class _CancelEntry:
|
||||
__slots__ = ("event", "session_id")
|
||||
|
||||
def __init__(self, session_id: Optional[str]):
|
||||
self.event = threading.Event()
|
||||
self.session_id = session_id
|
||||
|
||||
|
||||
class CancelTokenRegistry:
|
||||
"""In-process registry mapping request_id -> cancel Event.
|
||||
|
||||
Thread-safe. Singleton via module-level ``_registry``.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._lock = threading.Lock()
|
||||
self._by_request: Dict[str, _CancelEntry] = {}
|
||||
# session_id -> set of request_ids currently in flight (usually 1).
|
||||
self._by_session: Dict[str, set] = {}
|
||||
|
||||
def register(self, request_id: str, session_id: Optional[str] = None) -> threading.Event:
|
||||
"""Create (or return existing) cancel event for a request.
|
||||
|
||||
Returns the threading.Event the caller should poll via ``is_set()``.
|
||||
"""
|
||||
if not request_id:
|
||||
return threading.Event()
|
||||
with self._lock:
|
||||
entry = self._by_request.get(request_id)
|
||||
if entry is None:
|
||||
entry = _CancelEntry(session_id)
|
||||
self._by_request[request_id] = entry
|
||||
if session_id:
|
||||
self._by_session.setdefault(session_id, set()).add(request_id)
|
||||
return entry.event
|
||||
|
||||
def get_event(self, request_id: str) -> Optional[threading.Event]:
|
||||
if not request_id:
|
||||
return None
|
||||
with self._lock:
|
||||
entry = self._by_request.get(request_id)
|
||||
return entry.event if entry else None
|
||||
|
||||
def cancel_request(self, request_id: str) -> bool:
|
||||
"""Trigger cancel for a specific request. Returns True when matched."""
|
||||
if not request_id:
|
||||
return False
|
||||
with self._lock:
|
||||
entry = self._by_request.get(request_id)
|
||||
if entry is None:
|
||||
return False
|
||||
entry.event.set()
|
||||
return True
|
||||
|
||||
def cancel_session(self, session_id: str) -> int:
|
||||
"""Trigger cancel for every in-flight request of a session.
|
||||
|
||||
Returns the number of requests cancelled (0 when nothing was running).
|
||||
"""
|
||||
if not session_id:
|
||||
return 0
|
||||
with self._lock:
|
||||
request_ids = list(self._by_session.get(session_id, ()))
|
||||
entries = [self._by_request[r] for r in request_ids if r in self._by_request]
|
||||
for entry in entries:
|
||||
entry.event.set()
|
||||
return len(entries)
|
||||
|
||||
def unregister(self, request_id: str) -> None:
|
||||
"""Remove an entry once the agent run is done. Safe to call twice."""
|
||||
if not request_id:
|
||||
return
|
||||
with self._lock:
|
||||
entry = self._by_request.pop(request_id, None)
|
||||
if entry and entry.session_id:
|
||||
bucket = self._by_session.get(entry.session_id)
|
||||
if bucket is not None:
|
||||
bucket.discard(request_id)
|
||||
if not bucket:
|
||||
self._by_session.pop(entry.session_id, None)
|
||||
|
||||
def has_active(self, session_id: str) -> bool:
|
||||
if not session_id:
|
||||
return False
|
||||
with self._lock:
|
||||
bucket = self._by_session.get(session_id)
|
||||
return bool(bucket)
|
||||
|
||||
|
||||
_registry = CancelTokenRegistry()
|
||||
|
||||
|
||||
def get_cancel_registry() -> CancelTokenRegistry:
|
||||
"""Module-level accessor for the singleton registry."""
|
||||
return _registry
|
||||
@@ -145,7 +145,8 @@ class BrowserTool(BaseTool):
|
||||
url = args.get("url", "").strip()
|
||||
if not url:
|
||||
return ToolResult.fail("Error: 'url' is required for navigate action")
|
||||
if not url.startswith(("http://", "https://")):
|
||||
# Only auto-prepend https:// for bare hosts; preserve file://, about:, data:, etc.
|
||||
if "://" not in url and not url.startswith(("about:", "data:")):
|
||||
url = "https://" + url
|
||||
timeout = args.get("timeout", 30000)
|
||||
service = self._get_service()
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""
|
||||
MCP (Model Context Protocol) client module.
|
||||
|
||||
Implements JSON-RPC 2.0 over stdio and SSE transports without any external
|
||||
MCP SDK dependency.
|
||||
Implements JSON-RPC 2.0 over stdio, SSE and Streamable HTTP transports
|
||||
without any external MCP SDK dependency.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -17,18 +17,29 @@ from typing import Optional
|
||||
from common.log import logger
|
||||
|
||||
|
||||
# Aliases accepted for the Streamable HTTP transport type
|
||||
_STREAMABLE_HTTP_ALIASES = {"streamable-http", "streamable_http", "streamablehttp", "http"}
|
||||
|
||||
|
||||
class McpClient:
|
||||
"""Single MCP Server client supporting stdio and SSE transports."""
|
||||
"""Single MCP Server client supporting stdio, SSE and Streamable HTTP transports."""
|
||||
|
||||
def __init__(self, config: dict):
|
||||
"""
|
||||
config examples:
|
||||
stdio: {"name": "filesystem", "type": "stdio", "command": "npx", "args": [...]}
|
||||
SSE: {"name": "my-api", "type": "sse", "url": "http://localhost:8000/sse"}
|
||||
stdio: {"name": "filesystem", "type": "stdio", "command": "npx", "args": [...]}
|
||||
SSE: {"name": "my-api", "type": "sse", "url": "http://localhost:8000/sse"}
|
||||
streamable-http: {"name": "pubmed", "type": "streamable-http", "url": "https://x/mcp"}
|
||||
"""
|
||||
self.config = config
|
||||
self.name: str = config.get("name", "unknown")
|
||||
self.transport: str = config.get("type", "stdio")
|
||||
raw_transport: str = config.get("type", "stdio")
|
||||
# Normalize streamable-http aliases to a single internal key
|
||||
self.transport: str = (
|
||||
"streamable-http"
|
||||
if raw_transport.lower() in _STREAMABLE_HTTP_ALIASES
|
||||
else raw_transport
|
||||
)
|
||||
|
||||
# stdio state
|
||||
self._proc: Optional[subprocess.Popen] = None
|
||||
@@ -37,6 +48,11 @@ class McpClient:
|
||||
self._sse_url: Optional[str] = None
|
||||
self._post_url: Optional[str] = None # endpoint for sending messages (resolved from SSE)
|
||||
|
||||
# Streamable HTTP state
|
||||
self._http_url: Optional[str] = None
|
||||
self._http_headers: dict = {} # extra headers from user config (e.g. Authorization)
|
||||
self._http_session_id: Optional[str] = None # Mcp-Session-Id assigned by the server
|
||||
|
||||
# Shared state
|
||||
self._next_id = 1
|
||||
self._id_lock = threading.Lock()
|
||||
@@ -54,6 +70,8 @@ class McpClient:
|
||||
return self._init_stdio()
|
||||
elif self.transport == "sse":
|
||||
return self._init_sse()
|
||||
elif self.transport == "streamable-http":
|
||||
return self._init_streamable_http()
|
||||
else:
|
||||
logger.warning(f"[MCP:{self.name}] Unknown transport type: {self.transport!r}")
|
||||
return False
|
||||
@@ -109,6 +127,21 @@ class McpClient:
|
||||
pass
|
||||
self._proc = None
|
||||
logger.debug(f"[MCP:{self.name}] stdio process terminated")
|
||||
|
||||
# Best-effort streamable-http session termination
|
||||
if self.transport == "streamable-http" and self._http_session_id and self._http_url:
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
self._http_url,
|
||||
method="DELETE",
|
||||
headers={"Mcp-Session-Id": self._http_session_id, **self._http_headers},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=5):
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
self._http_session_id = None
|
||||
|
||||
self._initialized = False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -234,6 +267,120 @@ class McpClient:
|
||||
raw = resp.read().decode("utf-8")
|
||||
return json.loads(raw)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Streamable HTTP transport (MCP spec 2025-03-26)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _init_streamable_http(self) -> bool:
|
||||
url = self.config.get("url")
|
||||
if not url:
|
||||
logger.warning(f"[MCP:{self.name}] streamable-http config missing 'url'")
|
||||
return False
|
||||
|
||||
self._http_url = url
|
||||
# Allow user-provided headers (e.g. {"Authorization": "Bearer xxx"})
|
||||
extra_headers = self.config.get("headers") or {}
|
||||
if isinstance(extra_headers, dict):
|
||||
self._http_headers = {str(k): str(v) for k, v in extra_headers.items()}
|
||||
|
||||
return self._handshake()
|
||||
|
||||
def _streamable_http_send(self, message: dict) -> dict:
|
||||
"""POST a JSON-RPC request and return the response (JSON or SSE-wrapped)."""
|
||||
return self._streamable_http_post(message, expect_response=True)
|
||||
|
||||
def _streamable_http_post(self, message: dict, expect_response: bool) -> dict:
|
||||
"""
|
||||
POST a JSON-RPC message over Streamable HTTP.
|
||||
|
||||
Per the spec, the response Content-Type can be either:
|
||||
- application/json -> single JSON-RPC response in body
|
||||
- text/event-stream -> SSE stream; we read until we get a matching response
|
||||
"""
|
||||
body = json.dumps(message).encode("utf-8")
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json, text/event-stream",
|
||||
}
|
||||
if self._http_session_id:
|
||||
headers["Mcp-Session-Id"] = self._http_session_id
|
||||
headers.update(self._http_headers)
|
||||
|
||||
req = urllib.request.Request(
|
||||
self._http_url,
|
||||
data=body,
|
||||
method="POST",
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=30)
|
||||
except urllib.error.HTTPError as e:
|
||||
# Surface the server-provided error body for easier debugging
|
||||
detail = ""
|
||||
try:
|
||||
detail = e.read().decode("utf-8", errors="ignore")
|
||||
except Exception:
|
||||
pass
|
||||
raise IOError(
|
||||
f"[MCP:{self.name}] streamable-http HTTP {e.code}: {detail[:200]}"
|
||||
)
|
||||
|
||||
with resp:
|
||||
# Capture session id assigned by the server (if any)
|
||||
session_id = resp.headers.get("Mcp-Session-Id")
|
||||
if session_id and not self._http_session_id:
|
||||
self._http_session_id = session_id
|
||||
|
||||
status = resp.status if hasattr(resp, "status") else resp.getcode()
|
||||
|
||||
# Notifications: server may reply with 202 Accepted and no body
|
||||
if not expect_response or status == 202:
|
||||
try:
|
||||
resp.read()
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
content_type = (resp.headers.get("Content-Type") or "").lower()
|
||||
expected_id = message.get("id")
|
||||
|
||||
if "text/event-stream" in content_type:
|
||||
return self._read_sse_response(resp, expected_id)
|
||||
|
||||
raw = resp.read().decode("utf-8")
|
||||
if not raw:
|
||||
return {}
|
||||
return json.loads(raw)
|
||||
|
||||
def _read_sse_response(self, resp, expected_id) -> dict:
|
||||
"""Read an SSE stream and return the first JSON-RPC response with matching id."""
|
||||
data_buf: list = []
|
||||
for raw_line in resp:
|
||||
line = raw_line.decode("utf-8").rstrip("\n\r")
|
||||
if line == "":
|
||||
# End of an SSE event, attempt to parse accumulated data
|
||||
if data_buf:
|
||||
payload = "\n".join(data_buf)
|
||||
data_buf = []
|
||||
try:
|
||||
msg = json.loads(payload)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
# Skip notifications / mismatched ids
|
||||
if "id" not in msg:
|
||||
continue
|
||||
if expected_id is None or msg.get("id") == expected_id:
|
||||
return msg
|
||||
continue
|
||||
if line.startswith(":"):
|
||||
continue # SSE comment / keepalive
|
||||
if line.startswith("data:"):
|
||||
data_buf.append(line[len("data:"):].lstrip())
|
||||
# Ignore 'event:' / 'id:' lines; we only care about JSON-RPC payloads
|
||||
|
||||
raise IOError(f"[MCP:{self.name}] streamable-http SSE stream closed before response")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Common JSON-RPC helpers
|
||||
# ------------------------------------------------------------------
|
||||
@@ -267,6 +414,8 @@ class McpClient:
|
||||
return self._stdio_send(message)
|
||||
elif self.transport == "sse":
|
||||
return self._sse_send(message)
|
||||
elif self.transport == "streamable-http":
|
||||
return self._streamable_http_send(message)
|
||||
else:
|
||||
raise ValueError(f"[MCP:{self.name}] Unsupported transport: {self.transport}")
|
||||
|
||||
@@ -291,6 +440,11 @@ class McpClient:
|
||||
pass
|
||||
except Exception:
|
||||
pass # notifications are fire-and-forget
|
||||
elif self.transport == "streamable-http":
|
||||
try:
|
||||
self._streamable_http_post(notification, expect_response=False)
|
||||
except Exception:
|
||||
pass # notifications are fire-and-forget
|
||||
|
||||
def _handshake(self) -> bool:
|
||||
"""Perform the MCP initialize / notifications/initialized handshake."""
|
||||
|
||||
@@ -57,34 +57,44 @@ def init_scheduler(agent_bridge) -> bool:
|
||||
_task_store = TaskStore(store_path)
|
||||
logger.debug(f"[Scheduler] Task store initialized: {store_path}")
|
||||
|
||||
# Create execute callback
|
||||
# Create execute callback. Returns True on success, False to ask
|
||||
# the scheduler to retry on the next tick (e.g. channel not yet
|
||||
# ready right after process start).
|
||||
def execute_task_callback(task: dict):
|
||||
"""Callback to execute a scheduled task"""
|
||||
try:
|
||||
action = task.get("action", {})
|
||||
action_type = action.get("type")
|
||||
channel_type = action.get("channel_type", "unknown")
|
||||
receiver = action.get("receiver", "")
|
||||
|
||||
if not _is_channel_ready(channel_type, receiver):
|
||||
logger.warning(
|
||||
f"[Scheduler] Task {task.get('id')}: channel "
|
||||
f"'{channel_type}' not ready for receiver={receiver} "
|
||||
f"(no inbound msg cached since restart?); deferring"
|
||||
)
|
||||
return False
|
||||
|
||||
if action_type == "agent_task":
|
||||
_execute_agent_task(task, agent_bridge)
|
||||
return _execute_agent_task(task, agent_bridge)
|
||||
elif action_type == "send_message":
|
||||
# Legacy support for old tasks
|
||||
_execute_send_message(task, agent_bridge)
|
||||
return _execute_send_message(task, agent_bridge)
|
||||
elif action_type == "tool_call":
|
||||
# Legacy support for old tasks
|
||||
_execute_tool_call(task, agent_bridge)
|
||||
return _execute_tool_call(task, agent_bridge)
|
||||
elif action_type == "skill_call":
|
||||
# Legacy support for old tasks
|
||||
_execute_skill_call(task, agent_bridge)
|
||||
return _execute_skill_call(task, agent_bridge)
|
||||
else:
|
||||
logger.warning(f"[Scheduler] Unknown action type: {action_type}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error executing task {task.get('id')}: {e}")
|
||||
return False
|
||||
|
||||
# Create scheduler service
|
||||
_scheduler_service = SchedulerService(_task_store, execute_task_callback)
|
||||
_scheduler_service.start()
|
||||
|
||||
logger.debug("[Scheduler] Scheduler service initialized and started")
|
||||
logger.info("[Scheduler] Service initialized and started")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
@@ -92,6 +102,40 @@ def init_scheduler(agent_bridge) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _is_channel_ready(channel_type: str, receiver: str) -> bool:
|
||||
"""Best-effort readiness probe for outbound channels.
|
||||
|
||||
Returns False when we know the send will drop (e.g. weixin not yet
|
||||
logged in, web session has no polling queue), so the scheduler can
|
||||
defer instead of consuming the task. Unknown channels return True
|
||||
to preserve previous behaviour.
|
||||
"""
|
||||
if not channel_type or channel_type == "unknown":
|
||||
return True
|
||||
try:
|
||||
from channel.channel_factory import create_channel
|
||||
channel = create_channel(channel_type)
|
||||
if channel is None:
|
||||
return False
|
||||
|
||||
if channel_type == "weixin":
|
||||
tokens = getattr(channel, "_context_tokens", None)
|
||||
if not tokens or receiver not in tokens:
|
||||
return False
|
||||
return True
|
||||
|
||||
if channel_type == "web":
|
||||
queues = getattr(channel, "session_queues", None)
|
||||
if not queues or receiver not in queues:
|
||||
return False
|
||||
return True
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"[Scheduler] Channel readiness check failed for {channel_type}: {e}")
|
||||
return True
|
||||
|
||||
|
||||
def get_task_store():
|
||||
"""Get the global task store instance"""
|
||||
return _task_store
|
||||
@@ -145,13 +189,10 @@ def _remember_delivered_output(
|
||||
)
|
||||
|
||||
|
||||
def _execute_agent_task(task: dict, agent_bridge):
|
||||
def _execute_agent_task(task: dict, agent_bridge) -> bool:
|
||||
"""
|
||||
Execute an agent_task action - let Agent handle the task
|
||||
|
||||
Args:
|
||||
task: Task dictionary
|
||||
agent_bridge: AgentBridge instance
|
||||
Execute an agent_task action - let Agent handle the task.
|
||||
Returns True on successful delivery, False to retry next tick.
|
||||
"""
|
||||
try:
|
||||
action = task.get("action", {})
|
||||
@@ -162,11 +203,11 @@ def _execute_agent_task(task: dict, agent_bridge):
|
||||
|
||||
if not task_description:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No task_description specified")
|
||||
return
|
||||
return True # malformed task, don't loop forever
|
||||
|
||||
if not receiver:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
|
||||
return
|
||||
return True
|
||||
|
||||
# Check for unsupported channels
|
||||
if channel_type == "dingtalk":
|
||||
@@ -209,51 +250,47 @@ def _execute_agent_task(task: dict, agent_bridge):
|
||||
try:
|
||||
# Don't clear history - scheduler tasks use isolated session_id so they won't pollute user conversations
|
||||
reply = agent_bridge.agent_reply(task_description, context=context, on_event=None, clear_history=False)
|
||||
|
||||
if reply and reply.content:
|
||||
# Send the reply via channel
|
||||
from channel.channel_factory import create_channel
|
||||
|
||||
try:
|
||||
channel = create_channel(channel_type)
|
||||
if channel:
|
||||
# For web channel, register request_id
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
request_id = context.get("request_id")
|
||||
if request_id:
|
||||
channel.request_to_session[request_id] = receiver
|
||||
logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
|
||||
|
||||
# Send the reply
|
||||
channel.send(reply, context)
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, reply.content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed successfully, result sent to {receiver}")
|
||||
else:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send result: {e}")
|
||||
else:
|
||||
|
||||
if not (reply and reply.content):
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No result from agent execution")
|
||||
|
||||
return True # agent ran but produced nothing; don't loop
|
||||
|
||||
from channel.channel_factory import create_channel
|
||||
channel = create_channel(channel_type)
|
||||
if not channel:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
return False
|
||||
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
request_id = context.get("request_id")
|
||||
if request_id:
|
||||
channel.request_to_session[request_id] = receiver
|
||||
|
||||
try:
|
||||
channel.send(reply, context)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send result: {e}")
|
||||
return False
|
||||
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, reply.content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed successfully, result sent to {receiver}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to execute task via Agent: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error in _execute_agent_task: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
|
||||
def _execute_send_message(task: dict, agent_bridge):
|
||||
"""
|
||||
Execute a send_message action
|
||||
|
||||
Args:
|
||||
task: Task dictionary
|
||||
agent_bridge: AgentBridge instance
|
||||
"""
|
||||
def _execute_send_message(task: dict, agent_bridge) -> bool:
|
||||
"""Execute a send_message action. Returns True/False for delivery."""
|
||||
try:
|
||||
action = task.get("action", {})
|
||||
content = action.get("content", "")
|
||||
@@ -263,7 +300,7 @@ def _execute_send_message(task: dict, agent_bridge):
|
||||
|
||||
if not receiver:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
|
||||
return
|
||||
return True
|
||||
|
||||
# Create context for sending message
|
||||
context = Context(ContextType.TEXT, content)
|
||||
@@ -308,169 +345,135 @@ def _execute_send_message(task: dict, agent_bridge):
|
||||
# Get channel and send
|
||||
from channel.channel_factory import create_channel
|
||||
|
||||
channel = create_channel(channel_type)
|
||||
if not channel:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
return False
|
||||
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
channel.request_to_session[request_id] = receiver
|
||||
|
||||
try:
|
||||
channel = create_channel(channel_type)
|
||||
if channel:
|
||||
# For web channel, register the request_id to session mapping
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
channel.request_to_session[request_id] = receiver
|
||||
logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
|
||||
|
||||
channel.send(reply, context)
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: sent message to {receiver}")
|
||||
else:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
channel.send(reply, context)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send message: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
|
||||
return False
|
||||
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: sent message to {receiver}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error in _execute_send_message: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
|
||||
def _execute_tool_call(task: dict, agent_bridge):
|
||||
"""
|
||||
Execute a tool_call action
|
||||
|
||||
Args:
|
||||
task: Task dictionary
|
||||
agent_bridge: AgentBridge instance
|
||||
"""
|
||||
def _execute_tool_call(task: dict, agent_bridge) -> bool:
|
||||
"""Execute a tool_call action. Returns True/False for delivery."""
|
||||
try:
|
||||
action = task.get("action", {})
|
||||
# Support both old and new field names
|
||||
tool_name = action.get("call_name") or action.get("tool_name")
|
||||
tool_params = action.get("call_params") or action.get("tool_params", {})
|
||||
result_prefix = action.get("result_prefix", "")
|
||||
receiver = action.get("receiver")
|
||||
is_group = action.get("is_group", False)
|
||||
channel_type = action.get("channel_type", "unknown")
|
||||
|
||||
|
||||
if not tool_name:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No tool_name specified")
|
||||
return
|
||||
|
||||
return True
|
||||
if not receiver:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
|
||||
return
|
||||
|
||||
# Get tool manager and create tool instance
|
||||
return True
|
||||
|
||||
from agent.tools.tool_manager import ToolManager
|
||||
tool_manager = ToolManager()
|
||||
tool = tool_manager.create_tool(tool_name)
|
||||
|
||||
tool = ToolManager().create_tool(tool_name)
|
||||
if not tool:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: Tool '{tool_name}' not found")
|
||||
return
|
||||
|
||||
# Execute tool
|
||||
return True
|
||||
|
||||
logger.info(f"[Scheduler] Task {task['id']}: Executing tool '{tool_name}' with params {tool_params}")
|
||||
result = tool.execute(tool_params)
|
||||
|
||||
# Get result content
|
||||
if hasattr(result, 'result'):
|
||||
content = result.result
|
||||
else:
|
||||
content = str(result)
|
||||
|
||||
# Add prefix if specified
|
||||
content = result.result if hasattr(result, 'result') else str(result)
|
||||
if result_prefix:
|
||||
content = f"{result_prefix}\n\n{content}"
|
||||
|
||||
# Send result as message
|
||||
|
||||
context = Context(ContextType.TEXT, content)
|
||||
context["receiver"] = receiver
|
||||
context["isgroup"] = is_group
|
||||
context["session_id"] = receiver
|
||||
|
||||
# Channel-specific context setup
|
||||
|
||||
request_id = None
|
||||
if channel_type == "web":
|
||||
# Web channel needs request_id
|
||||
import uuid
|
||||
request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
|
||||
context["request_id"] = request_id
|
||||
logger.debug(f"[Scheduler] Generated request_id for web channel: {request_id}")
|
||||
elif channel_type == "feishu":
|
||||
context["receive_id_type"] = "chat_id" if is_group else "open_id"
|
||||
context["msg"] = None
|
||||
logger.debug(f"[Scheduler] Feishu: receive_id_type={context['receive_id_type']}, is_group={is_group}, receiver={receiver}")
|
||||
elif channel_type == "wecom_bot":
|
||||
context["msg"] = None
|
||||
|
||||
reply = Reply(ReplyType.TEXT, content)
|
||||
|
||||
# Get channel and send
|
||||
from channel.channel_factory import create_channel
|
||||
channel = create_channel(channel_type)
|
||||
if not channel:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
return False
|
||||
|
||||
if channel_type == "web" and request_id and hasattr(channel, 'request_to_session'):
|
||||
channel.request_to_session[request_id] = receiver
|
||||
|
||||
try:
|
||||
channel = create_channel(channel_type)
|
||||
if channel:
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
channel.request_to_session[request_id] = receiver
|
||||
logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
|
||||
|
||||
channel.send(reply, context)
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: sent tool result to {receiver}")
|
||||
else:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
channel.send(reply, context)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send tool result: {e}")
|
||||
return False
|
||||
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: sent tool result to {receiver}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error in _execute_tool_call: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def _execute_skill_call(task: dict, agent_bridge):
|
||||
"""
|
||||
Execute a skill_call action by asking Agent to run the skill
|
||||
|
||||
Args:
|
||||
task: Task dictionary
|
||||
agent_bridge: AgentBridge instance
|
||||
"""
|
||||
def _execute_skill_call(task: dict, agent_bridge) -> bool:
|
||||
"""Execute a skill_call action by asking Agent to run the skill.
|
||||
Returns True/False for delivery."""
|
||||
try:
|
||||
action = task.get("action", {})
|
||||
# Support both old and new field names
|
||||
skill_name = action.get("call_name") or action.get("skill_name")
|
||||
skill_params = action.get("call_params") or action.get("skill_params", {})
|
||||
result_prefix = action.get("result_prefix", "")
|
||||
receiver = action.get("receiver")
|
||||
is_group = action.get("isgroup", False)
|
||||
channel_type = action.get("channel_type", "unknown")
|
||||
|
||||
|
||||
if not skill_name:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No skill_name specified")
|
||||
return
|
||||
|
||||
return True
|
||||
if not receiver:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
|
||||
return
|
||||
|
||||
return True
|
||||
|
||||
logger.info(f"[Scheduler] Task {task['id']}: Executing skill '{skill_name}' with params {skill_params}")
|
||||
|
||||
# Create a unique session_id for this scheduled task to avoid polluting user's conversation
|
||||
# Format: scheduler_<receiver>_<task_id> to ensure isolation
|
||||
|
||||
scheduler_session_id = f"scheduler_{receiver}_{task['id']}"
|
||||
|
||||
# Build a natural language query for the Agent to execute the skill
|
||||
# Format: "Use skill-name to do something with params"
|
||||
param_str = ", ".join([f"{k}={v}" for k, v in skill_params.items()])
|
||||
query = f"Use {skill_name} skill"
|
||||
if param_str:
|
||||
query += f" with {param_str}"
|
||||
|
||||
# Create context for Agent
|
||||
|
||||
context = Context(ContextType.TEXT, query)
|
||||
context["receiver"] = receiver
|
||||
context["isgroup"] = is_group
|
||||
context["session_id"] = scheduler_session_id
|
||||
|
||||
# Channel-specific setup
|
||||
|
||||
if channel_type == "web":
|
||||
import uuid
|
||||
request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
|
||||
@@ -481,49 +484,48 @@ def _execute_skill_call(task: dict, agent_bridge):
|
||||
elif channel_type == "wecom_bot":
|
||||
context["msg"] = None
|
||||
|
||||
# Use Agent to execute the skill
|
||||
try:
|
||||
# Don't clear history - scheduler tasks use isolated session_id so they won't pollute user conversations
|
||||
reply = agent_bridge.agent_reply(query, context=context, on_event=None, clear_history=False)
|
||||
|
||||
if reply and reply.content:
|
||||
content = reply.content
|
||||
|
||||
# Add prefix if specified
|
||||
if result_prefix:
|
||||
content = f"{result_prefix}\n\n{content}"
|
||||
|
||||
# Send the result via channel
|
||||
from channel.channel_factory import create_channel
|
||||
|
||||
try:
|
||||
channel = create_channel(channel_type)
|
||||
if channel:
|
||||
# For web channel, register request_id
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
req_id = context.get("request_id")
|
||||
if req_id:
|
||||
channel.request_to_session[req_id] = receiver
|
||||
logger.debug(f"[Scheduler] Registered request_id {req_id} -> session {receiver}")
|
||||
|
||||
channel.send(Reply(ReplyType.TEXT, content), context)
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send skill result: {e}")
|
||||
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: skill result sent to {receiver}")
|
||||
else:
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No result from skill execution")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to execute skill via Agent: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
|
||||
return False
|
||||
|
||||
if not (reply and reply.content):
|
||||
logger.error(f"[Scheduler] Task {task['id']}: No result from skill execution")
|
||||
return True
|
||||
|
||||
content = reply.content
|
||||
if result_prefix:
|
||||
content = f"{result_prefix}\n\n{content}"
|
||||
|
||||
from channel.channel_factory import create_channel
|
||||
channel = create_channel(channel_type)
|
||||
if not channel:
|
||||
logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
|
||||
return False
|
||||
|
||||
if channel_type == "web" and hasattr(channel, 'request_to_session'):
|
||||
req_id = context.get("request_id")
|
||||
if req_id:
|
||||
channel.request_to_session[req_id] = receiver
|
||||
|
||||
try:
|
||||
channel.send(Reply(ReplyType.TEXT, content), context)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Failed to send skill result: {e}")
|
||||
return False
|
||||
|
||||
_remember_delivered_output(agent_bridge, task, channel_type, content)
|
||||
logger.info(f"[Scheduler] Task {task['id']} executed: skill result sent to {receiver}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error in _execute_skill_call: {e}")
|
||||
import traceback
|
||||
logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
|
||||
def attach_scheduler_to_tool(tool, context: Context = None):
|
||||
|
||||
@@ -52,7 +52,6 @@ class SchedulerService:
|
||||
self.running = True
|
||||
self.thread = threading.Thread(target=self._run_loop, daemon=True)
|
||||
self.thread.start()
|
||||
logger.debug("[Scheduler] Service started")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the scheduler service"""
|
||||
@@ -67,7 +66,7 @@ class SchedulerService:
|
||||
|
||||
def _run_loop(self):
|
||||
"""Main scheduler loop"""
|
||||
logger.debug("[Scheduler] Scheduler loop started")
|
||||
logger.info("[Scheduler] Scheduler loop started")
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
@@ -84,12 +83,18 @@ class SchedulerService:
|
||||
|
||||
for task in tasks:
|
||||
try:
|
||||
# Check if task is due
|
||||
if self._is_task_due(task, now):
|
||||
logger.info(f"[Scheduler] Executing task: {task['id']} - {task['name']}")
|
||||
self._execute_task(task)
|
||||
|
||||
# Update next run time
|
||||
ok = self._execute_task(task)
|
||||
if not ok:
|
||||
# Leave next_run_at as-is so the next loop retries.
|
||||
# Cron tasks within the catch-up window will keep
|
||||
# firing; beyond it _is_task_due will reschedule.
|
||||
logger.warning(
|
||||
f"[Scheduler] Task {task['id']} delivery failed, will retry next tick"
|
||||
)
|
||||
continue
|
||||
|
||||
next_run = self._calculate_next_run(task, now)
|
||||
if next_run:
|
||||
self.task_store.update_task(task['id'], {
|
||||
@@ -97,7 +102,6 @@ class SchedulerService:
|
||||
"last_run_at": now.isoformat()
|
||||
})
|
||||
else:
|
||||
# One-time task completed, remove it
|
||||
self.task_store.delete_task(task['id'])
|
||||
logger.info(f"[Scheduler] One-time task completed and removed: {task['id']}")
|
||||
except Exception as e:
|
||||
@@ -128,30 +132,35 @@ class SchedulerService:
|
||||
try:
|
||||
next_run = _parse_naive_local(next_run_str)
|
||||
|
||||
# Check if task is overdue (e.g., service restart)
|
||||
if next_run < now:
|
||||
time_diff = (now - next_run).total_seconds()
|
||||
|
||||
# If overdue by more than 5 minutes, skip this run and schedule next
|
||||
if time_diff > 300: # 5 minutes
|
||||
logger.warning(f"[Scheduler] Task {task['id']} is overdue by {int(time_diff)}s, skipping and scheduling next run")
|
||||
|
||||
# For one-time tasks, remove them directly
|
||||
schedule = task.get("schedule", {})
|
||||
if schedule.get("type") == "once":
|
||||
self.task_store.delete_task(task['id'])
|
||||
logger.info(f"[Scheduler] One-time task {task['id']} expired, removed")
|
||||
return False
|
||||
|
||||
# For recurring tasks, calculate next run from now
|
||||
next_next_run = self._calculate_next_run(task, now)
|
||||
if next_next_run:
|
||||
self.task_store.update_task(task['id'], {
|
||||
"next_run_at": next_next_run.isoformat()
|
||||
})
|
||||
logger.info(f"[Scheduler] Rescheduled task {task['id']} to {next_next_run}")
|
||||
schedule = task.get("schedule", {})
|
||||
schedule_type = schedule.get("type")
|
||||
|
||||
# Catch-up window: fire if we're within 10 minutes of the
|
||||
# scheduled tick. Beyond that we'd rather skip than push a
|
||||
# stale daily report to the user.
|
||||
if time_diff <= 600:
|
||||
return True
|
||||
|
||||
logger.warning(
|
||||
f"[Scheduler] Task {task['id']} is overdue by {int(time_diff)}s, "
|
||||
f"skipping and scheduling next run"
|
||||
)
|
||||
|
||||
if schedule_type == "once":
|
||||
self.task_store.delete_task(task['id'])
|
||||
logger.info(f"[Scheduler] One-time task {task['id']} expired, removed")
|
||||
return False
|
||||
|
||||
|
||||
next_next_run = self._calculate_next_run(task, now)
|
||||
if next_next_run:
|
||||
self.task_store.update_task(task['id'], {
|
||||
"next_run_at": next_next_run.isoformat()
|
||||
})
|
||||
logger.info(f"[Scheduler] Rescheduled task {task['id']} to {next_next_run}")
|
||||
return False
|
||||
|
||||
return now >= next_run
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
@@ -213,20 +222,22 @@ class SchedulerService:
|
||||
|
||||
return None
|
||||
|
||||
def _execute_task(self, task: dict):
|
||||
def _execute_task(self, task: dict) -> bool:
|
||||
"""
|
||||
Execute a task
|
||||
|
||||
Args:
|
||||
task: Task dictionary
|
||||
Execute a task.
|
||||
|
||||
Returns True if delivery succeeded (caller should advance state),
|
||||
False if it failed (caller should keep next_run_at so the next
|
||||
loop iteration retries). Callback may return None for legacy
|
||||
behaviour, treated as success.
|
||||
"""
|
||||
try:
|
||||
# Call the execute callback
|
||||
self.execute_callback(task)
|
||||
result = self.execute_callback(task)
|
||||
return False if result is False else True
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error executing task {task['id']}: {e}")
|
||||
# Update task with error
|
||||
self.task_store.update_task(task['id'], {
|
||||
"last_error": str(e),
|
||||
"last_error_at": datetime.now().isoformat()
|
||||
})
|
||||
return False
|
||||
|
||||
@@ -30,7 +30,7 @@ from common import const
|
||||
from common.log import logger
|
||||
from config import conf
|
||||
|
||||
DEFAULT_MODEL = const.GPT_55
|
||||
DEFAULT_MODEL = const.GPT_41_MINI
|
||||
DEFAULT_TIMEOUT = 60
|
||||
MAX_TOKENS = 1000
|
||||
COMPRESS_THRESHOLD = 1_048_576 # 1 MB
|
||||
@@ -57,6 +57,7 @@ _DISCOVERABLE_MODELS = [
|
||||
("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"),
|
||||
("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"),
|
||||
("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
|
||||
("mimo_api_key", const.MIMO, const.MIMO_V2_5_PRO, "MiMo"),
|
||||
]
|
||||
|
||||
# Model name prefix → discoverable provider display_name.
|
||||
@@ -73,11 +74,29 @@ _MODEL_PREFIX_TO_PROVIDER = [
|
||||
("glm-", "ZhipuAI"),
|
||||
("minimax-", "MiniMax"),
|
||||
("abab", "MiniMax"),
|
||||
("mimo-", "MiMo"),
|
||||
]
|
||||
|
||||
# Model prefixes that natively belong to OpenAI / LinkAI (raw HTTP providers).
|
||||
_OPENAI_MODEL_PREFIXES = ("gpt-", "o1-", "o3-", "o4-", "chatgpt-")
|
||||
|
||||
# Maps the UI provider id (persisted in tools.vision.provider) to the internal
|
||||
# display name used in VisionProvider.name. Keep in sync with _DISCOVERABLE_MODELS
|
||||
# and the openai/linkai branches in _route_by_model_name.
|
||||
_PROVIDER_ID_TO_DISPLAY = {
|
||||
"openai": "OpenAI",
|
||||
"linkai": "LinkAI",
|
||||
"moonshot": "Moonshot",
|
||||
"doubao": "Doubao",
|
||||
"dashscope": "DashScope",
|
||||
"claudeAPI": "Claude",
|
||||
"gemini": "Gemini",
|
||||
"qianfan": "Qianfan",
|
||||
"zhipu": "ZhipuAI",
|
||||
"minimax": "MiniMax",
|
||||
"mimo": "MiMo",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class VisionProvider:
|
||||
@@ -211,13 +230,19 @@ class Vision(BaseTool):
|
||||
are de-duplicated to avoid retrying the same endpoint twice.
|
||||
"""
|
||||
user_model = self._resolve_user_vision_model()
|
||||
user_provider = self._resolve_user_vision_provider()
|
||||
providers: List[VisionProvider] = []
|
||||
|
||||
# Step 1: preferred provider derived from tools.vision.model
|
||||
if user_model:
|
||||
# Step 1: preferred provider — explicit `tools.vision.provider`
|
||||
# wins so custom model names can still be routed correctly. Falls
|
||||
# through to model-name prefix inference when provider is unset.
|
||||
preferred = None
|
||||
if user_provider and user_model:
|
||||
preferred = self._route_by_provider_id(user_provider, user_model)
|
||||
if not preferred and user_model:
|
||||
preferred = self._route_by_model_name(user_model)
|
||||
if preferred:
|
||||
providers.extend(preferred)
|
||||
if preferred:
|
||||
providers.extend(preferred)
|
||||
|
||||
# Step 2: auto-discovery chain as fallback
|
||||
existing = {p.name for p in providers}
|
||||
@@ -263,6 +288,24 @@ class Vision(BaseTool):
|
||||
return m.strip()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _resolve_user_vision_provider() -> Optional[str]:
|
||||
"""Read tools.vision.provider — the UI-persisted vendor id.
|
||||
|
||||
Lets users pin a vendor for custom model names that prefix-inference
|
||||
can't recognize. Returns None when unset/blank.
|
||||
"""
|
||||
tools_conf = conf().get("tools") or conf().get("tool") or {}
|
||||
if not isinstance(tools_conf, dict):
|
||||
return None
|
||||
vision_conf = tools_conf.get("vision", {})
|
||||
if not isinstance(vision_conf, dict):
|
||||
return None
|
||||
p = vision_conf.get("provider")
|
||||
if isinstance(p, str) and p.strip():
|
||||
return p.strip()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _infer_provider_from_model(model_name: str) -> Optional[str]:
|
||||
"""
|
||||
@@ -279,6 +322,54 @@ class Vision(BaseTool):
|
||||
return display_name
|
||||
return None
|
||||
|
||||
def _route_by_provider_id(self, provider_id: str, user_model: str) -> Optional[List[VisionProvider]]:
|
||||
"""Route by the UI-persisted provider id.
|
||||
|
||||
Returns:
|
||||
- [provider] : provider id is known and its key is configured.
|
||||
- None : unknown provider id, or the bot can't be created.
|
||||
Caller falls through to model-name-based routing.
|
||||
"""
|
||||
display_name = _PROVIDER_ID_TO_DISPLAY.get(provider_id)
|
||||
if not display_name:
|
||||
return None
|
||||
|
||||
# OpenAI / LinkAI use raw HTTP providers, not the discoverable bot path.
|
||||
if provider_id == "openai":
|
||||
p = self._build_openai_provider(user_model)
|
||||
return [p] if p else None
|
||||
if provider_id == "linkai":
|
||||
p = self._build_linkai_provider(user_model)
|
||||
return [p] if p else None
|
||||
|
||||
# Discoverable bot-backed providers.
|
||||
for config_key, bot_type, _default_model, name in _DISCOVERABLE_MODELS:
|
||||
if name != display_name:
|
||||
continue
|
||||
api_key = conf().get(config_key, "")
|
||||
if not api_key or not api_key.strip():
|
||||
logger.warning(f"[Vision] tools.vision.provider='{provider_id}' "
|
||||
f"but '{config_key}' is not configured. Falling back.")
|
||||
return None
|
||||
try:
|
||||
from models.bot_factory import create_bot
|
||||
bot = create_bot(bot_type)
|
||||
if not hasattr(bot, 'call_vision'):
|
||||
logger.warning(f"[Vision] '{display_name}' bot does not implement call_vision.")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"[Vision] Failed to create '{display_name}' bot: {e}")
|
||||
return None
|
||||
return [VisionProvider(
|
||||
name=display_name,
|
||||
api_key="",
|
||||
api_base="",
|
||||
model_override=user_model,
|
||||
use_bot=True,
|
||||
fallback_bot=bot,
|
||||
)]
|
||||
return None
|
||||
|
||||
def _route_by_model_name(self, user_model: str) -> Optional[List[VisionProvider]]:
|
||||
"""
|
||||
Try to build a provider list using the user-specified model name.
|
||||
|
||||
12
app.py
12
app.py
@@ -288,6 +288,16 @@ def _warmup_mcp_tools():
|
||||
logger.warning(f"[App] MCP warmup failed (non-fatal): {e}")
|
||||
|
||||
|
||||
def _warmup_scheduler():
|
||||
"""Eager-init AgentBridge so the scheduler thread starts at process
|
||||
boot rather than waiting for the first user message."""
|
||||
try:
|
||||
from bridge.bridge import Bridge
|
||||
Bridge().get_agent_bridge()
|
||||
except Exception as e:
|
||||
logger.warning(f"[App] Scheduler warmup failed: {e}")
|
||||
|
||||
|
||||
def _sync_builtin_skills():
|
||||
"""Sync builtin skills from project skills/ to workspace skills/ on startup."""
|
||||
import shutil
|
||||
@@ -353,6 +363,8 @@ def run():
|
||||
# latency isn't dominated by npx package downloads.
|
||||
_warmup_mcp_tools()
|
||||
|
||||
_warmup_scheduler()
|
||||
|
||||
logger.info(f"[App] Starting channels: {channel_names}")
|
||||
|
||||
_channel_mgr = ChannelManager()
|
||||
|
||||
@@ -5,7 +5,7 @@ Agent Bridge - Integrates Agent system with existing COW bridge
|
||||
import os
|
||||
from typing import Optional, List
|
||||
|
||||
from agent.protocol import Agent, LLMModel, LLMRequest
|
||||
from agent.protocol import Agent, LLMModel, LLMRequest, get_cancel_registry
|
||||
from bridge.agent_event_handler import AgentEventHandler
|
||||
from bridge.agent_initializer import AgentInitializer
|
||||
from bridge.bridge import Bridge
|
||||
@@ -285,6 +285,15 @@ class AgentBridge:
|
||||
|
||||
# Create helper instances
|
||||
self.initializer = AgentInitializer(bridge, self)
|
||||
|
||||
# Eager-start the scheduler so cron tasks fire without waiting
|
||||
# for the first user message. init_scheduler is idempotent.
|
||||
try:
|
||||
from agent.tools.scheduler.integration import init_scheduler
|
||||
if init_scheduler(self):
|
||||
self.scheduler_initialized = True
|
||||
except Exception as e:
|
||||
logger.warning(f"[AgentBridge] Eager scheduler init failed: {e}")
|
||||
def create_agent(self, system_prompt: str, tools: List = None, **kwargs) -> Agent:
|
||||
"""
|
||||
Create the super agent with COW integration
|
||||
@@ -390,11 +399,22 @@ class AgentBridge:
|
||||
"""
|
||||
session_id = None
|
||||
agent = None
|
||||
request_id = None
|
||||
cancel_event = None
|
||||
try:
|
||||
# Extract session_id from context for user isolation
|
||||
if context:
|
||||
session_id = context.kwargs.get("session_id") or context.get("session_id")
|
||||
|
||||
request_id = context.kwargs.get("request_id") or context.get("request_id")
|
||||
|
||||
# Register a cancel token. Prefer per-turn request_id (web),
|
||||
# fall back to session_id (IM channels). The Event is polled by
|
||||
# AgentStreamExecutor at safe checkpoints.
|
||||
registry = get_cancel_registry()
|
||||
token_key = request_id or session_id
|
||||
if token_key:
|
||||
cancel_event = registry.register(token_key, session_id=session_id)
|
||||
|
||||
# Get agent for this session (will auto-initialize if needed)
|
||||
agent = self.get_agent(session_id=session_id)
|
||||
if not agent:
|
||||
@@ -449,7 +469,8 @@ class AgentBridge:
|
||||
response = agent.run_stream(
|
||||
user_message=query,
|
||||
on_event=event_handler.handle_event,
|
||||
clear_history=clear_history
|
||||
clear_history=clear_history,
|
||||
cancel_event=cancel_event,
|
||||
)
|
||||
finally:
|
||||
# Restore original tools
|
||||
@@ -459,6 +480,13 @@ class AgentBridge:
|
||||
# Log execution summary
|
||||
event_handler.log_summary()
|
||||
|
||||
# Release cancel token; keep registry bounded.
|
||||
if token_key:
|
||||
try:
|
||||
registry.unregister(token_key)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Persist new messages generated during this run
|
||||
if session_id:
|
||||
channel_type = (context.get("channel_type") or "") if context else ""
|
||||
@@ -512,6 +540,12 @@ class AgentBridge:
|
||||
logger.info(f"[AgentBridge] Cleared DB for session after error: {session_id}")
|
||||
except Exception as db_err:
|
||||
logger.warning(f"[AgentBridge] Failed to clear DB after error: {db_err}")
|
||||
# Release cancel token on error path too (idempotent).
|
||||
if cancel_event is not None and (request_id or session_id):
|
||||
try:
|
||||
get_cancel_registry().unregister(request_id or session_id)
|
||||
except Exception:
|
||||
pass
|
||||
return Reply(ReplyType.ERROR, f"Agent error: {str(e)}")
|
||||
|
||||
def _schedule_mcp_hot_reload(self, agent):
|
||||
|
||||
@@ -2,44 +2,40 @@
|
||||
Agent Event Handler - Handles agent events and thinking process output
|
||||
"""
|
||||
|
||||
from common import const
|
||||
from common.log import logger
|
||||
|
||||
# Cap intermediate thinking messages on weixin to stay within send quota.
|
||||
WEIXIN_THINKING_INSTANT_MAX = 7
|
||||
|
||||
|
||||
class AgentEventHandler:
|
||||
"""
|
||||
Handles agent events and optionally sends intermediate messages to channel
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, context=None, original_callback=None):
|
||||
"""
|
||||
Initialize event handler
|
||||
|
||||
Args:
|
||||
context: COW context (for accessing channel)
|
||||
original_callback: Original event callback to chain
|
||||
"""
|
||||
self.context = context
|
||||
self.original_callback = original_callback
|
||||
|
||||
# Get channel for sending intermediate messages
|
||||
|
||||
self.channel = None
|
||||
if context:
|
||||
self.channel = context.kwargs.get("channel") if hasattr(context, "kwargs") else None
|
||||
|
||||
|
||||
self.current_content = ""
|
||||
self.turn_number = 0
|
||||
|
||||
|
||||
channel_type = ""
|
||||
if context and hasattr(context, "kwargs"):
|
||||
channel_type = context.kwargs.get("channel_type", "") or ""
|
||||
self._is_weixin = channel_type == const.WEIXIN
|
||||
self._thinking_sent_count = 0
|
||||
self._merged_buf: list[str] = []
|
||||
|
||||
def handle_event(self, event):
|
||||
"""
|
||||
Main event handler
|
||||
|
||||
Args:
|
||||
event: Event dict with type and data
|
||||
"""
|
||||
event_type = event.get("type")
|
||||
data = event.get("data", {})
|
||||
|
||||
# Dispatch to specific handlers
|
||||
|
||||
if event_type == "turn_start":
|
||||
self._handle_turn_start(data)
|
||||
elif event_type == "message_update":
|
||||
@@ -52,25 +48,23 @@ class AgentEventHandler:
|
||||
self._handle_tool_execution_start(data)
|
||||
elif event_type == "tool_execution_end":
|
||||
self._handle_tool_execution_end(data)
|
||||
|
||||
# Call original callback if provided
|
||||
elif event_type == "agent_end":
|
||||
self._handle_agent_end(data)
|
||||
|
||||
if self.original_callback:
|
||||
self.original_callback(event)
|
||||
|
||||
|
||||
def _handle_turn_start(self, data):
|
||||
"""Handle turn start event"""
|
||||
self.turn_number = data.get("turn", 0)
|
||||
self.current_content = ""
|
||||
|
||||
|
||||
def _handle_message_update(self, data):
|
||||
"""Handle message update event (streaming content text)"""
|
||||
delta = data.get("delta", "")
|
||||
self.current_content += delta
|
||||
|
||||
|
||||
def _handle_message_end(self, data):
|
||||
"""Handle message end event"""
|
||||
tool_calls = data.get("tool_calls", [])
|
||||
|
||||
|
||||
if tool_calls:
|
||||
if self.current_content.strip():
|
||||
logger.info(f"💭 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}")
|
||||
@@ -78,35 +72,54 @@ class AgentEventHandler:
|
||||
else:
|
||||
if self.current_content.strip():
|
||||
logger.debug(f"💬 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}")
|
||||
|
||||
# Drain weixin buffer before final reply leaves chat_channel
|
||||
self._flush_merged_now()
|
||||
|
||||
self.current_content = ""
|
||||
|
||||
|
||||
def _handle_agent_end(self, data):
|
||||
self._flush_merged_now()
|
||||
|
||||
def _handle_tool_execution_start(self, data):
|
||||
"""Handle tool execution start event - logged by agent_stream.py"""
|
||||
pass
|
||||
|
||||
|
||||
def _handle_tool_execution_end(self, data):
|
||||
"""Handle tool execution end event - logged by agent_stream.py"""
|
||||
pass
|
||||
|
||||
|
||||
def _send_to_channel(self, message):
|
||||
"""
|
||||
Try to send intermediate message to channel.
|
||||
Skipped in SSE mode because thinking text is already streamed via on_event.
|
||||
"""
|
||||
if self.context and self.context.get("on_event"):
|
||||
return
|
||||
if not self.channel:
|
||||
return
|
||||
|
||||
if not self._is_weixin:
|
||||
self._do_send(message)
|
||||
return
|
||||
|
||||
if self._thinking_sent_count < WEIXIN_THINKING_INSTANT_MAX:
|
||||
self._do_send(message)
|
||||
self._thinking_sent_count += 1
|
||||
return
|
||||
|
||||
self._merged_buf.append(message)
|
||||
|
||||
def _flush_merged_now(self):
|
||||
if not self._merged_buf:
|
||||
return
|
||||
merged = "\n\n".join(self._merged_buf)
|
||||
count = len(self._merged_buf)
|
||||
self._merged_buf = []
|
||||
logger.debug(f"[AgentEventHandler] Flushing {count} merged thinking msgs, len={len(merged)}")
|
||||
self._do_send(merged)
|
||||
self._thinking_sent_count += 1
|
||||
|
||||
def _do_send(self, message):
|
||||
try:
|
||||
from bridge.reply import Reply, ReplyType
|
||||
reply = Reply(ReplyType.TEXT, message)
|
||||
self.channel._send(reply, self.context)
|
||||
except Exception as e:
|
||||
logger.debug(f"[AgentEventHandler] Failed to send to channel: {e}")
|
||||
|
||||
if self.channel:
|
||||
try:
|
||||
from bridge.reply import Reply, ReplyType
|
||||
reply = Reply(ReplyType.TEXT, message)
|
||||
self.channel._send(reply, self.context)
|
||||
except Exception as e:
|
||||
logger.debug(f"[AgentEventHandler] Failed to send to channel: {e}")
|
||||
|
||||
def log_summary(self):
|
||||
"""Log execution summary - simplified"""
|
||||
# Summary removed as per user request
|
||||
# Real-time logging during execution is sufficient
|
||||
pass
|
||||
|
||||
@@ -63,6 +63,10 @@ class Bridge(object):
|
||||
if model_type and model_type.startswith("deepseek"):
|
||||
self.btype["chat"] = const.DEEPSEEK
|
||||
|
||||
# 小米 MiMo 系列模型,全部以 mimo- 开头
|
||||
if model_type and model_type.startswith("mimo-"):
|
||||
self.btype["chat"] = const.MIMO
|
||||
|
||||
if model_type and isinstance(model_type, str):
|
||||
lowered_model_type = model_type.lower()
|
||||
if lowered_model_type == const.QIANFAN or lowered_model_type.startswith("ernie"):
|
||||
|
||||
@@ -39,6 +39,9 @@ def create_channel(channel_type) -> Channel:
|
||||
elif channel_type == const.QQ:
|
||||
from channel.qq.qq_channel import QQChannel
|
||||
ch = QQChannel()
|
||||
elif channel_type == const.TELEGRAM:
|
||||
from channel.telegram.telegram_channel import TelegramChannel
|
||||
ch = TelegramChannel()
|
||||
elif channel_type in (const.WEIXIN, "wx"):
|
||||
from channel.weixin.weixin_channel import WeixinChannel
|
||||
ch = WeixinChannel()
|
||||
|
||||
@@ -438,8 +438,21 @@ class ChatChannel(Channel):
|
||||
|
||||
return func
|
||||
|
||||
# Chat commands that must bypass the per-session serial queue,
|
||||
# otherwise /cancel would queue behind the task it tries to cancel.
|
||||
# Use /cancel (not /stop) to avoid colliding with `cow stop` CLI.
|
||||
_BYPASS_QUEUE_COMMANDS = ("/cancel",)
|
||||
|
||||
def produce(self, context: Context):
|
||||
session_id = context["session_id"]
|
||||
|
||||
# Fast path: /cancel must not enter the queue.
|
||||
if context.type == ContextType.TEXT and context.content:
|
||||
stripped = context.content.strip().lower()
|
||||
if stripped in self._BYPASS_QUEUE_COMMANDS:
|
||||
self._handle_cancel_command(context, session_id)
|
||||
return
|
||||
|
||||
with self.lock:
|
||||
if session_id not in self.sessions:
|
||||
self.sessions[session_id] = [
|
||||
@@ -451,6 +464,29 @@ class ChatChannel(Channel):
|
||||
else:
|
||||
self.sessions[session_id][0].put(context)
|
||||
|
||||
def _handle_cancel_command(self, context: Context, session_id: str) -> None:
|
||||
"""Cancel any in-flight agent run for *session_id* and reply inline.
|
||||
|
||||
Runs synchronously on the caller's thread. Reply is sent through
|
||||
_send_reply so plugins (e.g. logging) still observe it.
|
||||
"""
|
||||
try:
|
||||
from agent.protocol import get_cancel_registry
|
||||
from bridge.reply import Reply, ReplyType
|
||||
|
||||
cancelled = get_cancel_registry().cancel_session(session_id)
|
||||
text = (
|
||||
"🛑 已中止"
|
||||
if cancelled > 0
|
||||
else "当前没有可中止的任务。"
|
||||
)
|
||||
logger.info(
|
||||
f"[chat_channel] /cancel fast-path: session={session_id}, cancelled={cancelled}"
|
||||
)
|
||||
self._send_reply(context, Reply(ReplyType.TEXT, text))
|
||||
except Exception as e:
|
||||
logger.warning(f"[chat_channel] /cancel fast-path failed: {e}")
|
||||
|
||||
# 消费者函数,单独线程,用于从消息队列中取出消息并处理
|
||||
def consume(self):
|
||||
while True:
|
||||
|
||||
@@ -752,6 +752,9 @@ class FeiShuChanel(ChatChannel):
|
||||
init_in_flight = [False]
|
||||
# 一旦初始化失败就长期标记为 disabled,本次回复不再尝试任何流式调用
|
||||
disabled = [False]
|
||||
# True after agent_cancelled: agent_end stops rewriting the card
|
||||
# with stale final_response and just finalizes current content.
|
||||
cancelled = [False]
|
||||
lock = threading.Lock()
|
||||
|
||||
# ---- 异步推送队列 ----------------------------------------------------
|
||||
@@ -1076,18 +1079,42 @@ class FeiShuChanel(ChatChannel):
|
||||
message_id[0] = None
|
||||
sequence[0] = 0
|
||||
|
||||
elif event_type == "agent_cancelled":
|
||||
# Lock channel into "no-rewrite" mode: the subsequent
|
||||
# agent_end's final_response is from the last *completed*
|
||||
# turn (the user already saw it), so rewriting the card
|
||||
# would duplicate it visually.
|
||||
with lock:
|
||||
cancelled[0] = True
|
||||
|
||||
elif event_type == "agent_end":
|
||||
# 最终回复:用 final_response 覆盖当前流式卡片,然后关闭流式模式。
|
||||
final_response = data.get("final_response", "")
|
||||
if not final_response:
|
||||
return
|
||||
final_text = str(final_response)
|
||||
# 标记 streamed 让 chat_channel 跳过 send()
|
||||
context["feishu_streamed"] = True
|
||||
|
||||
with lock:
|
||||
was_cancelled = cancelled[0]
|
||||
has_card = card_id[0] is not None
|
||||
init_busy = init_in_flight[0]
|
||||
pending_text = current_text[0]
|
||||
|
||||
if was_cancelled:
|
||||
# Cancelled path: finalize the in-flight card with
|
||||
# partial output (or a short marker if empty); drop
|
||||
# stale final_response to avoid duplicating last turn.
|
||||
if has_card:
|
||||
_drain_push_queue()
|
||||
partial = (pending_text or "").rstrip()
|
||||
final_text = partial or "_(已中止)_"
|
||||
_stream_update_text(final_text)
|
||||
_close_streaming_mode(final_text)
|
||||
push_queue.put(None)
|
||||
return
|
||||
|
||||
if not final_response:
|
||||
return
|
||||
final_text = str(final_response)
|
||||
|
||||
# 罕见情况:agent_end 触发时还没创建过卡片(极快返回 / 没有
|
||||
# message_update),主动创建一张承载 final_text。
|
||||
|
||||
0
channel/telegram/__init__.py
Normal file
0
channel/telegram/__init__.py
Normal file
676
channel/telegram/telegram_channel.py
Normal file
676
channel/telegram/telegram_channel.py
Normal file
@@ -0,0 +1,676 @@
|
||||
"""
|
||||
Telegram channel via Bot API (long polling mode).
|
||||
|
||||
Features:
|
||||
- Single chat & group chat (text / photo / voice / video / document)
|
||||
- Group trigger: @mention or reply-to-bot (configurable)
|
||||
- /cancel fast-path matches Web channel behaviour
|
||||
- Auto-register bot commands menu on startup (mirrors Web slash menu)
|
||||
- Optional HTTP/SOCKS5 proxy support for restricted networks
|
||||
|
||||
Implementation note:
|
||||
python-telegram-bot is async-first. We run the bot inside a dedicated
|
||||
thread with its own asyncio loop so the rest of cow (which is sync)
|
||||
stays untouched. Inbound updates are dispatched onto cow's existing
|
||||
sync ChatChannel.produce() pipeline; outbound send() schedules
|
||||
coroutines back onto that loop via asyncio.run_coroutine_threadsafe.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
import threading
|
||||
|
||||
from bridge.context import Context, ContextType
|
||||
from bridge.reply import Reply, ReplyType
|
||||
from channel.chat_channel import ChatChannel, check_prefix
|
||||
from channel.telegram.telegram_message import TelegramMessage
|
||||
from common.expired_dict import ExpiredDict
|
||||
from common.log import logger
|
||||
from common.singleton import singleton
|
||||
from config import conf
|
||||
|
||||
# Bot command menu, aligned with Web slash commands.
|
||||
# Top-level commands only; sub-commands are entered with a space (e.g. "/skill list").
|
||||
TELEGRAM_BOT_COMMANDS = [
|
||||
("help", "Show command help"),
|
||||
("status", "Show running status"),
|
||||
("context", "View/clear conversation context (sub: clear)"),
|
||||
("skill", "Manage skills (list/search/install/...)"),
|
||||
("memory", "Manage memory (sub: dream)"),
|
||||
("knowledge", "Manage knowledge base (list/on/off)"),
|
||||
("config", "Show current config"),
|
||||
("cancel", "Cancel running agent task"),
|
||||
("logs", "Show recent logs"),
|
||||
("version", "Show version"),
|
||||
]
|
||||
|
||||
|
||||
@singleton
|
||||
class TelegramChannel(ChatChannel):
|
||||
NOT_SUPPORT_REPLYTYPE = []
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.bot_token = ""
|
||||
self.bot_username = "" # used for @-mention matching
|
||||
self._bot = None
|
||||
self._application = None
|
||||
self._loop = None
|
||||
self._loop_thread = None
|
||||
self._stop_event = threading.Event()
|
||||
# Idempotent dedup; TG occasionally redelivers the same update on flaky networks
|
||||
self._received_msgs = ExpiredDict(60 * 60 * 1)
|
||||
|
||||
# Disable group whitelist / prefix checks (we handle triggering ourselves
|
||||
# in _should_reply_in_group), aligned with feishu / wecom_bot channels.
|
||||
conf()["group_name_white_list"] = ["ALL_GROUP"]
|
||||
conf()["single_chat_prefix"] = [""]
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def startup(self):
|
||||
self.bot_token = conf().get("telegram_token", "")
|
||||
if not self.bot_token:
|
||||
err = "[Telegram] telegram_token is required"
|
||||
logger.error(err)
|
||||
self.report_startup_error(err)
|
||||
return
|
||||
|
||||
try:
|
||||
from telegram.ext import (
|
||||
Application,
|
||||
MessageHandler,
|
||||
CommandHandler,
|
||||
filters,
|
||||
)
|
||||
except ImportError:
|
||||
err = (
|
||||
"[Telegram] python-telegram-bot is not installed. "
|
||||
"Run: pip install python-telegram-bot"
|
||||
)
|
||||
logger.error(err)
|
||||
self.report_startup_error(err)
|
||||
return
|
||||
|
||||
# Run the asyncio event loop in a dedicated thread so the sync cow body
|
||||
# is untouched.
|
||||
self._loop = asyncio.new_event_loop()
|
||||
|
||||
def _run_loop():
|
||||
asyncio.set_event_loop(self._loop)
|
||||
try:
|
||||
self._loop.run_until_complete(self._async_main(Application, MessageHandler, CommandHandler, filters))
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] event loop crashed: {e}", exc_info=True)
|
||||
self.report_startup_error(str(e))
|
||||
finally:
|
||||
try:
|
||||
self._loop.close()
|
||||
except Exception:
|
||||
pass
|
||||
logger.info("[Telegram] event loop exited")
|
||||
|
||||
self._loop_thread = threading.Thread(target=_run_loop, daemon=True, name="telegram-loop")
|
||||
self._loop_thread.start()
|
||||
# Block startup() until the loop thread exits, matching other channels'
|
||||
# behaviour (startup is a blocking call).
|
||||
self._loop_thread.join()
|
||||
|
||||
async def _async_main(self, Application, MessageHandler, CommandHandler, filters):
|
||||
"""Build Application, register handlers, and run polling."""
|
||||
builder = Application.builder().token(self.bot_token)
|
||||
|
||||
# Proxy: prefer telegram_proxy config, fall back to HTTPS_PROXY env var
|
||||
proxy_url = conf().get("telegram_proxy", "") or os.environ.get("HTTPS_PROXY", "")
|
||||
if proxy_url:
|
||||
try:
|
||||
builder = builder.proxy(proxy_url).get_updates_proxy(proxy_url)
|
||||
logger.info(f"[Telegram] using proxy: {proxy_url}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[Telegram] proxy config failed, fallback to direct: {e}")
|
||||
|
||||
# Media uploads (photo/voice/video/document) over a proxy can be slow,
|
||||
# bump read/write/connect/pool timeouts.
|
||||
builder = (
|
||||
builder
|
||||
.read_timeout(60)
|
||||
.write_timeout(120)
|
||||
.connect_timeout(30)
|
||||
.pool_timeout(30)
|
||||
)
|
||||
|
||||
application = builder.build()
|
||||
self._application = application
|
||||
self._bot = application.bot
|
||||
|
||||
# Fetch our own username (needed for @-mention matching in groups)
|
||||
try:
|
||||
me = await self._bot.get_me()
|
||||
self.bot_username = me.username or ""
|
||||
self.name = self.bot_username # ChatChannel uses self.name to strip @-mention
|
||||
logger.info(f"[Telegram] Bot logged in as @{self.bot_username} (id={me.id})")
|
||||
except Exception as e:
|
||||
err = f"[Telegram] get_me failed: {e}"
|
||||
logger.error(err)
|
||||
self.report_startup_error(err)
|
||||
return
|
||||
|
||||
# Register the command menu (failure is non-fatal)
|
||||
if conf().get("telegram_register_commands", True):
|
||||
try:
|
||||
from telegram import BotCommand
|
||||
cmds = [BotCommand(name, desc) for name, desc in TELEGRAM_BOT_COMMANDS]
|
||||
await self._bot.set_my_commands(cmds)
|
||||
logger.info(f"[Telegram] Registered {len(cmds)} bot commands")
|
||||
except Exception as e:
|
||||
logger.warning(f"[Telegram] set_my_commands failed: {e}")
|
||||
|
||||
# Handlers:
|
||||
# 1) /cancel uses the fast-path
|
||||
application.add_handler(CommandHandler("cancel", self._on_cancel))
|
||||
# 2) Normal messages (text + media)
|
||||
application.add_handler(MessageHandler(filters.ALL & ~filters.COMMAND, self._on_message))
|
||||
# 3) Other slash commands are forwarded as plain text for the agent to handle
|
||||
application.add_handler(MessageHandler(filters.COMMAND, self._on_command_passthrough))
|
||||
|
||||
# Start polling. drop_pending_updates avoids replaying backlog after restart.
|
||||
logger.info("[Telegram] Starting long polling...")
|
||||
await application.initialize()
|
||||
await application.start()
|
||||
await application.updater.start_polling(drop_pending_updates=True)
|
||||
self.report_startup_success()
|
||||
logger.info("[Telegram] ✅ Telegram bot ready, polling for updates")
|
||||
|
||||
# Block until stop()
|
||||
try:
|
||||
while not self._stop_event.is_set():
|
||||
await asyncio.sleep(0.5)
|
||||
finally:
|
||||
try:
|
||||
await application.updater.stop()
|
||||
await application.stop()
|
||||
await application.shutdown()
|
||||
except Exception as e:
|
||||
logger.warning(f"[Telegram] shutdown error: {e}")
|
||||
|
||||
def stop(self):
|
||||
logger.info("[Telegram] stop() called")
|
||||
self._stop_event.set()
|
||||
if self._loop_thread and self._loop_thread.is_alive():
|
||||
try:
|
||||
self._loop_thread.join(timeout=10)
|
||||
except Exception:
|
||||
pass
|
||||
logger.info("[Telegram] stop() completed")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Inbound: telegram update -> ChatMessage -> ChatChannel.produce
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _on_cancel(self, update, _context):
|
||||
"""Fast-path: /cancel calls cancel_session directly without going through agent."""
|
||||
try:
|
||||
from agent.protocol import get_cancel_registry
|
||||
session_id = self._compute_session_id(update)
|
||||
cancelled = get_cancel_registry().cancel_session(session_id)
|
||||
text = "Current task cancelled." if cancelled else "No running task to cancel."
|
||||
await update.effective_message.reply_text(text)
|
||||
logger.info(f"[Telegram] /cancel session={session_id}, cancelled={cancelled}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] /cancel error: {e}", exc_info=True)
|
||||
try:
|
||||
await update.effective_message.reply_text(f"⚠️ /cancel failed: {e}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def _on_command_passthrough(self, update, _context):
|
||||
"""All non-/cancel commands fall through to plain message handling."""
|
||||
await self._on_message(update, _context)
|
||||
|
||||
async def _on_message(self, update, _context):
|
||||
"""Telegram update entry: parse message -> build ChatMessage -> produce()."""
|
||||
try:
|
||||
message = update.effective_message
|
||||
chat = update.effective_chat
|
||||
if not message or not chat:
|
||||
return
|
||||
|
||||
# Idempotent dedup
|
||||
msg_uid = f"{chat.id}:{message.message_id}"
|
||||
if self._received_msgs.get(msg_uid):
|
||||
return
|
||||
self._received_msgs[msg_uid] = True
|
||||
|
||||
is_group = chat.type in ("group", "supergroup")
|
||||
|
||||
# Debug log: helpful when group messages are silently dropped
|
||||
if is_group:
|
||||
logger.debug(
|
||||
f"[Telegram] group update received: chat_id={chat.id}, "
|
||||
f"text={(message.text or message.caption or '')[:40]!r}, "
|
||||
f"reply_to_bot={bool(message.reply_to_message and message.reply_to_message.from_user and message.reply_to_message.from_user.username == self.bot_username)}"
|
||||
)
|
||||
|
||||
# Group trigger gate (silently drop if not triggered)
|
||||
if is_group and not self._should_reply_in_group(update):
|
||||
logger.debug(f"[Telegram] group message not triggered (need @{self.bot_username} or reply), skip")
|
||||
return
|
||||
|
||||
# Parse message type + download media if needed.
|
||||
# Media messages with caption return both the local path and the caption text.
|
||||
ctype, content, caption = await self._parse_message(message)
|
||||
if ctype is None:
|
||||
logger.debug(f"[Telegram] unsupported message type, skip. msg={message}")
|
||||
return
|
||||
|
||||
# Strip @bot mention for group text/caption
|
||||
if is_group and self.bot_username:
|
||||
if ctype == ContextType.TEXT and content:
|
||||
content = self._strip_at_mention(content)
|
||||
if caption:
|
||||
caption = self._strip_at_mention(caption)
|
||||
|
||||
tg_msg = TelegramMessage(
|
||||
update,
|
||||
is_group=is_group,
|
||||
bot_username=self.bot_username,
|
||||
ctype=ctype,
|
||||
content=content,
|
||||
)
|
||||
tg_msg.is_at = is_group # If we got here in a group, the bot is mentioned/replied
|
||||
|
||||
# File cache: standalone media goes into cache, the next text query attaches them
|
||||
from channel.file_cache import get_file_cache
|
||||
file_cache = get_file_cache()
|
||||
session_id = self._compute_session_id(update)
|
||||
|
||||
# Media + caption together: treat as a complete query and bypass the cache
|
||||
if ctype in (ContextType.IMAGE, ContextType.FILE) and caption:
|
||||
tag = "image" if ctype == ContextType.IMAGE else "file"
|
||||
merged_text = f"{caption}\n[{tag}: {content}]"
|
||||
tg_msg.ctype = ContextType.TEXT
|
||||
tg_msg.content = merged_text
|
||||
ctype = ContextType.TEXT
|
||||
logger.info(f"[Telegram] Media+caption merged for session {session_id}")
|
||||
# fallthrough to the TEXT branch below
|
||||
|
||||
elif ctype == ContextType.IMAGE:
|
||||
file_cache.add(session_id, content, file_type="image")
|
||||
logger.info(f"[Telegram] Image cached for session {session_id}, waiting for query...")
|
||||
return
|
||||
elif ctype == ContextType.FILE:
|
||||
file_cache.add(session_id, content, file_type="file")
|
||||
logger.info(f"[Telegram] File cached for session {session_id}: {content}")
|
||||
return
|
||||
|
||||
if ctype == ContextType.TEXT:
|
||||
cached_files = file_cache.get(session_id)
|
||||
if cached_files:
|
||||
refs = []
|
||||
for fi in cached_files:
|
||||
ftype = fi["type"]
|
||||
tag = ftype if ftype in ("image", "video") else "file"
|
||||
refs.append(f"[{tag}: {fi['path']}]")
|
||||
tg_msg.content = (tg_msg.content or "") + "\n" + "\n".join(refs)
|
||||
file_cache.clear(session_id)
|
||||
logger.info(f"[Telegram] Attached {len(cached_files)} cached file(s) to query")
|
||||
|
||||
# Dispatch to cow main pipeline (reuses ChatChannel._compose_context routing)
|
||||
context = self._compose_context(
|
||||
tg_msg.ctype,
|
||||
tg_msg.content,
|
||||
isgroup=is_group,
|
||||
msg=tg_msg,
|
||||
)
|
||||
if context:
|
||||
context["session_id"] = session_id
|
||||
context["receiver"] = str(chat.id)
|
||||
context["telegram_chat_id"] = chat.id
|
||||
context["telegram_reply_to_msg_id"] = message.message_id if is_group else None
|
||||
self.produce(context)
|
||||
logger.debug(f"[Telegram] received: type={ctype}, content={str(tg_msg.content)[:80]}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] _on_message error: {e}", exc_info=True)
|
||||
|
||||
async def _parse_message(self, message):
|
||||
"""Parse a telegram message and return (ctype, content, caption).
|
||||
|
||||
- content is text for ContextType.TEXT, otherwise the local file path
|
||||
- caption is the optional text accompanying a media message; empty for plain text
|
||||
"""
|
||||
caption = (message.caption or "").strip()
|
||||
|
||||
if message.photo:
|
||||
largest = message.photo[-1]
|
||||
path = await self._download_file(largest.file_id, suffix=".jpg")
|
||||
return (ContextType.IMAGE, path, caption) if path else (None, None, "")
|
||||
|
||||
if message.voice or message.audio:
|
||||
audio_obj = message.voice or message.audio
|
||||
suffix = ".ogg" if message.voice else (
|
||||
"." + (audio_obj.mime_type.split("/")[-1] if getattr(audio_obj, "mime_type", "") else "mp3")
|
||||
)
|
||||
path = await self._download_file(audio_obj.file_id, suffix=suffix)
|
||||
return (ContextType.VOICE, path, caption) if path else (None, None, "")
|
||||
|
||||
if message.video or message.video_note:
|
||||
video_obj = message.video or message.video_note
|
||||
path = await self._download_file(video_obj.file_id, suffix=".mp4")
|
||||
return (ContextType.FILE, path, caption) if path else (None, None, "")
|
||||
|
||||
if message.document:
|
||||
doc = message.document
|
||||
ext = ""
|
||||
if doc.file_name and "." in doc.file_name:
|
||||
ext = "." + doc.file_name.rsplit(".", 1)[-1]
|
||||
path = await self._download_file(doc.file_id, suffix=ext, original_name=doc.file_name)
|
||||
if not path:
|
||||
return (None, None, "")
|
||||
# Image-typed documents (user picked "send as file") are treated as images
|
||||
mime = (doc.mime_type or "").lower()
|
||||
if mime.startswith("image/"):
|
||||
return (ContextType.IMAGE, path, caption)
|
||||
return (ContextType.FILE, path, caption)
|
||||
|
||||
if message.text:
|
||||
return (ContextType.TEXT, message.text.strip(), "")
|
||||
|
||||
return (None, None, "")
|
||||
|
||||
async def _download_file(self, file_id: str, suffix: str = "", original_name: str = ""):
|
||||
"""Download via bot.get_file into the local tmp dir; return path or None on failure."""
|
||||
try:
|
||||
f = await self._bot.get_file(file_id)
|
||||
tmp_dir = TelegramMessage.get_tmp_dir()
|
||||
base = original_name or f"{file_id}{suffix or ''}"
|
||||
# Prefix with file_id to avoid name collisions / weird chars
|
||||
safe_name = f"{file_id}_{base}" if original_name else base
|
||||
local_path = os.path.join(tmp_dir, safe_name)
|
||||
await f.download_to_drive(custom_path=local_path)
|
||||
logger.debug(f"[Telegram] downloaded file_id={file_id} -> {local_path}")
|
||||
return local_path
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] download_file failed (file_id={file_id}): {e}")
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Group trigger logic
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _should_reply_in_group(self, update) -> bool:
|
||||
"""Decide whether to reply to a group message based on configuration."""
|
||||
mode = conf().get("telegram_group_trigger", "mention_or_reply")
|
||||
if mode == "all":
|
||||
return True
|
||||
|
||||
message = update.effective_message
|
||||
if not message:
|
||||
return False
|
||||
|
||||
# 1) Mentioned
|
||||
if self.bot_username and self._is_mentioned(message, self.bot_username):
|
||||
return True
|
||||
|
||||
# 2) Reply to a bot message
|
||||
if mode == "mention_or_reply":
|
||||
reply = message.reply_to_message
|
||||
if reply and reply.from_user and reply.from_user.username == self.bot_username:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _is_mentioned(message, bot_username: str) -> bool:
|
||||
"""Check whether entities/caption_entities contain a @mention of the bot."""
|
||||
bot_at = "@" + bot_username.lower()
|
||||
text = (message.text or message.caption or "").lower()
|
||||
if bot_at in text:
|
||||
return True
|
||||
# Also check entities strictly to support text_mention (no-username @)
|
||||
for ent in (message.entities or []) + (message.caption_entities or []):
|
||||
if ent.type == "mention":
|
||||
src = message.text or message.caption or ""
|
||||
if src[ent.offset: ent.offset + ent.length].lower() == bot_at:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _strip_at_mention(self, content: str) -> str:
|
||||
"""Strip @bot_username from group text (case-insensitive)."""
|
||||
if not content or not self.bot_username:
|
||||
return content
|
||||
pattern = re.compile(r"@" + re.escape(self.bot_username), re.IGNORECASE)
|
||||
return pattern.sub("", content).strip()
|
||||
|
||||
@staticmethod
|
||||
def _compute_session_id(update) -> str:
|
||||
chat = update.effective_chat
|
||||
user = update.effective_user
|
||||
is_group = chat.type in ("group", "supergroup")
|
||||
if is_group:
|
||||
if conf().get("group_shared_session", True):
|
||||
return f"tg_group_{chat.id}"
|
||||
return f"tg_group_{chat.id}_{user.id}"
|
||||
return f"tg_user_{user.id}"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Override _compose_context: skip the parent's group whitelist/at checks
|
||||
# (already handled in _on_message via _should_reply_in_group). Same idea
|
||||
# as the feishu channel.
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _compose_context(self, ctype: ContextType, content, **kwargs):
|
||||
context = Context(ctype, content)
|
||||
context.kwargs = kwargs
|
||||
if "channel_type" not in context:
|
||||
context["channel_type"] = self.channel_type
|
||||
if "origin_ctype" not in context:
|
||||
context["origin_ctype"] = ctype
|
||||
|
||||
cmsg = context["msg"]
|
||||
if cmsg.is_group:
|
||||
if conf().get("group_shared_session", True):
|
||||
context["session_id"] = cmsg.other_user_id
|
||||
else:
|
||||
context["session_id"] = f"{cmsg.from_user_id}:{cmsg.other_user_id}"
|
||||
else:
|
||||
context["session_id"] = cmsg.from_user_id
|
||||
context["receiver"] = cmsg.other_user_id
|
||||
|
||||
if ctype == ContextType.TEXT:
|
||||
img_match_prefix = check_prefix(content, conf().get("image_create_prefix"))
|
||||
if img_match_prefix:
|
||||
content = content.replace(img_match_prefix, "", 1)
|
||||
context.type = ContextType.IMAGE_CREATE
|
||||
else:
|
||||
context.type = ContextType.TEXT
|
||||
context.content = (content or "").strip()
|
||||
if "desire_rtype" not in context and conf().get("always_reply_voice"):
|
||||
context["desire_rtype"] = ReplyType.VOICE
|
||||
elif ctype == ContextType.VOICE:
|
||||
if "desire_rtype" not in context and (
|
||||
conf().get("voice_reply_voice") or conf().get("always_reply_voice")
|
||||
):
|
||||
context["desire_rtype"] = ReplyType.VOICE
|
||||
|
||||
return context
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Outbound: ChatChannel.send -> Telegram API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def send(self, reply: Reply, context: Context):
|
||||
"""Called from cow's sync main thread; we marshal the coroutine onto the loop thread."""
|
||||
if self._loop is None or self._bot is None:
|
||||
logger.warning("[Telegram] bot not ready, drop reply")
|
||||
return
|
||||
|
||||
chat_id = context.get("telegram_chat_id")
|
||||
reply_to = context.get("telegram_reply_to_msg_id")
|
||||
if chat_id is None:
|
||||
logger.warning("[Telegram] no telegram_chat_id in context, drop reply")
|
||||
return
|
||||
|
||||
coro = self._async_send(reply, chat_id, reply_to)
|
||||
try:
|
||||
future = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
||||
# Media uploads through a proxy can be slow; let PTB's own timeouts win
|
||||
future.result(timeout=180)
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] send failed: {e}")
|
||||
|
||||
# Number of retries for transient network errors (proxy hiccups etc.)
|
||||
_SEND_RETRIES = 2
|
||||
_SEND_RETRY_BACKOFF = 2.0 # seconds
|
||||
|
||||
async def _send_with_retry(self, send_fn, *, label: str):
|
||||
"""Run a single Telegram API call with retries for transient network errors."""
|
||||
from telegram.error import NetworkError, TimedOut
|
||||
last_err = None
|
||||
for attempt in range(self._SEND_RETRIES + 1):
|
||||
try:
|
||||
return await send_fn()
|
||||
except (NetworkError, TimedOut) as e:
|
||||
last_err = e
|
||||
if attempt >= self._SEND_RETRIES:
|
||||
break
|
||||
wait = self._SEND_RETRY_BACKOFF * (attempt + 1)
|
||||
logger.warning(
|
||||
f"[Telegram] {label} transient error (attempt {attempt + 1}/"
|
||||
f"{self._SEND_RETRIES + 1}): {e}; retry in {wait}s"
|
||||
)
|
||||
await asyncio.sleep(wait)
|
||||
raise last_err
|
||||
|
||||
async def _async_send(self, reply: Reply, chat_id, reply_to_msg_id):
|
||||
try:
|
||||
rtype = reply.type
|
||||
content = reply.content
|
||||
|
||||
if rtype == ReplyType.TEXT or rtype == ReplyType.INFO or rtype == ReplyType.ERROR:
|
||||
# Telegram caps a single text message at 4096 chars; auto-split
|
||||
text = str(content) if content is not None else ""
|
||||
if not text:
|
||||
return
|
||||
for chunk in _split_text(text, 4000):
|
||||
await self._send_with_retry(
|
||||
lambda c=chunk: self._bot.send_message(
|
||||
chat_id=chat_id,
|
||||
text=c,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
# Avoid failing the whole send if reply_to was deleted
|
||||
allow_sending_without_reply=True,
|
||||
),
|
||||
label="send_message",
|
||||
)
|
||||
|
||||
elif rtype == ReplyType.IMAGE:
|
||||
# Already a local BytesIO; send it directly
|
||||
content.seek(0)
|
||||
await self._send_with_retry(
|
||||
lambda: self._bot.send_photo(
|
||||
chat_id=chat_id,
|
||||
photo=content,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
),
|
||||
label="send_photo",
|
||||
)
|
||||
|
||||
elif rtype == ReplyType.IMAGE_URL:
|
||||
url = str(content)
|
||||
if url.startswith("file://"):
|
||||
local = url[7:]
|
||||
# Open inside the lambda so each retry gets a fresh stream
|
||||
async def _send_local_photo():
|
||||
with open(local, "rb") as f:
|
||||
return await self._bot.send_photo(
|
||||
chat_id=chat_id, photo=f,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
)
|
||||
await self._send_with_retry(_send_local_photo, label="send_photo(file)")
|
||||
else:
|
||||
await self._send_with_retry(
|
||||
lambda: self._bot.send_photo(
|
||||
chat_id=chat_id, photo=url,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
),
|
||||
label="send_photo(url)",
|
||||
)
|
||||
|
||||
elif rtype == ReplyType.VOICE:
|
||||
local = content[7:] if isinstance(content, str) and content.startswith("file://") else content
|
||||
async def _send_voice():
|
||||
with open(local, "rb") as f:
|
||||
return await self._bot.send_voice(
|
||||
chat_id=chat_id, voice=f,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
)
|
||||
await self._send_with_retry(_send_voice, label="send_voice")
|
||||
|
||||
elif rtype == ReplyType.FILE:
|
||||
# Videos go through send_video, everything else through send_document
|
||||
local = content[7:] if isinstance(content, str) and content.startswith("file://") else content
|
||||
# File replies may carry an accompanying text caption
|
||||
caption = getattr(reply, "text_content", None) or None
|
||||
is_video = isinstance(local, str) and local.lower().endswith(
|
||||
(".mp4", ".mov", ".avi", ".mkv", ".webm")
|
||||
)
|
||||
|
||||
async def _send_file():
|
||||
with open(local, "rb") as f:
|
||||
if is_video:
|
||||
return await self._bot.send_video(
|
||||
chat_id=chat_id, video=f, caption=caption,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
)
|
||||
return await self._bot.send_document(
|
||||
chat_id=chat_id, document=f, caption=caption,
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
)
|
||||
await self._send_with_retry(_send_file, label="send_video" if is_video else "send_document")
|
||||
|
||||
else:
|
||||
# Fallback: send as plain text
|
||||
await self._send_with_retry(
|
||||
lambda: self._bot.send_message(
|
||||
chat_id=chat_id, text=str(content),
|
||||
reply_to_message_id=reply_to_msg_id,
|
||||
allow_sending_without_reply=True,
|
||||
),
|
||||
label="send_message(fallback)",
|
||||
)
|
||||
|
||||
logger.info(f"[Telegram] sent reply (type={rtype}, chat_id={chat_id})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Telegram] _async_send error: {e}", exc_info=True)
|
||||
|
||||
|
||||
def _split_text(text: str, limit: int):
|
||||
"""Split long text preferring line breaks to keep markdown structure intact."""
|
||||
if len(text) <= limit:
|
||||
yield text
|
||||
return
|
||||
buf = []
|
||||
size = 0
|
||||
for line in text.splitlines(keepends=True):
|
||||
if size + len(line) > limit and buf:
|
||||
yield "".join(buf)
|
||||
buf, size = [], 0
|
||||
# Hard-split single lines that exceed the limit
|
||||
while len(line) > limit:
|
||||
yield line[:limit]
|
||||
line = line[limit:]
|
||||
buf.append(line)
|
||||
size += len(line)
|
||||
if buf:
|
||||
yield "".join(buf)
|
||||
62
channel/telegram/telegram_message.py
Normal file
62
channel/telegram/telegram_message.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""
|
||||
Telegram message adapter.
|
||||
|
||||
Convert a python-telegram-bot Update into cow's unified ChatMessage.
|
||||
File downloads are NOT performed here; the channel layer triggers
|
||||
bot.get_file() on demand because it requires the async event loop.
|
||||
"""
|
||||
import os
|
||||
|
||||
from bridge.context import ContextType
|
||||
from channel.chat_message import ChatMessage
|
||||
from common.utils import expand_path
|
||||
from config import conf
|
||||
|
||||
|
||||
class TelegramMessage(ChatMessage):
|
||||
"""Wrap a Telegram Update into the unified ChatMessage."""
|
||||
|
||||
def __init__(self, update, is_group: bool = False, bot_username: str = "",
|
||||
ctype: ContextType = ContextType.TEXT, content: str = ""):
|
||||
super().__init__(update)
|
||||
message = update.effective_message
|
||||
chat = update.effective_chat
|
||||
user = update.effective_user
|
||||
|
||||
# Basic fields
|
||||
self.msg_id = str(message.message_id) if message else ""
|
||||
self.create_time = int(message.date.timestamp()) if message and message.date else 0
|
||||
self.ctype = ctype
|
||||
self.content = content
|
||||
|
||||
# Sender / chat info
|
||||
from_user_id = str(user.id) if user else "unknown"
|
||||
from_user_nick = (
|
||||
user.full_name if user and user.full_name else (user.username if user else "unknown")
|
||||
)
|
||||
self.from_user_id = from_user_id
|
||||
self.from_user_nickname = from_user_nick or from_user_id
|
||||
self.to_user_id = bot_username or "telegram_bot"
|
||||
self.to_user_nickname = bot_username or "telegram_bot"
|
||||
|
||||
self.is_group = is_group
|
||||
if is_group:
|
||||
# Group: other_user_id = group_id, actual_user_id = sender id
|
||||
self.other_user_id = str(chat.id)
|
||||
self.other_user_nickname = chat.title or str(chat.id)
|
||||
self.actual_user_id = from_user_id
|
||||
self.actual_user_nickname = self.from_user_nickname
|
||||
else:
|
||||
self.other_user_id = from_user_id
|
||||
self.other_user_nickname = self.from_user_nickname
|
||||
|
||||
# Whether the bot was triggered by @-mention or reply (set by channel layer)
|
||||
self.is_at = False
|
||||
|
||||
@staticmethod
|
||||
def get_tmp_dir() -> str:
|
||||
"""Local download directory, aligned with other channels (agent_workspace/tmp)."""
|
||||
workspace_root = expand_path(conf().get("agent_workspace", "~/cow"))
|
||||
tmp_dir = os.path.join(workspace_root, "tmp")
|
||||
os.makedirs(tmp_dir, exist_ok=True)
|
||||
return tmp_dir
|
||||
@@ -445,7 +445,7 @@
|
||||
bg-primary-400 text-white hover:bg-primary-500
|
||||
disabled:bg-slate-300 dark:disabled:bg-slate-600
|
||||
disabled:cursor-not-allowed cursor-pointer transition-colors duration-150"
|
||||
disabled onclick="sendMessage()">
|
||||
disabled>
|
||||
<i class="fas fa-paper-plane text-sm"></i>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
@@ -1367,3 +1367,35 @@
|
||||
text-align: right;
|
||||
}
|
||||
.voice-pill audio { display: none; }
|
||||
|
||||
/* Send button toggles into a Stop button while an SSE stream is in flight.
|
||||
Match the look of the disabled send button (light grey block + white
|
||||
glyph) so it reads as the same visual element, just paused/idle from
|
||||
sending perspective and clickable to stop. */
|
||||
#send-btn.send-btn-cancel {
|
||||
background-color: rgb(203 213 225) !important; /* slate-300, == disabled send-btn */
|
||||
color: white !important;
|
||||
}
|
||||
#send-btn.send-btn-cancel:hover {
|
||||
background-color: rgb(148 163 184) !important; /* slate-400 */
|
||||
}
|
||||
#send-btn.send-btn-cancel:disabled {
|
||||
background-color: rgb(226 232 240) !important; /* slate-200, while stop is in flight */
|
||||
color: white !important;
|
||||
cursor: progress;
|
||||
}
|
||||
.dark #send-btn.send-btn-cancel {
|
||||
background-color: rgb(71 85 105) !important; /* slate-600, == dark disabled send-btn */
|
||||
color: white !important;
|
||||
}
|
||||
.dark #send-btn.send-btn-cancel:hover {
|
||||
background-color: rgb(100 116 139) !important; /* slate-500 */
|
||||
}
|
||||
.dark #send-btn.send-btn-cancel:disabled {
|
||||
background-color: rgb(51 65 85) !important; /* slate-700 */
|
||||
color: rgb(203 213 225) !important;
|
||||
}
|
||||
|
||||
.agent-cancelled-tag {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
@@ -106,7 +106,7 @@ const I18N = {
|
||||
config_custom_model_hint: '输入自定义模型名称',
|
||||
config_save: '保存', config_saved: '已保存',
|
||||
config_save_error: '保存失败',
|
||||
config_custom_option: '自定义...',
|
||||
config_custom_option: '自定义',
|
||||
config_custom_tip: '接口需遵循 OpenAI API 协议',
|
||||
config_security: '安全设置', config_password: '访问密码',
|
||||
config_password_hint: '留空则不启用密码保护',
|
||||
@@ -280,7 +280,7 @@ const I18N = {
|
||||
config_custom_model_hint: 'Enter custom model name',
|
||||
config_save: 'Save', config_saved: 'Saved',
|
||||
config_save_error: 'Save failed',
|
||||
config_custom_option: 'Custom...',
|
||||
config_custom_option: 'Custom',
|
||||
config_custom_tip: 'API must follow OpenAI protocol.',
|
||||
config_security: 'Security', config_password: 'Password',
|
||||
config_password_hint: 'Leave empty to disable password protection',
|
||||
@@ -367,6 +367,15 @@ function t(key) {
|
||||
return (I18N[currentLang] && I18N[currentLang][key]) || (I18N.en[key]) || key;
|
||||
}
|
||||
|
||||
// Resolve a localized label that may be either a plain string or
|
||||
// a {zh, en} object returned by the backend.
|
||||
function localizedLabel(label) {
|
||||
if (label && typeof label === 'object') {
|
||||
return label[currentLang] || label.en || label.zh || '';
|
||||
}
|
||||
return label || '';
|
||||
}
|
||||
|
||||
function applyI18n() {
|
||||
document.querySelectorAll('[data-i18n]').forEach(el => {
|
||||
el.textContent = t(el.dataset.i18n);
|
||||
@@ -1007,7 +1016,60 @@ const inputHistory = [];
|
||||
let historyIdx = -1;
|
||||
let historySavedDraft = '';
|
||||
|
||||
// While an SSE stream is in flight, the send button morphs into a cancel
|
||||
// button. Only one in-flight request is supported at a time.
|
||||
let activeRequestId = null;
|
||||
let sendBtnMode = 'send'; // 'send' | 'cancel'
|
||||
|
||||
function setSendBtnCancelMode(requestId) {
|
||||
activeRequestId = requestId;
|
||||
sendBtnMode = 'cancel';
|
||||
sendBtn.disabled = false;
|
||||
sendBtn.classList.add('send-btn-cancel');
|
||||
sendBtn.title = (currentLang === 'zh' ? '中止' : 'Cancel');
|
||||
sendBtn.innerHTML = '<i class="fas fa-stop text-sm"></i>';
|
||||
}
|
||||
|
||||
function resetSendBtnSendMode() {
|
||||
activeRequestId = null;
|
||||
sendBtnMode = 'send';
|
||||
sendBtn.classList.remove('send-btn-cancel');
|
||||
sendBtn.title = '';
|
||||
sendBtn.innerHTML = '<i class="fas fa-paper-plane text-sm"></i>';
|
||||
updateSendBtnState();
|
||||
}
|
||||
|
||||
function requestCancel() {
|
||||
const reqId = activeRequestId;
|
||||
if (!reqId) return;
|
||||
fetch('/cancel', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ request_id: reqId, session_id: sessionId, lang: currentLang }),
|
||||
}).catch(err => {
|
||||
console.warn('[cancel] request failed', err);
|
||||
});
|
||||
// Optimistic UI lock so the click visibly registers before the SSE
|
||||
// "cancelled" event arrives.
|
||||
sendBtn.disabled = true;
|
||||
sendBtn.title = (currentLang === 'zh' ? '已中止' : 'Cancelled');
|
||||
}
|
||||
|
||||
// Button click is the only path to Cancel. Pressing Enter still calls
|
||||
// sendMessage() so users can submit "/cancel" as a regular slash command.
|
||||
sendBtn.addEventListener('click', () => {
|
||||
if (sendBtnMode === 'cancel') {
|
||||
requestCancel();
|
||||
} else {
|
||||
sendMessage();
|
||||
}
|
||||
});
|
||||
|
||||
function updateSendBtnState() {
|
||||
if (sendBtnMode === 'cancel') {
|
||||
// Don't downgrade a Cancel button on input edits.
|
||||
return;
|
||||
}
|
||||
sendBtn.disabled = uploadingCount > 0 || (!chatInput.value.trim() && pendingAttachments.length === 0);
|
||||
}
|
||||
|
||||
@@ -1255,6 +1317,7 @@ const SLASH_COMMANDS = [
|
||||
{ cmd: '/knowledge on', desc: '开启知识库' },
|
||||
{ cmd: '/knowledge off', desc: '关闭知识库' },
|
||||
{ cmd: '/config', desc: '查看当前配置' },
|
||||
{ cmd: '/cancel', desc: '中止当前正在运行的 Agent 任务' },
|
||||
{ cmd: '/logs', desc: '查看最近日志' },
|
||||
{ cmd: '/version', desc: '查看版本' },
|
||||
];
|
||||
@@ -1525,6 +1588,7 @@ function sendVoiceMessage(text, audioUrl) {
|
||||
stream: true,
|
||||
timestamp: timestamp.toISOString(),
|
||||
is_voice: true,
|
||||
lang: currentLang,
|
||||
};
|
||||
|
||||
const MAX_RETRIES = 2;
|
||||
@@ -1538,7 +1602,12 @@ function sendVoiceMessage(text, audioUrl) {
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
if (data.status === 'success') {
|
||||
if (data.stream) {
|
||||
if (data.inline_reply) {
|
||||
// Synchronous fast-path reply (e.g. /cancel); skip SSE.
|
||||
loadingEl.remove();
|
||||
addBotMessage(data.inline_reply, new Date());
|
||||
} else if (data.stream) {
|
||||
setSendBtnCancelMode(data.request_id);
|
||||
startSSE(data.request_id, loadingEl, timestamp, titleInfo);
|
||||
} else {
|
||||
loadingContainers[data.request_id] = loadingEl;
|
||||
@@ -1546,6 +1615,7 @@ function sendVoiceMessage(text, audioUrl) {
|
||||
} else {
|
||||
loadingEl.remove();
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
resetSendBtnSendMode();
|
||||
}
|
||||
})
|
||||
.catch(err => {
|
||||
@@ -1582,6 +1652,10 @@ function addUserVoiceMessage(audioUrl, caption, timestamp) {
|
||||
}
|
||||
|
||||
function sendMessage() {
|
||||
// Do NOT branch on sendBtnMode here: Enter should always send (so
|
||||
// typing "/cancel" submits normally). Cancel is wired only to the
|
||||
// send button's pointer click — see send-btn listener above.
|
||||
|
||||
const text = chatInput.value.trim();
|
||||
if (!text && pendingAttachments.length === 0) return;
|
||||
|
||||
@@ -1610,7 +1684,7 @@ function sendMessage() {
|
||||
renderAttachmentPreview();
|
||||
sendBtn.disabled = true;
|
||||
|
||||
const body = { session_id: sessionId, message: text, stream: true, timestamp: timestamp.toISOString() };
|
||||
const body = { session_id: sessionId, message: text, stream: true, timestamp: timestamp.toISOString(), lang: currentLang };
|
||||
if (attachments.length > 0) {
|
||||
body.attachments = attachments.map(a => ({
|
||||
file_path: a.file_path,
|
||||
@@ -1632,7 +1706,13 @@ function sendMessage() {
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
if (data.status === 'success') {
|
||||
if (data.stream) {
|
||||
if (data.inline_reply) {
|
||||
// Channel handled synchronously (e.g. /cancel fast-path);
|
||||
// render as a bot bubble and skip SSE entirely.
|
||||
loadingEl.remove();
|
||||
addBotMessage(data.inline_reply, new Date());
|
||||
} else if (data.stream) {
|
||||
setSendBtnCancelMode(data.request_id);
|
||||
startSSE(data.request_id, loadingEl, timestamp, titleInfo);
|
||||
} else {
|
||||
loadingContainers[data.request_id] = loadingEl;
|
||||
@@ -1640,12 +1720,14 @@ function sendMessage() {
|
||||
} else {
|
||||
loadingEl.remove();
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
resetSendBtnSendMode();
|
||||
}
|
||||
})
|
||||
.catch(err => {
|
||||
if (err.name === 'AbortError') {
|
||||
loadingEl.remove();
|
||||
addBotMessage(t('error_timeout'), new Date());
|
||||
resetSendBtnSendMode();
|
||||
return;
|
||||
}
|
||||
if (attempt < MAX_RETRIES) {
|
||||
@@ -1655,6 +1737,7 @@ function sendMessage() {
|
||||
}
|
||||
loadingEl.remove();
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
resetSendBtnSendMode();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1910,14 +1993,33 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
|
||||
stepsEl.appendChild(wrap);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'cancelled') {
|
||||
// Agent acknowledged the stop; mark the bubble. A trailing
|
||||
// "done" still arrives with the partial answer.
|
||||
ensureBotEl();
|
||||
if (currentReasoningEl) {
|
||||
finalizeThinking(currentReasoningEl, reasoningStartTime, reasoningText);
|
||||
currentReasoningEl = null;
|
||||
reasoningText = '';
|
||||
}
|
||||
if (!botEl.querySelector('.agent-cancelled-tag')) {
|
||||
const tag = document.createElement('div');
|
||||
tag.className = 'agent-cancelled-tag text-xs text-amber-600 dark:text-amber-400 mt-1';
|
||||
tag.textContent = (currentLang === 'zh') ? '已中止' : 'Cancelled';
|
||||
stepsEl.appendChild(tag);
|
||||
}
|
||||
resetSendBtnSendMode();
|
||||
|
||||
} else if (item.type === 'done') {
|
||||
// Don't close the stream yet: the backend keeps it open
|
||||
// for a short tail to deliver async attachments such as
|
||||
// TTS audio (`voice_attach`). It will close the stream on
|
||||
// its own via onerror once the tail expires.
|
||||
done = true;
|
||||
resetSendBtnSendMode();
|
||||
|
||||
const finalText = item.content || accumulatedText;
|
||||
const finalTextRaw = item.content || accumulatedText;
|
||||
const finalText = localizeCancelMarker(finalTextRaw);
|
||||
|
||||
if (!botEl && finalText) {
|
||||
if (loadingEl) { loadingEl.remove(); loadingEl = null; }
|
||||
@@ -1925,7 +2027,7 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
|
||||
} else if (botEl) {
|
||||
contentEl.classList.remove('sse-streaming');
|
||||
if (finalText) contentEl.innerHTML = renderMarkdown(finalText);
|
||||
contentEl.dataset.rawMd = finalText || '';
|
||||
contentEl.dataset.rawMd = finalTextRaw || '';
|
||||
const copyBtn = botEl.querySelector('.copy-msg-btn');
|
||||
if (copyBtn && finalText) copyBtn.style.display = '';
|
||||
applyHighlighting(botEl);
|
||||
@@ -1955,6 +2057,7 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
|
||||
delete activeStreams[requestId];
|
||||
if (loadingEl) { loadingEl.remove(); loadingEl = null; }
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
resetSendBtnSendMode();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1991,6 +2094,7 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
|
||||
applyHighlighting(botEl);
|
||||
bindChatKnowledgeLinks(botEl);
|
||||
}
|
||||
resetSendBtnSendMode();
|
||||
};
|
||||
}
|
||||
|
||||
@@ -2229,13 +2333,23 @@ function _renderSentFileFromToolResult(step) {
|
||||
`<i class="fas fa-file-download" style="color:#6b7280;"></i> ${escapeHtml(fileName)}</a></div>`;
|
||||
}
|
||||
|
||||
// Cosmetic translator for cancel markers persisted in history.
|
||||
// History keeps the English canonical form for the LLM; only display is localized.
|
||||
function localizeCancelMarker(text) {
|
||||
if (!text) return text;
|
||||
if (currentLang !== 'zh') return text;
|
||||
return text
|
||||
.replace(/_\(Cancelled by user\)_/g, '_(用户已中止)_')
|
||||
.replace(/_\(Cancelled\)_/g, '_(已中止)_');
|
||||
}
|
||||
|
||||
function createBotMessageEl(content, timestamp, requestId, msg) {
|
||||
const el = document.createElement('div');
|
||||
el.className = 'flex gap-3 px-4 sm:px-6 py-3';
|
||||
if (requestId) el.dataset.requestId = requestId;
|
||||
|
||||
let stepsHtml = '';
|
||||
let displayContent = content;
|
||||
let displayContent = localizeCancelMarker(content);
|
||||
|
||||
if (msg && msg.steps && msg.steps.length > 0) {
|
||||
// New format: ordered steps with interleaved content
|
||||
@@ -3164,7 +3278,7 @@ function initConfigView(data) {
|
||||
configCurrentModel = data.model || '';
|
||||
|
||||
const providerEl = document.getElementById('cfg-provider');
|
||||
const providerOpts = Object.entries(configProviders).map(([pid, p]) => ({ value: pid, label: p.label }));
|
||||
const providerOpts = Object.entries(configProviders).map(([pid, p]) => ({ value: pid, label: localizedLabel(p.label) }));
|
||||
|
||||
// if use_linkai is enabled, always select linkai as the provider
|
||||
// Otherwise prefer bot_type from config, fall back to model-based detection
|
||||
@@ -3914,7 +4028,7 @@ function renderVendorChip(p) {
|
||||
bg-slate-50 dark:bg-white/5 hover:border-primary-300 dark:hover:border-primary-500/50
|
||||
cursor-pointer transition-colors duration-150 text-left">
|
||||
${renderProviderLogo(p, 28)}
|
||||
<span class="flex-1 min-w-0 text-sm font-medium text-slate-800 dark:text-slate-100 truncate">${escapeHtml(p.label)}</span>
|
||||
<span class="flex-1 min-w-0 text-sm font-medium text-slate-800 dark:text-slate-100 truncate">${escapeHtml(localizedLabel(p.label))}</span>
|
||||
<i class="fas fa-pen-to-square text-[11px] text-slate-400 dark:text-slate-500 group-hover:text-primary-500 transition-colors"></i>
|
||||
</button>`;
|
||||
}
|
||||
@@ -3922,7 +4036,7 @@ function renderVendorChip(p) {
|
||||
// Render a uniformly-styled logo for a provider. Tries an SVG asset first; if
|
||||
// it 404s the <img> swaps itself for a monogram fallback via onerror.
|
||||
function renderProviderLogo(p, sizePx) {
|
||||
const initial = (p.label || p.id || '?').slice(0, 1).toUpperCase();
|
||||
const initial = (localizedLabel(p.label) || p.id || '?').slice(0, 1).toUpperCase();
|
||||
const sz = sizePx || 32;
|
||||
const url = `${MODELS_PROVIDER_LOGO_PATH}/${encodeURIComponent(p.id)}.svg`;
|
||||
const fallbackId = `pl-${p.id}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
@@ -3977,7 +4091,7 @@ function renderCapabilityHeaderTag(def, cap) {
|
||||
function _searchProviderLabel(cap, providerId) {
|
||||
const list = (cap && cap.providers) || [];
|
||||
const hit = list.find(p => p.id === providerId);
|
||||
return hit ? hit.label : providerId;
|
||||
return hit ? localizedLabel(hit.label) : providerId;
|
||||
}
|
||||
|
||||
// Search card body: strategy picker + (when fixed) provider picker + a
|
||||
@@ -4103,7 +4217,7 @@ function _renderSearchSummary(body, cap) {
|
||||
class="inline-flex items-center gap-1 px-2 py-0.5 text-[11px] rounded-md cursor-pointer
|
||||
bg-emerald-50 dark:bg-emerald-900/30 text-emerald-600 dark:text-emerald-400
|
||||
hover:bg-emerald-100 dark:hover:bg-emerald-900/50 transition-colors">
|
||||
<i class="fas fa-check text-[10px]"></i>${escapeHtml(p.label)}
|
||||
<i class="fas fa-check text-[10px]"></i>${escapeHtml(localizedLabel(p.label))}
|
||||
</button>
|
||||
`).join('');
|
||||
host.innerHTML = `
|
||||
@@ -4150,7 +4264,7 @@ function openSearchAddProviderPicker(missingProviders) {
|
||||
class="w-full flex items-center justify-between px-3 py-2.5 rounded-lg cursor-pointer
|
||||
bg-slate-50 dark:bg-white/5 hover:bg-slate-100 dark:hover:bg-white/10
|
||||
text-sm text-slate-700 dark:text-slate-200 transition-colors">
|
||||
<span>${escapeHtml(p.label)}</span>
|
||||
<span>${escapeHtml(localizedLabel(p.label))}</span>
|
||||
<i class="fas fa-chevron-right text-[10px] text-slate-400"></i>
|
||||
</button>
|
||||
`).join('');
|
||||
@@ -4607,7 +4721,7 @@ function renderCapabilityHints(def, cap, body, currentProvider) {
|
||||
// id ("linkai") when we know it. Falls back to the id when the
|
||||
// provider isn't in our vendor table (rare).
|
||||
const provMeta = modelsState.providers.find(p => p.id === fbProv);
|
||||
const fbProvLabel = (provMeta && provMeta.label) || fbProv;
|
||||
const fbProvLabel = (provMeta && localizedLabel(provMeta.label)) || fbProv;
|
||||
const fbText = fbModel ? `${fbProvLabel} / ${fbModel}` : fbProvLabel;
|
||||
slot.innerHTML = `
|
||||
<p class="flex items-center gap-1.5 text-xs text-slate-400 dark:text-slate-500 min-w-0">
|
||||
@@ -4639,7 +4753,7 @@ function buildCapabilityProviderOptions(def, cap) {
|
||||
const configured = !tracked || !!meta.configured;
|
||||
return {
|
||||
value: pid,
|
||||
label: (meta && meta.label) || pid,
|
||||
label: (meta && localizedLabel(meta.label)) || pid,
|
||||
_tracked: tracked,
|
||||
_configured: configured,
|
||||
};
|
||||
@@ -4798,7 +4912,7 @@ function rebuildCapabilityModelDropdown(def, providerId, selectedModel, scope) {
|
||||
modelValues.push(entry.value);
|
||||
return { value: entry.value, label: entry.label || entry.value, hint: entry.hint || '' };
|
||||
});
|
||||
opts.push({ value: '__custom__', label: currentLang === 'zh' ? '自定义...' : 'Custom...' });
|
||||
opts.push({ value: '__custom__', label: currentLang === 'zh' ? '自定义' : 'Custom' });
|
||||
|
||||
let initialValue = selectedModel || '';
|
||||
if (initialValue && !modelValues.includes(initialValue)) {
|
||||
@@ -4881,7 +4995,7 @@ function rebuildCapabilityVoiceDropdown(providerId, selectedVoice, scope, modelI
|
||||
hint: desc === code ? '' : code,
|
||||
};
|
||||
});
|
||||
opts.push({ value: '__custom__', label: currentLang === 'zh' ? '自定义...' : 'Custom...' });
|
||||
opts.push({ value: '__custom__', label: currentLang === 'zh' ? '自定义' : 'Custom' });
|
||||
|
||||
// Off-catalog values route through the custom branch.
|
||||
let initial = selectedVoice || '';
|
||||
@@ -5069,7 +5183,7 @@ function openVendorModal(providerId, onSaved) {
|
||||
const pickerEl = document.getElementById('vendor-modal-picker');
|
||||
const pickerOpts = modelsState.providers.map(p => ({
|
||||
value: p.id,
|
||||
label: p.label,
|
||||
label: localizedLabel(p.label),
|
||||
_configured: !!p.configured,
|
||||
}));
|
||||
initDropdown(pickerEl, pickerOpts, defaultId, (val) => fillVendorModalForProvider(val));
|
||||
@@ -5108,7 +5222,7 @@ function openVendorModal(providerId, onSaved) {
|
||||
function fillVendorModalForProvider(providerId) {
|
||||
const meta = modelsState.providers.find(p => p.id === providerId);
|
||||
if (!meta) return;
|
||||
document.getElementById('vendor-modal-title').textContent = meta.label;
|
||||
document.getElementById('vendor-modal-title').textContent = localizedLabel(meta.label);
|
||||
document.getElementById('vendor-modal-subtitle').textContent = meta.id;
|
||||
|
||||
// ----- API Base -----
|
||||
|
||||
@@ -28,8 +28,16 @@ from config import conf
|
||||
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".svg"}
|
||||
VIDEO_EXTENSIONS = {".mp4", ".webm", ".avi", ".mov", ".mkv"}
|
||||
|
||||
def _get_web_password() -> str:
|
||||
# Coerce to str so non-string values in config.json (e.g. numeric password) won't break comparisons
|
||||
pwd = conf().get("web_password", "")
|
||||
if pwd is None:
|
||||
return ""
|
||||
return str(pwd)
|
||||
|
||||
|
||||
def _is_password_enabled():
|
||||
return bool(conf().get("web_password", ""))
|
||||
return bool(_get_web_password())
|
||||
|
||||
|
||||
def _session_expire_seconds():
|
||||
@@ -40,7 +48,7 @@ def _create_auth_token():
|
||||
"""Create a stateless signed token: ``<timestamp_hex>.<hmac_hex>``."""
|
||||
ts = format(int(time.time()), "x")
|
||||
sig = hmac.new(
|
||||
conf().get("web_password", "").encode(),
|
||||
_get_web_password().encode(),
|
||||
ts.encode(),
|
||||
hashlib.sha256,
|
||||
).hexdigest()
|
||||
@@ -63,7 +71,7 @@ def _verify_auth_token(token):
|
||||
if time.time() - ts > _session_expire_seconds():
|
||||
return False
|
||||
expected = hmac.new(
|
||||
conf().get("web_password", "").encode(),
|
||||
_get_web_password().encode(),
|
||||
ts_hex.encode(),
|
||||
hashlib.sha256,
|
||||
).hexdigest()
|
||||
@@ -85,6 +93,15 @@ def _require_auth():
|
||||
json.dumps({"status": "error", "message": "Unauthorized"}))
|
||||
|
||||
|
||||
# Localized text for /cancel system replies. Web is the only channel that
|
||||
# honors a per-request `lang`; other channels reply in Chinese by default.
|
||||
def _cancel_reply_text(cancelled: int, lang: str) -> str:
|
||||
en = lang.startswith("en")
|
||||
if cancelled > 0:
|
||||
return "🛑 Cancelled." if en else "🛑 已中止"
|
||||
return "Nothing to cancel." if en else "当前没有可中止的任务。"
|
||||
|
||||
|
||||
def _get_upload_dir() -> str:
|
||||
from common.utils import expand_path
|
||||
ws_root = expand_path(conf().get("agent_workspace", "~/cow"))
|
||||
@@ -429,6 +446,18 @@ class WebChannel(ChatChannel):
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
|
||||
elif event_type == "agent_cancelled":
|
||||
# Push an explicit cancelled SSE event so the frontend
|
||||
# marks the bubble as stopped. A trailing "done" still
|
||||
# arrives with the partial answer.
|
||||
final_response = data.get("final_response", "")
|
||||
q.put({
|
||||
"type": "cancelled",
|
||||
"content": final_response,
|
||||
"request_id": request_id,
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
|
||||
elif event_type == "agent_end":
|
||||
# Safety net: if the agent finishes with an empty final_response,
|
||||
# chat_channel skips _send_reply (because reply.content is empty),
|
||||
@@ -748,6 +777,25 @@ class WebChannel(ChatChannel):
|
||||
# desire_rtype concept used by other channels).
|
||||
is_voice_input = bool(json_data.get('is_voice', False))
|
||||
|
||||
# Fast path for /cancel: bypass the session queue and SSE setup.
|
||||
# Web frontend (stream=true) only listens to SSE, so we return an
|
||||
# inline_reply payload to be rendered synchronously.
|
||||
stripped_prompt = (prompt or "").strip().lower()
|
||||
if stripped_prompt == "/cancel":
|
||||
from agent.protocol import get_cancel_registry
|
||||
cancelled = get_cancel_registry().cancel_session(session_id)
|
||||
lang = (json_data.get('lang') or 'zh').lower()
|
||||
msg_text = _cancel_reply_text(cancelled, lang)
|
||||
logger.info(
|
||||
f"[WebChannel] /cancel fast-path: session={session_id}, cancelled={cancelled}, lang={lang}"
|
||||
)
|
||||
return json.dumps({
|
||||
"status": "success",
|
||||
"request_id": "",
|
||||
"stream": False,
|
||||
"inline_reply": msg_text,
|
||||
})
|
||||
|
||||
# Append file references to the prompt (same format as QQ channel)
|
||||
if attachments:
|
||||
file_refs = []
|
||||
@@ -854,6 +902,11 @@ class WebChannel(ChatChannel):
|
||||
if itype == "done":
|
||||
post_done = True
|
||||
post_deadline = time.time() + POST_DONE_TAIL_SECONDS
|
||||
elif itype == "cancelled":
|
||||
# Close SSE tail quickly after cancel; don't wait for the
|
||||
# full TTS tail since the user already pressed Stop.
|
||||
post_done = True
|
||||
post_deadline = time.time() + 3
|
||||
elif itype == "voice_attach":
|
||||
# WSGI buffers the previous chunk until the next yield;
|
||||
# shrink the tail so the generator wakes up quickly to
|
||||
@@ -864,6 +917,59 @@ class WebChannel(ChatChannel):
|
||||
finally:
|
||||
self.sse_queues.pop(request_id, None)
|
||||
|
||||
def cancel_request(self):
|
||||
"""
|
||||
Cancel an in-flight agent run.
|
||||
|
||||
Body: {"request_id": "...", "session_id": "..."}
|
||||
Either field is sufficient; request_id is preferred when known.
|
||||
Always returns success even when nothing was running, so the
|
||||
client's UX is idempotent.
|
||||
"""
|
||||
try:
|
||||
from agent.protocol import get_cancel_registry
|
||||
|
||||
data = web.data()
|
||||
try:
|
||||
json_data = json.loads(data) if data else {}
|
||||
except Exception:
|
||||
json_data = {}
|
||||
|
||||
request_id = (json_data.get("request_id") or "").strip()
|
||||
session_id = (json_data.get("session_id") or "").strip()
|
||||
lang = (json_data.get("lang") or "zh").lower()
|
||||
|
||||
registry = get_cancel_registry()
|
||||
cancelled = 0
|
||||
|
||||
if request_id:
|
||||
if registry.cancel_request(request_id):
|
||||
cancelled = 1
|
||||
|
||||
if cancelled == 0 and session_id:
|
||||
cancelled = registry.cancel_session(session_id)
|
||||
|
||||
if request_id and request_id in self.sse_queues:
|
||||
self.sse_queues[request_id].put({
|
||||
"type": "cancelled",
|
||||
"content": "Cancelled" if lang.startswith("en") else "已中止",
|
||||
"request_id": request_id,
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
|
||||
logger.info(
|
||||
f"[WebChannel] cancel request: request_id={request_id!r}, "
|
||||
f"session_id={session_id!r}, cancelled={cancelled}"
|
||||
)
|
||||
return json.dumps({
|
||||
"status": "success",
|
||||
"cancelled": cancelled,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[WebChannel] cancel_request error: {e}")
|
||||
return json.dumps({"status": "error", "message": str(e)})
|
||||
|
||||
def poll_response(self):
|
||||
"""
|
||||
Poll for responses using the session_id.
|
||||
@@ -959,6 +1065,7 @@ class WebChannel(ChatChannel):
|
||||
'/api/voice/tts', 'VoiceTtsHandler',
|
||||
'/poll', 'PollHandler',
|
||||
'/stream', 'StreamHandler',
|
||||
'/cancel', 'CancelHandler',
|
||||
'/chat', 'ChatHandler',
|
||||
'/config', 'ConfigHandler',
|
||||
'/api/models', 'ModelsHandler',
|
||||
@@ -1050,8 +1157,8 @@ class AuthLoginHandler:
|
||||
data = json.loads(web.data())
|
||||
except Exception:
|
||||
return json.dumps({"status": "error", "message": "Invalid request"})
|
||||
password = data.get("password", "")
|
||||
expected = conf().get("web_password", "")
|
||||
password = str(data.get("password", "") or "")
|
||||
expected = _get_web_password()
|
||||
if not hmac.compare_digest(password, expected):
|
||||
logger.warning("[WebChannel] Invalid login attempt")
|
||||
return json.dumps({"status": "error", "message": "Wrong password"})
|
||||
@@ -1232,6 +1339,12 @@ class PollHandler:
|
||||
return WebChannel().poll_response()
|
||||
|
||||
|
||||
class CancelHandler:
|
||||
def POST(self):
|
||||
_require_auth()
|
||||
return WebChannel().cancel_request()
|
||||
|
||||
|
||||
class StreamHandler:
|
||||
def GET(self):
|
||||
_require_auth()
|
||||
@@ -1274,6 +1387,7 @@ class ConfigHandler:
|
||||
const.DOUBAO_SEED_2_PRO, const.DOUBAO_SEED_2_CODE,
|
||||
const.KIMI_K2_6, const.KIMI_K2_5, const.KIMI_K2,
|
||||
const.ERNIE_5_1, const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K,
|
||||
const.MIMO_V2_5_PRO, const.MIMO_V2_5,
|
||||
]
|
||||
|
||||
# Generic placeholder hints surfaced in the web console. We deliberately
|
||||
@@ -1329,7 +1443,7 @@ class ConfigHandler:
|
||||
"models": [const.GPT_55, const.GPT_54, const.GPT_54_MINI, const.GPT_54_NANO, const.GPT_5, const.GPT_41, const.GPT_4o],
|
||||
}),
|
||||
("zhipu", {
|
||||
"label": "智谱AI",
|
||||
"label": {"zh": "智谱AI", "en": "GLM"},
|
||||
"api_key_field": "zhipu_ai_api_key",
|
||||
"api_base_key": "zhipu_ai_api_base",
|
||||
"api_base_default": "https://open.bigmodel.cn/api/paas/v4",
|
||||
@@ -1337,7 +1451,7 @@ class ConfigHandler:
|
||||
"models": [const.GLM_5_1, const.GLM_5_TURBO, const.GLM_5, const.GLM_4_7],
|
||||
}),
|
||||
("dashscope", {
|
||||
"label": "通义千问",
|
||||
"label": {"zh": "通义千问", "en": "Qwen"},
|
||||
"api_key_field": "dashscope_api_key",
|
||||
"api_base_key": None,
|
||||
"api_base_default": None,
|
||||
@@ -1345,7 +1459,7 @@ class ConfigHandler:
|
||||
"models": [const.QWEN36_PLUS, const.QWEN37_MAX, const.QWEN35_PLUS, const.QWEN3_MAX],
|
||||
}),
|
||||
("doubao", {
|
||||
"label": "豆包",
|
||||
"label": {"zh": "豆包", "en": "Doubao"},
|
||||
"api_key_field": "ark_api_key",
|
||||
"api_base_key": "ark_base_url",
|
||||
"api_base_default": "https://ark.cn-beijing.volces.com/api/v3",
|
||||
@@ -1361,13 +1475,21 @@ class ConfigHandler:
|
||||
"models": [const.KIMI_K2_6, const.KIMI_K2_5, const.KIMI_K2],
|
||||
}),
|
||||
("qianfan", {
|
||||
"label": "百度千帆",
|
||||
"label": {"zh": "百度千帆", "en": "ERNIE"},
|
||||
"api_key_field": "qianfan_api_key",
|
||||
"api_base_key": "qianfan_api_base",
|
||||
"api_base_default": "https://qianfan.baidubce.com/v2",
|
||||
"api_base_placeholder": _PLACEHOLDER_QIANFAN,
|
||||
"models": [const.ERNIE_5_1, const.ERNIE_5, const.ERNIE_X1_1, const.ERNIE_45_TURBO_128K, const.ERNIE_45_TURBO_32K],
|
||||
}),
|
||||
("mimo", {
|
||||
"label": {"zh": "小米 MiMo", "en": "MiMo"},
|
||||
"api_key_field": "mimo_api_key",
|
||||
"api_base_key": "mimo_api_base",
|
||||
"api_base_default": "https://api.xiaomimimo.com/v1",
|
||||
"api_base_placeholder": _PLACEHOLDER_V1,
|
||||
"models": [const.MIMO_V2_5_PRO, const.MIMO_V2_5],
|
||||
}),
|
||||
("linkai", {
|
||||
"label": "LinkAI",
|
||||
"api_key_field": "linkai_api_key",
|
||||
@@ -1377,7 +1499,7 @@ class ConfigHandler:
|
||||
"models": _RECOMMENDED_MODELS,
|
||||
}),
|
||||
("custom", {
|
||||
"label": "自定义",
|
||||
"label": {"zh": "自定义", "en": "Custom"},
|
||||
"api_key_field": "custom_api_key",
|
||||
"api_base_key": "custom_api_base",
|
||||
"api_base_default": "",
|
||||
@@ -1389,10 +1511,10 @@ class ConfigHandler:
|
||||
EDITABLE_KEYS = {
|
||||
"model", "bot_type", "use_linkai",
|
||||
"open_ai_api_base", "deepseek_api_base", "qianfan_api_base", "claude_api_base", "gemini_api_base",
|
||||
"zhipu_ai_api_base", "moonshot_base_url", "ark_base_url", "custom_api_base",
|
||||
"zhipu_ai_api_base", "moonshot_base_url", "ark_base_url", "custom_api_base", "mimo_api_base",
|
||||
"open_ai_api_key", "deepseek_api_key", "qianfan_api_key", "claude_api_key", "gemini_api_key",
|
||||
"zhipu_ai_api_key", "dashscope_api_key", "moonshot_api_key",
|
||||
"ark_api_key", "minimax_api_key", "linkai_api_key", "custom_api_key",
|
||||
"ark_api_key", "minimax_api_key", "linkai_api_key", "custom_api_key", "mimo_api_key",
|
||||
"agent_max_context_tokens", "agent_max_context_turns", "agent_max_steps",
|
||||
"enable_thinking", "web_password",
|
||||
}
|
||||
@@ -1434,7 +1556,7 @@ class ConfigHandler:
|
||||
"api_key_field": p.get("api_key_field"),
|
||||
}
|
||||
|
||||
raw_pwd = local_config.get("web_password", "")
|
||||
raw_pwd = str(local_config.get("web_password", "") or "")
|
||||
masked_pwd = ("*" * len(raw_pwd)) if raw_pwd else ""
|
||||
|
||||
return json.dumps({
|
||||
@@ -1533,7 +1655,7 @@ class ModelsHandler:
|
||||
# Capability -> provider ids drawn from ConfigHandler.PROVIDER_MODELS.
|
||||
_ASR_PROVIDERS = ["openai", "dashscope", "zhipu", "linkai"]
|
||||
# Web-console white-list. Other vendors stay usable via direct config.
|
||||
_TTS_PROVIDERS = ["openai", "minimax", "dashscope", "linkai"]
|
||||
_TTS_PROVIDERS = ["openai", "minimax", "dashscope", "mimo", "linkai"]
|
||||
|
||||
# TTS engine catalog (speech models, not voice timbres). Entries are
|
||||
# either a bare code or {value, hint?} when a friendly label helps.
|
||||
@@ -1548,6 +1670,10 @@ class ModelsHandler:
|
||||
"dashscope": [
|
||||
{"value": "qwen3-tts-flash", "hint": "覆盖普通话、方言与主流外语"},
|
||||
],
|
||||
# 小米 MiMo TTS 系列,通过 chat completions 接口合成
|
||||
"mimo": [
|
||||
{"value": "mimo-v2.5-tts", "hint": "预置音色 · 支持唱歌模式"},
|
||||
],
|
||||
# Aggregating gateway: a single endpoint multiplexes several
|
||||
# underlying TTS engines, selected via the `model` field.
|
||||
# Each engine exposes its own voice catalog (see _TTS_PROVIDER_VOICES).
|
||||
@@ -1667,6 +1793,18 @@ class ModelsHandler:
|
||||
{"value": "Marcus", "hint": "陕西话 · 秦川"},
|
||||
{"value": "Roy", "hint": "闽南语 · 阿杰"},
|
||||
],
|
||||
# 小米 MiMo 预置音色列表(mimo-v2.5-tts),文档:
|
||||
# https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5
|
||||
"mimo": [
|
||||
{"value": "冰糖", "hint": "中文 · 女声 · 冰糖"},
|
||||
{"value": "茉莉", "hint": "中文 · 女声 · 茉莉"},
|
||||
{"value": "苏打", "hint": "中文 · 男声 · 苏打"},
|
||||
{"value": "白桦", "hint": "中文 · 男声 · 白桦"},
|
||||
{"value": "Mia", "hint": "英文 · 女声 · Mia"},
|
||||
{"value": "Chloe", "hint": "英文 · 女声 · Chloe"},
|
||||
{"value": "Milo", "hint": "英文 · 男声 · Milo"},
|
||||
{"value": "Dean", "hint": "英文 · 男声 · Dean"},
|
||||
],
|
||||
# Aggregating gateway: voices are scoped per engine model. The
|
||||
# frontend picks the correct list based on the selected model so
|
||||
# users don't see incompatible timbres for the active engine.
|
||||
@@ -1803,6 +1941,8 @@ class ModelsHandler:
|
||||
# (see models/minimax/minimax_bot.py::call_vision); the M2.x chat
|
||||
# family is text-only.
|
||||
"minimax": [const.MINIMAX_TEXT_01],
|
||||
# MiMo 原生全模态模型:v2.5-pro / v2.5 支持图像/音频/视频输入
|
||||
"mimo": [const.MIMO_V2_5_PRO, const.MIMO_V2_5],
|
||||
# LinkAI proxies the underlying vendor; surface a curated set of
|
||||
# multimodal models. Order: gpt-4.1-mini → gpt-5.4-mini as the
|
||||
# cross-vendor baselines, then each vendor's recommended default.
|
||||
@@ -1932,6 +2072,7 @@ class ModelsHandler:
|
||||
("qianfan", "qianfan_api_key", const.ERNIE_45_TURBO_VL),
|
||||
("zhipu", "zhipu_ai_api_key", const.GLM_5V_TURBO),
|
||||
("minimax", "minimax_api_key", const.MINIMAX_TEXT_01),
|
||||
("mimo", "mimo_api_key", const.MIMO_V2_5_PRO),
|
||||
]
|
||||
|
||||
@classmethod
|
||||
@@ -2011,12 +2152,17 @@ class ModelsHandler:
|
||||
if not isinstance(vision_conf, dict):
|
||||
vision_conf = {}
|
||||
user_specified = (vision_conf.get("model") or "").strip()
|
||||
explicit_provider = (vision_conf.get("provider") or "").strip()
|
||||
|
||||
# When the user pinned a specific model, infer which vendor card to
|
||||
# highlight by scanning the per-provider model lists. Falls back to
|
||||
# an empty provider so the dropdown stays on "auto" if we can't tell.
|
||||
# Provider resolution priority:
|
||||
# 1. Explicit `tools.vision.provider` (persisted via UI; supports
|
||||
# custom model names that prefix-inference can't recognize).
|
||||
# 2. Scan per-provider model lists by model name.
|
||||
# Empty provider keeps the dropdown on "auto" when we can't tell.
|
||||
inferred_provider = ""
|
||||
if user_specified:
|
||||
if explicit_provider and explicit_provider in cls._VISION_PROVIDER_MODELS:
|
||||
inferred_provider = explicit_provider
|
||||
elif user_specified:
|
||||
for pid, models in cls._VISION_PROVIDER_MODELS.items():
|
||||
if user_specified in models:
|
||||
inferred_provider = pid
|
||||
@@ -2181,11 +2327,17 @@ class ModelsHandler:
|
||||
if not isinstance(img_node, dict):
|
||||
img_node = {}
|
||||
explicit_model = (img_node.get("model") or "").strip()
|
||||
explicit_provider = (img_node.get("provider") or "").strip()
|
||||
|
||||
# Infer the provider card to highlight by scanning per-provider
|
||||
# model lists, including alias values inside {value, hint} entries.
|
||||
# Provider resolution priority:
|
||||
# 1. Explicit `skills.image-generation.provider` (persisted via UI;
|
||||
# supports custom model names that prefix-inference can't catch).
|
||||
# 2. Scan per-provider model catalog by model name.
|
||||
# Empty provider keeps the dropdown on "auto" when we can't tell.
|
||||
inferred_provider = ""
|
||||
if explicit_model:
|
||||
if explicit_provider and explicit_provider in cls._IMAGE_PROVIDER_MODELS:
|
||||
inferred_provider = explicit_provider
|
||||
elif explicit_model:
|
||||
for pid, models in cls._IMAGE_PROVIDER_MODELS.items():
|
||||
for entry in models:
|
||||
val = entry if isinstance(entry, str) else (entry.get("value") or "")
|
||||
@@ -2222,10 +2374,10 @@ class ModelsHandler:
|
||||
_SEARCH_PROVIDERS = ("bocha", "qianfan", "zhipu", "linkai")
|
||||
|
||||
_SEARCH_PROVIDER_LABELS = {
|
||||
"bocha": "博查",
|
||||
"zhipu": "智谱",
|
||||
"qianfan": "百度千帆",
|
||||
"linkai": "LinkAI",
|
||||
"bocha": {"zh": "博查", "en": "Bocha"},
|
||||
"zhipu": {"zh": "智谱", "en": "GLM"},
|
||||
"qianfan": {"zh": "百度千帆", "en": "ERNIE"},
|
||||
"linkai": {"zh": "LinkAI", "en": "LinkAI"},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@@ -2440,27 +2592,37 @@ class ModelsHandler:
|
||||
return json.dumps({"status": "error", "message": f"capability not editable: {capability}"})
|
||||
|
||||
def _set_image(self, provider_id: str, model: str) -> str:
|
||||
# Source of truth: skills.image-generation.model. provider_id is
|
||||
# informational only; the resolver picks the vendor by model prefix.
|
||||
# Source of truth: skills.image-generation.{provider, model}. The
|
||||
# provider field is persisted so users picking a custom model under
|
||||
# a specific vendor still get routed there — runtime falls back to
|
||||
# model-name prefix inference only when provider is empty.
|
||||
local_config = conf()
|
||||
file_cfg = self._read_file_config()
|
||||
|
||||
self._set_nested_namespace_value(local_config, "skills", "image-generation", "model", model or "")
|
||||
self._set_nested_namespace_value(file_cfg, "skills", "image-generation", "model", model or "")
|
||||
self._set_nested_namespace_value(local_config, "skills", "image-generation", "provider", provider_id or "")
|
||||
self._set_nested_namespace_value(file_cfg, "skills", "image-generation", "provider", provider_id or "")
|
||||
self._drop_legacy_namespace(local_config, "skill", "skills", child="image-generation")
|
||||
self._drop_legacy_namespace(file_cfg, "skill", "skills", child="image-generation")
|
||||
|
||||
self._write_file_config(file_cfg)
|
||||
|
||||
# The skill subprocess reads SKILL_IMAGE_GENERATION_MODEL from env at
|
||||
# startup; mirror the change so live edits apply without restart.
|
||||
env_key = "SKILL_IMAGE_GENERATION_MODEL"
|
||||
# The skill subprocess reads SKILL_IMAGE_GENERATION_{MODEL,PROVIDER}
|
||||
# from env at startup; mirror the change so live edits apply without
|
||||
# restart.
|
||||
model_env = "SKILL_IMAGE_GENERATION_MODEL"
|
||||
provider_env = "SKILL_IMAGE_GENERATION_PROVIDER"
|
||||
if model:
|
||||
os.environ[env_key] = model
|
||||
os.environ[model_env] = model
|
||||
else:
|
||||
os.environ.pop(env_key, None)
|
||||
os.environ.pop(model_env, None)
|
||||
if provider_id:
|
||||
os.environ[provider_env] = provider_id
|
||||
else:
|
||||
os.environ.pop(provider_env, None)
|
||||
|
||||
logger.info(f"[ModelsHandler] image updated: provider_hint={provider_id!r} model={model!r}")
|
||||
logger.info(f"[ModelsHandler] image updated: provider={provider_id!r} model={model!r}")
|
||||
return json.dumps({
|
||||
"status": "success",
|
||||
"provider": provider_id,
|
||||
@@ -2499,18 +2661,22 @@ class ModelsHandler:
|
||||
return json.dumps({"status": "success", "applied": applied})
|
||||
|
||||
def _set_vision(self, provider_id: str, model: str) -> str:
|
||||
# Source of truth: tools.vision.model. provider_id is informational
|
||||
# only; the resolver picks the vendor by model prefix.
|
||||
# Source of truth: tools.vision.{provider, model}. The provider field
|
||||
# is persisted so users picking a custom model under a specific vendor
|
||||
# still get routed there — runtime falls back to model-name prefix
|
||||
# inference only when provider is empty.
|
||||
local_config = conf()
|
||||
file_cfg = self._read_file_config()
|
||||
self._set_nested_namespace_value(file_cfg, "tools", "vision", "model", model)
|
||||
self._set_nested_namespace_value(local_config, "tools", "vision", "model", model)
|
||||
self._set_nested_namespace_value(file_cfg, "tools", "vision", "provider", provider_id or "")
|
||||
self._set_nested_namespace_value(local_config, "tools", "vision", "provider", provider_id or "")
|
||||
self._drop_legacy_namespace(file_cfg, "tool", "tools", child="vision")
|
||||
self._drop_legacy_namespace(local_config, "tool", "tools", child="vision")
|
||||
|
||||
self._write_file_config(file_cfg)
|
||||
logger.info(f"[ModelsHandler] vision model set: {model!r}")
|
||||
return json.dumps({"status": "success", "model": model})
|
||||
logger.info(f"[ModelsHandler] vision updated: provider={provider_id!r} model={model!r}")
|
||||
return json.dumps({"status": "success", "provider": provider_id, "model": model})
|
||||
|
||||
@staticmethod
|
||||
def _set_nested_namespace_value(cfg, top: str, name: str, key: str, value):
|
||||
@@ -2743,6 +2909,14 @@ class ChannelsHandler:
|
||||
{"key": "wechatmp_port", "label": "Port", "type": "number", "default": 8080},
|
||||
],
|
||||
}),
|
||||
("telegram", {
|
||||
"label": {"zh": "Telegram", "en": "Telegram"},
|
||||
"icon": "fa-paper-plane",
|
||||
"color": "sky",
|
||||
"fields": [
|
||||
{"key": "telegram_token", "label": "Bot Token", "type": "secret"},
|
||||
],
|
||||
}),
|
||||
])
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -440,6 +440,17 @@ class WecomBotChannel(ChatChannel):
|
||||
state["current"] = ""
|
||||
_push_stream(state, force=True)
|
||||
|
||||
elif event_type == "agent_cancelled":
|
||||
# Flush partial output and strip trailing "---" separator
|
||||
# left over from previous turn, to avoid a dangling divider.
|
||||
if state["current"]:
|
||||
state["committed"] += state["current"]
|
||||
state["current"] = ""
|
||||
state["committed"] = state["committed"].rstrip()
|
||||
if state["committed"].endswith("---"):
|
||||
state["committed"] = state["committed"][:-3].rstrip()
|
||||
_push_stream(state, force=True)
|
||||
|
||||
return on_event
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -47,14 +47,16 @@ def _load_credentials(cred_path: str) -> dict:
|
||||
|
||||
|
||||
def _save_credentials(cred_path: str, data: dict):
|
||||
"""Save credentials to JSON file."""
|
||||
"""Atomically save credentials to JSON file (tmp + rename)."""
|
||||
os.makedirs(os.path.dirname(cred_path), exist_ok=True)
|
||||
with open(cred_path, "w") as f:
|
||||
tmp_path = f"{cred_path}.tmp"
|
||||
with open(tmp_path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
try:
|
||||
os.chmod(cred_path, 0o600)
|
||||
os.chmod(tmp_path, 0o600)
|
||||
except Exception:
|
||||
pass
|
||||
os.replace(tmp_path, cred_path)
|
||||
|
||||
|
||||
@singleton
|
||||
@@ -73,7 +75,10 @@ class WeixinChannel(ChatChannel):
|
||||
self.api = None
|
||||
self._stop_event = threading.Event()
|
||||
self._poll_thread = None
|
||||
self._context_tokens = {} # user_id -> context_token
|
||||
# user_id -> context_token. Guarded by _context_tokens_lock for any
|
||||
# mutation that races with disk persistence.
|
||||
self._context_tokens = {}
|
||||
self._context_tokens_lock = threading.Lock()
|
||||
self._received_msgs = ExpiredDict(60 * 60 * 7.1)
|
||||
self._get_updates_buf = ""
|
||||
self._credentials_path = ""
|
||||
@@ -95,12 +100,19 @@ class WeixinChannel(ChatChannel):
|
||||
conf().get("weixin_credentials_path", "~/.weixin_cow_credentials.json")
|
||||
)
|
||||
|
||||
# Always load credentials so we can restore context_tokens even when
|
||||
# the bot token itself comes from config.
|
||||
creds = _load_credentials(self._credentials_path)
|
||||
if not token:
|
||||
creds = _load_credentials(self._credentials_path)
|
||||
token = creds.get("token", "")
|
||||
if creds.get("base_url"):
|
||||
base_url = creds["base_url"]
|
||||
|
||||
# Restore persisted context_tokens so scheduler can deliver pushes
|
||||
# immediately after restart, without waiting for the user to ping
|
||||
# the bot first.
|
||||
self._restore_context_tokens_from_creds(creds)
|
||||
|
||||
if not token:
|
||||
token, base_url = self._login_with_retry(base_url)
|
||||
if not token:
|
||||
@@ -140,11 +152,16 @@ class WeixinChannel(ChatChannel):
|
||||
def _relogin(self) -> bool:
|
||||
"""Re-login after session expiry. Returns True on success."""
|
||||
base_url = self.api.base_url if self.api else DEFAULT_BASE_URL
|
||||
if os.path.exists(self._credentials_path):
|
||||
try:
|
||||
os.remove(self._credentials_path)
|
||||
except Exception:
|
||||
pass
|
||||
# Clearing the whole credentials file is intentional: the new login
|
||||
# will issue a fresh `token` and persisted context_tokens belong to
|
||||
# the previous bot identity, so they must not survive.
|
||||
with self._context_tokens_lock:
|
||||
self._context_tokens.clear()
|
||||
if os.path.exists(self._credentials_path):
|
||||
try:
|
||||
os.remove(self._credentials_path)
|
||||
except Exception:
|
||||
pass
|
||||
self.login_status = self.LOGIN_STATUS_WAITING
|
||||
result = self._qr_login(base_url)
|
||||
if not result:
|
||||
@@ -156,9 +173,62 @@ class WeixinChannel(ChatChannel):
|
||||
cdn_base_url=self.api.cdn_base_url if self.api else CDN_BASE_URL,
|
||||
)
|
||||
self.login_status = self.LOGIN_STATUS_OK
|
||||
self._context_tokens.clear()
|
||||
return True
|
||||
|
||||
# ── Context token persistence ──────────────────────────────────────
|
||||
# ilink requires every outbound send to echo the context_token from the
|
||||
# user's latest inbound message. We mirror the in-memory map into the
|
||||
# credentials JSON so scheduled pushes survive process restarts.
|
||||
# All mutation + disk IO is serialized via _context_tokens_lock so that
|
||||
# concurrent updates can never lose each other's writes.
|
||||
|
||||
def _restore_context_tokens_from_creds(self, creds: dict) -> None:
|
||||
if not isinstance(creds, dict):
|
||||
return
|
||||
tokens = creds.get("context_tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return
|
||||
restored = 0
|
||||
with self._context_tokens_lock:
|
||||
for user_id, token in tokens.items():
|
||||
if isinstance(user_id, str) and isinstance(token, str) and token:
|
||||
self._context_tokens[user_id] = token
|
||||
restored += 1
|
||||
if restored:
|
||||
logger.info(f"[Weixin] Restored {restored} context_tokens from credentials")
|
||||
|
||||
def _persist_context_tokens_locked(self) -> None:
|
||||
"""Flush the token map to disk. Caller must hold _context_tokens_lock."""
|
||||
if not self._credentials_path:
|
||||
return
|
||||
try:
|
||||
creds = _load_credentials(self._credentials_path) or {}
|
||||
creds["context_tokens"] = dict(self._context_tokens)
|
||||
_save_credentials(self._credentials_path, creds)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Weixin] Failed to persist context_tokens: {e}")
|
||||
|
||||
def _update_context_token(self, user_id: str, token: str) -> None:
|
||||
"""Update the in-memory token for a user; flush to disk only on change."""
|
||||
if not user_id or not token:
|
||||
return
|
||||
with self._context_tokens_lock:
|
||||
if self._context_tokens.get(user_id) == token:
|
||||
return
|
||||
self._context_tokens[user_id] = token
|
||||
self._persist_context_tokens_locked()
|
||||
|
||||
def _invalidate_context_token(self, user_id: str) -> None:
|
||||
"""Drop the cached token for a user (used after -14 / send rejection)."""
|
||||
if not user_id:
|
||||
return
|
||||
with self._context_tokens_lock:
|
||||
if user_id not in self._context_tokens:
|
||||
return
|
||||
del self._context_tokens[user_id]
|
||||
logger.info(f"[Weixin] Invalidated stale context_token for {user_id}")
|
||||
self._persist_context_tokens_locked()
|
||||
|
||||
# ── QR Login ───────────────────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
@@ -391,7 +461,7 @@ class WeixinChannel(ChatChannel):
|
||||
context_token = raw_msg.get("context_token", "")
|
||||
|
||||
if context_token and from_user:
|
||||
self._context_tokens[from_user] = context_token
|
||||
self._update_context_token(from_user, context_token)
|
||||
|
||||
cdn_base_url = self.api.cdn_base_url if self.api else CDN_BASE_URL
|
||||
try:
|
||||
@@ -510,10 +580,30 @@ class WeixinChannel(ChatChannel):
|
||||
return msg.context_token
|
||||
return self._context_tokens.get(receiver, "")
|
||||
|
||||
def _check_send_response(self, resp, receiver: str) -> None:
|
||||
"""Inspect a send-API response; drop stale context_token on -14.
|
||||
|
||||
ilink uses ret/errcode = -14 to signal that the session (and any
|
||||
cached context_token) is no longer valid. The plugin keeps running
|
||||
because the bot itself can re-login; we just need to forget the
|
||||
per-user token so the next push won't retry forever.
|
||||
"""
|
||||
if not isinstance(resp, dict):
|
||||
return
|
||||
ret = resp.get("ret")
|
||||
errcode = resp.get("errcode")
|
||||
if ret == -14 or errcode == -14:
|
||||
logger.warning(
|
||||
f"[Weixin] Send returned -14 (session expired) for "
|
||||
f"receiver={receiver}; dropping cached context_token"
|
||||
)
|
||||
self._invalidate_context_token(receiver)
|
||||
|
||||
def _send_text(self, text: str, receiver: str, context_token: str):
|
||||
if len(text) <= TEXT_CHUNK_LIMIT:
|
||||
try:
|
||||
self.api.send_text(receiver, text, context_token)
|
||||
resp = self.api.send_text(receiver, text, context_token)
|
||||
self._check_send_response(resp, receiver)
|
||||
logger.debug(f"[Weixin] Text sent to {receiver}, len={len(text)}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Weixin] Failed to send text: {e}")
|
||||
@@ -522,7 +612,8 @@ class WeixinChannel(ChatChannel):
|
||||
chunks = self._split_text(text, TEXT_CHUNK_LIMIT)
|
||||
for i, chunk in enumerate(chunks):
|
||||
try:
|
||||
self.api.send_text(receiver, chunk, context_token)
|
||||
resp = self.api.send_text(receiver, chunk, context_token)
|
||||
self._check_send_response(resp, receiver)
|
||||
logger.debug(f"[Weixin] Text chunk {i+1}/{len(chunks)} sent to {receiver}, len={len(chunk)}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Weixin] Failed to send text chunk {i+1}/{len(chunks)}: {e}")
|
||||
@@ -556,13 +647,14 @@ class WeixinChannel(ChatChannel):
|
||||
return
|
||||
try:
|
||||
result = upload_media_to_cdn(self.api, local_path, receiver, media_type=1)
|
||||
self.api.send_image_item(
|
||||
resp = self.api.send_image_item(
|
||||
to=receiver,
|
||||
context_token=context_token,
|
||||
encrypt_query_param=result["encrypt_query_param"],
|
||||
aes_key_b64=result["aes_key_b64"],
|
||||
ciphertext_size=result["ciphertext_size"],
|
||||
)
|
||||
self._check_send_response(resp, receiver)
|
||||
logger.info(f"[Weixin] Image sent to {receiver}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Weixin] Image send failed: {e}")
|
||||
@@ -575,7 +667,7 @@ class WeixinChannel(ChatChannel):
|
||||
return
|
||||
try:
|
||||
result = upload_media_to_cdn(self.api, local_path, receiver, media_type=3)
|
||||
self.api.send_file_item(
|
||||
resp = self.api.send_file_item(
|
||||
to=receiver,
|
||||
context_token=context_token,
|
||||
encrypt_query_param=result["encrypt_query_param"],
|
||||
@@ -583,6 +675,7 @@ class WeixinChannel(ChatChannel):
|
||||
file_name=os.path.basename(local_path),
|
||||
file_size=result["raw_size"],
|
||||
)
|
||||
self._check_send_response(resp, receiver)
|
||||
logger.info(f"[Weixin] File sent to {receiver}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Weixin] File send failed: {e}")
|
||||
@@ -595,13 +688,14 @@ class WeixinChannel(ChatChannel):
|
||||
return
|
||||
try:
|
||||
result = upload_media_to_cdn(self.api, local_path, receiver, media_type=2)
|
||||
self.api.send_video_item(
|
||||
resp = self.api.send_video_item(
|
||||
to=receiver,
|
||||
context_token=context_token,
|
||||
encrypt_query_param=result["encrypt_query_param"],
|
||||
aes_key_b64=result["aes_key_b64"],
|
||||
ciphertext_size=result["ciphertext_size"],
|
||||
)
|
||||
self._check_send_response(resp, receiver)
|
||||
logger.info(f"[Weixin] Video sent to {receiver}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Weixin] Video send failed: {e}")
|
||||
|
||||
@@ -15,6 +15,7 @@ ZHIPU_AI = "zhipu"
|
||||
MOONSHOT = "moonshot"
|
||||
MiniMax = "minimax"
|
||||
DEEPSEEK = "deepseek"
|
||||
MIMO = "mimo" # 小米 MiMo 大模型
|
||||
CUSTOM = "custom" # custom OpenAI-compatible API, bot_type won't auto-switch on model change
|
||||
MODELSCOPE = "modelscope"
|
||||
|
||||
@@ -140,6 +141,13 @@ KIMI_K2 = "kimi-k2"
|
||||
KIMI_K2_5 = "kimi-k2.5"
|
||||
KIMI_K2_6 = "kimi-k2.6" # Kimi K2.6 - Agent recommended model (default)
|
||||
|
||||
# 小米 MiMo
|
||||
MIMO_V2_5_PRO = "mimo-v2.5-pro" # MiMo V2.5 Pro - 旗舰,长上下文(默认推荐)
|
||||
MIMO_V2_5 = "mimo-v2.5" # MiMo V2.5 - 多模态(文/图/音/视频)
|
||||
MIMO_V2_PRO = "mimo-v2-pro" # MiMo V2 Pro
|
||||
MIMO_V2_OMNI = "mimo-v2-omni" # MiMo V2 Omni - 多模态
|
||||
MIMO_V2_FLASH = "mimo-v2-flash" # MiMo V2 Flash - 极速版
|
||||
|
||||
# Doubao (Volcengine Ark)
|
||||
DOUBAO = "doubao"
|
||||
DOUBAO_SEED_2_CODE = "doubao-seed-2-0-code-preview-260215"
|
||||
@@ -182,6 +190,9 @@ MODEL_LIST = [
|
||||
# MiniMax
|
||||
MiniMax, MINIMAX_M2_7, MINIMAX_M2_7_HIGHSPEED, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5,
|
||||
|
||||
# 小米 MiMo
|
||||
MIMO, MIMO_V2_5_PRO, MIMO_V2_5, MIMO_V2_PRO, MIMO_V2_OMNI, MIMO_V2_FLASH,
|
||||
|
||||
# Claude
|
||||
CLAUDE3, CLAUDE_4_6_SONNET, CLAUDE_4_7_OPUS, CLAUDE_4_6_OPUS, CLAUDE_4_OPUS, CLAUDE_4_5_SONNET, CLAUDE_4_SONNET, CLAUDE_3_OPUS, CLAUDE_3_OPUS_0229,
|
||||
CLAUDE_35_SONNET, CLAUDE_35_SONNET_1022, CLAUDE_35_SONNET_0620, CLAUDE_3_SONNET, CLAUDE_3_HAIKU,
|
||||
@@ -232,3 +243,4 @@ DINGTALK = "dingtalk"
|
||||
WECOM_BOT = "wecom_bot"
|
||||
QQ = "qq"
|
||||
WEIXIN = "weixin"
|
||||
TELEGRAM = "telegram"
|
||||
|
||||
12
config.py
12
config.py
@@ -166,6 +166,11 @@ available_setting = {
|
||||
# 企微智能机器人配置(长连接模式)
|
||||
"wecom_bot_id": "", # 企微智能机器人BotID
|
||||
"wecom_bot_secret": "", # 企微智能机器人长连接Secret
|
||||
# Telegram 配置
|
||||
"telegram_token": "", # 从 @BotFather 申请的 bot token
|
||||
"telegram_proxy": "", # 可选的 HTTP/SOCKS5 代理,例如 http://127.0.0.1:7890 或 socks5://127.0.0.1:1080(留空则走系统环境变量)
|
||||
"telegram_group_trigger": "mention_or_reply", # 群聊触发方式: mention_or_reply(@或回复触发,推荐) | mention_only(仅@) | all(所有消息)
|
||||
"telegram_register_commands": True, # 启动时是否自动向 BotFather 注册命令菜单(与 web 端 slash 命令一致)
|
||||
# 微信配置
|
||||
"weixin_token": "", # 微信登录后获取的bot_token,留空则启动时自动扫码登录
|
||||
"weixin_base_url": "https://ilinkai.weixin.qq.com", # Weixin ilink API base URL
|
||||
@@ -174,7 +179,7 @@ available_setting = {
|
||||
# chatgpt指令自定义触发词
|
||||
"clear_memory_commands": ["#清除记忆"], # 重置会话指令,必须以#开头
|
||||
# channel配置
|
||||
"channel_type": "", # 通道类型,支持多渠道同时运行。单个: "feishu",多个: "feishu, dingtalk" 或 ["feishu", "dingtalk"]。可选值: web,feishu,dingtalk,wecom_bot,weixin,wechatmp,wechatmp_service,wechatcom_app
|
||||
"channel_type": "", # 通道类型,支持多渠道同时运行。单个: "feishu",多个: "feishu, dingtalk" 或 ["feishu", "dingtalk"]。可选值: web,feishu,dingtalk,wecom_bot,weixin,wechatmp,wechatmp_service,wechatcom_app,telegram
|
||||
"web_console": True, # 是否自动启动Web控制台(默认启动)。设为False可禁用
|
||||
"subscribe_msg": "", # 订阅消息, 支持: wechatmp, wechatmp_service, wechatcom_app
|
||||
"debug": False, # 是否开启debug模式,开启后会打印更多日志
|
||||
@@ -209,6 +214,9 @@ available_setting = {
|
||||
"Minimax_base_url": "",
|
||||
"deepseek_api_key": "",
|
||||
"deepseek_api_base": "https://api.deepseek.com/v1",
|
||||
# 小米 MiMo 大模型
|
||||
"mimo_api_key": "",
|
||||
"mimo_api_base": "https://api.xiaomimimo.com/v1",
|
||||
"web_host": "", # Web console bind address; empty means auto
|
||||
"web_port": 9899,
|
||||
"web_password": "", # Web console password; empty means no authentication required
|
||||
@@ -401,6 +409,8 @@ def load_config():
|
||||
"minimax_api_base": "MINIMAX_API_BASE",
|
||||
"deepseek_api_key": "DEEPSEEK_API_KEY",
|
||||
"deepseek_api_base": "DEEPSEEK_API_BASE",
|
||||
"mimo_api_key": "MIMO_API_KEY",
|
||||
"mimo_api_base": "MIMO_API_BASE",
|
||||
"qianfan_api_key": "QIANFAN_API_KEY",
|
||||
"qianfan_api_base": "QIANFAN_API_BASE",
|
||||
"zhipu_ai_api_key": "ZHIPU_AI_API_KEY",
|
||||
|
||||
30
docs/README.md
Normal file
30
docs/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Documentation
|
||||
|
||||
This directory contains the Mintlify documentation site for the project.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Node.js v20.17.0 or higher (LTS recommended)
|
||||
|
||||
## Install the CLI (one-time, global)
|
||||
|
||||
```bash
|
||||
npm i -g mint
|
||||
```
|
||||
|
||||
## Run the docs locally
|
||||
|
||||
From this `docs/` directory:
|
||||
|
||||
```bash
|
||||
mint dev
|
||||
```
|
||||
|
||||
Then open http://localhost:3000 (or the port Mint reports if 3000 is in use).
|
||||
|
||||
> The first run downloads the Mint preview framework (~90 MB) into `~/.mintlify/`.
|
||||
> Subsequent runs start instantly from the local cache.
|
||||
|
||||
## More
|
||||
|
||||
- Mintlify docs: https://www.mintlify.com/docs
|
||||
@@ -19,6 +19,7 @@ CowAgent 支持接入多种聊天通道,启动时通过 `channel_type` 切换
|
||||
| [QQ](/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
|
||||
| [企业微信应用](/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [公众号](/channels/wechatmp) | ✅ | ✅ | | ✅ | |
|
||||
| [Telegram](/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
- **图片 / 文件 / 语音**列表示通道支持收发对应消息类型,具体细节详见各通道文档
|
||||
- **群聊**列指可识别并响应群消息
|
||||
@@ -37,3 +38,4 @@ CowAgent 支持接入多种聊天通道,启动时通过 `channel_type` 切换
|
||||
- [QQ](/channels/qq) — QQ 官方机器人开放平台
|
||||
- [企业微信应用](/channels/wecom) — 企业微信自建应用接入
|
||||
- [公众号](/channels/wechatmp) — 微信公众号(订阅号 / 服务号)
|
||||
- [Telegram](/channels/telegram) — 海外 IM,5 分钟接入,无需公网 IP
|
||||
|
||||
112
docs/channels/telegram.mdx
Normal file
112
docs/channels/telegram.mdx
Normal file
@@ -0,0 +1,112 @@
|
||||
---
|
||||
title: Telegram
|
||||
description: 将 CowAgent 接入 Telegram Bot
|
||||
---
|
||||
|
||||
> 通过 Telegram Bot API 接入 CowAgent,支持单聊与群聊(@机器人 / 回复机器人触发),使用 Long Polling 模式无需公网 IP,开箱即用。
|
||||
|
||||
|
||||
## 一、接入步骤
|
||||
|
||||
### 步骤一:通过 BotFather 创建 Bot
|
||||
|
||||
1. 在 Telegram 中搜索并打开官方账号 [@BotFather](https://t.me/BotFather)。
|
||||
2. 发送 `/newbot` 命令,按提示输入:
|
||||
- **Bot 名称**(显示名,可中文,例如 `My CowAgent Bot`)
|
||||
- **Bot 用户名**(必须以 `bot` 结尾,例如 `my_cowagent_bot`)
|
||||
3. 创建成功后,BotFather 会返回一段 **HTTP API Token**(形如 `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`),妥善保存。
|
||||
|
||||
<Tip>
|
||||
这个 Token 等同于 Bot 的密码,请勿泄露。如果意外泄漏可向 `@BotFather` 发送 `/revoke` 重置。
|
||||
</Tip>
|
||||
|
||||
### 步骤二:(群聊使用)关闭 Privacy Mode
|
||||
|
||||
仅使用单聊可跳过此步。Telegram Bot 默认开启 **Privacy Mode**,群聊中只能收到带 `@bot` 的命令(如 `/start@your_bot`)以及对 bot 消息的 reply;**普通的 `@bot 你好` 文字消息收不到**,会导致群聊无响应。
|
||||
|
||||
向 `@BotFather` 发送:
|
||||
|
||||
1. `/setprivacy`
|
||||
2. 选择刚才创建的 bot
|
||||
3. 选择 `Disable`
|
||||
|
||||
<Note>
|
||||
若设置后群聊仍无响应,可尝试把 Bot 从群里移除并重新拉入。
|
||||
</Note>
|
||||
|
||||
### 步骤三:接入 CowAgent
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Web 控制台(推荐)">
|
||||
打开 Web 控制台(本地链接:http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **Telegram**,填入 Bot Token,点击接入即可。
|
||||
</Tab>
|
||||
<Tab title="配置文件">
|
||||
在 `config.json` 中添加以下配置后启动:
|
||||
|
||||
```json
|
||||
{
|
||||
"channel_type": "telegram",
|
||||
"telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ",
|
||||
"telegram_group_trigger": "mention_or_reply"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
| --- | --- | --- |
|
||||
| `telegram_token` | BotFather 返回的 HTTP API Token | - |
|
||||
| `telegram_group_trigger` | 群聊触发方式:`mention_or_reply`(@或回复机器人)/ `mention_only`(仅@) / `all`(所有消息) | `mention_or_reply` |
|
||||
| `telegram_register_commands` | 启动时是否自动向 BotFather 注册命令菜单 | `true` |
|
||||
| `telegram_proxy` | (可选)代理地址,如 `http://127.0.0.1:7890`、`socks5://127.0.0.1:1080`;运行环境无法直连 `api.telegram.org` 时配置,留空则使用环境变量 `HTTPS_PROXY` | `""` |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
启动 Cow 后,日志中出现以下输出即表示接入成功:
|
||||
|
||||
```
|
||||
[Telegram] Bot logged in as @my_cowagent_bot (id=123456789)
|
||||
[Telegram] Registered 10 bot commands
|
||||
[Telegram] ✅ Telegram bot ready, polling for updates
|
||||
```
|
||||
|
||||
## 二、功能说明
|
||||
|
||||
| 功能 | 支持情况 |
|
||||
| --- | --- |
|
||||
| 单聊 | ✅ |
|
||||
| 群聊(@机器人 / 回复机器人) | ✅ |
|
||||
| 文本消息 | ✅ 收发 |
|
||||
| 图片消息 | ✅ 收发 |
|
||||
| 语音消息 | ✅ 收发(接收 OGG/Opus,发送 OGG/Opus) |
|
||||
| 视频消息 | ✅ 收发 |
|
||||
| 文件消息 | ✅ 收发(PDF / Word / Excel 等) |
|
||||
| 命令菜单 | ✅ 与 Web 控制台 slash 命令一致 |
|
||||
|
||||
### 命令菜单
|
||||
|
||||
启动时会自动向 BotFather 注册命令菜单,用户在 Telegram 输入框输入 `/` 会出现下拉提示:
|
||||
|
||||
| 命令 | 说明 |
|
||||
| --- | --- |
|
||||
| `/help` | 显示命令帮助 |
|
||||
| `/status` | 查看运行状态 |
|
||||
| `/context` | 查看对话上下文(`/context clear` 清除) |
|
||||
| `/skill` | 技能管理(`/skill list`、`/skill install` 等) |
|
||||
| `/memory` | 记忆管理(`/memory dream`) |
|
||||
| `/knowledge` | 知识库管理(`/knowledge list` / `on` / `off`) |
|
||||
| `/config` | 查看当前配置 |
|
||||
| `/cancel` | 中止当前正在运行的 Agent 任务 |
|
||||
| `/logs` | 查看最近日志 |
|
||||
| `/version` | 查看版本 |
|
||||
|
||||
<Note>
|
||||
Telegram 命令菜单只能展示一级命令,子命令通过空格输入即可,例如 `/skill list`、`/context clear`。
|
||||
</Note>
|
||||
|
||||
## 三、使用
|
||||
|
||||
完成接入后:
|
||||
|
||||
- **单聊**:在 Telegram 中搜索你创建的 Bot 用户名(如 `@my_cowagent_bot`),点击 `Start` 即可开始对话。
|
||||
- **群聊**:把 Bot 拉进群,使用 `@bot 你好` 或 **回复 Bot 的某条消息** 触发对话。若群聊无响应,请检查 Privacy Mode 是否已按 [步骤二](#步骤二-群聊使用-关闭-privacy-mode) 关闭。
|
||||
|
||||
发送图片或文件时,可以直接在附件上方的输入框中 **添加 Caption**(描述/问题)一并发送,机器人会结合附件回答。也支持先发附件再发问题,两条消息会自动合并提问。
|
||||
@@ -39,6 +39,14 @@ Mode: agent
|
||||
Session: 12 messages | 8 skills loaded
|
||||
```
|
||||
|
||||
## cancel
|
||||
|
||||
中止当前会话正在运行的 Agent 任务。在 Agent 执行长时间任务(例如多轮工具调用、长流式输出)时,可随时发送 `/cancel`,Agent 会在下一次工具执行前停止。Web 端、微信、企业微信、飞书等各通道均可使用。
|
||||
|
||||
```text
|
||||
/cancel
|
||||
```
|
||||
|
||||
## config
|
||||
|
||||
查看或修改运行时配置。修改后立即生效,无需重启服务。
|
||||
|
||||
@@ -57,6 +57,7 @@ Others:
|
||||
| --- | --- |
|
||||
| `/help` | 显示命令帮助 |
|
||||
| `/status` | 查看服务状态和配置 |
|
||||
| `/cancel` | 中止当前正在运行的 Agent 任务 |
|
||||
| `/config` | 查看或修改运行时配置 |
|
||||
| `/skill` | 管理技能(安装、卸载、启用、禁用等) |
|
||||
| `/memory dream [N]` | 手动触发记忆蒸馏(默认 3 天,最大 30) |
|
||||
@@ -82,6 +83,7 @@ Others:
|
||||
| version | ✓ | ✓ |
|
||||
| status | ✓ | ✓ |
|
||||
| logs | ✓ | ✓ |
|
||||
| cancel | ✗ | ✓ |
|
||||
| config | ✗ | ✓ |
|
||||
| context | — | ✓ |
|
||||
| memory (子命令) | ✗ | ✓ |
|
||||
|
||||
@@ -38,6 +38,12 @@
|
||||
{
|
||||
"language": "zh",
|
||||
"default": true,
|
||||
"navbar": {
|
||||
"links": [
|
||||
{ "label": "官网", "href": "https://cowagent.ai/?lang=zh" },
|
||||
{ "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
|
||||
]
|
||||
},
|
||||
"tabs": [
|
||||
{
|
||||
"tab": "项目介绍",
|
||||
@@ -82,6 +88,7 @@
|
||||
"models/doubao",
|
||||
"models/kimi",
|
||||
"models/qianfan",
|
||||
"models/mimo",
|
||||
"models/linkai",
|
||||
"models/coding-plan",
|
||||
"models/custom"
|
||||
@@ -189,7 +196,8 @@
|
||||
"channels/wecom-bot",
|
||||
"channels/qq",
|
||||
"channels/wecom",
|
||||
"channels/wechatmp"
|
||||
"channels/wechatmp",
|
||||
"channels/telegram"
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -234,6 +242,12 @@
|
||||
},
|
||||
{
|
||||
"language": "en",
|
||||
"navbar": {
|
||||
"links": [
|
||||
{ "label": "Website", "href": "https://cowagent.ai/" },
|
||||
{ "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
|
||||
]
|
||||
},
|
||||
"tabs": [
|
||||
{
|
||||
"tab": "Introduction",
|
||||
@@ -255,7 +269,8 @@
|
||||
"group": "Installation",
|
||||
"pages": [
|
||||
"en/guide/quick-start",
|
||||
"en/guide/manual-install"
|
||||
"en/guide/manual-install",
|
||||
"en/guide/upgrade"
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -277,6 +292,7 @@
|
||||
"en/models/doubao",
|
||||
"en/models/kimi",
|
||||
"en/models/qianfan",
|
||||
"en/models/mimo",
|
||||
"en/models/linkai",
|
||||
"en/models/coding-plan",
|
||||
"en/models/custom"
|
||||
@@ -332,6 +348,7 @@
|
||||
"pages": [
|
||||
"en/skills/index",
|
||||
"en/skills/install",
|
||||
"en/skills/create",
|
||||
"en/skills/hub"
|
||||
]
|
||||
},
|
||||
@@ -383,7 +400,8 @@
|
||||
"en/channels/wecom-bot",
|
||||
"en/channels/qq",
|
||||
"en/channels/wecom",
|
||||
"en/channels/wechatmp"
|
||||
"en/channels/wechatmp",
|
||||
"en/channels/telegram"
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -398,7 +416,7 @@
|
||||
"en/cli/process",
|
||||
"en/cli/skill",
|
||||
"en/cli/memory-knowledge",
|
||||
"en/cli/chat"
|
||||
"en/cli/general"
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -428,6 +446,12 @@
|
||||
},
|
||||
{
|
||||
"language": "ja",
|
||||
"navbar": {
|
||||
"links": [
|
||||
{ "label": "ウェブサイト", "href": "https://cowagent.ai/" },
|
||||
{ "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" }
|
||||
]
|
||||
},
|
||||
"tabs": [
|
||||
{
|
||||
"tab": "紹介",
|
||||
@@ -472,6 +496,7 @@
|
||||
"ja/models/doubao",
|
||||
"ja/models/kimi",
|
||||
"ja/models/qianfan",
|
||||
"ja/models/mimo",
|
||||
"ja/models/linkai",
|
||||
"ja/models/coding-plan",
|
||||
"ja/models/custom"
|
||||
@@ -579,7 +604,8 @@
|
||||
"ja/channels/wecom-bot",
|
||||
"ja/channels/qq",
|
||||
"ja/channels/wecom",
|
||||
"ja/channels/wechatmp"
|
||||
"ja/channels/wechatmp",
|
||||
"ja/channels/telegram"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1,250 +0,0 @@
|
||||
<p align="center"><img src="https://github.com/user-attachments/assets/eca9a9ec-8534-4615-9e0f-96c5ac1d10a3" alt="CowAgent" width="550" /></p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/zhayujie/CowAgent/releases/latest"><img src="https://img.shields.io/github/v/release/zhayujie/CowAgent" alt="Latest release"></a>
|
||||
<a href="https://github.com/zhayujie/CowAgent/blob/master/LICENSE"><img src="https://img.shields.io/github/license/zhayujie/CowAgent" alt="License: MIT"></a>
|
||||
<a href="https://github.com/zhayujie/CowAgent"><img src="https://img.shields.io/github/stars/zhayujie/CowAgent?style=flat-square" alt="Stars"></a> <br/>
|
||||
[<a href="https://github.com/zhayujie/CowAgent/blob/master/README.md">中文</a>] | [English] | [<a href="https://github.com/zhayujie/CowAgent/blob/master/docs/ja/README.md">日本語</a>]
|
||||
</p>
|
||||
|
||||
**CowAgent** is an AI super assistant powered by LLMs, capable of autonomous task planning, operating computers and external resources, creating and executing Skills, and continuously growing with long-term memory and a personal knowledge base. It supports flexible model switching, handles text, voice, images, and files, and can be integrated into WeChat, Web, Feishu, DingTalk, WeCom Bot, WeCom App, and WeChat Official Account — running 7×24 hours on your personal computer or server.
|
||||
|
||||
<p align="center">
|
||||
<a href="https://cowagent.ai/">🌐 Website</a> ·
|
||||
<a href="https://docs.cowagent.ai/en/intro/index">📖 Docs</a> ·
|
||||
<a href="https://docs.cowagent.ai/en/guide/quick-start">🚀 Quick Start</a> ·
|
||||
<a href="https://skills.cowagent.ai/">🧩 Skill Hub</a> ·
|
||||
<a href="https://link-ai.tech/cowagent/create">☁️ Try Online</a>
|
||||
</p>
|
||||
|
||||
## Introduction
|
||||
|
||||
> CowAgent is both an out-of-the-box AI super assistant and a highly extensible Agent framework. You can extend it with new model interfaces, channels, built-in tools, and the Skills system to flexibly implement various customization needs.
|
||||
|
||||
- ✅ **Autonomous Task Planning**: Understands complex tasks and autonomously plans execution, continuously thinking and invoking tools until goals are achieved.
|
||||
- ✅ **Long-term Memory**: Automatically persists conversation memory to local files and databases, including core memory, daily memory, and Deep Dream distillation, with keyword and vector retrieval support.
|
||||
- ✅ **Personal Knowledge Base**: Automatically organizes structured knowledge with cross-references to build a knowledge graph, with web-based visualization and conversational management.
|
||||
- ✅ **Skills System**: Implements a Skills creation and execution engine, supports installing skills from [Skill Hub](https://skills.cowagent.ai), GitHub, etc., or creating custom Skills through conversation.
|
||||
- ✅ **Tool System**: Built-in tools for file I/O, terminal execution, browser automation, scheduled tasks, messaging, and more — autonomously invoked by the Agent.
|
||||
- ✅ **CLI System**: Provides terminal commands and in-chat commands for process management, skill installation, configuration, and more.
|
||||
- ✅ **Multimodal Messages**: Supports parsing, processing, generating, and sending text, images, voice, files, and other message types.
|
||||
- ✅ **Multiple Model Support**: Supports DeepSeek, MiniMax, Claude, Gemini, OpenAI, GLM, Qwen, Doubao, Kimi, and other mainstream model providers.
|
||||
- ✅ **Multi-platform Deployment**: Runs on local computers or servers, integrable into WeChat, Web, Feishu, DingTalk, WeChat Official Account, and WeCom applications.
|
||||
|
||||
## Disclaimer
|
||||
|
||||
1. This project follows the [MIT License](/LICENSE) and is intended for technical research and learning. Users must comply with local laws, regulations, policies, and corporate bylaws. Any illegal or rights-infringing use is prohibited.
|
||||
2. Agent mode consumes more tokens than normal chat mode. Choose models based on effectiveness and cost. Agent has access to the host OS — please deploy in trusted environments.
|
||||
3. CowAgent focuses on open-source development and does not participate in, authorize, or issue any cryptocurrency.
|
||||
|
||||
## Demo
|
||||
|
||||
Try online (no deployment needed): [CowAgent](https://link-ai.tech/cowagent/create)
|
||||
|
||||
## Changelog
|
||||
|
||||
> **2026.04.14:** [v2.0.6](https://github.com/zhayujie/CowAgent/releases/tag/2.0.6) — Knowledge Base, Deep Dream Memory Distillation, Smart Context Compression, Web Console upgrades.
|
||||
|
||||
> **2026.04.01:** [v2.0.5](https://github.com/zhayujie/CowAgent/releases/tag/2.0.5) — Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more.
|
||||
|
||||
> **2026.02.27:** [v2.0.2](https://github.com/zhayujie/CowAgent/releases/tag/2.0.2) — Web console overhaul (streaming chat, model/skill/memory/channel/scheduler/log management), multi-channel concurrent running, session persistence, new models including Gemini 3.1 Pro / Claude 4.6 Sonnet / Qwen3.5 Plus.
|
||||
|
||||
> **2026.02.13:** [v2.0.1](https://github.com/zhayujie/CowAgent/releases/tag/2.0.1) — Built-in Web Search tool, smart context trimming, runtime info dynamic update, Windows compatibility, fixes for scheduler memory loss, Feishu connection issues, and more.
|
||||
|
||||
> **2026.02.03:** [v2.0.0](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) — Full upgrade to AI super assistant with multi-step task planning, long-term memory, built-in tools, Skills framework, new models, and optimized channels.
|
||||
|
||||
> **2025.05.23:** [v1.7.6](https://github.com/zhayujie/CowAgent/releases/tag/1.7.6) — Web channel optimization, AgentMesh multi-agent plugin, Baidu TTS, claude-4-sonnet/opus support.
|
||||
|
||||
> **2025.04.11:** [v1.7.5](https://github.com/zhayujie/CowAgent/releases/tag/1.7.5) — wechatferry protocol, DeepSeek model, Tencent Cloud voice, ModelScope and Gitee-AI support.
|
||||
|
||||
> **2024.12.13:** [v1.7.4](https://github.com/zhayujie/CowAgent/releases/tag/1.7.4) — Gemini 2.0 model, Web channel, memory leak fix.
|
||||
|
||||
Full changelog: [Release Notes](https://docs.cowagent.ai/en/releases/overview)
|
||||
|
||||
<br/>
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
The project provides a one-click script for installation, configuration, startup, and management:
|
||||
|
||||
**Linux / macOS:**
|
||||
```bash
|
||||
bash <(curl -fsSL https://cdn.link-ai.tech/code/cow/run.sh)
|
||||
```
|
||||
|
||||
**Windows (PowerShell):**
|
||||
```powershell
|
||||
irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex
|
||||
```
|
||||
|
||||
After running, the Web service starts by default. Access `http://localhost:9899/chat` to chat.
|
||||
|
||||
Script usage: [One-click Install](https://docs.cowagent.ai/en/guide/quick-start). After installation, you can also use `cow start`, `cow stop`, and other [CLI commands](https://docs.cowagent.ai/en/cli/index) to manage the service.
|
||||
|
||||
### Manual Installation
|
||||
|
||||
**1. Clone the project**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/zhayujie/CowAgent
|
||||
cd CowAgent/
|
||||
```
|
||||
|
||||
**2. Install dependencies**
|
||||
|
||||
```bash
|
||||
pip3 install -r requirements.txt
|
||||
pip3 install -r requirements-optional.txt # optional but recommended
|
||||
```
|
||||
|
||||
**3. Install Cow CLI (recommended)**
|
||||
|
||||
```bash
|
||||
pip3 install -e .
|
||||
```
|
||||
|
||||
After installation, use `cow` commands to manage the service (start, stop, update, etc.) and skills. See [Command Docs](https://docs.cowagent.ai/en/cli/index).
|
||||
|
||||
**4. Install browser (optional)**
|
||||
|
||||
If you need the Agent to operate a browser (visit web pages, fill forms, etc.):
|
||||
|
||||
```bash
|
||||
cow install-browser
|
||||
```
|
||||
|
||||
This auto-installs `playwright` and Chromium. See [Browser Tool Docs](https://docs.cowagent.ai/en/tools/browser).
|
||||
|
||||
**5. Configure**
|
||||
|
||||
```bash
|
||||
cp config-template.json config.json
|
||||
```
|
||||
|
||||
Fill in your model API key and channel type in `config.json`. See the [configuration docs](https://docs.cowagent.ai/en/guide/manual-install) for details.
|
||||
|
||||
**6. Run**
|
||||
|
||||
```bash
|
||||
cow start # recommended, requires Cow CLI
|
||||
python3 app.py # or run directly
|
||||
```
|
||||
|
||||
For server deployment, use `cow` commands to manage the service:
|
||||
|
||||
```bash
|
||||
cow start # start in background
|
||||
cow stop # stop service
|
||||
cow restart # restart service
|
||||
cow status # check running status
|
||||
cow logs # view logs
|
||||
cow update # pull latest code and restart
|
||||
```
|
||||
|
||||
Or use the traditional way:
|
||||
|
||||
```bash
|
||||
nohup python3 app.py & tail -f nohup.out
|
||||
```
|
||||
|
||||
### Docker Deployment
|
||||
|
||||
```bash
|
||||
curl -O https://cdn.link-ai.tech/code/cow/docker-compose.yml
|
||||
# Edit docker-compose.yml with your config
|
||||
sudo docker compose up -d
|
||||
sudo docker logs -f chatgpt-on-wechat
|
||||
```
|
||||
|
||||
<br/>
|
||||
|
||||
## Models
|
||||
|
||||
Supports mainstream model providers. Recommended models for Agent mode:
|
||||
|
||||
| Provider | Recommended Model |
|
||||
| --- | --- |
|
||||
| DeepSeek | `deepseek-v4-flash` |
|
||||
| MiniMax | `MiniMax-M2.7` |
|
||||
| Claude | `claude-sonnet-4-6` |
|
||||
| Gemini | `gemini-3.1-pro-preview` |
|
||||
| OpenAI | `gpt-5.4` |
|
||||
| GLM | `glm-5.1` |
|
||||
| Qwen | `qwen3.6-plus` |
|
||||
| Doubao | `doubao-seed-2-0-code-preview-260215` |
|
||||
| Kimi | `kimi-k2.6` |
|
||||
|
||||
For detailed configuration of each model, see the [Models documentation](https://docs.cowagent.ai/en/models/index).
|
||||
|
||||
### Coding Plan
|
||||
|
||||
Coding Plan is a monthly subscription package offered by various providers, ideal for high-frequency Agent usage. All providers can be accessed via OpenAI-compatible mode:
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "MODEL_NAME",
|
||||
"open_ai_api_base": "PROVIDER_CODING_PLAN_API_BASE",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
}
|
||||
```
|
||||
|
||||
- `bot_type`: Must be `openai`
|
||||
- `model`: Model name supported by the provider
|
||||
- `open_ai_api_base`: Provider's Coding Plan API Base (different from standard pay-as-you-go)
|
||||
- `open_ai_api_key`: Provider's Coding Plan API Key
|
||||
|
||||
> Note: Coding Plan API Base and API Key are usually separate from standard pay-as-you-go ones. Please obtain them from each provider's platform.
|
||||
|
||||
Supported providers include Alibaba Cloud, MiniMax, Zhipu GLM, Kimi, Volcengine, and more. For detailed configuration of each provider, see the [Coding Plan documentation](https://docs.cowagent.ai/en/models/coding-plan).
|
||||
|
||||
<br/>
|
||||
|
||||
## Channels
|
||||
|
||||
Supports multiple platforms. Set `channel_type` in `config.json` to switch:
|
||||
|
||||
| Channel | `channel_type` | Docs |
|
||||
| --- | --- | --- |
|
||||
| WeChat | `weixin` | [WeChat Setup](https://docs.cowagent.ai/en/channels/weixin) |
|
||||
| Web (default) | `web` | [Web Channel](https://docs.cowagent.ai/en/channels/web) |
|
||||
| Feishu | `feishu` | [Feishu Setup](https://docs.cowagent.ai/en/channels/feishu) |
|
||||
| DingTalk | `dingtalk` | [DingTalk Setup](https://docs.cowagent.ai/en/channels/dingtalk) |
|
||||
| WeCom Bot | `wecom_bot` | [WeCom Bot Setup](https://docs.cowagent.ai/en/channels/wecom-bot) |
|
||||
| WeCom App | `wechatcom_app` | [WeCom Setup](https://docs.cowagent.ai/en/channels/wecom) |
|
||||
| WeChat MP | `wechatmp` / `wechatmp_service` | [WeChat MP Setup](https://docs.cowagent.ai/en/channels/wechatmp) |
|
||||
| Terminal | `terminal` | — |
|
||||
|
||||
Multiple channels can be enabled simultaneously, separated by commas: `"channel_type": "feishu,dingtalk"`.
|
||||
|
||||
<br/>
|
||||
|
||||
## Enterprise Services
|
||||
|
||||
<a href="https://link-ai.tech" target="_blank"><img width="720" src="https://cdn.link-ai.tech/image/link-ai-intro.jpg"></a>
|
||||
|
||||
> [LinkAI](https://link-ai.tech/) is a one-stop AI agent platform for enterprises and developers, integrating multimodal LLMs, knowledge bases, Agent plugins, and workflows. Supports one-click integration with mainstream platforms, SaaS and private deployment.
|
||||
|
||||
<br/>
|
||||
|
||||
## 🔗 Related Projects
|
||||
|
||||
- [Cow Skill Hub](https://github.com/zhayujie/cow-skill-hub): Open skill marketplace for AI Agents — browse, search, install, and publish skills for CowAgent, OpenClaw, Claude Code, and more.
|
||||
- [bot-on-anything](https://github.com/zhayujie/bot-on-anything): Lightweight and highly extensible LLM application framework supporting Slack, Telegram, Discord, Gmail, and more.
|
||||
- [AgentMesh](https://github.com/MinimalFuture/AgentMesh): Open-source Multi-Agent framework for complex problem solving through agent team collaboration.
|
||||
|
||||
## 🔎 FAQ
|
||||
|
||||
FAQs: <https://github.com/zhayujie/CowAgent/wiki/FAQs>
|
||||
|
||||
## 🛠️ Contributing
|
||||
|
||||
Welcome to add new channels, referring to the [Feishu channel](https://github.com/zhayujie/CowAgent/blob/master/channel/feishu/feishu_channel.py) as an example. Also welcome to contribute new Skills, see the [Skill Creation docs](https://docs.cowagent.ai/en/skills/create), or submit to [Skill Hub](https://skills.cowagent.ai/submit).
|
||||
|
||||
## ✉ Contact
|
||||
|
||||
Welcome to submit PRs and Issues, and support the project with a 🌟 Star. For questions, check the [FAQ list](https://github.com/zhayujie/CowAgent/wiki/FAQs) or search [Issues](https://github.com/zhayujie/CowAgent/issues).
|
||||
|
||||
## 🌟 Contributors
|
||||
|
||||

|
||||
@@ -15,8 +15,11 @@ description: Integrate CowAgent into Feishu via a custom enterprise app
|
||||
|
||||
No need to manually create an app on the Feishu Developer Platform. Start the Cow project, open the web console (default `http://127.0.0.1:9899/`), go to **Channels**, click **Add Channel**, choose **Feishu**, then under the **Scan QR** tab click **One-click Create Feishu App** and scan with the **Feishu App** to complete app creation and connection automatically.
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260505181126.png" width="800"/>
|
||||
|
||||
<Note>
|
||||
The created app comes with all required permissions (messaging, card read/write, group events, etc.) and event subscriptions pre-configured. Currently only the Feishu mainland version is supported (Lark international not yet supported).
|
||||
1. Requires `lark-oapi` ≥ 1.5.5.
|
||||
2. The created app comes with all required permissions (messaging, card read/write, group events, etc.) and event subscriptions pre-configured — no manual setup on the developer console needed. Currently only the Feishu mainland version is supported (Lark international not yet supported).
|
||||
</Note>
|
||||
|
||||
When starting from CLI without `feishu_app_id` configured, the QR code is also printed to the terminal.
|
||||
|
||||
@@ -11,29 +11,31 @@ The table below summarizes the inbound message types, bot reply types, and group
|
||||
|
||||
| Channel | Text | Image | File | Voice | Group Chat |
|
||||
| --- | :-: | :-: | :-: | :-: | :-: |
|
||||
| [WeChat](/channels/weixin) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Web Console](/channels/web) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Feishu](/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [DingTalk](/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [WeCom Smart Bot](/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [QQ](/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
|
||||
| [WeCom App](/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Official Account](/channels/wechatmp) | ✅ | ✅ | | ✅ | |
|
||||
| [WeChat](/en/channels/weixin) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Web Console](/en/channels/web) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Feishu](/en/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [DingTalk](/en/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [WeCom Bot](/en/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [QQ](/en/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
|
||||
| [WeCom App](/en/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Official Account](/en/channels/wechatmp) | ✅ | ✅ | | ✅ | |
|
||||
| [Telegram](/en/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
- The **Image / File / Voice** columns indicate that the channel can send and receive the corresponding message types; see each channel's docs for details
|
||||
- The **Group Chat** column indicates the ability to recognize and respond to group messages
|
||||
|
||||
<Tip>
|
||||
The voice / image capabilities of each channel depend on the configuration of the corresponding model provider. See [Models Overview](/models) for details.
|
||||
The voice / image capabilities of each channel depend on the configuration of the corresponding model provider. See [Models Overview](/en/models/index) for details.
|
||||
</Tip>
|
||||
|
||||
## Channel List
|
||||
|
||||
- [Web Console](/channels/web) — built-in browser-based chat and management panel, enabled by default
|
||||
- [WeChat](/channels/weixin) — log in via personal WeChat QR scan
|
||||
- [Feishu](/channels/feishu) — Feishu custom bot
|
||||
- [DingTalk](/channels/dingtalk) — DingTalk custom bot
|
||||
- [WeCom Smart Bot](/channels/wecom-bot) — WeCom smart robot
|
||||
- [QQ](/channels/qq) — QQ official bot open platform
|
||||
- [WeCom App](/channels/wecom) — WeCom custom app integration
|
||||
- [Official Account](/channels/wechatmp) — WeChat Official Account (subscription / service account)
|
||||
- [Web Console](/en/channels/web) — built-in browser-based chat and management panel, enabled by default
|
||||
- [WeChat](/en/channels/weixin) — log in via personal WeChat QR scan
|
||||
- [Feishu](/en/channels/feishu) — Feishu custom bot
|
||||
- [DingTalk](/en/channels/dingtalk) — DingTalk custom bot
|
||||
- [WeCom Bot](/en/channels/wecom-bot) — WeCom AI Bot via WebSocket long connection
|
||||
- [QQ](/en/channels/qq) — QQ Official Bot open platform
|
||||
- [WeCom App](/en/channels/wecom) — WeCom custom app integration
|
||||
- [Official Account](/en/channels/wechatmp) — WeChat Official Account (subscription / service)
|
||||
- [Telegram](/en/channels/telegram) — global IM, 5-minute setup, no public IP needed
|
||||
|
||||
111
docs/en/channels/telegram.mdx
Normal file
111
docs/en/channels/telegram.mdx
Normal file
@@ -0,0 +1,111 @@
|
||||
---
|
||||
title: Telegram
|
||||
description: Integrate CowAgent with Telegram via the Bot API
|
||||
---
|
||||
|
||||
> Integrate CowAgent into Telegram via the official Bot API. Supports private chat and group chat (triggered by @mention or replying to the bot). Uses Long Polling — no public IP required, works out of the box.
|
||||
|
||||
|
||||
## 1. Setup
|
||||
|
||||
### Step 1: Create a Bot via BotFather
|
||||
|
||||
1. Open the official account [@BotFather](https://t.me/BotFather) in Telegram.
|
||||
2. Send `/newbot` and follow the prompts:
|
||||
- **Bot name** (display name, e.g. `My CowAgent Bot`)
|
||||
- **Bot username** (must end with `bot`, e.g. `my_cowagent_bot`)
|
||||
3. Once created, BotFather returns an **HTTP API Token** (e.g. `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`). Keep it safe.
|
||||
|
||||
<Tip>
|
||||
The token is the password of your bot — never share it. If it leaks, send `/revoke` to `@BotFather` to reset it.
|
||||
</Tip>
|
||||
|
||||
### Step 2: (Group chat only) Disable Privacy Mode
|
||||
|
||||
Skip this step if you only use private chat. Telegram bots run in **Privacy Mode** by default — in groups they can only see commands suffixed with `@bot` (e.g. `/start@your_bot`) and replies to bot messages; **plain `@bot hello` text messages are not delivered**, so the bot will appear unresponsive in groups.
|
||||
|
||||
Send the following to `@BotFather`:
|
||||
|
||||
1. `/setprivacy`
|
||||
2. Pick the bot you just created
|
||||
3. Choose `Disable`
|
||||
|
||||
<Note>
|
||||
If the bot is still silent in groups after this, try removing it from the group and adding it back.
|
||||
</Note>
|
||||
|
||||
### Step 3: Connect to CowAgent
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Web Console (Recommended)">
|
||||
Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Telegram**, paste the Bot Token, and click connect.
|
||||
</Tab>
|
||||
<Tab title="Config File">
|
||||
Add the following to `config.json` and start Cow:
|
||||
|
||||
```json
|
||||
{
|
||||
"channel_type": "telegram",
|
||||
"telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ",
|
||||
"telegram_group_trigger": "mention_or_reply"
|
||||
}
|
||||
```
|
||||
|
||||
| Key | Description | Default |
|
||||
| --- | --- | --- |
|
||||
| `telegram_token` | HTTP API Token returned by BotFather | - |
|
||||
| `telegram_group_trigger` | Group trigger: `mention_or_reply` (@ or reply) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` |
|
||||
| `telegram_register_commands` | Whether to register the command menu with BotFather on startup | `true` |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
The integration is ready when you see logs like:
|
||||
|
||||
```
|
||||
[Telegram] Bot logged in as @my_cowagent_bot (id=123456789)
|
||||
[Telegram] Registered 10 bot commands
|
||||
[Telegram] ✅ Telegram bot ready, polling for updates
|
||||
```
|
||||
|
||||
## 2. Capabilities
|
||||
|
||||
| Feature | Support |
|
||||
| --- | --- |
|
||||
| Private chat | ✅ |
|
||||
| Group chat (@bot / reply to bot) | ✅ |
|
||||
| Text messages | ✅ send / receive |
|
||||
| Image messages | ✅ send / receive |
|
||||
| Voice messages | ✅ send / receive (OGG/Opus) |
|
||||
| Video messages | ✅ send / receive |
|
||||
| File messages | ✅ send / receive (PDF / Word / Excel, etc.) |
|
||||
| Command menu | ✅ aligned with Web Console slash commands |
|
||||
|
||||
### Command Menu
|
||||
|
||||
On startup, the channel registers a command menu with BotFather. Typing `/` in Telegram shows a dropdown:
|
||||
|
||||
| Command | Description |
|
||||
| --- | --- |
|
||||
| `/help` | Show command help |
|
||||
| `/status` | View runtime status |
|
||||
| `/context` | View conversation context (`/context clear` to clear) |
|
||||
| `/skill` | Skill management (`/skill list`, `/skill install`, ...) |
|
||||
| `/memory` | Memory management (`/memory dream`) |
|
||||
| `/knowledge` | Knowledge base (`/knowledge list` / `on` / `off`) |
|
||||
| `/config` | View current config |
|
||||
| `/cancel` | Cancel the running Agent task |
|
||||
| `/logs` | View recent logs |
|
||||
| `/version` | Show version |
|
||||
|
||||
<Note>
|
||||
Telegram's command menu only displays top-level commands; subcommands are entered with a space, e.g. `/skill list`, `/context clear`.
|
||||
</Note>
|
||||
|
||||
## 3. Usage
|
||||
|
||||
Once connected:
|
||||
|
||||
- **Private chat**: search for your bot username (e.g. `@my_cowagent_bot`) in Telegram, click `Start` and chat away.
|
||||
- **Group chat**: add the bot to a group, then trigger it with `@bot hello` or by **replying to one of the bot's messages**. If the bot doesn't respond in groups, double-check Privacy Mode in [Step 2](#step-2-group-chat-only-disable-privacy-mode).
|
||||
|
||||
When sending an image or file, you can **add a caption** (description / question) directly in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically.
|
||||
@@ -3,71 +3,88 @@ title: WeCom Bot
|
||||
description: Connect CowAgent to WeCom AI Bot (WebSocket long connection)
|
||||
---
|
||||
|
||||
Connect CowAgent via WeCom AI Bot, supporting both direct messages and group chats. No public IP required — uses WebSocket long connection with Markdown rendering and streaming output.
|
||||
> Connect CowAgent via WeCom AI Bot, supporting both internal direct messages and group chats. No public IP required — uses a WebSocket long connection, with Markdown rendering and streaming output.
|
||||
|
||||
<Note>
|
||||
WeCom Bot and WeCom App are two different integration methods. WeCom Bot uses WebSocket long connection, requiring no public IP or domain, making it easier to set up.
|
||||
WeCom Bot and WeCom App are two different integration methods. WeCom Bot uses a WebSocket long connection and requires no public IP or domain, making setup much simpler.
|
||||
</Note>
|
||||
|
||||
## 1. Create an AI Bot
|
||||
## 1. Connection methods
|
||||
|
||||
### Option A: One-click QR scan (recommended)
|
||||
|
||||
No need to create the bot ahead of time. Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/), go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **QR scan** mode, and scan the QR code with **WeCom** — bot creation and connection complete automatically.
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260401121213.png" width="800"/>
|
||||
|
||||
<Note>
|
||||
After a successful scan, you can further configure the bot (name, avatar, visibility scope, etc.) in **WeCom Workbench → AI Bot**.
|
||||
</Note>
|
||||
|
||||
### Option B: Manual creation
|
||||
|
||||
Create the AI Bot in WeCom and obtain the Bot ID and Secret, then connect via the Web console or config file.
|
||||
|
||||
**Step 1: Create the AI Bot**
|
||||
|
||||
1. Open the WeCom client, go to **Workbench**, and click **AI Bot**:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316180959.png" width="800"/>
|
||||
|
||||
2. Click **Create Bot** → **Manual Creation**:
|
||||
2. Click **Create Bot → Manual Creation**:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181118.png" width="600"/>
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181118.png" width="800"/>
|
||||
|
||||
3. Scroll to the bottom of the right panel and select **API Mode**:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181215.png" width="600"/>
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181215.png" width="800"/>
|
||||
|
||||
4. Set the bot name, avatar, and visibility scope. Select **Long Connection** mode, note down the **Bot ID** and **Secret**, then click Save.
|
||||
4. Set the bot name, avatar, and visibility scope. Choose **Long Connection** mode, save the **Bot ID** and **Secret**, then click Save.
|
||||
|
||||
## 2. Configuration
|
||||
**Step 2: Connect to CowAgent**
|
||||
|
||||
### Option A: Web Console
|
||||
<Tabs>
|
||||
<Tab title="Web Console">
|
||||
Open the Web console, go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **Manual** mode, enter the Bot ID and Secret, and click Connect.
|
||||
|
||||
Start the program and open the Web console (local access: http://127.0.0.1:9899). Go to the **Channels** tab, click **Connect Channel**, select **WeCom Bot**, fill in the Bot ID and Secret from the previous step, and click Connect.
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181711.png" width="800"/>
|
||||
</Tab>
|
||||
<Tab title="Config File">
|
||||
Add the following to `config.json`, then start CowAgent:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316181711.png" width="600"/>
|
||||
```json
|
||||
{
|
||||
"channel_type": "wecom_bot",
|
||||
"wecom_bot_id": "YOUR_BOT_ID",
|
||||
"wecom_bot_secret": "YOUR_SECRET"
|
||||
}
|
||||
```
|
||||
|
||||
### Option B: Config File
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `wecom_bot_id` | Bot ID of the AI Bot |
|
||||
| `wecom_bot_secret` | Secret of the AI Bot |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
Add the following to your `config.json`:
|
||||
The log line `[WecomBot] Subscribe success` confirms the connection is established.
|
||||
|
||||
```json
|
||||
{
|
||||
"channel_type": "wecom_bot",
|
||||
"wecom_bot_id": "YOUR_BOT_ID",
|
||||
"wecom_bot_secret": "YOUR_SECRET"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `wecom_bot_id` | Bot ID of the AI Bot |
|
||||
| `wecom_bot_secret` | Secret for the AI Bot |
|
||||
|
||||
After configuration, start the program. The log message `[WecomBot] Subscribe success` indicates a successful connection.
|
||||
|
||||
## 3. Supported Features
|
||||
## 2. Supported features
|
||||
|
||||
| Feature | Status |
|
||||
| --- | --- |
|
||||
| Direct Messages | ✅ |
|
||||
| Group Chat (@bot) | ✅ |
|
||||
| Text Messages | ✅ Send & Receive |
|
||||
| Image Messages | ✅ Send & Receive |
|
||||
| File Messages | ✅ Send & Receive |
|
||||
| Streaming Reply | ✅ |
|
||||
| Scheduled Push | ✅ |
|
||||
| Direct chat | ✅ |
|
||||
| Group chat (@bot) | ✅ |
|
||||
| Text messages | ✅ Send / Receive |
|
||||
| Image messages | ✅ Send / Receive |
|
||||
| File messages | ✅ Send / Receive |
|
||||
| Streaming replies | ✅ |
|
||||
| Scheduled push messages | ✅ |
|
||||
|
||||
## 4. Usage
|
||||
## 3. Usage
|
||||
|
||||
Search for the bot name in WeCom to start a direct conversation.
|
||||
Search for the bot's name inside WeCom to start a direct chat.
|
||||
|
||||
To use in group chats, add the bot to a group and @mention it to send messages.
|
||||
To use the bot in an internal group chat, add it to the group and @-mention it.
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260316182902.png" width="800"/>
|
||||
|
||||
@@ -1,19 +1,21 @@
|
||||
---
|
||||
title: WeChat
|
||||
description: Connect CowAgent to personal WeChat
|
||||
description: Connect CowAgent to personal WeChat (via the official API)
|
||||
---
|
||||
|
||||
> Connect CowAgent to your personal WeChat. Simply scan a QR code to log in — no public IP required. Supports text, image, voice, file, and video messages.
|
||||
> Connect CowAgent to your personal WeChat — scan to log in, no public IP required. Supports text, image, voice, file, and video messages in 1-on-1 chats. Backed by WeChat's official API; safe to use. After connecting, a bot assistant is added to your conversation list without affecting normal account usage.
|
||||
|
||||
## 1. Configuration
|
||||
## 1. Setup and run
|
||||
|
||||
### Option A: Web Console
|
||||
### Option A: Web console
|
||||
|
||||
Start the program and open the Web console (local access: http://127.0.0.1:9899). Go to the **Channels** tab, click **Connect Channel**, select **WeChat**, and follow the prompts to scan the QR code.
|
||||
Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/). Go to the **Channels** tab, click **Connect Channel**, select **WeChat**, and follow the prompts to scan in.
|
||||
|
||||
### Option B: Config File
|
||||
<img src="https://cdn.link-ai.tech/doc/20260322195114.png" width="800" />
|
||||
|
||||
Set `channel_type` to `weixin` in your `config.json`:
|
||||
### Option B: Config file
|
||||
|
||||
Set `channel_type` to `weixin` in `config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -21,52 +23,49 @@ Set `channel_type` to `weixin` in your `config.json`:
|
||||
}
|
||||
```
|
||||
|
||||
After starting the program, a QR code will be displayed in the terminal. Scan it with WeChat and confirm on your phone to complete login.
|
||||
After starting CowAgent, a QR code is displayed in the terminal. Scan it with WeChat to complete login.
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260322195509.png" width="800" />
|
||||
|
||||
<Note>
|
||||
For backward compatibility, setting `channel_type` to `wx` also activates the WeChat channel.
|
||||
1. For backward compatibility, setting `channel_type` to `wx` also activates the WeChat channel.
|
||||
2. The WeChat client must be on version **8.0.69** or higher.
|
||||
</Note>
|
||||
|
||||
## 2. Parameters
|
||||
## 2. Usage
|
||||
|
||||
| Parameter | Description | Default |
|
||||
| --- | --- | --- |
|
||||
| `channel_type` | Set to `weixin` or `wx` | — |
|
||||
Once authorized, the integration completes and you can start chatting. A bot assistant is created in your WeChat conversation list, leaving normal account usage unaffected.
|
||||
|
||||
Login credentials are automatically saved to `~/.weixin_cow_credentials.json`. To force a re-login, delete this file and restart.
|
||||
> You can find the bot at any time by searching for **"微信ClawBot"**. You may also rename it, change its avatar, pin it to the top of your conversation list, and so on.
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/83ae8251d896219fde4803f4205205be.jpg" width="250" />
|
||||
|
||||
## 3. Login
|
||||
|
||||
### QR Code Login
|
||||
### QR code login
|
||||
|
||||
On first startup, a QR code is displayed in the terminal (valid for approximately 2 minutes). Scan it with WeChat and confirm on your phone.
|
||||
On first startup, a QR code appears in the terminal (valid for around 2 minutes). Scan it with WeChat and confirm on your phone to log in.
|
||||
|
||||
- The QR code automatically refreshes when it expires
|
||||
- The `qrcode` dependency is already included in `requirements.txt`, enabling QR code rendering directly in the terminal
|
||||
- The QR code refreshes automatically when it expires
|
||||
- The `qrcode` dependency is already included in `requirements.txt`, so the QR code renders directly in the terminal after install
|
||||
|
||||
### Credential Persistence
|
||||
### Credential persistence
|
||||
|
||||
After successful login, credentials are saved to `~/.weixin_cow_credentials.json`. Subsequent startups will reuse the saved credentials without requiring a new scan.
|
||||
After a successful login, credentials are saved to `~/.weixin_cow_credentials.json`. Subsequent startups reuse the saved credentials with no need to re-scan.
|
||||
|
||||
To force a re-login, delete the credentials file and restart the program.
|
||||
To force a re-login, delete the credentials file and restart.
|
||||
|
||||
### Session Expiry
|
||||
### Session expiry
|
||||
|
||||
When the WeChat session expires (errcode -14), the program automatically clears old credentials and initiates a new QR login — no manual intervention required.
|
||||
When the WeChat session expires (errcode `-14`), CowAgent automatically clears old credentials and initiates a new QR login — no manual intervention required.
|
||||
|
||||
## 4. Supported Features
|
||||
## 4. Supported features
|
||||
|
||||
| Feature | Status |
|
||||
| --- | --- |
|
||||
| Direct Messages | ✅ |
|
||||
| Text Messages | ✅ Send & Receive |
|
||||
| Image Messages | ✅ Send & Receive |
|
||||
| File Messages | ✅ Send & Receive |
|
||||
| Video Messages | ✅ Send & Receive |
|
||||
| Voice Messages | ✅ Receive |
|
||||
|
||||
## 5. Notes
|
||||
|
||||
1. Ensure network access to `ilinkai.weixin.qq.com`.
|
||||
2. Media files (images, files, videos) are transferred via CDN with AES-128-ECB encryption, handled automatically by the program.
|
||||
3. A stable network connection is recommended to avoid frequent disconnections that would require re-scanning.
|
||||
| Direct messages | ✅ |
|
||||
| Text messages | ✅ Send & Receive |
|
||||
| Image messages | ✅ Send & Receive |
|
||||
| File messages | ✅ Send & Receive |
|
||||
| Video messages | ✅ Send & Receive |
|
||||
| Voice messages | ✅ Receive (built-in speech recognition) |
|
||||
|
||||
@@ -25,6 +25,14 @@ View current session and service status, including process info, model configura
|
||||
/status
|
||||
```
|
||||
|
||||
## cancel
|
||||
|
||||
Abort the agent task currently running in this session. When the agent is busy with a long task (e.g. multi-turn tool calls or a long streaming response), send `/cancel` and the agent will stop before the next tool execution. Available across all channels — Web, WeChat, WeCom, Feishu, etc.
|
||||
|
||||
```text
|
||||
/cancel
|
||||
```
|
||||
|
||||
## config
|
||||
|
||||
View or modify runtime configuration. Changes take effect immediately without restarting.
|
||||
|
||||
@@ -57,6 +57,7 @@ In the Web console or any connected channel, type `/` to see command suggestions
|
||||
| --- | --- |
|
||||
| `/help` | Show command help |
|
||||
| `/status` | View service status and configuration |
|
||||
| `/cancel` | Abort the currently running agent task |
|
||||
| `/config` | View or modify runtime configuration |
|
||||
| `/skill` | Manage skills (install, uninstall, enable, disable, etc.) |
|
||||
| `/memory dream [N]` | Manually trigger memory distillation (default 3 days, max 30) |
|
||||
@@ -80,6 +81,7 @@ In the Web console or any connected channel, type `/` to see command suggestions
|
||||
| version | ✓ | ✓ |
|
||||
| status | ✓ | ✓ |
|
||||
| logs | ✓ | ✓ |
|
||||
| cancel | ✗ | ✓ |
|
||||
| config | ✗ | ✓ |
|
||||
| context | — | ✓ |
|
||||
| memory (subcommands) | ✗ | ✓ |
|
||||
|
||||
@@ -19,6 +19,24 @@ cow skill list
|
||||
```
|
||||
</CodeGroup>
|
||||
|
||||
Example output:
|
||||
|
||||
```
|
||||
📦 Installed skills (3/4)
|
||||
|
||||
✅ pptx
|
||||
Use this skill any time a .pptx file is involved…
|
||||
Source: cowhub
|
||||
|
||||
✅ skill-creator
|
||||
Create, install, or update skills…
|
||||
Source: builtin
|
||||
|
||||
⏸️ image-vision (disabled)
|
||||
Image understanding and visual analysis
|
||||
Source: builtin
|
||||
```
|
||||
|
||||
**Browse the Skill Hub** (view all available skills):
|
||||
|
||||
<CodeGroup>
|
||||
|
||||
@@ -81,7 +81,7 @@ nohup python3 app.py & tail -f nohup.out
|
||||
```
|
||||
|
||||
<Tip>
|
||||
If deploying on a server, open port `9899` in your firewall or security group to access the Web console. It's recommended to restrict access to specific IPs for security.
|
||||
**Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs.
|
||||
</Tip>
|
||||
|
||||
## Docker Deployment
|
||||
@@ -113,7 +113,7 @@ sudo docker logs -f chatgpt-on-wechat
|
||||
```
|
||||
|
||||
<Tip>
|
||||
If deploying on a server, open port `9899` in your firewall or security group to access the Web console. It's recommended to restrict access to specific IPs for security.
|
||||
**Running in Docker?** Set `WEB_HOST` to `0.0.0.0` in `docker-compose.yml` so the console is reachable from outside the container, and set `WEB_PASSWORD` to protect it. Make sure port `9899` is mapped to the host and open in your firewall or security group.
|
||||
</Tip>
|
||||
|
||||
## Core Configuration
|
||||
|
||||
@@ -33,6 +33,10 @@ The script automatically performs these steps:
|
||||
|
||||
By default, the Web console starts after installation. Access `http://localhost:9899` to begin chatting.
|
||||
|
||||
<Note>
|
||||
**Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access only). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs.
|
||||
</Note>
|
||||
|
||||
## Management Commands
|
||||
|
||||
After installation, use the `cow` command to manage the service:
|
||||
|
||||
61
docs/en/guide/upgrade.mdx
Normal file
61
docs/en/guide/upgrade.mdx
Normal file
@@ -0,0 +1,61 @@
|
||||
---
|
||||
title: Upgrade
|
||||
description: How to upgrade CowAgent
|
||||
---
|
||||
|
||||
## Recommended: One-line upgrade
|
||||
|
||||
Use `cow update` to pull the latest code and restart the service in one step:
|
||||
|
||||
```bash
|
||||
cow update
|
||||
```
|
||||
|
||||
The command runs the following automatically:
|
||||
|
||||
1. Pull the latest code (`git pull`)
|
||||
2. Stop the running service
|
||||
3. Update Python dependencies
|
||||
4. Reinstall the CLI
|
||||
5. Start the service
|
||||
|
||||
<Note>
|
||||
If the Cow CLI is not installed, `./run.sh update` performs the same operations.
|
||||
</Note>
|
||||
|
||||
## Manual upgrade
|
||||
|
||||
Run the following inside the project root:
|
||||
|
||||
```bash
|
||||
git pull
|
||||
pip3 install -r requirements.txt
|
||||
pip3 install -e .
|
||||
```
|
||||
|
||||
Then restart the service:
|
||||
|
||||
```bash
|
||||
# Using Cow CLI (recommended)
|
||||
cow restart
|
||||
|
||||
# Or using run.sh
|
||||
./run.sh restart
|
||||
|
||||
# Or restart manually with nohup
|
||||
kill $(ps -ef | grep app.py | grep -v grep | awk '{print $2}')
|
||||
nohup python3 app.py & tail -f nohup.out
|
||||
```
|
||||
|
||||
## Docker upgrade
|
||||
|
||||
Run the following in the directory containing `docker-compose.yml`:
|
||||
|
||||
```bash
|
||||
sudo docker compose pull
|
||||
sudo docker compose up -d
|
||||
```
|
||||
|
||||
<Tip>
|
||||
Back up `config.json` before upgrading. For Docker deployments, mount the workspace directory as a volume to persist data across upgrades.
|
||||
</Tip>
|
||||
@@ -9,7 +9,7 @@ CowAgent 2.0 has evolved from a simple chatbot into a super intelligent assistan
|
||||
|
||||
CowAgent's architecture consists of the following core modules:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/cow-agent-arch-en.jpg.jpg" alt="CowAgent Architecture" />
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/architecture/en/architecture.jpg" alt="CowAgent Architecture" />
|
||||
|
||||
| Module | Description |
|
||||
| --- | --- |
|
||||
@@ -39,8 +39,8 @@ The Agent workspace is located at `~/cow` by default and stores system prompts,
|
||||
|
||||
```
|
||||
~/cow/
|
||||
├── system.md # Agent system prompt
|
||||
├── user.md # User profile
|
||||
├── SYSTEM.md # Agent system prompt
|
||||
├── USER.md # User profile
|
||||
├── MEMORY.md # Core memory
|
||||
├── memory/ # Long-term memory storage
|
||||
│ └── YYYY-MM-DD.md # Daily memory
|
||||
@@ -67,9 +67,10 @@ Configure Agent mode parameters in `config.json`:
|
||||
{
|
||||
"agent": true,
|
||||
"agent_workspace": "~/cow",
|
||||
"agent_max_context_tokens": 40000,
|
||||
"agent_max_context_turns": 30,
|
||||
"agent_max_steps": 15
|
||||
"agent_max_context_tokens": 50000,
|
||||
"agent_max_context_turns": 20,
|
||||
"agent_max_steps": 20,
|
||||
"enable_thinking": false
|
||||
}
|
||||
```
|
||||
|
||||
@@ -77,7 +78,9 @@ Configure Agent mode parameters in `config.json`:
|
||||
| --- | --- | --- |
|
||||
| `agent` | Enable Agent mode | `true` |
|
||||
| `agent_workspace` | Workspace path | `~/cow` |
|
||||
| `agent_max_context_tokens` | Max context tokens | `40000` |
|
||||
| `agent_max_context_turns` | Max context turns | `30` |
|
||||
| `agent_max_steps` | Max decision steps per task | `15` |
|
||||
| `agent_max_context_tokens` | Max context tokens | `50000` |
|
||||
| `agent_max_context_turns` | Max context turns | `20` |
|
||||
| `agent_max_steps` | Max decision steps per task | `20` |
|
||||
| `enable_thinking` | Enable deep-thinking mode | `false` |
|
||||
| `knowledge` | Enable personal knowledge base | `true` |
|
||||
| `knowledge` | Enable personal knowledge base | `true` |
|
||||
|
||||
@@ -84,7 +84,7 @@ Secrets required by skills are stored in an environment variable file, managed b
|
||||
|
||||
The Skills system provides infinite extensibility for the Agent. Each Skill consists of a description file, execution scripts (optional), and resources (optional), describing how to complete specific types of tasks. Skills allow the Agent to follow instructions for complex workflows, invoke tools, or integrate third-party systems.
|
||||
|
||||
- **[Skill Hub](https://skills.cowagent.ai/):** An open skill marketplace featuring official, community, and third-party skills. Install with one command.
|
||||
- [Skill Hub](https://skills.cowagent.ai/): An open skill marketplace featuring official, community, and third-party skills. Install with one command.
|
||||
- **Built-in skills:** Located in the project's `skills/` directory, including skill creator, image recognition, LinkAI agent, web fetch, and more. Built-in skills are automatically enabled based on dependency conditions (API keys, system commands, etc.).
|
||||
- **Custom skills:** Created by users through conversation, stored in the workspace (`~/cow/skills/`), capable of implementing any complex business process or third-party integration.
|
||||
|
||||
|
||||
@@ -1,53 +1,60 @@
|
||||
---
|
||||
title: Introduction
|
||||
description: CowAgent - AI Super Assistant powered by LLMs
|
||||
description: CowAgent - Open-source super AI assistant and Agent Harness
|
||||
---
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="600px"/>
|
||||
<div align="center">
|
||||
<img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="450px"/>
|
||||
</div>
|
||||
|
||||
**CowAgent** is an AI super assistant powered by LLMs with autonomous task planning, long-term memory, skills system, multimodal messages, multiple model support, and multi-platform deployment.
|
||||
**CowAgent** is an open-source super AI assistant and Agent Harness. It proactively plans tasks, runs tools and skills, and autonomously grows with memory and knowledge.
|
||||
|
||||
CowAgent can proactively think and plan tasks, operate computers and external resources, create and execute Skills, and continuously grow with long-term memory. It supports flexible switching between multiple models, handles text, voice, images, files and other multimodal messages, and can be integrated into WeChat, web, Feishu, DingTalk, WeCom, and WeChat Official Account. It runs 7x24 hours on your personal computer or server.
|
||||
CowAgent is lightweight, easy to deploy, and built to extend. Plug in any major LLM provider, run it across Web and major IM platforms, 24/7 on a personal computer or server.
|
||||
|
||||
<Card title="GitHub" icon="github" href="https://github.com/zhayujie/CowAgent">
|
||||
github.com/zhayujie/CowAgent
|
||||
</Card>
|
||||
<CardGroup cols={2}>
|
||||
<Card title="GitHub" icon="github" href="https://github.com/zhayujie/CowAgent">
|
||||
Open-source repository — Star and contribute
|
||||
</Card>
|
||||
<Card title="Try Online" icon="cloud" href="https://link-ai.tech/cowagent/create">
|
||||
No setup required — experience CowAgent instantly
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
## Core Capabilities
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="Autonomous Task Planning" icon="brain" href="/en/intro/architecture">
|
||||
Understands complex tasks and autonomously plans execution, continuously thinking and invoking tools until goals are achieved. Supports accessing file systems, terminals, browsers, schedulers, and other system resources through tools.
|
||||
Decomposes complex tasks and executes them step by step, looping over tools and skills until the goal is reached.
|
||||
</Card>
|
||||
<Card title="Long-term Memory" icon="database" href="/en/memory">
|
||||
Three-tier memory flow (context → daily memory → global memory) with daily Deep Dream distillation, keyword and vector retrieval support.
|
||||
<Card title="Long-term Memory" icon="database" href="/en/memory/index">
|
||||
Three-tier architecture (context → daily → core), automatic Deep Dream distillation, hybrid keyword + vector retrieval.
|
||||
</Card>
|
||||
<Card title="Knowledge Base" icon="book" href="/en/knowledge">
|
||||
Automatically organizes structured knowledge with knowledge graph visualization, building a continuously growing knowledge network through cross-references.
|
||||
<Card title="Personal Knowledge Base" icon="book" href="/en/knowledge/index">
|
||||
Auto-curates structured knowledge into a Markdown wiki, builds an evolving knowledge graph with visual browsing.
|
||||
</Card>
|
||||
<Card title="Skills System" icon="puzzle-piece" href="/en/skills/index">
|
||||
Implements a Skills creation and execution engine with built-in skills, and supports custom Skills development through natural language conversation.
|
||||
A complete skill creation and execution engine. Install from Skill Hub or generate custom skills via natural-language conversation.
|
||||
</Card>
|
||||
<Card title="Multimodal Messages" icon="image" href="/en/channels/web">
|
||||
Supports parsing, processing, generating, and sending text, images, voice, files, and other message types.
|
||||
<Card title="Multimodal Messaging" icon="image" href="/en/channels/web">
|
||||
First-class support for text, images, voice, and files — recognition, generation, and delivery.
|
||||
</Card>
|
||||
<Card title="Tool System" icon="wrench" href="/en/tools/index">
|
||||
Built-in tools for file I/O, terminal execution, browser automation, scheduled tasks, messaging, and more. The Agent autonomously invokes tools to accomplish complex tasks.
|
||||
Built-in file I/O, terminal, browser, scheduler, memory retrieval, web search, and more — with native MCP integration.
|
||||
</Card>
|
||||
<Card title="Command System" icon="terminal" href="/en/cli/index">
|
||||
Provides terminal CLI and in-chat commands for process management, skill installation, configuration, context inspection, and other common operations.
|
||||
Terminal CLI and in-chat commands for process management, skill installation, configuration, and context inspection.
|
||||
</Card>
|
||||
<Card title="Multiple Model Support" icon="microchip" href="/en/models/index">
|
||||
Supports mainstream model providers including OpenAI, Claude, Gemini, DeepSeek, MiniMax, GLM, Qwen, Kimi, Doubao, and more.
|
||||
<Card title="Pluggable Models" icon="microchip" href="/en/models/index">
|
||||
Claude, GPT, Gemini, DeepSeek, Qwen, GLM, Kimi, MiniMax, Doubao, and more — swap providers from the Web console with one click.
|
||||
</Card>
|
||||
<Card title="Multi-platform Deployment" icon="server" href="/en/channels/weixin">
|
||||
Runs on local computers or servers, integrable into WeChat, web, Feishu, DingTalk, WeChat Official Account, and WeCom applications.
|
||||
<Card title="Multi-channel Integration" icon="server" href="/en/channels/index">
|
||||
A single Agent simultaneously serves Web, WeChat, Feishu, DingTalk, WeCom, QQ, and Official Accounts.
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
## Quick Experience
|
||||
## Quick Start
|
||||
|
||||
Run the following command in your terminal for one-click install, configuration, and startup:
|
||||
Run one of the commands below to install, configure, and start CowAgent in a single step:
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Linux / macOS">
|
||||
@@ -62,25 +69,25 @@ Run the following command in your terminal for one-click install, configuration,
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
By default, the Web service starts after running. Access `http://localhost:9899/chat` to chat in the web interface.
|
||||
Once started, open `http://localhost:9899` to access the **Web console** — the unified place to chat, configure providers, connect channels, and install skills.
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="Quick Start" icon="rocket" href="/en/guide/quick-start">
|
||||
Complete installation and run guide
|
||||
</Card>
|
||||
<Card title="Architecture" icon="sitemap" href="/en/intro/architecture">
|
||||
CowAgent system architecture design
|
||||
CowAgent system architecture
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
## Disclaimer
|
||||
|
||||
1. This project follows the [MIT License](https://github.com/zhayujie/CowAgent/blob/master/LICENSE) and is intended for technical research and learning. Users must comply with local laws, regulations, policies, and corporate bylaws. Any illegal or rights-infringing use is prohibited.
|
||||
2. Agent mode consumes more tokens than normal chat mode. Choose models based on effectiveness and cost. Agent has access to the host operating system — deploy with caution.
|
||||
3. CowAgent focuses on open-source development and does not participate in, authorize, or issue any cryptocurrency.
|
||||
1. This project is licensed under the [MIT License](https://github.com/zhayujie/CowAgent/blob/master/LICENSE) and is intended for technical research and learning. You are responsible for complying with applicable laws and regulations in your jurisdiction; the maintainers assume no liability for any consequences arising from use of this project.
|
||||
2. **Cost & safety:** Agent mode consumes substantially more tokens than plain chat — pick models that balance quality and cost. The Agent has access to your local operating system; deploy only in trusted environments.
|
||||
3. CowAgent is a pure open-source project and does not participate in, authorize, or issue any cryptocurrency.
|
||||
|
||||
## Community
|
||||
|
||||
Add our assistant on WeChat to join the open-source community:
|
||||
Scan the WeChat QR code to join the open-source community group:
|
||||
|
||||
<img width="140" src="https://img-1317903499.cos.ap-guangzhou.myqcloud.com/docs/open-community.png" />
|
||||
|
||||
@@ -5,6 +5,10 @@ description: CowAgent personal knowledge base — structured knowledge accumulat
|
||||
|
||||
The personal knowledge base is the Agent's long-term structured knowledge store, saved in the `knowledge/` directory within the workspace. Unlike memory, which is organized by timeline, the knowledge base organizes content by topic — articles, conversation insights, and learning materials are structured into interlinked Markdown pages, forming a continuously growing knowledge network.
|
||||
|
||||
<Frame>
|
||||
<img src="https://cdn.link-ai.tech/doc/20260413105435.png" width="800" />
|
||||
</Frame>
|
||||
|
||||
## Core Concepts
|
||||
|
||||
### Knowledge vs Memory
|
||||
@@ -43,7 +47,7 @@ Knowledge writing is an autonomous Agent behavior, triggered in these scenarios:
|
||||
Each knowledge page includes cross-reference links to related pages, gradually building a knowledge graph.
|
||||
|
||||
<Frame>
|
||||
<img src="https://gist.github.com/user-attachments/assets/3ce92f78-1863-4820-8fa8-660c0f2b7f09" alt="Conversational knowledge ingest" />
|
||||
<img src="https://cdn.link-ai.tech/doc/20260413110104.png" width="800" />
|
||||
</Frame>
|
||||
|
||||
## Knowledge Retrieval
|
||||
@@ -63,11 +67,11 @@ The web console provides a dedicated "Knowledge" module with:
|
||||
- **Chat integration** — Knowledge document links referenced in Agent replies are clickable for direct navigation
|
||||
|
||||
<Frame>
|
||||
<img src="https://gist.github.com/user-attachments/assets/b7b9d6be-0ac1-4c65-803b-2c6b36bd59a7" alt="Knowledge document browsing" />
|
||||
<img src="https://cdn.link-ai.tech/doc/17aad553d3e9e428c52ff9dc31726fda.png" width="800" />
|
||||
</Frame>
|
||||
|
||||
<Frame>
|
||||
<img src="https://gist.github.com/user-attachments/assets/44ae68ca-96cc-40b9-ab33-cdbec34c2379" alt="Knowledge graph visualization" />
|
||||
<img src="https://cdn.link-ai.tech/doc/20260413105402.png" width="800" />
|
||||
</Frame>
|
||||
|
||||
## CLI Commands
|
||||
|
||||
@@ -27,7 +27,7 @@ The Agent automatically persists conversation content to long-term memory throug
|
||||
|
||||
- **On context trimming** — When conversation turns or tokens exceed the configured limit, the oldest half of the context is trimmed, and the discarded content is summarized by LLM into key information and written to the daily memory file. The summary is also asynchronously injected into the retained context for conversational continuity
|
||||
- **Daily scheduled summary** — A full summary is automatically triggered at 23:55 every day, ensuring memory is preserved even on low-activity days (skipped if content hasn't changed)
|
||||
- **[Deep Dream (memory distillation)](/en/memory/deep-dream)** — Runs automatically after the daily summary, distilling daily memories into MEMORY.md and generating a dream diary
|
||||
- [Deep Dream (memory distillation)](/en/memory/deep-dream) — Runs automatically after the daily summary, distilling daily memories into MEMORY.md and generating a dream diary
|
||||
- **On API context overflow** — When the model API returns a context overflow error, the current conversation summary is saved as an emergency measure
|
||||
|
||||
All memory writes run asynchronously in a background thread (LLM summarization + file writing), never blocking normal conversation replies.
|
||||
|
||||
@@ -77,7 +77,7 @@ Reference: [China Key](https://platform.minimaxi.com/docs/coding-plan/quickstart
|
||||
|
||||
---
|
||||
|
||||
## Zhipu GLM
|
||||
## GLM
|
||||
|
||||
```json
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: Zhipu GLM
|
||||
title: GLM
|
||||
description: Zhipu AI GLM model configuration (Text / Image Understanding / Speech-to-Text / Embedding)
|
||||
---
|
||||
|
||||
|
||||
@@ -3,43 +3,36 @@ title: Models Overview
|
||||
description: Model vendors supported by CowAgent and their capability matrix
|
||||
---
|
||||
|
||||
CowAgent supports mainstream large language models from both Chinese and overseas vendors. Model interfaces are implemented under the project's `models/` directory. In addition to text chat, some vendors also provide vision understanding, image generation, speech-to-text, text-to-speech, and embedding capabilities, which can be invoked on demand in the Agent flow.
|
||||
|
||||
<Note>
|
||||
The following models are recommended in Agent mode; choose based on quality and cost: deepseek-v4-flash, MiniMax-M2.7, claude-sonnet-4-6, gemini-3.5-flash, glm-5.1, qwen3.6-plus, kimi-k2.6, ernie-5.1.
|
||||
|
||||
[LinkAI](https://link-ai.tech) is also supported, letting you switch between multiple vendors with a single key while gaining knowledge bases, workflows, and plugins.
|
||||
</Note>
|
||||
|
||||
CowAgent supports a wide range of mainstream large language models. Model interfaces live under the project's `models/` directory. Beyond text chat, several vendors also provide vision understanding, image generation, speech-to-text, text-to-speech, and embeddings — all of which can be invoked on demand in the Agent flow.
|
||||
|
||||
## Capability Matrix
|
||||
|
||||
A snapshot of each vendor's capabilities. "Text" refers to the main chat model; the remaining columns indicate which Agent capabilities the vendor can handle.
|
||||
A snapshot of each vendor's capabilities. "Text" refers to the main chat model; the remaining columns show which Agent capabilities the vendor can power.
|
||||
|
||||
| Vendor | Representative Models | Text | Image Understanding | Image Generation | Speech-to-Text | Text-to-Speech | Embedding |
|
||||
| Vendor | Representative Models | Text | Vision | Image Gen | STT | TTS | Embedding |
|
||||
| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: |
|
||||
| [DeepSeek](/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | |
|
||||
| [MiniMax](/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
|
||||
| [Claude](/models/claude) | claude-opus-4-7 | ✅ | ✅ | | | | |
|
||||
| [Gemini](/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | |
|
||||
| [OpenAI](/models/openai) | gpt-5.5, o-series | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [Zhipu GLM](/models/glm) | glm-5.1, glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ |
|
||||
| [Tongyi Qwen](/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [Doubao](/models/doubao) | doubao-seed-2.0 series | ✅ | ✅ | ✅ | | | ✅ |
|
||||
| [Kimi](/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||
| [Baidu Qianfan](/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||
| [LinkAI](/models/linkai) | 100+ models from multiple vendors | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [Custom](/models/custom) | Local models / third-party proxies | ✅ | | | | | |
|
||||
| [DeepSeek](/en/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | |
|
||||
| [MiniMax](/en/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
|
||||
| [Claude](/en/models/claude) | claude-opus-4-7 | ✅ | ✅ | | | | |
|
||||
| [Gemini](/en/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | |
|
||||
| [OpenAI](/en/models/openai) | gpt-5.5, o-series | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [GLM](/en/models/glm) | glm-5.1, glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ |
|
||||
| [Qwen](/en/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [Doubao](/en/models/doubao) | doubao-seed-2.0 series | ✅ | ✅ | ✅ | | | ✅ |
|
||||
| [Kimi](/en/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||
| [ERNIE](/en/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||
| [MiMo](/en/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
|
||||
| [LinkAI](/en/models/linkai) | 100+ models from multiple vendors | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [Custom](/en/models/custom) | Local models / third-party proxies | ✅ | | | | | |
|
||||
|
||||
<Tip>
|
||||
Every capability in the Web Console (Vision / Image / Speech-to-Text / Text-to-Speech / Embedding / Web Search) can be configured independently with its own vendor and model; they are not forced to be bound together.
|
||||
Every capability in the Web console (Vision / Image / STT / TTS / Embedding / Web Search) can be configured independently with its own vendor and model — there is no forced binding between them.
|
||||
</Tip>
|
||||
|
||||
|
||||
## How to Configure
|
||||
|
||||
**Option 1 (recommended):** Manage models and capabilities online via the [Web Console](/channels/web), with no need to edit the configuration file:
|
||||
**Option 1 (recommended):** Manage models and capabilities online via the [Web console](/en/channels/web), with no need to edit the configuration file:
|
||||
|
||||
<img width="900" src="https://cdn.link-ai.tech/doc/20260521212527.png" />
|
||||
<img width="900" src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-models-config.png" />
|
||||
|
||||
**Option 2:** Manually edit `config.json` and fill in the model name and API key according to the selected model. Every model also supports OpenAI-compatible access: set `bot_type` to `openai` and configure `open_ai_api_base` and `open_ai_api_key`.
|
||||
**Option 2:** Edit `config.json` manually and fill in the model name and API key for the selected vendor. Every model also supports OpenAI-compatible access — just set `bot_type` to `openai` and configure `open_ai_api_base` and `open_ai_api_key`.
|
||||
|
||||
136
docs/en/models/mimo.mdx
Normal file
136
docs/en/models/mimo.mdx
Normal file
@@ -0,0 +1,136 @@
|
||||
---
|
||||
title: MiMo
|
||||
description: Xiaomi MiMo model configuration (Text Chat + Image Understanding + Text-to-Speech)
|
||||
---
|
||||
|
||||
Xiaomi MiMo is a native omni-modal large model. A single `mimo_api_key` enables text chat, image understanding, and text-to-speech all at once.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console — no need to manually edit the configuration file.
|
||||
</Tip>
|
||||
|
||||
## Text Chat
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "mimo-v2.5-pro",
|
||||
"mimo_api_key": "YOUR_API_KEY",
|
||||
"mimo_api_base": "https://api.xiaomimimo.com/v1"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `model` | Default recommendation: `mimo-v2.5-pro`; `mimo-v2.5` is also supported |
|
||||
| `mimo_api_key` | Create one in the [MiMo Open Platform](https://platform.xiaomimimo.com/console/api-keys) |
|
||||
| `mimo_api_base` | Optional, defaults to `https://api.xiaomimimo.com/v1` |
|
||||
|
||||
### Model Selection
|
||||
|
||||
| Model | Use Case |
|
||||
| --- | --- |
|
||||
| `mimo-v2.5-pro` | Flagship: native omni-modal + Agent capability, up to 1M tokens context |
|
||||
| `mimo-v2.5` | General-purpose, native omni-modal (text / image / video / audio) |
|
||||
|
||||
## Thinking Mode
|
||||
|
||||
The MiMo V2.5 series enables "thinking mode" by default: the model emits `reasoning_content` (chain-of-thought) before the final answer, improving performance on complex tasks.
|
||||
|
||||
Use the global `enable_thinking` flag to toggle visibility (also switchable from the Web Console settings):
|
||||
|
||||
```json
|
||||
{
|
||||
"enable_thinking": true
|
||||
}
|
||||
```
|
||||
|
||||
## Image Understanding
|
||||
|
||||
Once `mimo_api_key` is configured, the Agent's Vision tool can automatically use MiMo's vision models:
|
||||
|
||||
- When the main model itself is multimodal (`mimo-v2.5-pro` / `mimo-v2.5`), images are handled directly by the main model with no extra setup.
|
||||
- When the main model belongs to another vendor, the Vision tool falls back to `mimo-v2.5-pro` in order.
|
||||
|
||||
To force a specific Vision model, set it explicitly in the configuration:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"provider": "mimo",
|
||||
"model": "mimo-v2.5-pro"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Text-to-Speech (TTS)
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "mimo",
|
||||
"text_to_voice_model": "mimo-v2.5-tts",
|
||||
"tts_voice_id": "冰糖"
|
||||
}
|
||||
```
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `text_to_voice_model` | Currently only `mimo-v2.5-tts` (preset voices + singing mode) |
|
||||
| `tts_voice_id` | Preset voice name (Chinese voice IDs use the Chinese name directly) |
|
||||
|
||||
### Preset Voices
|
||||
|
||||
| Voice ID | Description |
|
||||
| --- | --- |
|
||||
| `Mia` | English · Female |
|
||||
| `Chloe` | English · Female |
|
||||
| `Milo` | English · Male |
|
||||
| `Dean` | English · Male |
|
||||
| `冰糖` | Chinese · Female (default) |
|
||||
| `茉莉` | Chinese · Female |
|
||||
| `苏打` | Chinese · Male |
|
||||
| `白桦` | Chinese · Male |
|
||||
|
||||
|
||||
You can also pick a voice visually from the Web Console under "Model Management → Text-to-Speech".
|
||||
|
||||
### Style Control
|
||||
|
||||
MiMo TTS supports embedding **audio tags** in the synthesis text to control emotion, tone, dialect, persona, and even singing. Tags must appear in the **text that will be synthesized to speech (i.e. the Agent's reply)**, with the overall style tag placed at the very beginning:
|
||||
|
||||
```
|
||||
(style)content-to-synthesize
|
||||
```
|
||||
|
||||
Half-width `()`, full-width `()`, and `[]` brackets are all accepted. Both Chinese and English style descriptors work — pick whichever language expresses the timbre most precisely. Common examples:
|
||||
|
||||
| Category | Example tags |
|
||||
| --- | --- |
|
||||
| Basic emotions | `happy` `sad` `angry` `fear` `surprised` `excited` `aggrieved` `calm` `indifferent` |
|
||||
| Compound emotions | `wistful` `relieved` `helpless` `guilty` `at ease` `uneasy` `touched` |
|
||||
| Overall tone | `gentle` `aloof` `lively` `serious` `languid` `playful` `deep` `sharp` `cutting` |
|
||||
| Voice character | `magnetic` `mellow` `bright` `ethereal` `childlike` `aged` `sweet` `husky` |
|
||||
| Persona | `squeaky` `mature lady` `young boy` `uncle` `Taiwanese accent` |
|
||||
| Dialect | `Northeastern` `Sichuan` `Henan` `Cantonese` |
|
||||
| Role-play | `Sun Wukong` `Lin Daiyu` |
|
||||
| Singing | `sing` / `singing` |
|
||||
|
||||
Examples:
|
||||
|
||||
- `(magnetic)The night is deep, and the city is still breathing.`
|
||||
- `(gentle)Take a breath. You've got this.`
|
||||
- `(serious)This is the final warning before the system reboots.`
|
||||
- `(singing)Oh, when the saints go marching in…`
|
||||
|
||||
You can also insert fine-grained audio tags at any position in the text to control breathing, laughter, pauses, etc. For example:
|
||||
|
||||
```
|
||||
(nervous, deep breath) Phew… stay calm, stay calm. (faster pace) I've rehearsed this intro fifty times, it'll be fine.
|
||||
```
|
||||
|
||||
See the [MiMo speech synthesis documentation](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5) for the full tag list.
|
||||
|
||||
<Tip>
|
||||
When CowAgent calls TTS, the Agent's reply text (including any `(...)` tags) is forwarded directly to MiMo for synthesis. Tell the model in its persona / system prompt to "prefix replies with a `(style)` tag to control the tone", and IM channels (WeChat / Feishu / DingTalk / WeCom) will play voice replies with the corresponding emotion, dialect, or even singing.
|
||||
</Tip>
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
title: Baidu Qianfan / ERNIE
|
||||
description: Baidu Qianfan ERNIE model configuration
|
||||
title: ERNIE
|
||||
description: ERNIE model configuration (Baidu Qianfan)
|
||||
---
|
||||
|
||||
Option 1: Native integration (recommended):
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
---
|
||||
title: Tongyi Qwen
|
||||
description: Tongyi Qwen model configuration (Text / Image Understanding / Image Generation / Speech-to-Text / Text-to-Speech / Embedding)
|
||||
title: Qwen
|
||||
description: Qwen model configuration (Text / Image Understanding / Image Generation / Speech-to-Text / Text-to-Speech / Embedding)
|
||||
---
|
||||
|
||||
Tongyi Qwen (DashScope / Bailian) is one of the most fully-featured vendors in China. Text, image understanding, image generation, speech-to-text, text-to-speech, and embedding can all be enabled with a single `dashscope_api_key`.
|
||||
Qwen (Alibaba DashScope / Bailian) is one of the most fully-featured vendors. Text, image understanding, image generation, speech-to-text, text-to-speech, and embedding can all be enabled with a single `dashscope_api_key`.
|
||||
|
||||
<Tip>
|
||||
All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file.
|
||||
@@ -66,7 +66,7 @@ Available models: `qwen-image-2.0`, `qwen-image-2.0-pro`.
|
||||
|
||||
| Parameter | Description |
|
||||
| --- | --- |
|
||||
| `voice_to_text` | Set to `dashscope` to enable Tongyi Qwen ASR |
|
||||
| `voice_to_text` | Set to `dashscope` to enable Qwen ASR |
|
||||
| `voice_to_text_model` | Optional, defaults to `qwen3-asr-flash` |
|
||||
|
||||
Credentials are automatically reused from `dashscope_api_key`. A single audio segment should be smaller than 10MB and no longer than 300 seconds.
|
||||
|
||||
@@ -34,7 +34,7 @@ Related commits: [30c6d9b](https://github.com/zhayujie/CowAgent/commit/30c6d9b)
|
||||
|
||||
## 💰 Coding Plan Support
|
||||
|
||||
Added integration with vendor Coding Plan (monthly programming subscription) tiers via the unified OpenAI-compatible path. Supported vendors include Aliyun, MiniMax, Zhipu GLM, Kimi, and Volcengine.
|
||||
Added integration with vendor Coding Plan (monthly programming subscription) tiers via the unified OpenAI-compatible path. Supported vendors include Aliyun, MiniMax, GLM, Kimi, and Volcengine.
|
||||
|
||||
See [Coding Plan docs](https://docs.cowagent.ai/en/models/coding-plan) for detailed configuration.
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
title: v2.0.8
|
||||
description: CowAgent 2.0.8 - Major Feishu channel upgrade (voice, streaming typewriter, one-click QR app creation), DeepSeek V4 / Baidu Qianfan ERNIE 5.0 support, scheduler memory enhancements and multiple fixes
|
||||
description: CowAgent 2.0.8 - Major Feishu channel upgrade (voice, streaming typewriter, one-click QR app creation), DeepSeek V4 / ERNIE 5.0 support, scheduler memory enhancements and multiple fixes
|
||||
---
|
||||
|
||||
## 🪶 Major Feishu Channel Upgrade
|
||||
@@ -30,9 +30,9 @@ The voice and streaming building blocks come from a community contribution #2791
|
||||
|
||||
- **DeepSeek V4 series**: Added `deepseek-v4-pro` / `deepseek-v4-flash`, with `deepseek-v4-flash` set as the new default
|
||||
- **Unified thinking-mode toggle**: DeepSeek V4, Qwen3 and other thinking-capable models now share the same `enable_thinking` switch
|
||||
- **Baidu Qianfan / ERNIE first-class integration**: New `qianfan` provider supporting `ernie-5.0` (default recommendation), `ernie-x1.1`, `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k`. Dedicated `qianfan_api_key` / `qianfan_api_base` settings keep OpenAI config clean; legacy `wenxin` / `wenxin-4` paths are fully preserved. #2790 Thanks [@jimmyzhuu](https://github.com/jimmyzhuu)
|
||||
- **ERNIE first-class integration**: New `qianfan` provider supporting `ernie-5.0` (default recommendation), `ernie-x1.1`, `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k`. Dedicated `qianfan_api_key` / `qianfan_api_base` settings keep OpenAI config clean; legacy `wenxin` / `wenxin-4` paths are fully preserved. #2790 Thanks [@jimmyzhuu](https://github.com/jimmyzhuu)
|
||||
|
||||
Documentation: [Baidu Qianfan / ERNIE](https://docs.cowagent.ai/en/models/qianfan)
|
||||
Documentation: [ERNIE](https://docs.cowagent.ai/en/models/qianfan)
|
||||
|
||||
## 🌐 Translation Provider
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ The Web Console adds a new **Models** page that organizes everything by **provid
|
||||
|
||||
Documentation: [Models Overview](https://docs.cowagent.ai/en/models)
|
||||
|
||||
<img width="720" alt="20260522113305" src="https://cdn.link-ai.tech/doc/20260522113305.png" />
|
||||
<img width="720" alt="20260522113305" src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-models-config.png" />
|
||||
|
||||
|
||||
## 🧩 MCP Protocol Support
|
||||
|
||||
65
docs/en/skills/hub.mdx
Normal file
65
docs/en/skills/hub.mdx
Normal file
@@ -0,0 +1,65 @@
|
||||
---
|
||||
title: Skill Hub
|
||||
description: Browse, search, and install AI Agent skills
|
||||
---
|
||||
|
||||
[Cow Skill Hub](https://skills.cowagent.ai/) is an open-source skill marketplace for AI Agents, aggregating official picks, community contributions, and third-party skills from GitHub, ClawHub, and beyond.
|
||||
|
||||
Source code: [github.com/zhayujie/cow-skill-hub](https://github.com/zhayujie/cow-skill-hub)
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260401110103.png" width="800" />
|
||||
|
||||
## Features
|
||||
|
||||
- **Browse skills** — filter by category (Featured / Community / Third-party) and tags
|
||||
- **Search skills** — find skills by name or description
|
||||
- **View details** — read the skill manifest, file contents, install command, and required environment variables
|
||||
- **One-click install** — copy the install command and run it in CowAgent
|
||||
|
||||
## Installing a skill
|
||||
|
||||
Run the install command in chat or in your terminal:
|
||||
|
||||
<CodeGroup>
|
||||
```text Chat
|
||||
/skill install <name>
|
||||
```
|
||||
|
||||
```bash Terminal
|
||||
cow skill install <name>
|
||||
```
|
||||
</CodeGroup>
|
||||
|
||||
You can also browse the marketplace directly from chat:
|
||||
|
||||
```text
|
||||
/skill list --remote
|
||||
/skill search <keyword>
|
||||
```
|
||||
|
||||
Beyond the curated list, you can install third-party skills from **GitHub, ClawHub, LinkAI, or any URL** via the CLI. See [Installing skills](/en/skills/install) for details.
|
||||
|
||||
## Contributing a skill
|
||||
|
||||
To submit your own skill:
|
||||
|
||||
1. Visit [skills.cowagent.ai/submit](https://skills.cowagent.ai/submit)
|
||||
2. Sign in with GitHub or Google
|
||||
3. Upload a folder or zip file containing `SKILL.md`
|
||||
4. Skill name, display name, and description are auto-detected — adjust as needed
|
||||
5. Submit for review; skills go live after security and quality checks
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260401111904.png" width="800" />
|
||||
|
||||
Skill file layout:
|
||||
|
||||
```
|
||||
your-skill/
|
||||
├── SKILL.md # required, in the root
|
||||
├── scripts/ # optional, runtime scripts
|
||||
└── resources/ # optional, additional assets
|
||||
```
|
||||
|
||||
<Tip>
|
||||
Skills are built around the `SKILL.md` manifest. You can also download `SKILL.md` from a skill's detail page and use it with any Agent that supports custom instructions (OpenClaw, Cursor, Claude Code, and more).
|
||||
</Tip>
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: image-generation - Image Generation
|
||||
title: image-generation
|
||||
description: Text-to-image / image-to-image / multi-image fusion with automatic multi-provider routing and fallback
|
||||
---
|
||||
|
||||
@@ -39,7 +39,7 @@ To pin a specific model:
|
||||
## Configuring API Keys
|
||||
|
||||
<Tip>
|
||||
It is recommended to configure providers from the "Model Management" page in the [Web Console](/channels/web). Chat model keys configured there are automatically reused by the image generation skill — no need to set them twice. You can also edit the configuration file manually or temporarily set keys in a conversation using the `env_config` tool.
|
||||
It is recommended to configure providers from the "Model Management" page in the [Web console](/en/channels/web). Chat model keys configured there are automatically reused by the image generation skill — no need to set them twice. You can also edit the configuration file manually or temporarily set keys in a conversation using the `env_config` tool.
|
||||
</Tip>
|
||||
|
||||
Credentials are shared with the main model providers:
|
||||
|
||||
@@ -3,11 +3,11 @@ title: Install Skills
|
||||
description: Install skills from multiple sources with a single command
|
||||
---
|
||||
|
||||
CowAgent supports installing skills from **Cow Skill Hub, GitHub, ClawHub**, and any URL with a unified `install` command. Use `/skill install` in chat or `cow skill install` in the terminal.
|
||||
CowAgent supports installing skills from [Cow Skill Hub](https://skills.cowagent.ai/), GitHub, ClawHub, LinkAI, and any URL via a unified `install` command. Use `/skill install` in chat or `cow skill install` in the terminal.
|
||||
|
||||
## From Skill Hub
|
||||
## From the Skill Hub
|
||||
|
||||
Browse the Skill Hub and install:
|
||||
Browse all available skills at [skills.cowagent.ai](https://skills.cowagent.ai/) and install by name:
|
||||
|
||||
```text
|
||||
/skill list --remote
|
||||
@@ -16,7 +16,7 @@ Browse the Skill Hub and install:
|
||||
|
||||
## From GitHub
|
||||
|
||||
Supports batch install from repositories and single skill from subdirectories:
|
||||
Any GitHub-hosted skill can be installed directly. Supports both repository-level batch install and subdirectory-level single install:
|
||||
|
||||
```text
|
||||
/skill install larksuite/cli
|
||||
@@ -25,10 +25,22 @@ Supports batch install from repositories and single skill from subdirectories:
|
||||
|
||||
## From ClawHub
|
||||
|
||||
All [ClawHub](https://clawhub.ai/) skills (40k+) can be installed with a single command:
|
||||
|
||||
```text
|
||||
/skill install clawhub:baidu-search
|
||||
/skill install clawhub:<name>
|
||||
```
|
||||
|
||||
## From LinkAI
|
||||
|
||||
All public resources on [LinkAI](https://link-ai.tech/console) (10k+ apps / workflows / plugins), as well as your own resources (apps, workflows, knowledge bases, databases, plugins), can be installed via:
|
||||
|
||||
```text
|
||||
/skill install linkai:<code>
|
||||
```
|
||||
|
||||
> Every resource created on the LinkAI platform has a unique `code`. Find it on each resource's page in the [console](https://link-ai.tech/console).
|
||||
|
||||
## From URL
|
||||
|
||||
Supports zip archives and SKILL.md file links:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: knowledge-wiki - Knowledge Base
|
||||
title: knowledge-wiki
|
||||
description: Maintain a local structured knowledge base with automatic archiving, categorisation, and cross-referencing
|
||||
---
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: skill-creator - Skill Creator
|
||||
title: skill-creator
|
||||
description: Create, install, and update skills — standardises SKILL.md format and directory structure
|
||||
---
|
||||
|
||||
|
||||
@@ -34,7 +34,9 @@ Fully compatible with the MCP community standard, identical to Claude Desktop /
|
||||
| `command` | stdio | Executable to launch the server (e.g. `npx`, `python`, `uvx`) |
|
||||
| `args` | No | Arguments passed to `command` |
|
||||
| `env` | No | Environment variables for the subprocess, commonly used for API keys |
|
||||
| `url` | SSE | SSE endpoint URL (alternative to `command`) |
|
||||
| `url` | SSE / Streamable HTTP | Remote endpoint URL (alternative to `command`) |
|
||||
| `type` | Remote | Remote transport type: `sse` or `streamable-http` (defaults to `sse`) |
|
||||
| `headers` | No | Extra HTTP headers for remote requests (e.g. `Authorization`); Streamable HTTP only |
|
||||
| `disabled` | No | When `true`, this server is skipped — handy for temporary disabling |
|
||||
|
||||
### Full Example
|
||||
@@ -88,7 +90,8 @@ The Agent will:
|
||||
| Transport | Description | Config Field |
|
||||
| --- | --- | --- |
|
||||
| **stdio** | Subprocess communication. The most common option, with the richest community ecosystem. | `command` + `args` |
|
||||
| **SSE** | HTTP Server-Sent Events, suitable for remotely hosted MCP services. | `url` |
|
||||
| **SSE** | HTTP Server-Sent Events. Legacy remote transport. | `url` (default) |
|
||||
| **Streamable HTTP** | New unified remote transport, gradually replacing SSE. | `type: "streamable-http"` + `url` |
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
@@ -106,4 +109,4 @@ You can browse third-party MCP marketplaces and copy a JSON config to use direct
|
||||
- [mcp.so](https://mcp.so) — Global MCP service index
|
||||
- [ModelScope MCP Hub](https://modelscope.cn/mcp) — ModelScope's MCP hub, more reliable from mainland China
|
||||
|
||||
Any MCP server that follows the standard protocol (stdio / SSE) integrates with CowAgent out of the box.
|
||||
Any MCP server that follows the standard protocol (stdio / SSE / Streamable HTTP) integrates with CowAgent out of the box.
|
||||
|
||||
@@ -38,3 +38,43 @@ Create and manage scheduled tasks with natural language:
|
||||
<Frame>
|
||||
<img src="https://cdn.link-ai.tech/doc/20260202195402.png" width="800" />
|
||||
</Frame>
|
||||
|
||||
## Results injected into the conversation
|
||||
|
||||
Scheduled tasks run inside an isolated session (so internal planning and tool calls do not pollute the user's chat), but the **final output** is written back to the user's real session as a message pair. You can directly follow up — e.g. "expand on point 2 from earlier".
|
||||
|
||||
**Default policy**
|
||||
|
||||
- Output of Agent dynamic tasks is injected into the conversation
|
||||
- Fixed-message tasks are not injected by default (configurable)
|
||||
- Each session keeps the most recent **3 pairs** of scheduler messages; older pairs are pruned automatically. Regular user messages are unaffected
|
||||
|
||||
**Configuration**
|
||||
|
||||
| Key | Default | Description |
|
||||
| --- | --- | --- |
|
||||
| `scheduler_inject_to_session` | `true` | Master switch |
|
||||
| `scheduler_inject_max_per_session` | `3` | Max scheduler message pairs kept per session |
|
||||
| `scheduler_inject_send_message` | `false` | Whether to also inject fixed-message tasks |
|
||||
|
||||
```json
|
||||
{
|
||||
"scheduler_inject_to_session": true,
|
||||
"scheduler_inject_max_per_session": 3,
|
||||
"scheduler_inject_send_message": false
|
||||
}
|
||||
```
|
||||
|
||||
## Context inside scheduled task execution
|
||||
|
||||
The isolated session for scheduled tasks retains a few recent runs of conversation history, so you can naturally do "compare with last time" or "continue from previous conclusion". To prevent prompts from growing unbounded for high-frequency tasks (e.g. a 5-minute monitor), history is auto-trimmed:
|
||||
|
||||
```
|
||||
scheduler_keep_turns = max(1, agent_max_context_turns / 5)
|
||||
```
|
||||
|
||||
`agent_max_context_turns` defaults to `20`, so each scheduled run keeps the most recent **4 turns** of history by default. Increase `agent_max_context_turns` if you need longer memory.
|
||||
|
||||
<Note>
|
||||
For group-chat scenarios (Feishu / WeCom group bots / DingTalk, etc.), the user's real `session_id` looks like `user_id:group_id` — different from `receiver`. Scheduler records the correct `session_id` when a task is created. For older `tasks.json` entries missing this field, the runtime falls back to `receiver`, matching legacy behavior.
|
||||
</Note>
|
||||
|
||||
@@ -24,7 +24,7 @@ If the current provider fails, the tool automatically tries the next one until i
|
||||
| Gemini | Main model | inlineData format |
|
||||
| Doubao | Main model | doubao-seed-2-0 series natively supported |
|
||||
| Kimi (Moonshot) | Main model | kimi-k2.6, kimi-k2.5 natively supported |
|
||||
| Baidu Qianfan | Main model | Defaults to the multimodal main model (e.g. `ernie-5.1`); falls back to `ernie-4.5-turbo-vl` when the main model is not multimodal |
|
||||
| ERNIE | Main model | Defaults to the multimodal main model (e.g. `ernie-5.1`); falls back to `ernie-4.5-turbo-vl` when the main model is not multimodal |
|
||||
| ZhipuAI | glm-5v-turbo | Always uses the dedicated vision model |
|
||||
| MiniMax | MiniMax-Text-01 | Always uses the dedicated vision model |
|
||||
|
||||
|
||||
32
docs/en/tools/web-fetch.mdx
Normal file
32
docs/en/tools/web-fetch.mdx
Normal file
@@ -0,0 +1,32 @@
|
||||
---
|
||||
title: web_fetch - Web Fetch
|
||||
description: Fetch web pages and document content
|
||||
---
|
||||
|
||||
Fetch the content of an HTTP/HTTPS URL. Web pages are extracted as readable text; document files (PDF, Word, Excel, etc.) are downloaded and parsed automatically.
|
||||
|
||||
## Parameters
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| `url` | string | Yes | HTTP/HTTPS URL (web page or document) |
|
||||
|
||||
## Supported file types
|
||||
|
||||
| Type | Formats |
|
||||
| --- | --- |
|
||||
| PDF | `.pdf` |
|
||||
| Word | `.docx` |
|
||||
| Text | `.txt`, `.md`, `.csv`, `.log` |
|
||||
| Spreadsheet | `.xls`, `.xlsx` |
|
||||
| Presentation | `.ppt`, `.pptx` |
|
||||
|
||||
## Use cases
|
||||
|
||||
- Extract readable text from a web page
|
||||
- Download and parse remote documents
|
||||
- Inspect API response bodies
|
||||
|
||||
<Note>
|
||||
`web_fetch` only retrieves static HTML. For pages that require JavaScript rendering (such as SPAs), use the `browser` tool instead.
|
||||
</Note>
|
||||
@@ -3,10 +3,10 @@ title: web_search - Web Search
|
||||
description: Search the internet for real-time information, with support for multiple search providers
|
||||
---
|
||||
|
||||
Search the internet for real-time information, news, research, and more. Supports four backends — Bocha, Baidu Qianfan, Zhipu, and LinkAI — and works once any one of them is configured.
|
||||
Search the internet for real-time information, news, research, and more. Supports four backends — Bocha, ERNIE, GLM, and LinkAI — and works once any one of them is configured.
|
||||
|
||||
<Tip>
|
||||
It is recommended to configure providers and routing strategy visually from the "Model Management → Search" panel in the [Web Console](/channels/web), without manually editing the configuration file.
|
||||
It is recommended to configure providers and routing strategy visually from the "Model Management → Search" panel in the [Web console](/en/channels/web), without manually editing the configuration file.
|
||||
</Tip>
|
||||
|
||||
## Providers
|
||||
@@ -14,7 +14,7 @@ Search the internet for real-time information, news, research, and more. Support
|
||||
| Provider | Credential | Apply |
|
||||
| --- | --- | --- |
|
||||
| Bocha | `tools.web_search.bocha_api_key` | [Bocha Open Platform](https://open.bochaai.com/) |
|
||||
| Baidu Qianfan | Reuses `qianfan_api_key` | [Qianfan Console](https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy) |
|
||||
| ERNIE | Reuses `qianfan_api_key` | [Qianfan Console](https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy) |
|
||||
| Zhipu | Reuses `zhipu_ai_api_key` | [Zhipu Open Platform](https://docs.bigmodel.cn/cn/guide/tools/web-search) |
|
||||
| LinkAI | Reuses `linkai_api_key` | [LinkAI Console](https://link-ai.tech/console/interface) |
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ nohup python3 app.py & tail -f nohup.out
|
||||
```
|
||||
|
||||
<Tip>
|
||||
如果在服务器上部署,需要在防火墙或安全组中放行 `9899` 端口才能通过浏览器访问 Web 控制台,建议仅对指定IP开放以保证安全。
|
||||
**服务器公网访问 Web 控制台**:默认 `web_host` 仅监听 `127.0.0.1`(本机访问),需公网访问时请在 `config.json` 中将 `web_host` 设为 `0.0.0.0`,同时强烈建议设置 `web_password` 启用鉴权。此外还需在防火墙/安全组中放行 `9899` 端口,建议仅对指定 IP 开放以保证安全。
|
||||
</Tip>
|
||||
|
||||
## Docker 部署
|
||||
@@ -129,7 +129,7 @@ sudo docker logs -f chatgpt-on-wechat
|
||||
```
|
||||
|
||||
<Tip>
|
||||
如果在服务器上部署,需要在防火墙或安全组中放行 `9899` 端口才能通过浏览器访问 Web 控制台,建议仅对指定IP开放以保证安全。
|
||||
**Docker 公网访问 Web 控制台**:在 `docker-compose.yml` 中将 `WEB_HOST` 设为 `0.0.0.0`(容器内默认绑定 `127.0.0.1` 无法从宿主机外访问),同时强烈建议设置 `WEB_PASSWORD` 启用鉴权。此外需确保 `9899` 端口正确映射到宿主机,并在防火墙/安全组放行该端口。
|
||||
</Tip>
|
||||
|
||||
## 核心配置项
|
||||
|
||||
@@ -33,6 +33,10 @@ description: 使用脚本一键安装和管理 CowAgent
|
||||
|
||||
运行后默认启动 Web 控制台,访问 `http://localhost:9899` 开始对话和管理Agent。
|
||||
|
||||
<Note>
|
||||
**服务器部署需要公网访问控制台时**,请在 `config.json` 中将 `web_host` 设为 `0.0.0.0`(默认仅监听 `127.0.0.1` 本机访问),同时强烈建议设置 `web_password` 启用鉴权。然后通过 `http://<server-ip>:9899` 访问,并确保防火墙/安全组放行 `9899` 端口。
|
||||
</Note>
|
||||
|
||||
## 管理命令
|
||||
|
||||
安装完成后,使用 `cow` CLI 管理服务:
|
||||
|
||||
@@ -9,7 +9,7 @@ CowAgent 2.0 从简单的聊天机器人全面升级为超级智能助理,采
|
||||
|
||||
CowAgent 的整体架构由以下核心模块组成:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/cow-agent-arch-zh.jpg" alt="CowAgent Architecture" />
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/architecture/zh/architecture.jpg" alt="CowAgent Architecture" />
|
||||
|
||||
| 模块 | 说明 |
|
||||
| --- | --- |
|
||||
|
||||
@@ -84,7 +84,7 @@ Agent 会在对话中自动将有价值的信息整理为知识页面,维护
|
||||
|
||||
技能系统为 Agent 提供无限的扩展性,每个 Skill 由说明文件、运行脚本(可选)、资源(可选)组成,描述如何完成特定类型的任务。通过 Skill 可以让 Agent 遵循说明完成复杂流程、调用各类工具或对接第三方系统。
|
||||
|
||||
- **[Skill Hub](https://skills.cowagent.ai/):** 开放的技能广场,汇集官方推荐、社区贡献和第三方技能,支持一键安装。
|
||||
- [Skill Hub](https://skills.cowagent.ai/):开放的技能广场,汇集官方推荐、社区贡献和第三方技能,支持一键安装。
|
||||
- **内置技能:** 在项目的 `skills/` 目录下,包含技能创造器、图像识别、LinkAI 智能体、网页抓取等。内置 Skill 根据依赖条件(API Key、系统命令等)自动判断是否启用。
|
||||
- **自定义技能:** 由用户通过对话创建,存放在工作空间中(`~/cow/skills/`),可实现任何复杂的业务流程和第三方系统对接。
|
||||
|
||||
|
||||
@@ -3,7 +3,9 @@ title: 项目介绍
|
||||
description: CowAgent - 基于大模型的超级AI助理
|
||||
---
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="450px"/>
|
||||
<div align="center">
|
||||
<img src="https://cdn.link-ai.tech/doc/78c5dd674e2c828642ecc0406669fed7.png" alt="CowAgent" width="450px"/>
|
||||
</div>
|
||||
|
||||
**CowAgent** 是基于大模型的超级AI助理,能够主动思考和任务规划、操作计算机和外部资源、创造和执行Skills、拥有长期记忆和知识库并不断成长。
|
||||
|
||||
|
||||
@@ -1,250 +1,256 @@
|
||||
<p align="center"><img src="https://github.com/user-attachments/assets/eca9a9ec-8534-4615-9e0f-96c5ac1d10a3" alt="CowAgent" width="550" /></p>
|
||||
<p align="center"><img src="https://github.com/user-attachments/assets/eca9a9ec-8534-4615-9e0f-96c5ac1d10a3" alt="CowAgent" width="420" /></p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/zhayujie/CowAgent/releases/latest"><img src="https://img.shields.io/github/v/release/zhayujie/CowAgent" alt="Latest release"></a>
|
||||
<a href="https://github.com/zhayujie/CowAgent/blob/master/LICENSE"><img src="https://img.shields.io/github/license/zhayujie/CowAgent" alt="License: MIT"></a>
|
||||
<a href="https://github.com/zhayujie/CowAgent"><img src="https://img.shields.io/github/stars/zhayujie/CowAgent?style=flat-square" alt="Stars"></a> <br/>
|
||||
[<a href="https://github.com/zhayujie/CowAgent/blob/master/README.md">中文</a>] | [<a href="https://github.com/zhayujie/CowAgent/blob/master/docs/en/README.md">English</a>] | [日本語]
|
||||
[<a href="../../README.md">English</a>] | [<a href="../zh/README.md">中文</a>] | [日本語]
|
||||
</p>
|
||||
|
||||
**CowAgent** はLLMを搭載したAIスーパーアシスタントです。自律的なタスク計画、コンピュータや外部リソースの操作、Skillの作成・実行、長期記憶とパーソナルナレッジベースによる継続的な成長が可能です。柔軟なモデル切り替えに対応し、テキスト・音声・画像・ファイルを処理でき、WeChat、Web、Feishu(飛書)、DingTalk(釘釘)、WeCom Bot(企業微信ボット)、WeComアプリ、WeChat公式アカウントに統合可能で、個人のPCやサーバー上で24時間365日稼働できます。
|
||||
**CowAgent** は、自律的にタスクを計画し、コンピュータや外部リソースを操作し、Skill を作成・実行し、パーソナルナレッジベースと長期記憶でユーザーとともに成長するオープンソースのスーパー AI アシスタントです。エンドツーエンドの Agent Harness のリファレンス実装の一つでもあります。
|
||||
|
||||
CowAgent は軽量でデプロイしやすく、拡張性に優れています。主要な LLM プロバイダーをそのまま組み込み、Web や主要な IM プラットフォーム上で動作。個人 PC やサーバー上で 24 時間 365 日稼働できます。
|
||||
|
||||
<p align="center">
|
||||
<a href="https://cowagent.ai/">🌐 ウェブサイト</a> ·
|
||||
<a href="https://docs.cowagent.ai/en/intro/index">📖 ドキュメント</a> ·
|
||||
<a href="https://docs.cowagent.ai/en/guide/quick-start">🚀 クイックスタート</a> ·
|
||||
<a href="https://docs.cowagent.ai/ja/intro/index">📖 ドキュメント</a> ·
|
||||
<a href="https://docs.cowagent.ai/ja/guide/quick-start">🚀 クイックスタート</a> ·
|
||||
<a href="https://skills.cowagent.ai/">🧩 Skill Hub</a> ·
|
||||
<a href="https://link-ai.tech/cowagent/create">☁️ オンラインで試す</a>
|
||||
</p>
|
||||
|
||||
## はじめに
|
||||
<br/>
|
||||
|
||||
> CowAgentは、すぐに使えるAIスーパーアシスタントであると同時に、高い拡張性を持つAgentフレームワークでもあります。新しいモデルインターフェース、チャネル、組み込みツール、Skillシステムを拡張することで、さまざまなカスタマイズニーズに柔軟に対応できます。
|
||||
## 🌟 主な機能
|
||||
|
||||
- ✅ **自律的タスク計画**: 複雑なタスクを理解し、自律的に実行計画を立て、目標達成までツールを呼び出しながら継続的に思考します。
|
||||
- ✅ **長期記憶**: 会話の記憶をローカルファイルやデータベースに自動的に永続化します。コアメモリ、デイリーメモリ、Deep Dream 蒸留を含み、キーワード検索やベクトル検索に対応しています。
|
||||
- ✅ **パーソナルナレッジベース**: 構造化された知識を自動整理し、相互参照によるナレッジグラフを構築。Web での可視化ブラウジングと対話による管理をサポートします。
|
||||
- ✅ **Skillシステム**: Skillの作成・実行エンジンを実装。[Skill Hub](https://skills.cowagent.ai)、GitHubなどからSkillをインストールでき、会話を通じたカスタムSkill作成もサポートしています。
|
||||
- ✅ **ツールシステム**: ファイル読み書き、ターミナル実行、ブラウザ操作、スケジュールタスク、メッセージ送信などの組み込みツールを提供。Agentが自律的に呼び出して複雑なタスクを完了します。
|
||||
- ✅ **CLIシステム**: ターミナルコマンドとチャットコマンドを提供し、プロセス管理、Skillインストール、設定変更などの操作をサポートします。
|
||||
- ✅ **マルチモーダルメッセージ**: テキスト、画像、音声、ファイルなど、さまざまなメッセージタイプの解析・処理・生成・送信に対応しています。
|
||||
- ✅ **複数モデル対応**: DeepSeek、MiniMax、Claude、Gemini、OpenAI、GLM、Qwen、Doubao、Kimiなど、主要なモデルプロバイダーに対応しています。
|
||||
- ✅ **マルチプラットフォームデプロイ**: ローカルPCやサーバー上で実行でき、WeChat、Web、Feishu、DingTalk、WeChat公式アカウント、WeComアプリケーションに統合可能です。
|
||||
| 機能 | 説明 |
|
||||
| :--- | :--- |
|
||||
| [タスク計画](https://docs.cowagent.ai/ja/intro/architecture) | 複雑なタスクを分解し、目標達成までツールを繰り返し呼び出して段階的に実行 |
|
||||
| [長期記憶](https://docs.cowagent.ai/ja/memory/index) | 三層構造(コンテキスト → デイリー → コア)、Deep Dream による自動蒸留、キーワードとベクトルのハイブリッド検索 |
|
||||
| [ナレッジベース](https://docs.cowagent.ai/ja/knowledge/index) | 構造化された知識を Markdown Wiki として自動整理し、進化し続けるナレッジグラフを可視化ブラウジング |
|
||||
| [Skill](https://docs.cowagent.ai/ja/skills/index) | [Skill Hub](https://skills.cowagent.ai/)、GitHub、ClawHub からワンクリックでインストール;対話によるカスタム Skill 作成にも対応 |
|
||||
| [ツール](https://docs.cowagent.ai/ja/tools/index) | ファイル I/O、ターミナル、ブラウザ、スケジューラ、記憶検索、Web 検索など 10+ の組み込みツール — MCP プロトコルに完全対応 |
|
||||
| [チャネル](https://docs.cowagent.ai/ja/channels/index) | 一つの Agent で Web、WeChat、Feishu、DingTalk、WeCom、QQ、公式アカウント、Telegram を同時にサポート |
|
||||
| マルチモーダル | テキスト・画像・音声・ファイルをフルサポート — 認識・生成・双方向送受信 |
|
||||
| [モデル](https://docs.cowagent.ai/ja/models/index) | Claude、GPT、Gemini、DeepSeek、GLM、Qwen、Kimi、MiniMax、Doubao など、設定 1 行で切り替え可能 |
|
||||
| [デプロイ](https://docs.cowagent.ai/ja/guide/quick-start) | ワンラインインストーラー、統合された Web コンソール、複数のデプロイモード(ローカル / Docker / サーバー) |
|
||||
|
||||
## 免責事項
|
||||
<br/>
|
||||
|
||||
1. 本プロジェクトは [MIT License](/LICENSE) に基づいており、技術研究・学習を目的としています。利用者は現地の法律、規制、ポリシー、企業の社則を遵守する必要があります。違法行為や権利侵害となる利用は禁止されています。
|
||||
2. Agentモードは通常のチャットモードよりも多くのトークンを消費します。効果とコストに基づいてモデルを選択してください。AgentはホストOSにアクセスできるため、信頼できる環境にデプロイしてください。
|
||||
3. CowAgentはオープンソース開発に注力しており、いかなる暗号通貨の発行・参加・承認も行っていません。
|
||||
## 🏗️ アーキテクチャ
|
||||
|
||||
## デモ
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/architecture/en/architecture.jpg" alt="CowAgent Architecture" width="750"/>
|
||||
|
||||
オンラインで試す(デプロイ不要): [CowAgent](https://link-ai.tech/cowagent/create)
|
||||
CowAgent は完全な **Agent Harness** です:メッセージは各種**チャネル**から流入し、**Agent Core** が記憶・知識・利用可能なツール/Skill を組み合わせてタスクを計画・判断、**モデル**が応答を生成し、結果は元のチャネルに返されます。各レイヤーは疎結合で、独立して拡張可能です。
|
||||
|
||||
## 更新履歴
|
||||
|
||||
> **2026.04.14:** [v2.0.6](https://github.com/zhayujie/CowAgent/releases/tag/2.0.6) — ナレッジベース、Deep Dream 記憶蒸留、スマートコンテキスト圧縮、Web コンソールアップグレード。
|
||||
|
||||
> **2026.04.01:** [v2.0.5](https://github.com/zhayujie/CowAgent/releases/tag/2.0.5) — Cow CLI、Skill Hubオープンソース化、ブラウザツール、WeCom Botスキャン作成など。
|
||||
|
||||
> **2026.02.27:** [v2.0.2](https://github.com/zhayujie/CowAgent/releases/tag/2.0.2) — Webコンソールの全面刷新(ストリーミングチャット、モデル/Skill/メモリ/チャネル/スケジューラ/ログ管理)、マルチチャネル同時実行、セッション永続化、Gemini 3.1 Pro / Claude 4.6 Sonnet / Qwen3.5 Plusなど新モデル追加。
|
||||
|
||||
> **2026.02.13:** [v2.0.1](https://github.com/zhayujie/CowAgent/releases/tag/2.0.1) — 組み込みWeb検索ツール、スマートコンテキストトリミング、ランタイム情報の動的更新、Windows互換性、スケジューラのメモリ喪失やFeishu接続問題などの修正。
|
||||
|
||||
> **2026.02.03:** [v2.0.0](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) — マルチステップタスク計画、長期記憶、組み込みツール、Skillフレームワーク、新モデル、チャネル最適化を備えたAIスーパーアシスタントへの全面アップグレード。
|
||||
|
||||
> **2025.05.23:** [v1.7.6](https://github.com/zhayujie/CowAgent/releases/tag/1.7.6) — Webチャネル最適化、AgentMeshマルチエージェントプラグイン、Baidu TTS、claude-4-sonnet/opus対応。
|
||||
|
||||
> **2025.04.11:** [v1.7.5](https://github.com/zhayujie/CowAgent/releases/tag/1.7.5) — wechatferryプロトコル、DeepSeekモデル、Tencent Cloud音声、ModelScope・Gitee-AI対応。
|
||||
|
||||
> **2024.12.13:** [v1.7.4](https://github.com/zhayujie/CowAgent/releases/tag/1.7.4) — Gemini 2.0モデル、Webチャネル、メモリリーク修正。
|
||||
|
||||
全更新履歴: [リリースノート](https://docs.cowagent.ai/en/releases/overview)
|
||||
詳細は [アーキテクチャ](https://docs.cowagent.ai/ja/intro/architecture) を参照してください。
|
||||
|
||||
<br/>
|
||||
|
||||
## 🚀 クイックスタート
|
||||
|
||||
本プロジェクトは、インストール・設定・起動・管理をワンクリックで行えるスクリプトを提供しています:
|
||||
依存関係のインストール、設定、起動を自動で行うワンラインインストーラーを提供しています:
|
||||
|
||||
**Linux / macOS:**
|
||||
|
||||
```bash
|
||||
bash <(curl -fsSL https://cdn.link-ai.tech/code/cow/run.sh)
|
||||
```
|
||||
|
||||
**Windows (PowerShell):**
|
||||
|
||||
```powershell
|
||||
irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex
|
||||
```
|
||||
|
||||
実行後、デフォルトでWebサービスが起動します。`http://localhost:9899/chat` にアクセスしてチャットを開始できます。
|
||||
|
||||
スクリプトの使い方: [ワンクリックインストール](https://docs.cowagent.ai/ja/guide/quick-start)。インストール後は `cow start`、`cow stop` などの [CLI コマンド](https://docs.cowagent.ai/ja/cli/index)でサービスを管理できます。
|
||||
|
||||
### 手動インストール
|
||||
|
||||
**1. プロジェクトのクローン**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/zhayujie/CowAgent
|
||||
cd CowAgent/
|
||||
```
|
||||
|
||||
**2. 依存関係のインストール**
|
||||
|
||||
```bash
|
||||
pip3 install -r requirements.txt
|
||||
pip3 install -r requirements-optional.txt # 任意ですが推奨
|
||||
```
|
||||
|
||||
**3. Cow CLI のインストール(推奨)**
|
||||
|
||||
```bash
|
||||
pip3 install -e .
|
||||
```
|
||||
|
||||
インストール後、`cow` コマンドでサービス管理(起動、停止、更新など)やSkill管理ができます。[コマンドドキュメント](https://docs.cowagent.ai/ja/cli/index)を参照してください。
|
||||
|
||||
**4. ブラウザのインストール(任意)**
|
||||
|
||||
Agentにブラウザ操作(Webページへのアクセス、フォーム入力など)が必要な場合:
|
||||
|
||||
```bash
|
||||
cow install-browser
|
||||
```
|
||||
|
||||
`playwright` と Chromium を自動インストールします。[ブラウザツールドキュメント](https://docs.cowagent.ai/ja/tools/browser)を参照してください。
|
||||
|
||||
**5. 設定**
|
||||
|
||||
```bash
|
||||
cp config-template.json config.json
|
||||
```
|
||||
|
||||
`config.json` にモデルのAPIキーとチャネルタイプを記入してください。詳細は[設定ドキュメント](https://docs.cowagent.ai/en/guide/manual-install)を参照してください。
|
||||
|
||||
**6. 実行**
|
||||
|
||||
```bash
|
||||
cow start # 推奨、Cow CLI が必要
|
||||
python3 app.py # または直接実行
|
||||
```
|
||||
|
||||
サーバーデプロイでは、`cow` コマンドでサービスを管理できます:
|
||||
|
||||
```bash
|
||||
cow start # バックグラウンドで起動
|
||||
cow stop # サービス停止
|
||||
cow restart # サービス再起動
|
||||
cow status # 実行状態を確認
|
||||
cow logs # ログを表示
|
||||
cow update # 最新コードを取得して再起動
|
||||
```
|
||||
|
||||
または従来の方法で実行:
|
||||
|
||||
```bash
|
||||
nohup python3 app.py & tail -f nohup.out
|
||||
```
|
||||
|
||||
### Dockerデプロイ
|
||||
**Docker:**
|
||||
|
||||
```bash
|
||||
curl -O https://cdn.link-ai.tech/code/cow/docker-compose.yml
|
||||
# docker-compose.yml を編集して設定を記入
|
||||
sudo docker compose up -d
|
||||
sudo docker logs -f chatgpt-on-wechat
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
起動後、`http://localhost:9899` にアクセスして **Web コンソール**を開くと、モデル設定・チャネル接続・Skill インストールがすべてここで完結します。
|
||||
|
||||
> サーバーデプロイでコンソールに公開アクセスする場合は、`config.json` の `web_host` を `0.0.0.0` に設定してください(あわせて `web_password` の設定も強く推奨)。その後 `http://<server-ip>:9899` にアクセスし、ファイアウォール/セキュリティグループで `9899` ポートを開放することも忘れずに。
|
||||
|
||||
> 📖 詳細ガイド: [クイックスタート](https://docs.cowagent.ai/ja/guide/quick-start) · [ソースからインストール](https://docs.cowagent.ai/ja/guide/manual-install) · [アップグレード](https://docs.cowagent.ai/ja/guide/upgrade)
|
||||
|
||||
インストール後は、[`cow` CLI](https://docs.cowagent.ai/ja/cli/index) でサービスを管理できます:
|
||||
|
||||
```bash
|
||||
cow start | stop | restart # サービス制御
|
||||
cow status | logs # ステータスとログ
|
||||
cow update # 最新コード取得後に再起動
|
||||
cow skill install <名前> # Skill のインストール
|
||||
cow install-browser # ブラウザツールのインストール
|
||||
```
|
||||
|
||||
<br/>
|
||||
|
||||
## モデル
|
||||
## 🤖 モデル
|
||||
|
||||
主要なモデルプロバイダーに対応しています。Agentモードの推奨モデル:
|
||||
CowAgent は主要な LLM プロバイダーすべてに対応しています。**チャット、画像認識、画像生成、ASR/TTS、埋め込み(Embedding)** の各機能はそれぞれ別のベンダーで設定可能です。
|
||||
|
||||
| プロバイダー | 推奨モデル |
|
||||
| --- | --- |
|
||||
| DeepSeek | `deepseek-v4-flash` |
|
||||
| MiniMax | `MiniMax-M2.7` |
|
||||
| Claude | `claude-sonnet-4-6` |
|
||||
| Gemini | `gemini-3.1-pro-preview` |
|
||||
| OpenAI | `gpt-5.4` |
|
||||
| GLM | `glm-5.1` |
|
||||
| Qwen | `qwen3.6-plus` |
|
||||
| Doubao | `doubao-seed-2-0-code-preview-260215` |
|
||||
| Kimi | `kimi-k2.6` |
|
||||
| プロバイダー | 代表的なモデル | チャット | 画像認識 | 画像生成 | ASR | TTS | Embedding |
|
||||
| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: |
|
||||
| [Claude](https://docs.cowagent.ai/ja/models/claude) | claude-opus-4-7 | ✅ | ✅ | | | | |
|
||||
| [OpenAI](https://docs.cowagent.ai/ja/models/openai) | gpt-5.5、o シリーズ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [Gemini](https://docs.cowagent.ai/ja/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | |
|
||||
| [DeepSeek](https://docs.cowagent.ai/ja/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | |
|
||||
| [Qwen](https://docs.cowagent.ai/ja/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [GLM](https://docs.cowagent.ai/ja/models/glm) | glm-5.1、glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ |
|
||||
| [Doubao](https://docs.cowagent.ai/ja/models/doubao) | doubao-seed-2.0 シリーズ | ✅ | ✅ | ✅ | | | ✅ |
|
||||
| [Kimi](https://docs.cowagent.ai/ja/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||
| [MiniMax](https://docs.cowagent.ai/ja/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
|
||||
| [ERNIE](https://docs.cowagent.ai/ja/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||
| [MiMo](https://docs.cowagent.ai/ja/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
|
||||
| [LinkAI](https://docs.cowagent.ai/ja/models/linkai) | 1 つの Key で 100+ モデルに接続 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [カスタム](https://docs.cowagent.ai/ja/models/custom) | ローカルモデル / サードパーティプロキシ | ✅ | | | | | |
|
||||
|
||||
各モデルの詳細設定については、[モデルドキュメント](https://docs.cowagent.ai/en/models/index)を参照してください。
|
||||
> Web コンソールでの設定が推奨されており、ファイルを手動編集する必要はありません。手動設定については各プロバイダーのドキュメントおよび [モデル概要](https://docs.cowagent.ai/ja/models/index) を参照してください。
|
||||
|
||||
### Coding Plan
|
||||
<br/>
|
||||
|
||||
Coding Planは各プロバイダーが提供する月額サブスクリプションパッケージで、高頻度のAgent利用に最適です。すべてのプロバイダーはOpenAI互換モードでアクセスできます:
|
||||
## 💬 チャネル
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "MODEL_NAME",
|
||||
"open_ai_api_base": "PROVIDER_CODING_PLAN_API_BASE",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
}
|
||||
一つの Agent インスタンスで複数のチャネルを同時に提供できます。`channel_type` 設定で切り替えるか、複数のチャネルを並列実行できます。
|
||||
|
||||
| チャネル | テキスト | 画像 | ファイル | 音声 | グループ |
|
||||
| --- | :-: | :-: | :-: | :-: | :-: |
|
||||
| [Web コンソール](https://docs.cowagent.ai/ja/channels/web)(デフォルト) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [WeChat](https://docs.cowagent.ai/ja/channels/weixin) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Feishu / Lark](https://docs.cowagent.ai/ja/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [DingTalk](https://docs.cowagent.ai/ja/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [WeCom Bot](https://docs.cowagent.ai/ja/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [QQ](https://docs.cowagent.ai/ja/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
|
||||
| [WeCom App](https://docs.cowagent.ai/ja/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [WeChat 公式アカウント](https://docs.cowagent.ai/ja/channels/wechatmp) | ✅ | ✅ | | ✅ | |
|
||||
| [Telegram](https://docs.cowagent.ai/ja/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
> Feishu と WeCom Bot は **Web コンソール内で QR コードをスキャンするだけで接続**できます — パブリック IP は不要です。詳細は [チャネル概要](https://docs.cowagent.ai/ja/channels/index) を参照してください。
|
||||
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-chat.png" alt="CowAgent Web Console" width="800"/>
|
||||
|
||||
*Web コンソールはデフォルトのチャネルであると同時に、Agent の設定・管理を統一的に行う場でもあります。*
|
||||
|
||||
<br/>
|
||||
|
||||
## 🧠 記憶とナレッジベース
|
||||
|
||||
**長期記憶**は三層構造:会話コンテキスト(短期)→ デイリー記憶(中期)→ MEMORY.md(長期)。毎晩の **Deep Dream** が散在する記憶を洗練された長期記憶とナラティブな日記に蒸留します。詳細は [長期記憶](https://docs.cowagent.ai/ja/memory/index) · [Deep Dream](https://docs.cowagent.ai/ja/memory/deep-dream) を参照してください。
|
||||
|
||||
**パーソナルナレッジベース**は時系列の記憶とは異なり、構造化された知識を**トピック単位**で整理します。Agent が会話中に有用な情報を自動でキュレーションし、相互参照とインデックスを維持し、Web コンソールでナレッジグラフを可視化できます。詳細は [パーソナルナレッジベース](https://docs.cowagent.ai/ja/knowledge/index) を参照してください。
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td width="50%">
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-memory.png" alt="長期記憶" />
|
||||
<p align="center"><em>長期記憶 · 三層構造 + Deep Dream</em></p>
|
||||
</td>
|
||||
<td width="50%">
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-knowledge.png" alt="パーソナルナレッジベース" />
|
||||
<p align="center"><em>ナレッジベース · 自動キュレーションされた Markdown Wiki</em></p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br/>
|
||||
|
||||
## 🔧 ツールと Skill
|
||||
|
||||
**ツール(Tools)** は Agent がシステムリソースを操作するためのアトミックな機能です。**Skill(Skills)** はマニフェストファイルで定義される高レベルのワークフローで、複数のツールを組み合わせて複雑なタスクを完了します。
|
||||
|
||||
### ツールシステム
|
||||
|
||||
**組み込みツール**には、ファイル I/O(`read` / `write` / `edit` / `ls`)、ターミナル(`bash`)、ファイル送信(`send`)、記憶検索(`memory`)、環境変数(`env_config`)、Web フェッチ(`web_fetch`)、スケジューラ(`scheduler`)、Web 検索(`web_search`)、画像認識(`vision`)、ブラウザ自動化(`browser`)などが含まれます。
|
||||
|
||||
**MCP プロトコル**は [Model Context Protocol](https://modelcontextprotocol.io) のオープンエコシステムを統合します。`mcp.json` を一度設定すれば即利用可能で、stdio / SSE トランスポート、ホットリロード、ノーコード統合をサポートします。
|
||||
|
||||
詳細: [ツール概要](https://docs.cowagent.ai/ja/tools/index) · [MCP 統合](https://docs.cowagent.ai/ja/tools/mcp)。
|
||||
|
||||
### Skill システム
|
||||
|
||||
- **[Skill Hub](https://skills.cowagent.ai/)** — オープン Skill マーケットプレイス:閲覧、検索、ワンクリックインストール
|
||||
- **GitHub / ClawHub / URL など** — 任意のソースからワンクリックでインストール
|
||||
- **対話による作成** — `skill-creator` を使って対話でカスタム Skill を生成;ワークフローやサードパーティ API を再利用可能な Skill に変換
|
||||
|
||||
```bash
|
||||
/skill list # インストール済み Skill の一覧
|
||||
/skill search <キーワード> # マーケットプレイスで検索
|
||||
/skill install <名前> # ワンクリックインストール
|
||||
```
|
||||
|
||||
- `bot_type`: `openai` を指定
|
||||
- `model`: プロバイダーがサポートするモデル名
|
||||
- `open_ai_api_base`: プロバイダーのCoding Plan API Base(標準の従量課金とは異なります)
|
||||
- `open_ai_api_key`: プロバイダーのCoding Plan APIキー
|
||||
|
||||
> 注意:Coding PlanのAPI BaseとAPIキーは、通常の従量課金のものとは別です。各プロバイダーのプラットフォームから取得してください。
|
||||
|
||||
対応プロバイダーには、Alibaba Cloud、MiniMax、Zhipu GLM、Kimi、Volcengineなどがあります。各プロバイダーの詳細設定については、[Coding Planドキュメント](https://docs.cowagent.ai/en/models/coding-plan)を参照してください。
|
||||
詳細: [Skill 概要](https://docs.cowagent.ai/ja/skills/index) · [Skill 作成](https://docs.cowagent.ai/ja/skills/create)。
|
||||
|
||||
<br/>
|
||||
|
||||
## チャネル
|
||||
## 🏷 更新履歴
|
||||
|
||||
複数のプラットフォームに対応しています。`config.json` の `channel_type` を設定して切り替えます:
|
||||
> **2026.05.22:** [v2.0.9](https://github.com/zhayujie/CowAgent/releases/tag/2.0.9) — モデル管理、MCP プロトコル対応、ブラウザセッション永続化、新モデル(gpt-5.5、gemini-3.5-flash、qwen3.7-max)、デプロイのセキュリティ強化。
|
||||
|
||||
| チャネル | `channel_type` | ドキュメント |
|
||||
| --- | --- | --- |
|
||||
| WeChat | `weixin` | [WeChat設定](https://docs.cowagent.ai/ja/channels/weixin) |
|
||||
| Web(デフォルト) | `web` | [Webチャネル](https://docs.cowagent.ai/en/channels/web) |
|
||||
| Feishu(飛書) | `feishu` | [Feishu設定](https://docs.cowagent.ai/en/channels/feishu) |
|
||||
| DingTalk(釘釘) | `dingtalk` | [DingTalk設定](https://docs.cowagent.ai/en/channels/dingtalk) |
|
||||
| WeCom Bot | `wecom_bot` | [WeCom Bot設定](https://docs.cowagent.ai/en/channels/wecom-bot) |
|
||||
| WeComアプリ | `wechatcom_app` | [WeCom設定](https://docs.cowagent.ai/en/channels/wecom) |
|
||||
| WeChat公式アカウント | `wechatmp` / `wechatmp_service` | [WeChat公式アカウント設定](https://docs.cowagent.ai/en/channels/wechatmp) |
|
||||
| ターミナル | `terminal` | — |
|
||||
> **2026.05.06:** [v2.0.8](https://github.com/zhayujie/CowAgent/releases/tag/2.0.8) — Feishu チャネル全面アップグレード(音声、ストリーミング、QR 接続)、DeepSeek V4 と Baidu Qianfan 対応、スケジューラツール強化。
|
||||
|
||||
複数チャネルを同時に有効化できます。カンマ区切りで指定してください:`"channel_type": "feishu,dingtalk"`
|
||||
> **2026.04.22:** [v2.0.7](https://github.com/zhayujie/CowAgent/releases/tag/2.0.7) — 組み込み画像生成(GPT Image 2、Nano Banana)、新モデル(Kimi K2.6、Claude Opus 4.7、GLM 5.1)、ナレッジベースと記憶の強化。
|
||||
|
||||
> **2026.04.14:** [v2.0.6](https://github.com/zhayujie/CowAgent/releases/tag/2.0.6) — ナレッジベース、Deep Dream 記憶蒸留、スマートコンテキスト圧縮、マルチセッション Web コンソール。
|
||||
|
||||
> **2026.04.01:** [v2.0.5](https://github.com/zhayujie/CowAgent/releases/tag/2.0.5) — Cow CLI、Skill Hub オープンソース化、ブラウザツール、WeCom Bot QR 接続。
|
||||
|
||||
> **2026.02.03:** [v2.0.0](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) — マルチステップタスク計画、長期記憶、Skill フレームワークを備えたスーパー Agent アシスタントへの全面アップグレード。
|
||||
|
||||
完全な履歴: [リリースノート](https://docs.cowagent.ai/ja/releases/overview)
|
||||
|
||||
<br/>
|
||||
|
||||
## エンタープライズサービス
|
||||
## 🤝 コミュニティとサポート
|
||||
|
||||
<a href="https://link-ai.tech" target="_blank"><img width="720" src="https://cdn.link-ai.tech/image/link-ai-intro.jpg"></a>
|
||||
GitHub で [Issue を報告](https://github.com/zhayujie/CowAgent/issues) するか、下記 QR コードをスキャンして WeChat コミュニティに参加してください:
|
||||
|
||||
> [LinkAI](https://link-ai.tech/) は、企業や開発者向けのワンストップAIエージェントプラットフォームです。マルチモーダルLLM、ナレッジベース、Agentプラグイン、ワークフローを統合しています。主要プラットフォームへのワンクリック統合、SaaSおよびプライベートデプロイに対応しています。
|
||||
<img width="130" src="https://img-1317903499.cos.ap-guangzhou.myqcloud.com/docs/open-community.png">
|
||||
|
||||
<br/>
|
||||
|
||||
## 🔗 関連プロジェクト
|
||||
|
||||
- [Cow Skill Hub](https://github.com/zhayujie/cow-skill-hub): AIエージェント向けのオープンSkillマーケットプレイス。CowAgent、OpenClaw、Claude Codeなどで利用可能なSkillの閲覧・検索・インストール・公開が可能。
|
||||
- [bot-on-anything](https://github.com/zhayujie/bot-on-anything): 軽量で高い拡張性を持つLLMアプリケーションフレームワーク。Slack、Telegram、Discord、Gmailなどに対応。
|
||||
- [AgentMesh](https://github.com/MinimalFuture/AgentMesh): エージェントチームの協調による複雑な問題解決のためのオープンソースのマルチエージェントフレームワーク。
|
||||
- **[Cow Skill Hub](https://github.com/zhayujie/cow-skill-hub)** — AI エージェント向けのオープン Skill マーケットプレイス;CowAgent、OpenClaw、Claude Code などに対応
|
||||
- **[bot-on-anything](https://github.com/zhayujie/bot-on-anything)** — 軽量な LLM アプリケーションフレームワーク;Slack、Telegram、Discord、Gmail などに対応
|
||||
- **[AgentMesh](https://github.com/MinimalFuture/AgentMesh)** — チーム協調による複雑な問題解決のためのオープンソースのマルチエージェントフレームワーク
|
||||
|
||||
## 🔎 よくある質問
|
||||
<br/>
|
||||
|
||||
FAQ: <https://github.com/zhayujie/CowAgent/wiki/FAQs>
|
||||
## 🏢 エンタープライズサービス
|
||||
|
||||
## 🛠️ コントリビューション
|
||||
[**LinkAI**](https://link-ai.tech/) は企業や開発者向けのワンストップ AI Agent プラットフォームで、CowAgent にマネージドホスティングとエンタープライズグレードのサポートを提供します:
|
||||
|
||||
新しいチャネルの追加を歓迎します。[Feishuチャネル](https://github.com/zhayujie/CowAgent/blob/master/channel/feishu/feishu_channel.py)を参考にしてください。また、新しいSkillのコントリビューションも歓迎します。[Skill作成ドキュメント](https://docs.cowagent.ai/ja/skills/create)を参照するか、[Skill Hub](https://skills.cowagent.ai/submit)に提出してください。
|
||||
- **🚀 デプロイ不要のホスト型ランタイム** — [CowAgent オンラインアシスタント](https://link-ai.tech/cowagent/create) を 1 分以内に起動、サーバー不要
|
||||
- **🧠 Agent インフラ** — 主要 LLM・ナレッジベース・データベース・Skill・ワークフローへの統一アクセス。CowAgent の機能を拡張する、すぐに使えるビルディングブロック
|
||||
- **🏢 チーム & エンタープライズ機能** — ワークスペース、ロールベースのアクセス制御、監査ログ、本番運用向けプライベートデプロイ
|
||||
|
||||
## ✉ お問い合わせ
|
||||
エンタープライズに関するお問い合わせ:**sales@simple-future.tech** または [QR コードをスキャン](https://cdn.link-ai.tech/consultant.jpg) して WeChat でお問い合わせください。
|
||||
|
||||
PRやIssueの提出を歓迎します。🌟 Starでプロジェクトをサポートしてください。ご質問がある場合は、[FAQリスト](https://github.com/zhayujie/CowAgent/wiki/FAQs)を確認するか、[Issues](https://github.com/zhayujie/CowAgent/issues)を検索してください。
|
||||
<br/>
|
||||
|
||||
## 🛠️ 開発とコントリビューション
|
||||
|
||||
新しいチャネルの追加を歓迎します — [Feishu チャネル](https://github.com/zhayujie/CowAgent/blob/master/channel/feishu/feishu_channel.py) を参考にカスタムチャネルを実装できます。新しい Skill のコントリビューションも [Skill Hub](https://skills.cowagent.ai/submit) で受け付けています。
|
||||
|
||||
⭐ Star でプロジェクトの更新をフォローしてください。PR や Issue の提出も歓迎します。
|
||||
|
||||
## 🌟 コントリビューター
|
||||
|
||||

|
||||
|
||||
<br/>
|
||||
|
||||
## ⚠️ 免責事項
|
||||
|
||||
1. 本プロジェクトは [MIT License](/LICENSE) に基づき、技術研究と学習を目的としています。利用者は所在地の法令・規制を遵守する必要があり、本プロジェクトの利用に起因するいかなる結果についてもメンテナーは責任を負いません。
|
||||
2. **コストと安全性:** Agent モードは通常のチャットよりトークン消費が大幅に多いため、品質とコストのバランスを考慮してモデルを選択してください。Agent はローカル OS にアクセスできるため、信頼できる環境にのみデプロイしてください。
|
||||
3. CowAgent は純粋なオープンソースプロジェクトであり、暗号通貨の発行・参加・承認は一切行いません。
|
||||
|
||||
<br/>
|
||||
|
||||
## 📌 プロジェクト改名のお知らせ
|
||||
|
||||
本プロジェクトは旧名 `chatgpt-on-wechat` から、2026.04.13 に **CowAgent** へ正式に改名されました。元の GitHub URL は自動的にリダイレクトされます。既存ユーザーは `git remote set-url origin https://github.com/zhayujie/CowAgent.git` でローカルのリモートを更新できます。
|
||||
|
||||
@@ -19,6 +19,7 @@ CowAgent は複数のチャットチャネルへの接続に対応しており
|
||||
| [QQ](/ja/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
|
||||
| [WeCom アプリ](/ja/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [WeChat 公式アカウント](/ja/channels/wechatmp) | ✅ | ✅ | | ✅ | |
|
||||
| [Telegram](/ja/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
- **画像 / ファイル / 音声**列は対応するメッセージタイプの送受信に対応していることを示します。詳細は各チャネルのドキュメントを参照してください
|
||||
- **グループチャット**列はグループメッセージを認識して応答できることを示します
|
||||
@@ -37,3 +38,4 @@ CowAgent は複数のチャットチャネルへの接続に対応しており
|
||||
- [QQ](/ja/channels/qq) — QQ 公式ボットオープンプラットフォーム
|
||||
- [WeCom アプリ](/ja/channels/wecom) — WeCom 自作アプリ接続
|
||||
- [WeChat 公式アカウント](/ja/channels/wechatmp) — WeChat 公式アカウント(購読アカウント / サービスアカウント)
|
||||
- [Telegram](/ja/channels/telegram) — グローバル IM、5 分で接続、公開 IP 不要
|
||||
|
||||
111
docs/ja/channels/telegram.mdx
Normal file
111
docs/ja/channels/telegram.mdx
Normal file
@@ -0,0 +1,111 @@
|
||||
---
|
||||
title: Telegram
|
||||
description: Telegram Bot API 経由で CowAgent を接続
|
||||
---
|
||||
|
||||
> 公式の Telegram Bot API を通じて CowAgent を接続します。1 対 1 チャットおよびグループチャット(@メンションまたはボットへの返信で起動)に対応。Long Polling 方式のため公開 IP は不要で、すぐに利用できます。
|
||||
|
||||
|
||||
## 1. 接続手順
|
||||
|
||||
### ステップ 1: BotFather で Bot を作成
|
||||
|
||||
1. Telegram で公式アカウント [@BotFather](https://t.me/BotFather) を開きます。
|
||||
2. `/newbot` を送り、案内に従って入力します:
|
||||
- **Bot 名**(表示名、例: `My CowAgent Bot`)
|
||||
- **Bot ユーザー名**(`bot` で終わる必要があります、例: `my_cowagent_bot`)
|
||||
3. 作成完了後、BotFather から **HTTP API Token**(例: `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`)が返されます。大切に保管してください。
|
||||
|
||||
<Tip>
|
||||
Token は Bot のパスワードに相当します。漏えいしないよう注意してください。万が一漏れた場合は `@BotFather` に `/revoke` を送って再発行できます。
|
||||
</Tip>
|
||||
|
||||
### ステップ 2:(グループ利用時)Privacy Mode を無効化
|
||||
|
||||
1 対 1 チャットのみ利用する場合はスキップ可能です。Telegram Bot は既定で **Privacy Mode** が有効で、グループ内では `@bot` 接尾辞付きのコマンド(例: `/start@your_bot`)と、Bot メッセージへの返信のみ受信できます。**通常の `@bot こんにちは` のようなテキストメッセージは届きません**。そのままだとグループで反応しないので、必要に応じて以下を設定してください。
|
||||
|
||||
`@BotFather` に対して:
|
||||
|
||||
1. `/setprivacy` を送信
|
||||
2. 作成した Bot を選択
|
||||
3. `Disable` を選択
|
||||
|
||||
<Note>
|
||||
設定後もグループで反応しない場合は、Bot を一度グループから外して再度追加してみてください。
|
||||
</Note>
|
||||
|
||||
### ステップ 3: CowAgent に接続
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Web コンソール(推奨)">
|
||||
Web コンソール(既定 `http://127.0.0.1:9899`)を開き、**チャネル** メニュー → **チャネルを追加** → **Telegram** を選択し、Bot Token を貼り付けて接続をクリックします。
|
||||
</Tab>
|
||||
<Tab title="設定ファイル">
|
||||
`config.json` に以下を追加して Cow を起動します:
|
||||
|
||||
```json
|
||||
{
|
||||
"channel_type": "telegram",
|
||||
"telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ",
|
||||
"telegram_group_trigger": "mention_or_reply"
|
||||
}
|
||||
```
|
||||
|
||||
| パラメータ | 説明 | 既定値 |
|
||||
| --- | --- | --- |
|
||||
| `telegram_token` | BotFather から発行された HTTP API Token | - |
|
||||
| `telegram_group_trigger` | グループのトリガー方式: `mention_or_reply`(@ または返信)/ `mention_only`(@ のみ)/ `all`(全メッセージ) | `mention_or_reply` |
|
||||
| `telegram_register_commands` | 起動時に BotFather にコマンドメニューを登録するかどうか | `true` |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
ログに以下のような出力が表示されれば接続成功です:
|
||||
|
||||
```
|
||||
[Telegram] Bot logged in as @my_cowagent_bot (id=123456789)
|
||||
[Telegram] Registered 10 bot commands
|
||||
[Telegram] ✅ Telegram bot ready, polling for updates
|
||||
```
|
||||
|
||||
## 2. 機能
|
||||
|
||||
| 機能 | 対応状況 |
|
||||
| --- | --- |
|
||||
| 1 対 1 チャット | ✅ |
|
||||
| グループチャット(@bot / Bot への返信) | ✅ |
|
||||
| テキストメッセージ | ✅ 送受信 |
|
||||
| 画像メッセージ | ✅ 送受信 |
|
||||
| 音声メッセージ | ✅ 送受信(OGG/Opus) |
|
||||
| 動画メッセージ | ✅ 送受信 |
|
||||
| ファイルメッセージ | ✅ 送受信(PDF / Word / Excel など) |
|
||||
| コマンドメニュー | ✅ Web コンソールの slash コマンドと一致 |
|
||||
|
||||
### コマンドメニュー
|
||||
|
||||
起動時に BotFather へコマンドメニューを自動登録します。Telegram の入力欄で `/` を入力するとサジェストが表示されます:
|
||||
|
||||
| コマンド | 説明 |
|
||||
| --- | --- |
|
||||
| `/help` | コマンドヘルプを表示 |
|
||||
| `/status` | 実行ステータスを確認 |
|
||||
| `/context` | 対話コンテキストを表示(`/context clear` でクリア) |
|
||||
| `/skill` | スキル管理(`/skill list`、`/skill install` など) |
|
||||
| `/memory` | 記憶管理(`/memory dream`) |
|
||||
| `/knowledge` | ナレッジベース管理(`/knowledge list` / `on` / `off`) |
|
||||
| `/config` | 現在の設定を表示 |
|
||||
| `/cancel` | 実行中の Agent タスクを中断 |
|
||||
| `/logs` | 最近のログを表示 |
|
||||
| `/version` | バージョンを表示 |
|
||||
|
||||
<Note>
|
||||
Telegram のコマンドメニューはトップレベルのコマンドのみ表示されます。サブコマンドはスペース区切りで入力します(例: `/skill list`、`/context clear`)。
|
||||
</Note>
|
||||
|
||||
## 3. 使い方
|
||||
|
||||
接続が完了したら:
|
||||
|
||||
- **1 対 1 チャット**: Telegram で Bot のユーザー名(例: `@my_cowagent_bot`)を検索し、`Start` をタップして会話を開始します。
|
||||
- **グループチャット**: Bot をグループに追加し、`@bot こんにちは` または **Bot のメッセージに返信** することで起動します。グループで反応しない場合は [ステップ 2](#ステップ-2-グループ利用時-privacy-mode-を無効化) の Privacy Mode 設定を確認してください。
|
||||
|
||||
画像やファイルを送るときは、添付欄の上の入力欄に **キャプション**(説明・質問)を直接書いて一緒に送信できます。Bot は添付ファイルとキャプションを合わせて回答します。先に添付を送り、その後に質問を送る形でも、2 つのメッセージは自動でまとめて処理されます。
|
||||
@@ -25,6 +25,14 @@ description: ステータスの確認、設定管理、コンテキスト制御
|
||||
/status
|
||||
```
|
||||
|
||||
## cancel
|
||||
|
||||
現在のセッションで実行中の Agent タスクを中止します。Agent が長時間のタスク(マルチターンのツール呼び出しや長いストリーミング応答など)を実行している間、`/cancel` を送信すると、次のツール実行の前に停止します。Web、WeChat、企業微信、Feishu など、すべてのチャネルで利用可能です。
|
||||
|
||||
```text
|
||||
/cancel
|
||||
```
|
||||
|
||||
## config
|
||||
|
||||
実行時設定の表示または変更を行います。変更は即座に反映され、再起動は不要です。
|
||||
|
||||
@@ -57,6 +57,7 @@ Web コンソールや接続されたチャネルの会話で `/` を入力す
|
||||
| --- | --- |
|
||||
| `/help` | コマンドヘルプを表示 |
|
||||
| `/status` | サービスの状態と設定を表示 |
|
||||
| `/cancel` | 実行中の Agent タスクを中止 |
|
||||
| `/config` | 実行時設定の表示・変更 |
|
||||
| `/skill` | スキル管理(インストール、アンインストール、有効化、無効化など) |
|
||||
| `/memory dream [N]` | 記憶蒸留を手動トリガー(デフォルト 3 日、最大 30) |
|
||||
@@ -80,6 +81,7 @@ Web コンソールや接続されたチャネルの会話で `/` を入力す
|
||||
| version | ✓ | ✓ |
|
||||
| status | ✓ | ✓ |
|
||||
| logs | ✓ | ✓ |
|
||||
| cancel | ✗ | ✓ |
|
||||
| config | ✗ | ✓ |
|
||||
| context | — | ✓ |
|
||||
| memory(サブコマンド) | ✗ | ✓ |
|
||||
|
||||
@@ -9,7 +9,7 @@ CowAgent 2.0 は、シンプルなチャットボットから、自律的な思
|
||||
|
||||
CowAgent のアーキテクチャは以下のコアモジュールで構成されています:
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/cow-agent-arch-en.jpg.jpg" alt="CowAgent Architecture" />
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/architecture/en/architecture.jpg" alt="CowAgent Architecture" />
|
||||
|
||||
| モジュール | 説明 |
|
||||
| --- | --- |
|
||||
|
||||
@@ -84,7 +84,7 @@ Skill が必要とするシークレットキーは環境変数ファイルに
|
||||
|
||||
Skill システムは Agent に無限の拡張性を提供します。各 Skill は説明ファイル、実行スクリプト(任意)、リソース(任意)で構成され、特定のタイプのタスクを完了する方法を記述します。Skill により Agent は複雑なワークフローの指示に従い、ツールを呼び出し、サードパーティシステムと連携できます。
|
||||
|
||||
- **[Skill Hub](https://skills.cowagent.ai/):** オープンな Skill マーケットプレイス。公式推奨、コミュニティ、サードパーティの Skill を収録。ワンコマンドでインストール可能。
|
||||
- [Skill Hub](https://skills.cowagent.ai/):オープンな Skill マーケットプレイス。公式推奨、コミュニティ、サードパーティの Skill を収録。ワンコマンドでインストール可能。
|
||||
- **組み込み Skill:** プロジェクトの `skills/` ディレクトリにあり、Skill クリエイター、画像認識、LinkAI Agent、Web フェッチなどが含まれます。組み込み Skill は依存条件(API キー、システムコマンドなど)に基づいて自動的に有効化されます。
|
||||
- **カスタム Skill:** ユーザーが会話を通じて作成し、ワークスペース(`~/cow/skills/`)に保存されます。あらゆる複雑なビジネスプロセスやサードパーティ連携を実装できます。
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ Agent は以下のメカニズムにより、会話内容を長期記憶に自
|
||||
|
||||
- **コンテキストトリミング時** — 会話ターン数またはトークン数が設定上限を超えた場合、最も古い半分のコンテキストがトリミングされ、LLM によって要約されて日次記憶ファイルに書き込まれます。要約は保持されたコンテキストにも非同期で注入され、会話の連続性を維持します
|
||||
- **毎日のスケジュール要約** — 毎日 23:55 に自動的にフル要約がトリガーされ、アクティビティが少ない日でも記憶が保存されます(内容が変更されていない場合はスキップ)
|
||||
- **[夢境蒸留(Deep Dream)](/ja/memory/deep-dream)** — 毎日の要約完了後に自動実行され、日次記憶を MEMORY.md に蒸留し、夢日記を生成します
|
||||
- [夢境蒸留(Deep Dream)](/ja/memory/deep-dream) — 毎日の要約完了後に自動実行され、日次記憶を MEMORY.md に蒸留し、夢日記を生成します
|
||||
- **API コンテキストオーバーフロー時** — モデル API がコンテキストオーバーフローエラーを返した場合、緊急措置として現在の会話要約が保存されます
|
||||
|
||||
すべての記憶書き込みはバックグラウンドスレッドで非同期に実行され(LLM の要約 + ファイル書き込み)、通常の会話応答をブロックしません。
|
||||
|
||||
135
docs/ja/models/mimo.mdx
Normal file
135
docs/ja/models/mimo.mdx
Normal file
@@ -0,0 +1,135 @@
|
||||
---
|
||||
title: Xiaomi MiMo
|
||||
description: Xiaomi MiMo モデル設定(テキスト対話 + 画像理解 + 音声合成)
|
||||
---
|
||||
|
||||
Xiaomi MiMo はネイティブ全モーダル大規模言語モデルです。1 つの `mimo_api_key` でテキスト対話、画像理解、音声合成を同時に有効化できます。
|
||||
|
||||
<Tip>
|
||||
Web コンソールの「モデル管理」ページから、以下のすべての機能をワンストップで設定でき、設定ファイルを手動で編集する必要はありません。
|
||||
</Tip>
|
||||
|
||||
## テキスト対話
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "mimo-v2.5-pro",
|
||||
"mimo_api_key": "YOUR_API_KEY",
|
||||
"mimo_api_base": "https://api.xiaomimimo.com/v1"
|
||||
}
|
||||
```
|
||||
|
||||
| パラメータ | 説明 |
|
||||
| --- | --- |
|
||||
| `model` | 推奨は `mimo-v2.5-pro`。`mimo-v2.5` も使用可能 |
|
||||
| `mimo_api_key` | [MiMo Open Platform](https://platform.xiaomimimo.com/console/api-keys) で作成 |
|
||||
| `mimo_api_base` | 任意。デフォルトは `https://api.xiaomimimo.com/v1` |
|
||||
|
||||
### モデル選択
|
||||
|
||||
| モデル | ユースケース |
|
||||
| --- | --- |
|
||||
| `mimo-v2.5-pro` | フラッグシップ。ネイティブ全モーダル + Agent 能力、最大 100 万トークンのコンテキスト |
|
||||
| `mimo-v2.5` | 汎用版。ネイティブ全モーダル(テキスト / 画像 / 動画 / 音声) |
|
||||
|
||||
## 思考モード
|
||||
|
||||
MiMo V2.5 シリーズはデフォルトで「思考モード」が有効です。最終回答の前に `reasoning_content`(思考過程)を出力することで、複雑なタスクのパフォーマンスを高めます。
|
||||
|
||||
表示の有無はグローバル設定 `enable_thinking` で切り替え可能です(Web コンソールの設定ページからも変更できます):
|
||||
|
||||
```json
|
||||
{
|
||||
"enable_thinking": true
|
||||
}
|
||||
```
|
||||
|
||||
## 画像理解
|
||||
|
||||
`mimo_api_key` を設定すると、Agent の Vision ツールは自動的に MiMo のビジョンモデルを利用します:
|
||||
|
||||
- メインモデル自体がマルチモーダル(`mimo-v2.5-pro` / `mimo-v2.5`)の場合は、画像はメインモデルが直接処理し、追加設定は不要です。
|
||||
- メインモデルが他社製の場合、Vision ツールは順序に従い `mimo-v2.5-pro` にフォールバックします。
|
||||
|
||||
特定の Vision モデルを強制したい場合は、設定ファイルで明示的に指定してください:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"provider": "mimo",
|
||||
"model": "mimo-v2.5-pro"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 音声合成
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "mimo",
|
||||
"text_to_voice_model": "mimo-v2.5-tts",
|
||||
"tts_voice_id": "冰糖"
|
||||
}
|
||||
```
|
||||
|
||||
| パラメータ | 説明 |
|
||||
| --- | --- |
|
||||
| `text_to_voice_model` | 現在は `mimo-v2.5-tts` のみ対応(プリセット音色 + 歌唱モード) |
|
||||
| `tts_voice_id` | プリセット音色名(中国語の音色は中国語名がそのまま ID) |
|
||||
|
||||
### プリセット音色
|
||||
|
||||
| 音色 ID | 説明 |
|
||||
| --- | --- |
|
||||
| `冰糖` | 中国語 · 女声(デフォルト) |
|
||||
| `茉莉` | 中国語 · 女声 |
|
||||
| `苏打` | 中国語 · 男声 |
|
||||
| `白桦` | 中国語 · 男声 |
|
||||
| `Mia` | 英語 · 女声 |
|
||||
| `Chloe` | 英語 · 女声 |
|
||||
| `Milo` | 英語 · 男声 |
|
||||
| `Dean` | 英語 · 男声 |
|
||||
|
||||
Web コンソールの「モデル管理 → 音声合成」のドロップダウンから視覚的に選択することもできます。
|
||||
|
||||
### スタイル制御
|
||||
|
||||
MiMo TTS は合成テキスト内に **音声タグ** を埋め込むことで、感情、語調、方言、キャラクター、さらには歌唱まで制御できます。タグは **最終的に音声合成されるテキスト(つまり Agent の返信内容)** に含める必要があり、全体スタイルのタグは先頭に置きます:
|
||||
|
||||
```
|
||||
(スタイル)合成するテキスト
|
||||
```
|
||||
|
||||
半角 `()`、全角 `()`、`[]` の 3 種類の括弧に対応。スタイル記述は中国語・英語のどちらでも OK で、最も的確に表現できる言語を選んでください。代表的なスタイル例:
|
||||
|
||||
| 種類 | サンプルタグ |
|
||||
| --- | --- |
|
||||
| 基本感情 | `happy` `sad` `angry` `fear` `surprised` `excited` `aggrieved` `calm` `indifferent` |
|
||||
| 複合感情 | `wistful` `relieved` `helpless` `guilty` `at ease` `uneasy` `touched` |
|
||||
| 全体トーン | `gentle` `aloof` `lively` `serious` `languid` `playful` `deep` `sharp` `cutting` |
|
||||
| 声質 | `magnetic` `mellow` `bright` `ethereal` `childlike` `aged` `sweet` `husky` |
|
||||
| キャラクター調 | `squeaky` `mature lady` `young boy` `uncle` `Taiwanese accent` |
|
||||
| 方言 | `Northeastern` `Sichuan` `Henan` `Cantonese` |
|
||||
| ロールプレイ | `Sun Wukong` `Lin Daiyu` |
|
||||
| 歌唱 | `sing` / `singing` |
|
||||
|
||||
例:
|
||||
|
||||
- `(magnetic)夜が深まり、街はまだ呼吸している。`
|
||||
- `(gentle)深呼吸して。きっと大丈夫。`
|
||||
- `(serious)これがシステム再起動前の最後の警告です。`
|
||||
- `(singing)Twinkle, twinkle, little star, how I wonder what you are…`
|
||||
|
||||
テキストの任意の位置に細かい音声タグを挿入して、呼吸、笑い声、間などを制御することもできます。例:
|
||||
|
||||
```
|
||||
(nervous, deep breath) ふぅ……落ち着いて、落ち着いて。(faster pace) 自己紹介は五十回練習したから大丈夫。
|
||||
```
|
||||
|
||||
タグの完全な一覧は [MiMo 音声合成ドキュメント](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5) を参照してください。
|
||||
|
||||
<Tip>
|
||||
CowAgent は TTS 呼び出し時、Agent の返信原文(`(...)` タグを含む)をそのまま MiMo に送信します。ペルソナ / システムプロンプトで「返信の冒頭に `(スタイル)` タグを付けて口調を指定する」よう指示すれば、IM チャネル(WeChat / Feishu / DingTalk / WeCom)の音声返信に感情・方言・歌唱などの効果を付与できます。
|
||||
</Tip>
|
||||
@@ -15,7 +15,7 @@ Web コンソールに「モデル」ページを新設。**モデルプロバ
|
||||
|
||||
ドキュメント:[モデル概要](https://docs.cowagent.ai/ja/models)
|
||||
|
||||
<img width="720" alt="20260522113305" src="https://cdn.link-ai.tech/doc/20260522113305.png" />
|
||||
<img width="720" alt="20260522113305" src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/en/web-console-models-config.png" />
|
||||
|
||||
|
||||
## 🧩 MCP プロトコル対応
|
||||
|
||||
65
docs/ja/skills/hub.mdx
Normal file
65
docs/ja/skills/hub.mdx
Normal file
@@ -0,0 +1,65 @@
|
||||
---
|
||||
title: スキルハブ
|
||||
description: AI Agent スキルの閲覧、検索、インストール
|
||||
---
|
||||
|
||||
[Cow Skill Hub](https://skills.cowagent.ai/) は、公式推奨・コミュニティ貢献・サードパーティ(GitHub、ClawHub など)のスキルを集約した、オープンソースの AI Agent スキルマーケットプレイスです。
|
||||
|
||||
ソースコード: [github.com/zhayujie/cow-skill-hub](https://github.com/zhayujie/cow-skill-hub)
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260401110103.png" width="800" />
|
||||
|
||||
## 機能
|
||||
|
||||
- **スキル閲覧** — カテゴリ(公式推奨 / コミュニティ / サードパーティ)とタグでフィルタ
|
||||
- **スキル検索** — 名前または説明で検索
|
||||
- **詳細表示** — スキルマニフェスト、ファイル内容、インストールコマンド、必要な環境変数を確認
|
||||
- **ワンクリックインストール** — インストールコマンドをコピーして CowAgent で実行
|
||||
|
||||
## スキルのインストール
|
||||
|
||||
チャット内またはターミナルでインストールコマンドを実行:
|
||||
|
||||
<CodeGroup>
|
||||
```text チャット
|
||||
/skill install <name>
|
||||
```
|
||||
|
||||
```bash ターミナル
|
||||
cow skill install <name>
|
||||
```
|
||||
</CodeGroup>
|
||||
|
||||
チャットからスキルハブを直接閲覧することもできます:
|
||||
|
||||
```text
|
||||
/skill list --remote
|
||||
/skill search <キーワード>
|
||||
```
|
||||
|
||||
リスト表示されている厳選スキル以外にも、**GitHub、ClawHub、LinkAI、任意の URL** からサードパーティスキルを CLI 経由でインストールできます。詳しくは [スキルのインストール](/ja/skills/install) を参照してください。
|
||||
|
||||
## スキルの貢献
|
||||
|
||||
ご自身のスキルを投稿するには:
|
||||
|
||||
1. [skills.cowagent.ai/submit](https://skills.cowagent.ai/submit) にアクセス
|
||||
2. GitHub または Google でログイン
|
||||
3. `SKILL.md` を含むフォルダまたは zip ファイルをアップロード
|
||||
4. スキル名・表示名・説明は自動検出されます。必要に応じて編集してください
|
||||
5. 提出後、セキュリティ・品質チェックを経て公開されます
|
||||
|
||||
<img src="https://cdn.link-ai.tech/doc/20260401111904.png" width="800" />
|
||||
|
||||
スキルのファイル構成:
|
||||
|
||||
```
|
||||
your-skill/
|
||||
├── SKILL.md # 必須、ルートに配置
|
||||
├── scripts/ # 任意、実行スクリプト
|
||||
└── resources/ # 任意、その他リソース
|
||||
```
|
||||
|
||||
<Tip>
|
||||
スキルは `SKILL.md` マニフェストを中心に構築されます。スキル詳細ページから `SKILL.md` をダウンロードし、カスタム指示に対応した任意の Agent(OpenClaw、Cursor、Claude Code など)でも利用できます。
|
||||
</Tip>
|
||||
@@ -34,7 +34,9 @@ MCP コミュニティ標準に完全準拠しており、Claude Desktop / Curso
|
||||
| `command` | stdio | サーバーを起動する実行コマンド(`npx`、`python`、`uvx` など) |
|
||||
| `args` | 任意 | `command` に渡す引数 |
|
||||
| `env` | 任意 | サブプロセスの環境変数。API Key などに利用 |
|
||||
| `url` | SSE | SSE エンドポイントの URL(`command` と二者択一) |
|
||||
| `url` | SSE / Streamable HTTP | リモートエンドポイントの URL(`command` と二者択一) |
|
||||
| `type` | リモート | リモートトランスポート種別:`sse` または `streamable-http`(既定は `sse`) |
|
||||
| `headers` | 任意 | リモートリクエストの追加 HTTP ヘッダ(`Authorization` など)。Streamable HTTP のみ |
|
||||
| `disabled` | 任意 | `true` のとき該当サーバーをスキップ。一時的に無効化したいときに便利 |
|
||||
|
||||
### 完全な例
|
||||
@@ -88,7 +90,8 @@ Agent は次のように動作します:
|
||||
| トランスポート | 説明 | 設定フィールド |
|
||||
| --- | --- | --- |
|
||||
| **stdio** | サブプロセス通信。最も一般的で、コミュニティのエコシステムが最も豊富 | `command` + `args` |
|
||||
| **SSE** | HTTP Server-Sent Events。リモートホスト型の MCP サービス向け | `url` |
|
||||
| **SSE** | HTTP Server-Sent Events。従来のリモート用トランスポート | `url`(既定) |
|
||||
| **Streamable HTTP** | 新しい単一エンドポイント方式。SSE を段階的に置き換え | `type: "streamable-http"` + `url` |
|
||||
|
||||
## トラブルシューティング
|
||||
|
||||
@@ -106,4 +109,4 @@ Agent は次のように動作します:
|
||||
- [mcp.so](https://mcp.so) — グローバル MCP サービスインデックス
|
||||
- [ModelScope MCP 広場](https://modelscope.cn/mcp) — 魔搭コミュニティの MCP 広場、中国本土からのアクセスが安定
|
||||
|
||||
MCP 標準プロトコル(stdio / SSE)に準拠していれば、コードを一切変更せずに CowAgent に統合できます。
|
||||
MCP 標準プロトコル(stdio / SSE / Streamable HTTP)に準拠していれば、コードを一切変更せずに CowAgent に統合できます。
|
||||
|
||||
32
docs/ja/tools/web-fetch.mdx
Normal file
32
docs/ja/tools/web-fetch.mdx
Normal file
@@ -0,0 +1,32 @@
|
||||
---
|
||||
title: web_fetch - Web 取得
|
||||
description: Web ページやドキュメントのコンテンツを取得
|
||||
---
|
||||
|
||||
HTTP/HTTPS URL の内容を取得します。Web ページからは可読テキストを抽出し、ドキュメントファイル(PDF、Word、Excel など)は自動でダウンロードして解析します。
|
||||
|
||||
## パラメータ
|
||||
|
||||
| パラメータ | 型 | 必須 | 説明 |
|
||||
| --- | --- | --- | --- |
|
||||
| `url` | string | はい | HTTP/HTTPS URL(Web ページまたはドキュメント) |
|
||||
|
||||
## 対応ファイル形式
|
||||
|
||||
| 種別 | 形式 |
|
||||
| --- | --- |
|
||||
| PDF | `.pdf` |
|
||||
| Word | `.docx` |
|
||||
| テキスト | `.txt`、`.md`、`.csv`、`.log` |
|
||||
| 表計算 | `.xls`、`.xlsx` |
|
||||
| プレゼン | `.ppt`、`.pptx` |
|
||||
|
||||
## ユースケース
|
||||
|
||||
- Web ページの可読テキストを抽出する
|
||||
- リモートドキュメントのダウンロードと解析
|
||||
- API レスポンスの確認
|
||||
|
||||
<Note>
|
||||
`web_fetch` は静的 HTML のみ取得できます。JavaScript レンダリングが必要なページ(SPA など)は `browser` ツールを使用してください。
|
||||
</Note>
|
||||
@@ -27,7 +27,7 @@ Agent 通过以下机制自动将对话内容持久化为长期记忆:
|
||||
|
||||
- **上下文裁剪时** — 当对话轮次或 token 超出配置上限时,裁剪最早一半的上下文,使用 LLM 将被裁剪的内容总结为关键信息写入当天记忆文件,并将摘要异步注入到保留的上下文中,帮助模型保持对话连贯性
|
||||
- **每日定时总结** — 每天 23:55 自动触发一次全量总结,防止低活跃日无记忆留存(内容无变化时自动跳过)
|
||||
- **[梦境蒸馏(Deep Dream)](/memory/deep-dream)** — 每日总结完成后自动执行,将天级记忆蒸馏合并到 MEMORY.md,并生成梦境日记
|
||||
- [梦境蒸馏(Deep Dream)](/memory/deep-dream) — 每日总结完成后自动执行,将天级记忆蒸馏合并到 MEMORY.md,并生成梦境日记
|
||||
- **API 上下文溢出时** — 当模型 API 返回上下文溢出错误时,紧急保存当前对话摘要
|
||||
|
||||
所有记忆写入均在后台异步执行(LLM 总结 + 文件写入),不阻塞正常对话回复。
|
||||
|
||||
@@ -22,6 +22,7 @@ CowAgent 支持国内外主流厂商的大语言模型,模型接口实现在
|
||||
| [豆包 Doubao](/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ |
|
||||
| [Kimi](/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||
| [百度千帆](/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||
| [小米 MiMo](/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
|
||||
| [LinkAI](/models/linkai) | 多厂商 100+ 模型统一接入 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [自定义](/models/custom) |本地模型 / 三方代理 | ✅ | | | | | |
|
||||
|
||||
|
||||
135
docs/models/mimo.mdx
Normal file
135
docs/models/mimo.mdx
Normal file
@@ -0,0 +1,135 @@
|
||||
---
|
||||
title: 小米 MiMo
|
||||
description: 小米 MiMo 模型配置(文本对话 + 图像理解 + 语音合成)
|
||||
---
|
||||
|
||||
小米 MiMo 是原生全模态大模型,单 `mimo_api_key` 即可同时启用文本对话、图像理解与语音合成。
|
||||
|
||||
<Tip>
|
||||
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||
</Tip>
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "mimo-v2.5-pro",
|
||||
"mimo_api_key": "YOUR_API_KEY",
|
||||
"mimo_api_base": "https://api.xiaomimimo.com/v1"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `model` | 默认推荐 `mimo-v2.5-pro`,也可使用 `mimo-v2.5` |
|
||||
| `mimo_api_key` | 在 [MiMo 开放平台](https://platform.xiaomimimo.com/console/api-keys) 创建 |
|
||||
| `mimo_api_base` | 可选,默认为 `https://api.xiaomimimo.com/v1` |
|
||||
|
||||
### 模型选择
|
||||
|
||||
| 模型 | 适用场景 |
|
||||
| --- | --- |
|
||||
| `mimo-v2.5-pro` | 旗舰,原生全模态 + Agent 能力,最高 100 万 tokens 上下文 |
|
||||
| `mimo-v2.5` | 综合版,原生全模态(文本 / 图像 / 视频 / 音频) |
|
||||
|
||||
## 思考模式
|
||||
|
||||
MiMo V2.5 系列默认开启「思考模式」:模型在输出最终回答前会先输出 `reasoning_content`(思维链),提升复杂任务表现。
|
||||
|
||||
通过全局配置 `enable_thinking` 控制是否展示(也可在 Web 控制台 - 配置页面切换):
|
||||
|
||||
```json
|
||||
{
|
||||
"enable_thinking": true
|
||||
}
|
||||
```
|
||||
|
||||
## 图像理解
|
||||
|
||||
配置 `mimo_api_key` 后,Agent 的 Vision 工具可以自动使用 MiMo 视觉模型:
|
||||
|
||||
- 当主模型本身是多模态时(`mimo-v2.5-pro` / `mimo-v2.5`),直接由主模型识别图像,无需额外配置
|
||||
- 当主模型是其他厂商时,Vision 工具会根据顺序自动 fallback 到 `mimo-v2.5-pro`
|
||||
|
||||
如需手动指定 Vision 模型,可在配置文件中显式配置:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"provider": "mimo",
|
||||
"model": "mimo-v2.5-pro"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 语音合成
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "mimo",
|
||||
"text_to_voice_model": "mimo-v2.5-tts",
|
||||
"tts_voice_id": "冰糖"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `text_to_voice_model` | 当前仅支持 `mimo-v2.5-tts`(预置音色 + 唱歌模式) |
|
||||
| `tts_voice_id` | 预置音色名(中文音色直接使用中文名作为 ID) |
|
||||
|
||||
### 预置音色
|
||||
|
||||
| 音色 ID | 说明 |
|
||||
| --- | --- |
|
||||
| `冰糖` | 中文 · 女声(默认) |
|
||||
| `茉莉` | 中文 · 女声 |
|
||||
| `苏打` | 中文 · 男声 |
|
||||
| `白桦` | 中文 · 男声 |
|
||||
| `Mia` | 英文 · 女声 |
|
||||
| `Chloe` | 英文 · 女声 |
|
||||
| `Milo` | 英文 · 男声 |
|
||||
| `Dean` | 英文 · 男声 |
|
||||
|
||||
也可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。
|
||||
|
||||
### 风格控制
|
||||
|
||||
MiMo TTS 支持在合成文本中嵌入 **音频标签** 来控制情绪、语调、方言、角色甚至唱歌。标签需出现在 **最终被合成为语音的文本(即 Agent 回复内容)** 中,整体风格标签写在开头:
|
||||
|
||||
```
|
||||
(风格)待合成内容
|
||||
```
|
||||
|
||||
支持半角 `()`、全角 `()` 或 `[]` 三种括号。常见风格示例:
|
||||
|
||||
| 类型 | 示例标签 |
|
||||
| --- | --- |
|
||||
| 基础情绪 | `开心` `悲伤` `愤怒` `恐惧` `惊讶` `兴奋` `委屈` `平静` `冷漠` |
|
||||
| 复合情绪 | `怅然` `欣慰` `无奈` `愧疚` `释然` `忐忑` `动情` |
|
||||
| 整体语调 | `温柔` `高冷` `活泼` `严肃` `慵懒` `俏皮` `深沉` `干练` `凌厉` |
|
||||
| 音色定位 | `磁性` `醇厚` `清亮` `空灵` `稚嫩` `苍老` `甜美` `沙哑` |
|
||||
| 人设腔调 | `夹子音` `御姐音` `正太音` `大叔音` `台湾腔` |
|
||||
| 方言 | `东北话` `四川话` `河南话` `粤语` |
|
||||
| 角色扮演 | `孙悟空` `林黛玉` |
|
||||
| 唱歌 | `唱歌`(等价于 `sing` / `singing`) |
|
||||
|
||||
示例:
|
||||
|
||||
- (磁性)夜已经深了,城市还在呼吸。
|
||||
- (东北话)哎呀妈呀,这天儿也忒冷了吧!
|
||||
- (粤语)呢个真係好正啊!
|
||||
- (唱歌)原谅我这一生不羁放纵爱自由…
|
||||
|
||||
也可以在文本任意位置插入细粒度音频标签来控制呼吸、笑声、停顿等,例如:
|
||||
|
||||
```
|
||||
(紧张,深呼吸)呼……冷静,冷静。(语速加快)自我介绍我背了五十遍了,应该没问题。
|
||||
```
|
||||
|
||||
完整标签列表参见 [MiMo 语音合成文档](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5)。
|
||||
|
||||
<Tip>
|
||||
CowAgent 在调用 TTS 时会将 Agent 的回复原文(含 `(...)` 标签)直接送入 MiMo 合成。你可以在人设 / 系统提示词里要求模型「在回复开头用 `(风格)` 标签控制语气」,即可让 IM 渠道(微信 / 飞书 / 钉钉 / 企微)的语音回复带上情绪、方言、唱歌等效果。
|
||||
</Tip>
|
||||
@@ -11,7 +11,7 @@ Skill 与 Tool 的区别:Tool 是由代码实现的原子操作(如读写文
|
||||
|
||||
CowAgent 提供多种方式获取技能:
|
||||
|
||||
- **[Cow 技能广场](https://skills.cowagent.ai/)** — 在线浏览所有可用技能,或通过 `/skill list --remote` 在对话中浏览和安装
|
||||
- [Cow 技能广场](https://skills.cowagent.ai/) — 在线浏览所有可用技能,或通过 `/skill list --remote` 在对话中浏览和安装
|
||||
- **GitHub** — 直接从 GitHub 仓库安装,支持批量安装
|
||||
- **ClawHub** — 通过 `/skill install clawhub:名称` 安装 ClawHub 上的技能 (4w+个)
|
||||
- **LinkA** — 通过 `/skill install linkai:编码` 安装 LinkAI 上的公开资源和创建的知识库/数据库/工作流/插件等资源
|
||||
|
||||
@@ -3,7 +3,7 @@ title: 安装技能
|
||||
description: 通过命令一键安装来自多种来源的技能
|
||||
---
|
||||
|
||||
CowAgent 支持通过统一的 `install` 命令安装来自 **[Cow 技能广场](https://skills.cowagent.ai/)、GitHub、ClawHub、LinkAI** 以及任意 URL 上的技能。在对话中使用 `/skill install`,在终端中使用 `cow skill install`。
|
||||
CowAgent 支持通过统一的 `install` 命令安装来自 [Cow 技能广场](https://skills.cowagent.ai/)、GitHub、ClawHub、LinkAI 以及任意 URL 上的技能。在对话中使用 `/skill install`,在终端中使用 `cow skill install`。
|
||||
|
||||
## 从Cow技能广场安装
|
||||
|
||||
|
||||
@@ -34,7 +34,9 @@ Docker 部署时,官方 `docker-compose.yml` 已经把宿主机 `./cow` 挂载
|
||||
| `command` | stdio | 启动 server 的可执行命令(如 `npx`、`python`、`uvx`) |
|
||||
| `args` | 否 | 传给 command 的参数列表 |
|
||||
| `env` | 否 | 子进程的环境变量,常用于 API Key |
|
||||
| `url` | SSE | SSE 端点 URL(与 `command` 二选一) |
|
||||
| `url` | SSE / Streamable HTTP | 远程端点 URL(与 `command` 二选一) |
|
||||
| `type` | 远程 | 远程传输类型,可选 `sse` 或 `streamable-http`,默认 `sse` |
|
||||
| `headers` | 否 | 远程请求附加 HTTP 头(如 `Authorization`),仅 Streamable HTTP 使用 |
|
||||
| `disabled` | 否 | `true` 时跳过该 server,便于临时关闭 |
|
||||
|
||||
### 完整示例
|
||||
@@ -88,7 +90,8 @@ Agent 会:
|
||||
| 协议 | 说明 | 配置字段 |
|
||||
| --- | --- | --- |
|
||||
| **stdio** | 子进程通信,最常见,社区生态最丰富 | `command` + `args` |
|
||||
| **SSE** | HTTP Server-Sent Events,适合远程托管的 MCP 服务 | `url` |
|
||||
| **SSE** | HTTP Server-Sent Events,旧版远程协议 | `url`(默认) |
|
||||
| **Streamable HTTP** | 新版远程协议,单端点收发,逐步取代 SSE | `type: "streamable-http"` + `url` |
|
||||
|
||||
## 排错
|
||||
|
||||
@@ -106,4 +109,4 @@ Agent 会:
|
||||
- [mcp.so](https://mcp.so) — 全球 MCP 服务索引
|
||||
- [ModelScope MCP 广场](https://modelscope.cn/mcp) — 魔搭社区 MCP 广场,国内访问更稳定
|
||||
|
||||
只要遵循 MCP 标准协议(stdio / SSE),都可以直接接入 CowAgent。
|
||||
只要遵循 MCP 标准协议(stdio / SSE / Streamable HTTP),都可以直接接入 CowAgent。
|
||||
|
||||
268
docs/zh/README.md
Normal file
268
docs/zh/README.md
Normal file
@@ -0,0 +1,268 @@
|
||||
<p align="center"><img src= "https://github.com/user-attachments/assets/eca9a9ec-8534-4615-9e0f-96c5ac1d10a3" alt="CowAgent" width="420" /></p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/zhayujie/CowAgent/releases/latest"><img src="https://img.shields.io/github/v/release/zhayujie/CowAgent" alt="Latest release"></a>
|
||||
<a href="https://github.com/zhayujie/CowAgent/blob/master/LICENSE"><img src="https://img.shields.io/github/license/zhayujie/CowAgent" alt="License: MIT"></a>
|
||||
<a href="https://github.com/zhayujie/CowAgent"><img src="https://img.shields.io/github/stars/zhayujie/CowAgent?style=flat-square" alt="Stars"></a> <br/>
|
||||
[<a href="../../README.md">English</a>] | [中文] | [<a href="../ja/README.md">日本語</a>]
|
||||
</p>
|
||||
|
||||
**CowAgent** 是一个开源的超级 AI 助理,能够主动思考和规划任务、操作计算机和外部资源、创造和执行 Skills、构建知识库与长期记忆,与你一同成长,是 Agent Harness 工程的最佳实践之一。
|
||||
|
||||
CowAgent 轻量、易部署、可扩展,自由接入主流大模型,覆盖微信、飞书、钉钉、企微、QQ、Telegram、网页等多渠道,7×24 运行于个人电脑或服务器中。
|
||||
|
||||
<p align="center">
|
||||
<a href="https://cowagent.ai/?lang=zh">🌐 官网</a> ·
|
||||
<a href="https://docs.cowagent.ai/">📖 文档中心</a> ·
|
||||
<a href="https://docs.cowagent.ai/guide/quick-start">🚀 快速开始</a> ·
|
||||
<a href="https://skills.cowagent.ai/">🧩 技能广场</a> ·
|
||||
<a href="https://link-ai.tech/cowagent/create">☁️ 在线体验</a>
|
||||
</p>
|
||||
|
||||
<br/>
|
||||
|
||||
## 🌟 核心能力
|
||||
|
||||
| 能力 | 说明 |
|
||||
| :--- | :--- |
|
||||
| [任务规划](https://docs.cowagent.ai/intro/architecture) | 理解复杂任务并自主分解执行,循环调用工具直到完成目标 |
|
||||
| [长期记忆](https://docs.cowagent.ai/memory) | 三层记忆架构(上下文 → 天级 → 核心),梦境蒸馏自动整理,支持关键词与向量混合检索 |
|
||||
| [知识库](https://docs.cowagent.ai/knowledge) | 自动整理结构化知识为 Markdown Wiki,构建持续增长的知识图谱,可视化浏览 |
|
||||
| [技能](https://docs.cowagent.ai/skills) | 从 [Skill Hub](https://skills.cowagent.ai/)、GitHub、ClawHub 等一键安装;也可通过对话创造自定义技能 |
|
||||
| [工具](https://docs.cowagent.ai/tools) | 内置文件读写、终端、浏览器、定时任务、记忆检索、联网搜索等 10+ 工具,支持 MCP 协议 |
|
||||
| [通道](https://docs.cowagent.ai/channels) | 一个 Agent 同时接入 Web、微信、飞书、钉钉、企微、QQ、公众号、Telegram 等多个渠道 |
|
||||
| 多模态 | 文本、图片、语音、文件全消息类型支持,覆盖识别、生成、收发 |
|
||||
| [模型](https://docs.cowagent.ai/models) | DeepSeek、Claude、Gemini、GPT、GLM、Qwen、Kimi、MiniMax、Doubao 等主流厂商,配置一行切换 |
|
||||
| [部署](https://docs.cowagent.ai/guide/quick-start) | 一键脚本安装,Web 控制台统一管理;本地、Docker、服务器多种部署方式 |
|
||||
|
||||
<br/>
|
||||
|
||||
## 🏗️ 架构总览
|
||||
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/architecture/zh/architecture.jpg" alt="CowAgent Architecture" width="750"/>
|
||||
|
||||
CowAgent 是一个完整的 **Agent Harness**:消息从各类**通道**进入,**Agent Core** 结合记忆、知识库与可用工具/技能进行任务规划与决策,调用**模型**生成结果,再回传至原通道。各模块解耦清晰,按需扩展。
|
||||
|
||||
详见 [项目架构](https://docs.cowagent.ai/intro/architecture)。
|
||||
|
||||
<br/>
|
||||
|
||||
## 🚀 快速开始
|
||||
|
||||
项目提供一键安装脚本,自动完成依赖安装、配置和启动:
|
||||
|
||||
**Linux / macOS:**
|
||||
|
||||
```bash
|
||||
bash <(curl -fsSL https://cdn.link-ai.tech/code/cow/run.sh)
|
||||
```
|
||||
|
||||
**Windows(PowerShell):**
|
||||
|
||||
```powershell
|
||||
irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex
|
||||
```
|
||||
|
||||
**Docker:**
|
||||
|
||||
```bash
|
||||
curl -O https://cdn.link-ai.tech/code/cow/docker-compose.yml
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
启动成功后访问 `http://localhost:9899` 进入 **Web 控制台**,在控制台内即可完成模型配置、渠道接入、技能安装等全部操作。
|
||||
|
||||
> 服务器部署且需要公网访问控制台时,请在 `config.json` 中将 `web_host` 设为 `0.0.0.0`(同时强烈建议设置 `web_password` 启用鉴权),然后访问 `http://<server-ip>:9899`,并确保防火墙/安全组放行 `9899` 端口。
|
||||
|
||||
> 📖 详细安装指南:[快速开始](https://docs.cowagent.ai/guide/quick-start) · [源码安装](https://docs.cowagent.ai/guide/manual-install) · [升级](https://docs.cowagent.ai/guide/upgrade)
|
||||
|
||||
安装后可使用 `cow` [CLI 命令](https://docs.cowagent.ai/cli) 管理服务:
|
||||
|
||||
```bash
|
||||
cow start | stop | restart # 服务管理
|
||||
cow status | logs # 状态和日志
|
||||
cow update # 拉取最新代码并重启
|
||||
cow skill install <名称> # 安装技能
|
||||
cow install-browser # 安装浏览器工具
|
||||
```
|
||||
|
||||
<br/>
|
||||
|
||||
## 🤖 模型支持
|
||||
|
||||
CowAgent 支持国内外主流厂商的大语言模型。**文本对话、图像理解、图像生成、语音识别/合成、向量** 等能力均可独立配置厂商。
|
||||
|
||||
| 厂商 | 代表模型 | 文本 | 图像理解 | 图像生成 | 语音识别 | 语音合成 | 向量 |
|
||||
| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: |
|
||||
| [DeepSeek](https://docs.cowagent.ai/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | |
|
||||
| [MiniMax](https://docs.cowagent.ai/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
|
||||
| [Claude](https://docs.cowagent.ai/models/claude) | claude-opus-4-7 | ✅ | ✅ | | | | |
|
||||
| [Gemini](https://docs.cowagent.ai/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | |
|
||||
| [OpenAI](https://docs.cowagent.ai/models/openai) | gpt-5.5、o 系列 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [智谱 GLM](https://docs.cowagent.ai/models/glm) | glm-5.1、glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ |
|
||||
| [通义千问](https://docs.cowagent.ai/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [豆包 Doubao](https://docs.cowagent.ai/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ |
|
||||
| [Kimi](https://docs.cowagent.ai/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||
| [百度ERNIE](https://docs.cowagent.ai/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||
| [小米 MiMo](https://docs.cowagent.ai/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | |
|
||||
| [LinkAI](https://docs.cowagent.ai/models/linkai) | 一个 Key 接入 100+ 模型 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [自定义](https://docs.cowagent.ai/models/custom) | 本地模型 / 三方代理 | ✅ | | | | | |
|
||||
|
||||
> 推荐通过 Web 控制台在线配置,无需手动编辑文件。手动配置请参考各厂商文档,详见 [模型概览](https://docs.cowagent.ai/models)。
|
||||
|
||||
<br/>
|
||||
|
||||
## 💬 通道接入
|
||||
|
||||
一个 Agent 实例可同时接入多个渠道,启动时通过 `channel_type` 切换或并行运行。
|
||||
|
||||
| 通道 | 文本 | 图片 | 文件 | 语音 | 群聊 |
|
||||
| --- | :-: | :-: | :-: | :-: | :-: |
|
||||
| [Web 控制台](https://docs.cowagent.ai/channels/web)(默认) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [微信](https://docs.cowagent.ai/channels/weixin) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [飞书](https://docs.cowagent.ai/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [钉钉](https://docs.cowagent.ai/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [企微智能机器人](https://docs.cowagent.ai/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [QQ](https://docs.cowagent.ai/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
|
||||
| [企业微信应用](https://docs.cowagent.ai/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [微信公众号](https://docs.cowagent.ai/channels/wechatmp) | ✅ | ✅ | | ✅ | |
|
||||
| [Telegram](https://docs.cowagent.ai/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
> 飞书、企微智能机器人支持在 Web 控制台内**扫码一键接入**,无需公网 IP。详见 [通道概览](https://docs.cowagent.ai/channels)。
|
||||
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/zh/web-console-chat.png" alt="CowAgent Web 控制台" width="800"/>
|
||||
|
||||
*Web 控制台是默认通道,也是统一的 Agent 配置和管理入口*
|
||||
|
||||
<br/>
|
||||
|
||||
## 🧠 记忆与知识库
|
||||
|
||||
**长期记忆**采用三层架构:对话上下文(短期)→ 天级记忆(中期)→ MEMORY.md(长期)。每日自动执行**梦境蒸馏(Deep Dream)**,将分散记忆整合为精炼的长期记忆并生成叙事日记。详见 [长期记忆](https://docs.cowagent.ai/memory) · [梦境蒸馏](https://docs.cowagent.ai/memory/deep-dream)。
|
||||
|
||||
**个人知识库** 与按时间记录的记忆不同,以**主题为维度**组织结构化知识。Agent 在对话中自动整理有价值信息,维护交叉引用与索引,Web 控制台可可视化浏览知识图谱。详见 [个人知识库](https://docs.cowagent.ai/knowledge)。
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td width="50%">
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/zh/web-console-memory.png" alt="长期记忆" />
|
||||
<p align="center"><em>长期记忆 · 三层记忆 + 梦境蒸馏</em></p>
|
||||
</td>
|
||||
<td width="50%">
|
||||
<img src="https://cdn.jsdelivr.net/gh/zhayujie/cowagent-assets@main/screenshots/zh/web-console-knowledge.png" alt="个人知识库" />
|
||||
<p align="center"><em>个人知识库 · 自动整理的 Markdown Wiki</em></p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br/>
|
||||
|
||||
|
||||
## 🔧 工具与技能
|
||||
|
||||
**工具(Tools)** 是 Agent 操作系统资源的原子能力,**技能(Skills)** 是基于说明文件的高级工作流,可组合多个工具完成复杂任务。
|
||||
|
||||
### 工具系统
|
||||
|
||||
**内置工具** 涵盖文件读写(`read` / `write` / `edit` / `ls`)、终端(`bash`)、文件发送(`send`)、记忆检索(`memory`)、环境变量(`env_config`)、网页获取(`web_fetch`)、定时任务(`scheduler`)、联网搜索(`web_search`)、图像识别(`vision`)、浏览器自动化(`browser`)等常用能力。
|
||||
|
||||
**MCP 协议** 通过 [Model Context Protocol](https://modelcontextprotocol.io) 接入开放生态中的各种 MCP 服务,配置一次 `mcp.json` 即用即得,支持 stdio / SSE 协议、热更新、零代码接入。
|
||||
|
||||
详见 [工具概览](https://docs.cowagent.ai/tools) · [MCP 集成](https://docs.cowagent.ai/tools/mcp)。
|
||||
|
||||
### 技能系统
|
||||
|
||||
- **[Skill Hub](https://skills.cowagent.ai/)** — 开源的技能广场,浏览、搜索、一键安装
|
||||
- **GitHub / ClawHub / URL 等** — 任意来源一键安装
|
||||
- **对话创造** — 通过 `skill-creator` 用对话快速生成自定义技能,可将工作流程或第三方接口直接固化为技能
|
||||
|
||||
```bash
|
||||
/skill list # 查看当前技能
|
||||
/skill search <关键词> # 在技能广场搜索
|
||||
/skill install <名称> # 一键安装
|
||||
```
|
||||
|
||||
详见 [技能概览](https://docs.cowagent.ai/skills) · [创建技能](https://docs.cowagent.ai/skills/create)。
|
||||
|
||||
<br/>
|
||||
|
||||
## 🏷 更新日志
|
||||
|
||||
> **2026.05.22:** [v2.0.9](https://github.com/zhayujie/CowAgent/releases/tag/2.0.9) — 模型管理、MCP 协议支持、浏览器登录态持久化、新模型接入(gpt-5.5、gemini-3.5-flash、qwen3.7-max)、部署安全加固
|
||||
|
||||
> **2026.05.06:** [v2.0.8](https://github.com/zhayujie/CowAgent/releases/tag/2.0.8) — 飞书渠道全面升级(语音、流式输出、扫码接入)、新模型支持(DeepSeek V4、百度千帆)、定时任务工具增强
|
||||
|
||||
> **2026.04.22:** [v2.0.7](https://github.com/zhayujie/CowAgent/releases/tag/2.0.7) — 图像生成内置技能(GPT Image 2、Nano Banana)、新模型支持(Kimi K2.6、Claude Opus 4.7、GLM 5.1)、知识库和记忆增强
|
||||
|
||||
> **2026.04.14:** [v2.0.6](https://github.com/zhayujie/CowAgent/releases/tag/2.0.6) — 知识库系统、梦境记忆模块、上下文智能压缩、Web 控制台多会话
|
||||
|
||||
> **2026.04.01:** [v2.0.5](https://github.com/zhayujie/CowAgent/releases/tag/2.0.5) — Cow CLI 命令系统、Skill Hub 开源、浏览器工具、企微扫码创建
|
||||
|
||||
> **2026.03.22:** [v2.0.4](https://github.com/zhayujie/CowAgent/releases/tag/2.0.4) — 新增个人微信通道,支持文本/图片/文件/语音消息
|
||||
|
||||
> **2026.02.03:** [v2.0.0](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) — 正式升级为超级 Agent 助理,支持多轮任务决策、长期记忆、Skills 框架
|
||||
|
||||
完整更新历史:[Release Notes](https://docs.cowagent.ai/releases)
|
||||
|
||||
<br/>
|
||||
|
||||
## 🤝 社区与支持
|
||||
|
||||
扫码加入微信开源交流群:
|
||||
|
||||
<img width="130" src="https://img-1317903499.cos.ap-guangzhou.myqcloud.com/docs/open-community.png">
|
||||
|
||||
也可通过以下方式获取支持:
|
||||
|
||||
- 🐛 [提交 Issue](https://github.com/zhayujie/CowAgent/issues)
|
||||
- 🤖 在线 AI 助手:[项目小助手](https://link-ai.tech/app/Kv2fXJcH)(基于项目知识库)
|
||||
|
||||
<br/>
|
||||
|
||||
## 🔗 相关项目
|
||||
|
||||
- **[Cow Skill Hub](https://github.com/zhayujie/cow-skill-hub)** — 开源的 AI Agent 技能广场,支持 CowAgent、OpenClaw、Claude Code 等多种 Agent
|
||||
- **[bot-on-anything](https://github.com/zhayujie/bot-on-anything)** — 轻量大模型应用框架,支持 Slack、Telegram、Discord、Gmail 等海外平台
|
||||
- **[AgentMesh](https://github.com/MinimalFuture/AgentMesh)** — 开源多智能体(Multi-Agent)框架,通过团队协同解决复杂问题
|
||||
|
||||
<br/>
|
||||
|
||||
## 🏢 企业服务
|
||||
|
||||
<a href="https://link-ai.tech" target="_blank"><img width="650" src="https://cdn.link-ai.tech/image/link-ai-intro.jpg"></a>
|
||||
|
||||
> [LinkAI](https://link-ai.tech/) 是面向企业和个人的一站式 AI 智能体平台,为 CowAgent 提供云端托管和企业级支持:
|
||||
>
|
||||
> - **🚀 免部署在线运行**:无需服务器即可创建 [CowAgent 在线助理](https://link-ai.tech/cowagent/create),1 分钟拥有专属 Agent
|
||||
> - **🧠 Agent 基础设施**:聚合主流大模型、知识库、数据库、技能、工作流,提供开箱即用的 Agent 能力扩展
|
||||
> - **🏢 企业级协作**:提供团队协作、权限分级、审计日志、私有化部署等能力,让 Agent 安全落地企业场景
|
||||
|
||||
**产品咨询和企业服务** 可联系产品客服:
|
||||
|
||||
<img width="130" src="https://cdn.link-ai.tech/portal/linkai-customer-service.png">
|
||||
|
||||
<br/>
|
||||
|
||||
## 🛠️ 开发与贡献
|
||||
|
||||
欢迎接入更多应用通道,参考 [飞书通道实现](https://github.com/zhayujie/CowAgent/blob/master/channel/feishu/feishu_channel.py) 新增自定义通道;同时欢迎贡献新技能,向 [Skill Hub](https://skills.cowagent.ai/submit) 提交。
|
||||
|
||||
通过 ⭐ Star 关注项目更新,欢迎提交 PR、Issue 进行反馈。
|
||||
|
||||
## 🌟 贡献者
|
||||
|
||||

|
||||
|
||||
<br/>
|
||||
|
||||
## ⚠️ 声明
|
||||
|
||||
1. 本项目遵循 [MIT 开源协议](/LICENSE),主要用于技术研究和学习。使用时请遵守所在地法律法规及相关政策,因使用本项目所产生的一切后果由使用者自行承担。
|
||||
2. **成本与安全:** Agent 模式 Token 消耗显著高于普通对话,请根据效果与成本权衡选择模型;Agent 具备访问本地操作系统的能力,请谨慎选择部署环境。
|
||||
3. CowAgent 项目专注于开源技术开发,不会参与、授权或发行任何加密货币。
|
||||
|
||||
<br/>
|
||||
|
||||
## 📌 项目更名说明
|
||||
|
||||
本项目原名 `chatgpt-on-wechat`,于 2026.04.13 正式更名为 **CowAgent**。原 GitHub 地址已自动重定向,老用户可选择执行 `git remote set-url origin https://github.com/zhayujie/CowAgent.git` 更新本地远程地址。
|
||||
@@ -25,6 +25,10 @@ def create_bot(bot_type):
|
||||
from models.qianfan.qianfan_bot import QianfanBot
|
||||
return QianfanBot()
|
||||
|
||||
elif bot_type == const.MIMO:
|
||||
from models.mimo.mimo_bot import MimoBot
|
||||
return MimoBot()
|
||||
|
||||
elif bot_type in (const.OPENAI, const.CHATGPT, const.CUSTOM): # OpenAI-compatible API
|
||||
from models.chatgpt.chat_gpt_bot import ChatGPTBot
|
||||
return ChatGPTBot()
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user