From fcf4eb78dc51051e8024bd75dac89865fbe3b0aa Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sun, 31 May 2026 16:49:35 +0800 Subject: [PATCH 1/5] feat(i18n): add global language resolution and localize user-facing text --- agent/protocol/agent_stream.py | 46 +-- channel/chat_channel.py | 7 +- channel/web/chat.html | 23 +- channel/web/static/js/console.js | 108 +++++-- channel/web/web_channel.py | 25 +- cli/commands/install.py | 77 +++-- cli/commands/process.py | 14 +- cli/commands/skill.py | 12 +- cli/utils.py | 16 + common/i18n.py | 177 +++++++++++ config-template.json | 1 + config.py | 11 + docker/docker-compose.yml | 1 + models/chatgpt/chat_gpt_bot.py | 33 ++- plugins/cow_cli/cow_cli.py | 486 +++++++++++++++++++------------ 15 files changed, 748 insertions(+), 289 deletions(-) create mode 100644 common/i18n.py diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py index e3be20b8..0eb63b75 100644 --- a/agent/protocol/agent_stream.py +++ b/agent/protocol/agent_stream.py @@ -12,6 +12,7 @@ from agent.protocol.models import LLMRequest, LLMModel from agent.protocol.message_utils import sanitize_claude_messages, compress_turn_to_text_only from agent.tools.base_tool import BaseTool, ToolResult from common.log import logger +from common.i18n import t as _t # Optional: repair malformed JSON args from non-strict providers (e.g. unescaped quotes in long content). try: @@ -317,7 +318,10 @@ class AgentStreamExecutor: # Hard stop at 8 failures - abort with critical message if same_tool_failures >= 8: - return True, f"抱歉,我没能完成这个任务。可能是我理解有误或者当前方法不太合适。\n\n建议你:\n• 换个方式描述需求试试\n• 把任务拆分成更小的步骤\n• 或者换个思路来解决", True + return True, _t( + "抱歉,我没能完成这个任务。可能是我理解有误或者当前方法不太合适。\n\n建议你:\n• 换个方式描述需求试试\n• 把任务拆分成更小的步骤\n• 或者换个思路来解决", + "Sorry, I couldn't complete this task. I may have misunderstood, or my current approach isn't quite right.\n\nYou could try:\n• Rephrasing your request\n• Breaking the task into smaller steps\n• Taking a different approach", + ), True # Warning at 6 failures if same_tool_failures >= 6: @@ -436,14 +440,16 @@ class AgentStreamExecutor: elif not assistant_msg: # Still empty (no text and no tool_calls): use fallback logger.warning(f"[Agent] Still empty after explicit request") - final_response = ( - "抱歉,我暂时无法生成回复。请尝试换一种方式描述你的需求,或稍后再试。" + final_response = _t( + "抱歉,我暂时无法生成回复。请尝试换一种方式描述你的需求,或稍后再试。", + "Sorry, I can't generate a reply right now. Please try rephrasing your request, or try again later.", ) logger.info(f"Generated fallback response for empty LLM output") else: - # 第一轮就空回复,直接 fallback - final_response = ( - "抱歉,我暂时无法生成回复。请尝试换一种方式描述你的需求,或稍后再试。" + # First-turn empty reply, fall back directly + final_response = _t( + "抱歉,我暂时无法生成回复。请尝试换一种方式描述你的需求,或稍后再试。", + "Sorry, I can't generate a reply right now. Please try rephrasing your request, or try again later.", ) logger.info(f"Generated fallback response for empty LLM output") else: @@ -514,7 +520,7 @@ class AgentStreamExecutor: # Check for critical error - abort entire conversation if result.get("status") == "critical_error": logger.error(f"💥 检测到严重错误,终止对话") - final_response = result.get('result', '任务执行失败') + final_response = result.get('result') or _t("任务执行失败", "Task execution failed") return final_response # Log tool result in compact format @@ -650,15 +656,15 @@ class AgentStreamExecutor: logger.info(f"💭 Summary: {summary_response[:150]}{'...' if len(summary_response) > 150 else ''}") else: # Fallback if model still doesn't respond - final_response = ( - f"我已经执行了{turn}个决策步骤,达到了单次运行的步数上限。" - "任务可能还未完全完成,建议你将任务拆分成更小的步骤,或者换一种方式描述需求。" + final_response = _t( + f"我已经执行了{turn}个决策步骤,达到了单次运行的步数上限。任务可能还未完全完成,建议你将任务拆分成更小的步骤,或者换一种方式描述需求。", + f"I've taken {turn} decision steps and reached the per-run limit. The task may not be fully complete — try breaking it into smaller steps, or describe your request differently.", ) except Exception as e: logger.warning(f"Failed to get summary from LLM: {e}") - final_response = ( - f"我已经执行了{turn}个决策步骤,达到了单次运行的步数上限。" - "任务可能还未完全完成,建议你将任务拆分成更小的步骤,或者换一种方式描述需求。" + final_response = _t( + f"我已经执行了{turn}个决策步骤,达到了单次运行的步数上限。任务可能还未完全完成,建议你将任务拆分成更小的步骤,或者换一种方式描述需求。", + f"I've taken {turn} decision steps and reached the per-run limit. The task may not be fully complete — try breaking it into smaller steps, or describe your request differently.", ) finally: # Remove the injected user prompt from history to avoid polluting @@ -953,13 +959,15 @@ class AgentStreamExecutor: self.messages.clear() self._clear_session_db() if is_context_overflow: - raise Exception( - "抱歉,对话历史过长导致上下文溢出。我已清空历史记录,请重新描述你的需求。" - ) + raise Exception(_t( + "抱歉,对话历史过长导致上下文溢出。我已清空历史记录,请重新描述你的需求。", + "Sorry, the conversation history got too long and overflowed the context. I've cleared the history — please describe your request again.", + )) else: - raise Exception( - "抱歉,之前的对话出现了问题。我已清空历史记录,请重新发送你的消息。" - ) + raise Exception(_t( + "抱歉,之前的对话出现了问题。我已清空历史记录,请重新发送你的消息。", + "Sorry, something went wrong with the earlier conversation. I've cleared the history — please send your message again.", + )) # Check if error is rate limit (429) is_rate_limit = '429' in error_str_lower or 'rate limit' in error_str_lower diff --git a/channel/chat_channel.py b/channel/chat_channel.py index 6a9a1952..9104a38e 100644 --- a/channel/chat_channel.py +++ b/channel/chat_channel.py @@ -10,6 +10,7 @@ from bridge.reply import * from channel.channel import Channel from common.dequeue import Dequeue from common import memory +from common.i18n import t as _t from plugins import * try: @@ -265,7 +266,7 @@ class ChatChannel(Channel): if reply.type in self.NOT_SUPPORT_REPLYTYPE: logger.error("[chat_channel]reply type not support: " + str(reply.type)) reply.type = ReplyType.ERROR - reply.content = "不支持发送的消息类型: " + str(reply.type) + reply.content = _t("不支持发送的消息类型: ", "Unsupported message type: ") + str(reply.type) if reply.type == ReplyType.TEXT: reply_text = reply.content @@ -476,9 +477,9 @@ class ChatChannel(Channel): cancelled = get_cancel_registry().cancel_session(session_id) text = ( - "🛑 已中止" + _t("🛑 已中止", "🛑 Cancelled") if cancelled > 0 - else "当前没有可中止的任务。" + else _t("当前没有可中止的任务。", "Nothing to cancel.") ) logger.info( f"[chat_channel] /cancel fast-path: session={session_id}, cancelled={cancelled}" diff --git a/channel/web/chat.html b/channel/web/chat.html index d90adb15..1bbf0f04 100644 --- a/channel/web/chat.html +++ b/channel/web/chat.html @@ -47,11 +47,30 @@ This runs synchronously in so the correct class is on before any CSS or body rendering occurs. --> diff --git a/channel/web/static/js/console.js b/channel/web/static/js/console.js index 6d0a66fc..e5865051 100644 --- a/channel/web/static/js/console.js +++ b/channel/web/static/js/console.js @@ -91,6 +91,27 @@ const I18N = { example_knowledge_title: '知识库', example_knowledge_text: '查看知识库当前文档情况', example_skill_title: '技能系统', example_skill_text: '查看所有支持的工具和技能', example_web_title: '指令中心', example_web_text: '查看全部命令', + slash_help: '显示命令帮助', + slash_status: '查看运行状态', + slash_context: '查看对话上下文', + slash_context_clear: '清除对话上下文', + slash_skill_list: '查看已安装技能', + slash_skill_list_remote: '浏览技能广场', + slash_skill_search: '搜索技能', + slash_skill_install: '安装技能 (名称或 GitHub URL)', + slash_skill_uninstall: '卸载技能', + slash_skill_info: '查看技能详情', + slash_skill_enable: '启用技能', + slash_skill_disable: '禁用技能', + slash_memory_dream: '手动触发记忆蒸馏 (可指定天数, 默认3)', + slash_knowledge: '查看知识库统计', + slash_knowledge_list: '查看知识库文件树', + slash_knowledge_on: '开启知识库', + slash_knowledge_off: '关闭知识库', + slash_config: '查看当前配置', + slash_cancel: '中止当前正在运行的 Agent 任务', + slash_logs: '查看最近日志', + slash_version: '查看版本', input_placeholder: '输入消息,或输入 / 使用指令', config_title: '配置管理', config_desc: '管理模型和 Agent 配置', config_model: '模型配置', config_agent: 'Agent 配置', @@ -265,6 +286,27 @@ const I18N = { example_knowledge_title: 'Knowledge', example_knowledge_text: 'Show me the current knowledge base', example_skill_title: 'Skills', example_skill_text: 'Show current tools and skills', example_web_title: 'Commands', example_web_text: 'Show all commands', + slash_help: 'Show this help', + slash_status: 'Show running status', + slash_context: 'Show conversation context', + slash_context_clear: 'Clear conversation context', + slash_skill_list: 'List installed skills', + slash_skill_list_remote: 'Browse Skill Hub', + slash_skill_search: 'Search skills', + slash_skill_install: 'Install a skill (name or GitHub URL)', + slash_skill_uninstall: 'Uninstall a skill', + slash_skill_info: 'Show skill details', + slash_skill_enable: 'Enable a skill', + slash_skill_disable: 'Disable a skill', + slash_memory_dream: 'Trigger memory distillation (optional days, default 3)', + slash_knowledge: 'Show knowledge base stats', + slash_knowledge_list: 'Show knowledge base file tree', + slash_knowledge_on: 'Enable knowledge base', + slash_knowledge_off: 'Disable knowledge base', + slash_config: 'Show current config', + slash_cancel: 'Abort the running Agent task', + slash_logs: 'Show recent logs', + slash_version: 'Show version', input_placeholder: 'Type a message, or press / for commands', config_title: 'Configuration', config_desc: 'Manage model and agent settings', config_model: 'Model Configuration', config_agent: 'Agent Configuration', @@ -361,7 +403,25 @@ const I18N = { } }; -let currentLang = localStorage.getItem('cow_lang') || 'zh'; +// Resolve language by priority: user choice (localStorage) -> backend-detected +// (cow_lang) -> browser language -> 'zh'. Shares __cowResolveLang__ defined in +// chat.html; falls back to a local resolver if loaded standalone. +let currentLang = (typeof window.__cowResolveLang__ === 'function') + ? window.__cowResolveLang__() + : (function () { + const norm = (raw) => { + if (!raw) return ''; + const v = String(raw).trim().toLowerCase(); + if (v === 'auto') return ''; + if (v.indexOf('zh') === 0) return 'zh'; + if (v.indexOf('en') === 0) return 'en'; + return ''; + }; + return norm(localStorage.getItem('cow_lang')) + || norm(window.__COW_DEFAULT_LANG__) + || norm(navigator.language) + || 'zh'; + })(); function t(key) { return (I18N[currentLang] && I18N[currentLang][key]) || (I18N.en[key]) || key; @@ -1298,28 +1358,30 @@ chatInput.addEventListener('compositionstart', () => { isComposing = true; }); chatInput.addEventListener('compositionend', () => { setTimeout(() => { isComposing = false; }, 100); }); // ── Slash Command Menu ─────────────────────────────────────── +// desc holds an i18n key, resolved via t() at render time so the menu follows +// the current UI language. const SLASH_COMMANDS = [ - { cmd: '/help', desc: '显示命令帮助' }, - { cmd: '/status', desc: '查看运行状态' }, - { cmd: '/context', desc: '查看对话上下文' }, - { cmd: '/context clear', desc: '清除对话上下文' }, - { cmd: '/skill list', desc: '查看已安装技能' }, - { cmd: '/skill list --remote', desc: '浏览技能广场' }, - { cmd: '/skill search ', desc: '搜索技能' }, - { cmd: '/skill install ', desc: '安装技能 (名称或 GitHub URL)' }, - { cmd: '/skill uninstall ', desc: '卸载技能' }, - { cmd: '/skill info ', desc: '查看技能详情' }, - { cmd: '/skill enable ', desc: '启用技能' }, - { cmd: '/skill disable ', desc: '禁用技能' }, - { cmd: '/memory dream ', desc: '手动触发记忆蒸馏 (可指定天数, 默认3)' }, - { cmd: '/knowledge', desc: '查看知识库统计' }, - { cmd: '/knowledge list', desc: '查看知识库文件树' }, - { cmd: '/knowledge on', desc: '开启知识库' }, - { cmd: '/knowledge off', desc: '关闭知识库' }, - { cmd: '/config', desc: '查看当前配置' }, - { cmd: '/cancel', desc: '中止当前正在运行的 Agent 任务' }, - { cmd: '/logs', desc: '查看最近日志' }, - { cmd: '/version', desc: '查看版本' }, + { cmd: '/help', desc: 'slash_help' }, + { cmd: '/status', desc: 'slash_status' }, + { cmd: '/context', desc: 'slash_context' }, + { cmd: '/context clear', desc: 'slash_context_clear' }, + { cmd: '/skill list', desc: 'slash_skill_list' }, + { cmd: '/skill list --remote', desc: 'slash_skill_list_remote' }, + { cmd: '/skill search ', desc: 'slash_skill_search' }, + { cmd: '/skill install ', desc: 'slash_skill_install' }, + { cmd: '/skill uninstall ', desc: 'slash_skill_uninstall' }, + { cmd: '/skill info ', desc: 'slash_skill_info' }, + { cmd: '/skill enable ', desc: 'slash_skill_enable' }, + { cmd: '/skill disable ', desc: 'slash_skill_disable' }, + { cmd: '/memory dream ', desc: 'slash_memory_dream' }, + { cmd: '/knowledge', desc: 'slash_knowledge' }, + { cmd: '/knowledge list', desc: 'slash_knowledge_list' }, + { cmd: '/knowledge on', desc: 'slash_knowledge_on' }, + { cmd: '/knowledge off', desc: 'slash_knowledge_off' }, + { cmd: '/config', desc: 'slash_config' }, + { cmd: '/cancel', desc: 'slash_cancel' }, + { cmd: '/logs', desc: 'slash_logs' }, + { cmd: '/version', desc: 'slash_version' }, ]; const slashMenu = document.getElementById('slash-menu'); @@ -1373,7 +1435,7 @@ function renderSlashItems() { slashFiltered.map((c, i) => `
` + `${escapeHtml(c.cmd)}` + - `${escapeHtml(c.desc)}
` + `${escapeHtml(t(c.desc))}` ).join(''); const activeEl = slashMenu.querySelector('.slash-menu-item.active'); diff --git a/channel/web/web_channel.py b/channel/web/web_channel.py index 120d8efd..e2c0c5e4 100644 --- a/channel/web/web_channel.py +++ b/channel/web/web_channel.py @@ -21,6 +21,7 @@ from channel.chat_channel import ChatChannel, check_prefix from channel.chat_message import ChatMessage from collections import OrderedDict from common import const +from common import i18n from common.log import logger from common.singleton import singleton from config import conf @@ -98,7 +99,7 @@ def _require_auth(): def _cancel_reply_text(cancelled: int, lang: str) -> str: en = lang.startswith("en") if cancelled > 0: - return "🛑 Cancelled." if en else "🛑 已中止" + return "🛑 Cancelled" if en else "🛑 已中止" return "Nothing to cancel." if en else "当前没有可中止的任务。" @@ -477,7 +478,10 @@ class WebChannel(ChatChannel): ) q.put({ "type": "done", - "content": "(模型未返回任何内容,请重试或换一种方式描述你的需求)", + "content": i18n.t( + "(模型未返回任何内容,请重试或换一种方式描述你的需求)", + "(The model returned no content. Please retry or rephrase your request.)", + ), "request_id": request_id, "timestamp": time.time(), }) @@ -805,13 +809,13 @@ class WebChannel(ChatChannel): if not fpath: continue if ftype == "image": - file_refs.append(f"[图片: {fpath}]") + file_refs.append(f"[{i18n.t('图片', 'Image')}: {fpath}]") elif ftype == "video": - file_refs.append(f"[视频: {fpath}]") + file_refs.append(f"[{i18n.t('视频', 'Video')}: {fpath}]") elif ftype == "directory": - file_refs.append(f"[目录: {fpath}]") + file_refs.append(f"[{i18n.t('目录', 'Directory')}: {fpath}]") else: - file_refs.append(f"[文件: {fpath}]") + file_refs.append(f"[{i18n.t('文件', 'File')}: {fpath}]") if file_refs: prompt = prompt + "\n" + "\n".join(file_refs) logger.info(f"[WebChannel] Attached {len(file_refs)} file(s) to message") @@ -952,7 +956,7 @@ class WebChannel(ChatChannel): if request_id and request_id in self.sse_queues: self.sse_queues[request_id].put({ "type": "cancelled", - "content": "Cancelled" if lang.startswith("en") else "已中止", + "content": "🛑 Cancelled" if lang.startswith("en") else "🛑 已中止", "request_id": request_id, "timestamp": time.time(), }) @@ -1008,7 +1012,10 @@ class WebChannel(ChatChannel): """Serve the chat HTML page.""" file_path = os.path.join(os.path.dirname(__file__), 'chat.html') # 使用绝对路径 with open(file_path, 'r', encoding='utf-8') as f: - return f.read() + html = f.read() + # Inject the backend-resolved default language so the console can use + # it on first load (when the user has no saved cow_lang preference). + return html.replace("{{COW_DEFAULT_LANG}}", i18n.get_language()) def startup(self): configured_host = conf().get("web_host", "") @@ -1388,6 +1395,8 @@ class ChatHandler: cache_bust = str(int(time.time())) html = html.replace('assets/js/console.js', f'assets/js/console.js?v={cache_bust}') html = html.replace('assets/css/console.css', f'assets/css/console.css?v={cache_bust}') + # Inject the backend-resolved default language for first-load fallback. + html = html.replace("{{COW_DEFAULT_LANG}}", i18n.get_language()) return html diff --git a/cli/commands/install.py b/cli/commands/install.py index addec52c..ed22f296 100644 --- a/cli/commands/install.py +++ b/cli/commands/install.py @@ -14,7 +14,7 @@ CHINA_MIRROR = "https://registry.npmmirror.com/-/binary/playwright" # stream(msg, fg=None) — fg is "yellow" | "green" | "red" | None StreamFn = Callable[[str, Optional[str]], None] -# on_phase(msg) — coarse-grained progress for chat channels (Chinese) +# on_phase(msg) — coarse-grained progress for chat channels (localized via i18n) PhaseFn = Callable[[str], None] @@ -112,16 +112,25 @@ def run_install_browser( stream: Optional callback ``(message, fg)`` for each line. ``fg`` is ``yellow`` / ``green`` / ``red`` or None. Defaults to colored click output. on_phase: Optional callback for coarse progress (e.g. push to chat); - messages are short Chinese status lines. + messages are short status lines localized via i18n. Returns: 0 on success, 1 on fatal failure (pip or chromium install failed). """ + from cli.utils import get_cli_language + from common import i18n + + get_cli_language() # resolve cow_lang so i18n.t reflects config + _t = i18n.t + stream = stream or _default_stream python = sys.executable legacy_mode = False - _phase(on_phase, "🔧 开始安装浏览器工具依赖(约几分钟,请耐心等待)…") + _phase(on_phase, _t( + "🔧 开始安装浏览器工具依赖(约几分钟,请耐心等待)…", + "🔧 Installing browser tool dependencies (a few minutes, please wait)…", + )) glibc = _get_glibc_version() if glibc and glibc < GLIBC_THRESHOLD: @@ -136,27 +145,36 @@ def run_install_browser( stream("") _phase( on_phase, - f"ℹ️ 检测到 glibc {glibc_str}(较旧),将安装兼容版 Playwright {PLAYWRIGHT_LEGACY_VERSION}。", + _t( + f"ℹ️ 检测到 glibc {glibc_str}(较旧),将安装兼容版 Playwright {PLAYWRIGHT_LEGACY_VERSION}。", + f"ℹ️ Detected glibc {glibc_str} (older); installing compatible Playwright {PLAYWRIGHT_LEGACY_VERSION}.", + ), ) target_version = PLAYWRIGHT_LEGACY_VERSION if legacy_mode else PLAYWRIGHT_VERSION - _phase(on_phase, "📦 [1/3] 正在安装 Playwright Python 包…") + _phase(on_phase, _t("📦 [1/3] 正在安装 Playwright Python 包…", "📦 [1/3] Installing Playwright Python package…")) stream("[1/3] Installing playwright Python package...", "yellow") ret = _pip_install(f"playwright=={target_version}", stream) if ret != 0: stream("Failed to install playwright package.", "red") - _phase(on_phase, "❌ [1/3] Playwright Python 包安装失败。") + _phase(on_phase, _t("❌ [1/3] Playwright Python 包安装失败。", "❌ [1/3] Failed to install Playwright Python package.")) return 1 installed = _get_installed_version() if installed: stream(f" playwright {installed} installed.", "green") stream("") - _phase(on_phase, f"✅ [1/3] Playwright 包已安装({installed or target_version})。") + _phase(on_phase, _t( + f"✅ [1/3] Playwright 包已安装({installed or target_version})。", + f"✅ [1/3] Playwright package installed ({installed or target_version}).", + )) if sys.platform == "linux": - _phase(on_phase, "🔧 [2/3] 正在安装 Linux 系统依赖与轻量中文字体(文泉驿正黑,部分步骤可能需要 sudo)…") + _phase(on_phase, _t( + "🔧 [2/3] 正在安装 Linux 系统依赖与轻量中文字体(文泉驿正黑,部分步骤可能需要 sudo)…", + "🔧 [2/3] Installing Linux system deps and a lightweight CJK font (WenQuanYi Zen Hei; some steps may need sudo)…", + )) stream("[2/3] Installing system dependencies (Linux)...", "yellow") ret = subprocess.call([python, "-m", "playwright", "install-deps", "chromium"]) if ret != 0: @@ -183,14 +201,23 @@ def run_install_browser( stream(" CJK font (wqy-zenhei) installed.", "green") _phase( on_phase, - "✅ [2/3] Linux 依赖与字体步骤已执行(若有权限问题请查看服务器日志或手动执行提示命令)。", + _t( + "✅ [2/3] Linux 依赖与字体步骤已执行(若有权限问题请查看服务器日志或手动执行提示命令)。", + "✅ [2/3] Linux deps and font steps executed (on permission issues, check the server log or run the suggested commands manually).", + ), ) else: stream(f"[2/3] Skipping system deps (not needed on {sys.platform}).", "yellow") - _phase(on_phase, f"ℹ️ [2/3] 当前系统({sys.platform})跳过 Linux 专用依赖。") + _phase(on_phase, _t( + f"ℹ️ [2/3] 当前系统({sys.platform})跳过 Linux 专用依赖。", + f"ℹ️ [2/3] Skipping Linux-specific deps on this platform ({sys.platform}).", + )) stream("") - _phase(on_phase, "🌐 [3/3] 正在下载并安装 Chromium(体积较大,请耐心等待)…") + _phase(on_phase, _t( + "🌐 [3/3] 正在下载并安装 Chromium(体积较大,请耐心等待)…", + "🌐 [3/3] Downloading and installing Chromium (large download, please wait)…", + )) stream("[3/3] Installing Chromium browser...", "yellow") cmd = [python, "-m", "playwright", "install", "chromium"] @@ -209,27 +236,33 @@ def run_install_browser( if use_mirror: env["PLAYWRIGHT_DOWNLOAD_HOST"] = CHINA_MIRROR stream(f" (using China mirror: {CHINA_MIRROR})", None) - _phase(on_phase, "📡 检测到国内 pip 源配置,Chromium 将优先走国内镜像下载。") + _phase(on_phase, _t( + "📡 检测到国内 pip 源配置,Chromium 将优先走国内镜像下载。", + "📡 Detected a China pip mirror; Chromium will be downloaded from the China mirror first.", + )) ret = subprocess.call(cmd, env=env) if ret != 0 and use_mirror: stream(" Mirror download failed, retrying with official CDN...", "yellow") - _phase(on_phase, "⚠️ 镜像下载失败,正在改用官方源重试…") + _phase(on_phase, _t( + "⚠️ 镜像下载失败,正在改用官方源重试…", + "⚠️ Mirror download failed; retrying with the official CDN…", + )) env_no_mirror = os.environ.copy() env_no_mirror.pop("PLAYWRIGHT_DOWNLOAD_HOST", None) ret = subprocess.call(cmd, env=env_no_mirror) if ret != 0: stream("Failed to install Chromium.", "red") - _phase(on_phase, "❌ [3/3] Chromium 安装失败。") + _phase(on_phase, _t("❌ [3/3] Chromium 安装失败。", "❌ [3/3] Failed to install Chromium.")) return 1 stream("") - _phase(on_phase, "✅ [3/3] Chromium 已安装。") + _phase(on_phase, _t("✅ [3/3] Chromium 已安装。", "✅ [3/3] Chromium installed.")) stream("Verifying browser installation...", None) - _phase(on_phase, "🔍 正在验证 Playwright 能否正常加载…") + _phase(on_phase, _t("🔍 正在验证 Playwright 能否正常加载…", "🔍 Verifying that Playwright loads correctly…")) ret = subprocess.call( [python, "-c", "from playwright.sync_api import sync_playwright; print('OK')"], stderr=subprocess.DEVNULL, @@ -240,14 +273,20 @@ def run_install_browser( " Consider upgrading your OS or using Docker.", "yellow", ) - _phase(on_phase, "⚠️ 验证未完全通过:本机可能仍无法使用浏览器工具,请查看日志或升级系统。") + _phase(on_phase, _t( + "⚠️ 验证未完全通过:本机可能仍无法使用浏览器工具,请查看日志或升级系统。", + "⚠️ Verification did not fully pass: the browser tool may still not work here; check the log or upgrade your system.", + )) else: stream(" Verification passed.", "green") - _phase(on_phase, "✅ 验证通过。") + _phase(on_phase, _t("✅ 验证通过。", "✅ Verification passed.")) stream("") stream("Browser tool ready! Restart CowAgent to enable it.", "green") - _phase(on_phase, "🎉 全部步骤结束。请重启 CowAgent 后使用 browser 工具。") + _phase(on_phase, _t( + "🎉 全部步骤结束。请重启 CowAgent 后使用 browser 工具。", + "🎉 All steps finished. Restart CowAgent to use the browser tool.", + )) return 0 diff --git a/cli/commands/process.py b/cli/commands/process.py index 2176fbf7..6ccffdcb 100644 --- a/cli/commands/process.py +++ b/cli/commands/process.py @@ -275,7 +275,11 @@ def update(ctx): def status(): """Show CowAgent running status.""" from cli import __version__ - from cli.utils import load_config_json + from cli.utils import load_config_json, get_cli_language + from common import i18n + + get_cli_language() # resolve cow_lang so i18n.t reflects config + _t = i18n.t pid = _read_pid() if pid: @@ -283,17 +287,17 @@ def status(): else: click.echo(click.style("● CowAgent is not running", fg="red")) - click.echo(f" 版本: v{__version__}") + click.echo(_t(f" 版本: v{__version__}", f" Version: v{__version__}")) cfg = load_config_json() if cfg: channel = cfg.get("channel_type", "unknown") if isinstance(channel, list): channel = ", ".join(channel) - click.echo(f" 通道: {channel}") - click.echo(f" 模型: {cfg.get('model', 'unknown')}") + click.echo(_t(f" 通道: {channel}", f" Channel: {channel}")) + click.echo(_t(f" 模型: {cfg.get('model', 'unknown')}", f" Model: {cfg.get('model', 'unknown')}")) mode = "Chat" if cfg.get("agent") is False else "Agent" - click.echo(f" 模式: {mode}") + click.echo(_t(f" 模式: {mode}", f" Mode: {mode}")) @click.command() diff --git a/cli/commands/skill.py b/cli/commands/skill.py index a591ed9c..fa5a3167 100644 --- a/cli/commands/skill.py +++ b/cli/commands/skill.py @@ -517,18 +517,24 @@ def _install_targz_bytes(content: bytes, name: str, skills_dir: str, result: Ins def _print_install_success(name: str, source: str): """Print a unified install success message with description and source.""" + from cli.utils import get_cli_language + from common import i18n + + get_cli_language() # resolve cow_lang so i18n.t reflects config + _t = i18n.t + skills_dir = get_skills_dir() config = load_skills_config() display = config.get(name, {}).get("display_name", "") desc = _read_skill_description(os.path.join(skills_dir, name)) click.echo(click.style(f"✓ {name}", fg="green")) if display and display != name: - click.echo(f" 名称: {display}") + click.echo(_t(f" 名称: {display}", f" Name: {display}")) if desc: if len(desc) > 60: desc = desc[:57] + "…" - click.echo(f" 描述: {desc}") - click.echo(f" 来源: {source}") + click.echo(_t(f" 描述: {desc}", f" Description: {desc}")) + click.echo(_t(f" 来源: {source}", f" Source: {source}")) def _validate_skill_name(name: str): diff --git a/cli/utils.py b/cli/utils.py index b40f8dd5..4dcb5079 100644 --- a/cli/utils.py +++ b/cli/utils.py @@ -40,6 +40,22 @@ def load_config_json() -> dict: return {} +def get_cli_language() -> str: + """Resolve the CLI UI language using the shared i18n detector. + + Reads the `cow_lang` field from config.json (defaults to "auto") and runs + the same detection used by the running app, so CLI output matches. + """ + ensure_sys_path() + try: + from common import i18n + + configured = load_config_json().get("cow_lang", "auto") + return i18n.resolve_language(configured) + except Exception: + return "en" + + def load_skills_config() -> dict: """Load skills_config.json from the custom skills directory.""" path = os.path.join(get_skills_dir(), "skills_config.json") diff --git a/common/i18n.py b/common/i18n.py new file mode 100644 index 00000000..8cce5860 --- /dev/null +++ b/common/i18n.py @@ -0,0 +1,177 @@ +# encoding:utf-8 + +"""Lightweight global language detection and resolution. + +This module is the single source of truth for the runtime UI language used +across the CLI, startup logs, error messages, agent prompts and channel +replies. It must NOT import project config (to avoid circular imports) and +must stay dependency-free so it can run at the earliest startup phase. + +Resolution priority (highest first): + 1. Explicit `cow_lang` from config.json — also covers Docker/CI, since any + config key is overridable via its uppercase env var (e.g. COW_LANG=zh), + handled by config.load_config() before resolution. COW_LANG is a private + name to avoid clashing with the gettext-standard LANGUAGE variable. + 2. macOS `defaults read -g AppleLocale` (system-level preference; a Chinese + system locale is a strong signal that beats a shell-default LANG) + 3. Standard locale env vars: LC_ALL > LC_MESSAGES > LANG + 4. Python locale module + 5. Default -> English + +A value of "auto" (the default) triggers detection (steps 2-5). Explicitly +setting "zh" or "en" locks the language and skips detection. +""" + +import os +import subprocess +import sys + +# Supported language codes +ZH = "zh" +EN = "en" +SUPPORTED = (ZH, EN) +DEFAULT_LANG = EN + +# Resolved language cache; None until first resolution. +_resolved_lang = None + + +def _normalize(raw): + """Map an arbitrary locale-ish string to a supported code, or None. + + Only Chinese is detected explicitly; everything else (including unknown + or empty values) yields None so the caller can fall through to the next + detection source. + """ + if not raw: + return None + value = str(raw).strip().lower().replace("_", "-") + if value in ("auto", ""): + return None + # Chinese variants: zh, zh-cn, zh-hans, zh-hans-cn, zh-tw, zh-hk ... + if value.startswith("zh") or value.startswith("chinese"): + return ZH + if value.startswith("en") or value.startswith("english"): + return EN + return None + + +def _detect_from_env(): + """Detect language from standard locale environment variables. + + Note: on macOS, `LANG` is often a shell default (e.g. en_US.UTF-8 set by + .zshrc) that does not reflect the user's real preference, so AppleLocale + is checked first (see detect_language). On Linux these vars are the + primary signal. + + The cow_lang env override (COW_LANG=zh) is intentionally NOT read here: + it sets config["cow_lang"] and is handled via the explicit config path, + not auto-detection. + """ + for key in ("LC_ALL", "LC_MESSAGES", "LANG"): + lang = _normalize(os.environ.get(key)) + if lang: + return lang + return None + + +def _detect_from_macos(): + """macOS fallback: read the system-wide AppleLocale preference. + + On macOS the terminal often does NOT export LANG, yet the system locale + is still meaningful (e.g. a Chinese Mac reports zh_CN). This recovers + that signal so Chinese users are not misdetected as English. + """ + if sys.platform != "darwin": + return None + try: + out = subprocess.run( + ["defaults", "read", "-g", "AppleLocale"], + capture_output=True, + text=True, + timeout=2, + ) + if out.returncode == 0: + return _normalize(out.stdout) + except Exception: + pass + return None + + +def _detect_from_python_locale(): + """Last-resort detection via Python's locale module.""" + try: + import locale + + for value in locale.getlocale(): + lang = _normalize(value) + if lang: + return lang + except Exception: + pass + return None + + +def detect_language(): + """Run full auto-detection and return a supported language code. + + Order (auto-detection only; explicit config["cow_lang"] is resolved + before this is reached): + 1. macOS AppleLocale (system-level preference; a Chinese system locale + is a strong, low-false-positive signal that beats a shell-default + LANG like en_US.UTF-8) + 2. locale env vars LC_ALL / LC_MESSAGES / LANG (primary signal on Linux) + 3. Python locale module + 4. default English + """ + return ( + _detect_from_macos() + or _detect_from_env() + or _detect_from_python_locale() + or DEFAULT_LANG + ) + + +def resolve_language(configured=None): + """Resolve the effective language from a configured value. + + `configured` is the raw `cow_lang` value from config.json (may be None, + "auto", "zh" or "en"). An explicit "zh"/"en" locks the result; "auto" + or empty triggers detection. The result is cached globally. + """ + global _resolved_lang + explicit = _normalize(configured) + if explicit: + _resolved_lang = explicit + else: + _resolved_lang = detect_language() + return _resolved_lang + + +def set_language(lang): + """Force the resolved language (used by tests or per-request overrides).""" + global _resolved_lang + normalized = _normalize(lang) + _resolved_lang = normalized or DEFAULT_LANG + return _resolved_lang + + +def get_language(): + """Return the currently resolved language, detecting lazily if needed.""" + global _resolved_lang + if _resolved_lang is None: + _resolved_lang = detect_language() + return _resolved_lang + + +def is_zh(): + return get_language() == ZH + + +def t(zh_text, en_text): + """Pick a string by the current language. Tiny inline-translation helper. + + Intended for one-off strings where a full message catalog is overkill: + t("已中止", "Cancelled") + """ + return zh_text if get_language() == ZH else en_text diff --git a/config-template.json b/config-template.json index 4e4a7d36..dcd19774 100644 --- a/config-template.json +++ b/config-template.json @@ -1,4 +1,5 @@ { + "cow_lang": "auto", "channel_type": "weixin", "model": "deepseek-v4-flash", "deepseek_api_key": "", diff --git a/config.py b/config.py index 7ad6fa55..ba7936ca 100644 --- a/config.py +++ b/config.py @@ -7,11 +7,17 @@ import os import pickle from common.log import logger +from common import i18n # All available config keys are listed in this dict (use lowercase keys). # The values here are placeholders only; the program does NOT read them. # They merely document the expected format — put real values in config.json. available_setting = { + # global UI language for CLI, startup logs, error messages, agent prompts + # and channel replies. Options: "auto" (detect from system locale, default), + # "zh" (Chinese) or "en" (English). An explicit value locks the language. + # value: auto/en/zh + "cow_lang": "auto", # openai api config "open_ai_api_key": "", # openai api key # openai api base; when use_azure_chatgpt is true, set the matching api base @@ -390,12 +396,17 @@ def load_config(): logger.setLevel(logging.DEBUG) logger.debug("[INIT] set log level to DEBUG") + # Resolve the global UI language as early as possible so that every + # downstream layer (logs, CLI, agent prompts, channel replies) shares it. + resolved_lang = i18n.resolve_language(config.get("cow_lang", "auto")) + logger.info("[INIT] load config: {}".format(drag_sensitive(config))) # print system initialization info logger.info("[INIT] ========================================") logger.info("[INIT] System Initialization") logger.info("[INIT] ========================================") + logger.info("[INIT] Language: {}".format(resolved_lang)) logger.info("[INIT] Channel: {}".format(config.get("channel_type", "unknown"))) logger.info("[INIT] Model: {}".format(config.get("model", "unknown"))) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 4d0ec94b..6e5dfde7 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -8,6 +8,7 @@ services: ports: - "9899:9899" environment: + COW_LANG: 'auto' CHANNEL_TYPE: 'weixin' MODEL: 'deepseek-v4-flash' DEEPSEEK_API_KEY: '' diff --git a/models/chatgpt/chat_gpt_bot.py b/models/chatgpt/chat_gpt_bot.py index d5b7703d..999986bc 100644 --- a/models/chatgpt/chat_gpt_bot.py +++ b/models/chatgpt/chat_gpt_bot.py @@ -14,6 +14,7 @@ from models.openai.openai_compat import ( from models.openai.openai_http_client import OpenAIHTTPClient, OpenAIHTTPError import requests from common import const +from common.i18n import t as _t from models.bot import Bot from models.openai_compatible_bot import OpenAICompatibleBot from models.chatgpt.chat_gpt_session import ChatGPTSession @@ -94,13 +95,13 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot): clear_memory_commands = conf().get("clear_memory_commands", ["#清除记忆"]) if query in clear_memory_commands: self.sessions.clear_session(session_id) - reply = Reply(ReplyType.INFO, "记忆已清除") + reply = Reply(ReplyType.INFO, _t("记忆已清除", "Memory cleared")) elif query == "#清除所有": self.sessions.clear_all_session() - reply = Reply(ReplyType.INFO, "所有人记忆已清除") + reply = Reply(ReplyType.INFO, _t("所有人记忆已清除", "All memories cleared")) elif query == "#更新配置": load_config() - reply = Reply(ReplyType.INFO, "配置已更新") + reply = Reply(ReplyType.INFO, _t("配置已更新", "Config updated")) if reply: return reply session = self.sessions.session_query(query, session_id) @@ -148,7 +149,7 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot): reply = self.reply_image(context) return reply else: - reply = Reply(ReplyType.ERROR, "Bot不支持处理{}类型的消息".format(context.type)) + reply = Reply(ReplyType.ERROR, _t("Bot不支持处理{}类型的消息", "Bot does not support message type {}").format(context.type)) return reply def reply_image(self, context): @@ -165,7 +166,7 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot): # Check if file exists if not os.path.exists(image_path): logger.error(f"[CHATGPT] Image file not found: {image_path}") - return Reply(ReplyType.ERROR, "图片文件不存在") + return Reply(ReplyType.ERROR, _t("图片文件不存在", "Image file not found")) # Read and encode image with open(image_path, "rb") as f: @@ -232,7 +233,7 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot): logger.error(f"[CHATGPT] Image processing error: {e}") import traceback logger.error(traceback.format_exc()) - return Reply(ReplyType.ERROR, f"图片识别失败: {str(e)}") + return Reply(ReplyType.ERROR, _t("图片识别失败: ", "Image recognition failed: ") + str(e)) def reply_text(self, session: ChatGPTSession, api_key=None, args=None, retry_count=0) -> dict: """ @@ -277,25 +278,25 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot): def _handle_reply_error(self, e, session, api_key, args, retry_count): """Map exception to user-facing reply with retry/backoff (mirrors SDK behavior).""" need_retry = retry_count < 2 - result = {"completion_tokens": 0, "content": "我现在有点累了,等会再来吧"} + result = {"completion_tokens": 0, "content": _t("我现在有点累了,等会再来吧", "I'm a bit tired right now. Please try again later.")} if isinstance(e, RateLimitError): logger.warn("[CHATGPT] RateLimitError: {}".format(e)) - result["content"] = "提问太快啦,请休息一下再问我吧" + result["content"] = _t("提问太快啦,请休息一下再问我吧", "You're asking too fast. Please take a short break and try again.") if need_retry: time.sleep(20) elif isinstance(e, Timeout): logger.warn("[CHATGPT] Timeout: {}".format(e)) - result["content"] = "我没有收到你的消息" + result["content"] = _t("我没有收到你的消息", "I didn't receive your message") if need_retry: time.sleep(5) elif isinstance(e, APIConnectionError): logger.warn("[CHATGPT] APIConnectionError: {}".format(e)) - result["content"] = "我连接不到你的网络" + result["content"] = _t("我连接不到你的网络", "I can't reach your network") if need_retry: time.sleep(5) elif isinstance(e, APIError): logger.warn("[CHATGPT] Bad Gateway: {}".format(e)) - result["content"] = "请再问我一次" + result["content"] = _t("请再问我一次", "Please ask me again") if need_retry: time.sleep(10) else: @@ -358,7 +359,7 @@ class AzureChatGPTBot(ChatGPTBot): status = "" while (status != "succeeded"): if retry_count > 3: - return False, "图片生成失败" + return False, _t("图片生成失败", "Image generation failed") response = requests.get(operation_location, headers=headers) status = response.json()['status'] retry_count += 1 @@ -366,7 +367,7 @@ class AzureChatGPTBot(ChatGPTBot): return True, image_url except Exception as e: logger.error("create image error: {}".format(e)) - return False, "图片生成失败" + return False, _t("图片生成失败", "Image generation failed") elif text_to_image_model == "dall-e-3": api_version = conf().get("azure_api_version", "2024-02-15-preview") endpoint = conf().get("azure_openai_dalle_api_base","open_ai_api_base") @@ -389,7 +390,7 @@ class AzureChatGPTBot(ChatGPTBot): else: error_message = "响应中没有图像 URL" logger.error(error_message) - return False, "图片生成失败" + return False, _t("图片生成失败", "Image generation failed") except requests.exceptions.RequestException as e: # 捕获所有请求相关的异常 @@ -405,9 +406,9 @@ class AzureChatGPTBot(ChatGPTBot): # 捕获所有其他异常 error_message = f"生成图像时发生错误: {e}" logger.error(error_message) - return False, "图片生成失败" + return False, _t("图片生成失败", "Image generation failed") else: - return False, "图片生成失败,未配置text_to_image参数" + return False, _t("图片生成失败,未配置text_to_image参数", "Image generation failed: text_to_image is not configured") class _AzureChatHTTPClient(OpenAIHTTPClient): diff --git a/plugins/cow_cli/cow_cli.py b/plugins/cow_cli/cow_cli.py index f3fee96a..fdd0081f 100644 --- a/plugins/cow_cli/cow_cli.py +++ b/plugins/cow_cli/cow_cli.py @@ -23,6 +23,7 @@ from plugins import Plugin, Event, EventContext, EventAction from bridge.context import ContextType from bridge.reply import Reply, ReplyType from common.log import logger +from common.i18n import t as _t from config import conf from cli import __version__ @@ -280,19 +281,18 @@ class CowCliPlugin(Plugin): @staticmethod def _typo_hint(token: str, suggestion) -> str: - hint = f"未知命令: /{token}" + hint = _t(f"未知命令: /{token}", f"Unknown command: /{token}") if suggestion: - hint += f"\n你是不是想输入 /{suggestion} ?" - hint += "\n发送 /help 查看全部命令。" + hint += _t(f"\n你是不是想输入 /{suggestion} ?", f"\nDid you mean /{suggestion} ?") + hint += _t("\n发送 /help 查看全部命令。", "\nSend /help to see all commands.") return hint @staticmethod def _ambiguous_hint(token: str, candidates) -> str: options = " ".join(f"/{c}" for c in candidates) - return ( - f"命令不明确: /{token}\n" - f"可能想输入: {options}\n" - "发送 /help 查看全部命令。" + return _t( + f"命令不明确: /{token}\n可能想输入: {options}\n发送 /help 查看全部命令。", + f"Ambiguous command: /{token}\nDid you mean: {options}\nSend /help to see all commands.", ) # ------------------------------------------------------------------ @@ -324,7 +324,10 @@ class CowCliPlugin(Plugin): def _dispatch(self, cmd: str, args: str, e_context: EventContext, session_id: str = "") -> str: if cmd in CLI_ONLY_COMMANDS: - return f"⚠️ `cow {cmd}` 只能在命令行终端中执行。\n请在终端运行: cow {cmd}" + return _t( + f"⚠️ `cow {cmd}` 只能在命令行终端中执行。\n请在终端运行: cow {cmd}", + f"⚠️ `cow {cmd}` can only run in a terminal.\nRun it in your shell: cow {cmd}", + ) handler_attr = "_cmd_" + cmd.replace("-", "_") handler = getattr(self, handler_attr, None) @@ -333,42 +336,71 @@ class CowCliPlugin(Plugin): return handler(args, e_context, session_id=session_id) except Exception as e: logger.error(f"[CowCli] command '{cmd}' failed: {e}") - return f"命令执行失败: {e}" + return _t(f"命令执行失败: {e}", f"Command failed: {e}") - return f"未知命令: {cmd}" + return _t(f"未知命令: {cmd}", f"Unknown command: {cmd}") # ------------------------------------------------------------------ # help / version # ------------------------------------------------------------------ def _cmd_help(self, args: str, e_context, **_) -> str: - lines = [ - "📋 CowAgent 命令列表", - "", - " /help 显示此帮助", - " /version 查看版本", - " /status 查看运行状态", - " /cancel 中止当前正在运行的 Agent 任务", - " /logs [N] 查看最近N条日志 (默认20)", - " /context 查看当前对话上下文信息", - " /context clear 清除当前对话上下文", - " /skill list 查看已安装的技能", - " /skill list --remote 浏览技能广场", - " /skill search <关键词> 搜索技能", - " /skill install <名称> 安装技能", - " /skill info <名称> 查看技能详情", - " /config 查看当前配置", - " /config 查看某项配置", - " /config 修改配置", - " /memory status 查看记忆索引状态", - " /memory rebuild-index 清空并重建向量索引 (切换 embedding 模型后必须执行)", - " /memory dream [N] 手动触发记忆蒸馏 (整理近N天, 默认3, 最多30)", - " /knowledge 查看知识库统计", - " /knowledge list 查看知识库文件树", - " /knowledge on|off 开启/关闭知识库", - "", - "💡 也可以用 cow 代替 /", - ] + if _t("zh", "en") == "en": + lines = [ + "📋 CowAgent Commands", + "", + "/help: Show this help", + "/version: Show version", + "/status: Show running status", + "/cancel: Abort the running Agent task", + "/logs [N]: Show the last N log lines (default 20)", + "/context: Show current conversation context", + "/context clear: Clear current conversation context", + "/skill list: List installed skills", + "/skill list --remote: Browse Skill Hub", + "/skill search : Search skills", + "/skill install : Install a skill", + "/skill info : Show skill details", + "/config: Show current config", + "/config : Show a config item", + "/config : Update a config item", + "/memory status: Show memory index status", + "/memory rebuild-index: Rebuild the vector index (required after switching embedding model)", + "/memory dream [N]: Trigger memory distillation (last N days, default 3, max 30)", + "/knowledge: Show knowledge base stats", + "/knowledge list: Show knowledge base file tree", + "/knowledge on|off: Enable/disable knowledge base", + "", + "💡 You can also use cow instead of /", + ] + else: + lines = [ + "📋 CowAgent 命令列表", + "", + "/help: 显示此帮助", + "/version: 查看版本", + "/status: 查看运行状态", + "/cancel: 中止当前正在运行的 Agent 任务", + "/logs [N]: 查看最近N条日志 (默认20)", + "/context: 查看当前对话上下文信息", + "/context clear: 清除当前对话上下文", + "/skill list: 查看已安装的技能", + "/skill list --remote: 浏览技能广场", + "/skill search <关键词>: 搜索技能", + "/skill install <名称>: 安装技能", + "/skill info <名称>: 查看技能详情", + "/config: 查看当前配置", + "/config : 查看某项配置", + "/config : 修改配置", + "/memory status: 查看记忆索引状态", + "/memory rebuild-index: 清空并重建向量索引 (切换 embedding 模型后必须执行)", + "/memory dream [N]: 手动触发记忆蒸馏 (整理近N天, 默认3, 最多30)", + "/knowledge: 查看知识库统计", + "/knowledge list: 查看知识库文件树", + "/knowledge on|off: 开启/关闭知识库", + "", + "💡 也可以用 cow 代替 /", + ] return "\n".join(lines) def _cmd_version(self, args: str, e_context, **_) -> str: @@ -405,9 +437,9 @@ class CowCliPlugin(Plugin): cancelled = registry.cancel_session(target_session) if cancelled <= 0: - return "当前没有可中止的任务。" + return _t("当前没有可中止的任务。", "Nothing to cancel.") - return "🛑 已中止" + return _t("🛑 已中止", "🛑 Cancelled") # ------------------------------------------------------------------ # status @@ -417,21 +449,21 @@ class CowCliPlugin(Plugin): from config import conf cfg = conf() - lines = ["📊 CowAgent 运行状态", ""] + lines = [_t("📊 CowAgent 运行状态", "📊 CowAgent Status"), ""] - lines.append(f" 版本: v{__version__}") - lines.append(f" 进程: PID {os.getpid()}") + lines.append(_t(f" 版本: v{__version__}", f" Version: v{__version__}")) + lines.append(_t(f" 进程: PID {os.getpid()}", f" Process: PID {os.getpid()}")) channel = cfg.get("channel_type", "unknown") if isinstance(channel, list): channel = ", ".join(channel) - lines.append(f" 通道: {channel}") + lines.append(_t(f" 通道: {channel}", f" Channel: {channel}")) model_name = cfg.get("model", "unknown") - lines.append(f" 模型: {model_name}") + lines.append(_t(f" 模型: {model_name}", f" Model: {model_name}")) mode = "Chat" if cfg.get("agent") is False else "Agent" - lines.append(f" 模式: {mode}") + lines.append(_t(f" 模式: {mode}", f" Mode: {mode}")) session_id = self._get_session_id(e_context, fallback=session_id) agent = self._get_agent(session_id) @@ -439,7 +471,7 @@ class CowCliPlugin(Plugin): lines.append("") with agent.messages_lock: msg_count = len(agent.messages) - lines.append(f" 会话消息数: {msg_count}") + lines.append(_t(f" 会话消息数: {msg_count}", f" Session messages: {msg_count}")) if agent.skill_manager: total = len(agent.skill_manager.skills) @@ -447,10 +479,10 @@ class CowCliPlugin(Plugin): 1 for v in agent.skill_manager.skills_config.values() if v.get("enabled", True) ) - lines.append(f" 已加载技能: {enabled}/{total}") + lines.append(_t(f" 已加载技能: {enabled}/{total}", f" Loaded skills: {enabled}/{total}")) else: lines.append("") - lines.append(f" Agent: 未初始化 (首次对话后自动创建)") + lines.append(_t(" Agent: 未初始化 (首次对话后自动创建)", " Agent: not initialized (created on first chat)")) return "\n".join(lines) @@ -465,7 +497,7 @@ class CowCliPlugin(Plugin): log_file = self._find_log_file() if not log_file: - return "未找到日志文件" + return _t("未找到日志文件", "No log file found") try: with open(log_file, "r", encoding="utf-8", errors="replace") as f: @@ -473,10 +505,10 @@ class CowCliPlugin(Plugin): tail = all_lines[-num_lines:] content = "".join(tail).strip() if not content: - return "日志为空" - return f"📄 最近 {len(tail)} 条日志:\n\n{content}" + return _t("日志为空", "Log is empty") + return _t(f"📄 最近 {len(tail)} 条日志:\n\n{content}", f"📄 Last {len(tail)} log lines:\n\n{content}") except Exception as e: - return f"读取日志失败: {e}" + return _t(f"读取日志失败: {e}", f"Failed to read log: {e}") def _find_log_file(self) -> str: project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -507,13 +539,13 @@ class CowCliPlugin(Plugin): def _context_info(self, agent, session_id: str) -> str: if not agent: - return "⚠️ Agent 未初始化,暂无上下文信息" + return _t("⚠️ Agent 未初始化,暂无上下文信息", "⚠️ Agent not initialized, no context yet") with agent.messages_lock: messages = agent.messages.copy() if not messages: - return "当前对话上下文为空" + return _t("当前对话上下文为空", "Current conversation context is empty") user_msgs = sum(1 for m in messages if m.get("role") == "user") assistant_msgs = sum(1 for m in messages if m.get("role") == "assistant") @@ -521,29 +553,43 @@ class CowCliPlugin(Plugin): total_chars = sum(len(str(m.get("content", ""))) for m in messages) - lines = [ - "💬 当前对话上下文", - "", - f" 会话: {session_id or 'default'}", - f" 总消息数: {len(messages)}", - f" 用户消息: {user_msgs}", - f" 助手回复: {assistant_msgs}", - f" 工具调用: {tool_msgs}", - f" 内容总长度: ~{total_chars} 字符", - "", - " 发送 /context clear 可清除对话上下文", - ] + if _t("zh", "en") == "en": + lines = [ + "💬 Current Conversation Context", + "", + f" Session: {session_id or 'default'}", + f" Total messages: {len(messages)}", + f" User messages: {user_msgs}", + f" Assistant replies: {assistant_msgs}", + f" Tool calls: {tool_msgs}", + f" Total content length: ~{total_chars} chars", + "", + " Send /context clear to clear the conversation context", + ] + else: + lines = [ + "💬 当前对话上下文", + "", + f" 会话: {session_id or 'default'}", + f" 总消息数: {len(messages)}", + f" 用户消息: {user_msgs}", + f" 助手回复: {assistant_msgs}", + f" 工具调用: {tool_msgs}", + f" 内容总长度: ~{total_chars} 字符", + "", + " 发送 /context clear 可清除对话上下文", + ] return "\n".join(lines) def _context_clear(self, agent, session_id: str) -> str: if not agent: - return "⚠️ Agent 未初始化" + return _t("⚠️ Agent 未初始化", "⚠️ Agent not initialized") with agent.messages_lock: count = len(agent.messages) agent.messages.clear() - return f"✅ 已清除当前对话上下文 ({count} 条消息)" + return _t(f"✅ 已清除当前对话上下文 ({count} 条消息)", f"✅ Conversation context cleared ({count} messages)") # ------------------------------------------------------------------ # config @@ -578,21 +624,24 @@ class CowCliPlugin(Plugin): def _config_show_all(self) -> str: from config import conf cfg = conf() - lines = ["⚙️ 当前配置", ""] + lines = [_t("⚙️ 当前配置", "⚙️ Current Config"), ""] for key in sorted(self._CONFIG_READABLE): val = cfg.get(key, "") lines.append(f" {key}: {val}") lines.append("") lines.append("━━━━━━━━━━━━━━━━━━━━━━━━━━") - lines.append("💡 /config 查看配置") - lines.append("💡 /config 修改配置") + lines.append(_t("💡 /config : 查看配置", "💡 /config : Show a config item")) + lines.append(_t("💡 /config : 修改配置", "💡 /config : Update a config item")) return "\n".join(lines) def _config_get(self, key: str) -> str: from config import conf if key not in self._CONFIG_READABLE: available = ", ".join(sorted(self._CONFIG_READABLE)) - return f"不支持查看 '{key}'\n\n可查看的配置项: {available}" + return _t( + f"不支持查看 '{key}'\n\n可查看的配置项: {available}", + f"Cannot show '{key}'\n\nReadable config items: {available}", + ) val = conf().get(key, "") return f"⚙️ {key}: {val}" @@ -602,9 +651,12 @@ class CowCliPlugin(Plugin): if key not in self._CONFIG_WRITABLE: if key in self._CONFIG_READABLE: - return f"⚠️ '{key}' 为只读配置,不支持修改" + return _t(f"⚠️ '{key}' 为只读配置,不支持修改", f"⚠️ '{key}' is read-only and cannot be modified") available = ", ".join(sorted(self._CONFIG_WRITABLE)) - return f"不支持修改 '{key}'\n\n可修改的配置项: {available}" + return _t( + f"不支持修改 '{key}'\n\n可修改的配置项: {available}", + f"Cannot modify '{key}'\n\nWritable config items: {available}", + ) old_val = conf().get(key, "") @@ -637,7 +689,7 @@ class CowCliPlugin(Plugin): with open(config_path, "w", encoding="utf-8") as f: _json.dump(file_config, f, indent=4, ensure_ascii=False) except Exception as e: - return f"写入 config.json 失败: {e}" + return _t(f"写入 config.json 失败: {e}", f"Failed to write config.json: {e}") # Sync updated values to environment variables so that load_config() # won't overwrite the new value with a stale env var (common in Docker). @@ -660,7 +712,7 @@ class CowCliPlugin(Plugin): except Exception as e: logger.warning(f"[CowCli] config reload warning: {e}") - result = f"✅ 配置已更新\n\n {key}: {old_val} → {new_val}" + result = _t(f"✅ 配置已更新\n\n {key}: {old_val} → {new_val}", f"✅ Config updated\n\n {key}: {old_val} → {new_val}") if "bot_type" in updates and updates["bot_type"] != old_bot_type: result += f"\n bot_type: {old_bot_type} → {updates['bot_type']}" return result @@ -725,10 +777,13 @@ class CowCliPlugin(Plugin): from cli.commands.install import run_install_browser if args.strip(): - return ( + return _t( "用法: /install-browser\n\n" "无需参数,等同于终端执行 `cow install-browser`。\n" - "安装过程可能持续数分钟;进度会以多条消息推送,pip 详细输出见服务日志。" + "安装过程可能持续数分钟;进度会以多条消息推送,pip 详细输出见服务日志。", + "Usage: /install-browser\n\n" + "No arguments needed; equivalent to running `cow install-browser` in a terminal.\n" + "Installation may take a few minutes; progress is pushed as multiple messages, and detailed pip output goes to the service log.", ) # Suppress detailed stream in chat; phases go through channel.send @@ -740,11 +795,16 @@ class CowCliPlugin(Plugin): on_phase=lambda m: self._send_install_progress(e_context, m), ) if code != 0: - return ( + return _t( "❌ 安装未成功结束,请查看上方分段提示或服务器日志;" - "也可在终端执行 `cow install-browser`。" + "也可在终端执行 `cow install-browser`。", + "❌ Installation did not finish successfully. Check the messages above or the server log; " + "you can also run `cow install-browser` in a terminal.", ) - return "✅ 安装流程已结束。请重启 CowAgent 后使用 browser 工具(进度见上方消息)。" + return _t( + "✅ 安装流程已结束。请重启 CowAgent 后使用 browser 工具(进度见上方消息)。", + "✅ Installation finished. Restart CowAgent to use the browser tool (see messages above for progress).", + ) # ------------------------------------------------------------------ # skill @@ -770,16 +830,25 @@ class CowCliPlugin(Plugin): elif sub == "disable": return self._skill_set_enabled(sub_args, False) else: - return ( + return _t( "用法: /skill <子命令>\n\n" "子命令:\n" - " list [--remote] 查看技能列表\n" - " search <关键词> 搜索技能\n" - " install <名称> 安装技能\n" - " uninstall <名称> 卸载技能\n" - " info <名称> 查看技能详情\n" - " enable <名称> 启用技能\n" - " disable <名称> 禁用技能" + "list [--remote]: 查看技能列表\n" + "search <关键词>: 搜索技能\n" + "install <名称>: 安装技能\n" + "uninstall <名称>: 卸载技能\n" + "info <名称>: 查看技能详情\n" + "enable <名称>: 启用技能\n" + "disable <名称>: 禁用技能", + "Usage: /skill \n\n" + "Subcommands:\n" + "list [--remote]: List skills\n" + "search : Search skills\n" + "install : Install a skill\n" + "uninstall : Uninstall a skill\n" + "info : Show skill details\n" + "enable : Enable a skill\n" + "disable : Disable a skill", ) def _refresh_skill_manager(self): @@ -813,13 +882,16 @@ class CowCliPlugin(Plugin): if os.path.exists(os.path.join(skill_path, "SKILL.md")): entries.append({"name": name, "source": source, "enabled": True}) if not entries: - return "暂无已安装的技能\n\n💡 /skill list --remote 浏览技能广场" + return _t( + "暂无已安装的技能\n\n💡 /skill list --remote: 浏览技能广场", + "No skills installed yet\n\n💡 /skill list --remote: Browse Skill Hub", + ) config = {e["name"]: e for e in entries} sorted_entries = sorted(config.values(), key=lambda e: e.get("name", "")) enabled_count = sum(1 for e in sorted_entries if e.get("enabled", True)) - lines = [f"📦 已安装的技能 ({enabled_count}/{len(sorted_entries)})", ""] + lines = [_t(f"📦 已安装的技能 ({enabled_count}/{len(sorted_entries)})", f"📦 Installed Skills ({enabled_count}/{len(sorted_entries)})"), ""] for entry in sorted_entries: name = entry.get("name", "") enabled = entry.get("enabled", True) @@ -835,13 +907,13 @@ class CowCliPlugin(Plugin): if desc: line += f"\n {desc}" if source: - line += f"\n 来源: {source}" + line += _t(f"\n 来源: {source}", f"\n Source: {source}") lines.append(line) lines.append("") lines.append("━━━━━━━━━━━━━━━━━━━━━━━━━━") - lines.append("💡 /skill list --remote 浏览技能广场") - lines.append("💡 /skill info <名称> 查看详情") + lines.append(_t("💡 /skill list --remote: 浏览技能广场", "💡 /skill list --remote: Browse Skill Hub")) + lines.append(_t("💡 /skill info <名称>: 查看详情", "💡 /skill info : Show details")) return "\n".join(lines) def _skill_list(self, args: str) -> str: @@ -871,43 +943,43 @@ class CowCliPlugin(Plugin): skills = data.get("skills", []) total = data.get("total", len(skills)) except Exception as e: - return f"获取技能广场失败: {e}" + return _t(f"获取技能广场失败: {e}", f"Failed to fetch Skill Hub: {e}") if not skills and page == 1: - return "技能广场暂无可用技能" + return _t("技能广场暂无可用技能", "No skills available on Skill Hub") total_pages = max(1, (total + page_size - 1) // page_size) page = min(page, total_pages) installed = set(load_skills_config().keys()) - lines = ["🌐 技能广场", ""] + lines = [_t("🌐 技能广场", "🌐 Skill Hub"), ""] for s in skills: name = s.get("name", "") display = s.get("display_name", "") or name desc = s.get("description", "") if len(desc) > 50: desc = desc[:47] + "…" - badge = " [已安装]" if name in installed else "" + badge = _t(" [已安装]", " [installed]") if name in installed else "" lines.append(f"📌 {display}{badge}") - lines.append(f" 名称: {name}") + lines.append(_t(f" 名称: {name}", f" Name: {name}")) if desc: lines.append(f" {desc}") lines.append("") lines.append("━━━━━━━━━━━━━━━━━━━━━━━━━━") - lines.append(f"📄 第 {page}/{total_pages} 页") + lines.append(_t(f"📄 第 {page}/{total_pages} 页", f"📄 Page {page}/{total_pages}")) if page < total_pages: - lines.append(f"💡 /skill list --remote --page {page + 1} 下一页") + lines.append(_t(f"💡 /skill list --remote --page {page + 1}: 下一页", f"💡 /skill list --remote --page {page + 1}: Next page")) if page > 1: - lines.append(f"💡 /skill list --remote --page {page - 1} 上一页") - lines.append("💡 /skill install <名称> 安装技能") - lines.append("💡 /skill search <关键词> 搜索技能") - lines.append("🌐 https://skills.cowagent.ai 在线浏览全部技能") + lines.append(_t(f"💡 /skill list --remote --page {page - 1}: 上一页", f"💡 /skill list --remote --page {page - 1}: Previous page")) + lines.append(_t("💡 /skill install <名称>: 安装技能", "💡 /skill install : Install a skill")) + lines.append(_t("💡 /skill search <关键词>: 搜索技能", "💡 /skill search : Search skills")) + lines.append(_t("🌐 https://skills.cowagent.ai 在线浏览全部技能", "🌐 https://skills.cowagent.ai Browse all skills online")) return "\n".join(lines) def _skill_search(self, query: str) -> str: if not query: - return "请指定搜索关键词: /skill search <关键词>" + return _t("请指定搜索关键词: /skill search <关键词>", "Please specify a search keyword: /skill search ") import requests from cli.utils import SKILL_HUB_API, load_skills_config @@ -916,35 +988,35 @@ class CowCliPlugin(Plugin): resp.raise_for_status() skills = resp.json().get("skills", []) except Exception as e: - return f"搜索失败: {e}" + return _t(f"搜索失败: {e}", f"Search failed: {e}") if not skills: - return f"未找到与「{query}」相关的技能" + return _t(f"未找到与「{query}」相关的技能", f"No skills found for \"{query}\"") installed = set(load_skills_config().keys()) - lines = [f"🔍 搜索「{query}」({len(skills)} 个结果)", ""] + lines = [_t(f"🔍 搜索「{query}」({len(skills)} 个结果)", f"🔍 Search \"{query}\" ({len(skills)} results)"), ""] for s in skills: name = s.get("name", "") display = s.get("display_name", "") or name desc = s.get("description", "") if len(desc) > 50: desc = desc[:47] + "…" - badge = " [已安装]" if name in installed else "" + badge = _t(" [已安装]", " [installed]") if name in installed else "" lines.append(f"📌 {display}{badge}") - lines.append(f" 名称: {name}") + lines.append(_t(f" 名称: {name}", f" Name: {name}")) if desc: lines.append(f" {desc}") lines.append("") lines.append("━━━━━━━━━━━━━━━━━━━━━━━━━━") - lines.append("💡 /skill install <名称> 安装技能") + lines.append(_t("💡 /skill install <名称>: 安装技能", "💡 /skill install : Install a skill")) return "\n".join(lines) _INSTALL_TIMEOUT = 60 def _skill_install(self, name: str, e_context: EventContext) -> str: if not name: - return "请指定要安装的技能: /skill install <名称>" + return _t("请指定要安装的技能: /skill install <名称>", "Please specify a skill to install: /skill install ") from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout from cli.commands.skill import install_skill @@ -955,16 +1027,16 @@ class CowCliPlugin(Plugin): result = future.result(timeout=self._INSTALL_TIMEOUT) if result.error: - return f"安装失败: {result.error}" + return _t(f"安装失败: {result.error}", f"Install failed: {result.error}") if not result.installed: - return "\n".join(result.messages) if result.messages else "未找到可安装的技能" + return "\n".join(result.messages) if result.messages else _t("未找到可安装的技能", "No installable skill found") return self._format_install_result(result) except FuturesTimeout: - return "安装超时,请稍后重试或检查网络连接" + return _t("安装超时,请稍后重试或检查网络连接", "Install timed out. Please retry later or check your network connection.") except Exception as e: - return f"安装失败: {e}" + return _t(f"安装失败: {e}", f"Install failed: {e}") @staticmethod def _format_install_result(result) -> str: @@ -978,20 +1050,20 @@ class CowCliPlugin(Plugin): for skill_name in result.installed: desc = _read_skill_description(os.path.join(skills_dir, skill_name)) display = config.get(skill_name, {}).get("display_name", "") - lines.append(f"✅ 技能安装成功:{skill_name}") + lines.append(_t(f"✅ 技能安装成功:{skill_name}", f"✅ Skill installed: {skill_name}")) if display and display != skill_name: - lines.append(f" 名称:{display}") + lines.append(_t(f" 名称:{display}", f" Name: {display}")) if desc: - lines.append(f" 描述:{desc}") + lines.append(_t(f" 描述:{desc}", f" Description: {desc}")) if len(result.installed) > 1: - lines.append(f"\n共安装 {len(result.installed)} 个技能") + lines.append(_t(f"\n共安装 {len(result.installed)} 个技能", f"\nInstalled {len(result.installed)} skills")) return "\n".join(lines) def _skill_uninstall(self, name: str) -> str: if not name: - return "请指定要卸载的技能: /skill uninstall <名称>" + return _t("请指定要卸载的技能: /skill uninstall <名称>", "Please specify a skill to uninstall: /skill uninstall ") import shutil import json @@ -1004,7 +1076,7 @@ class CowCliPlugin(Plugin): skill_dir = self._resolve_skill_dir(name, skills_dir) if not skill_dir: - return f"技能 '{name}' 未安装" + return _t(f"技能 '{name}' 未安装", f"Skill '{name}' is not installed") shutil.rmtree(skill_dir) @@ -1019,7 +1091,7 @@ class CowCliPlugin(Plugin): except Exception: pass - return f"✅ 技能 '{name}' 已卸载" + return _t(f"✅ 技能 '{name}' 已卸载", f"✅ Skill '{name}' uninstalled") @staticmethod def _resolve_skill_dir(name: str, skills_dir: str): @@ -1055,7 +1127,7 @@ class CowCliPlugin(Plugin): def _skill_info(self, name: str) -> str: if not name: - return "请指定技能名称: /skill info <名称>" + return _t("请指定技能名称: /skill info <名称>", "Please specify a skill name: /skill info ") from cli.utils import get_skills_dir, get_builtin_skills_dir @@ -1078,18 +1150,18 @@ class CowCliPlugin(Plugin): source = "custom" if not skill_dir: - return f"技能 '{name}' 未找到" + return _t(f"技能 '{name}' 未找到", f"Skill '{name}' not found") skill_md = os.path.join(skill_dir, "SKILL.md") if not os.path.exists(skill_md): - return f"技能 '{name}' 没有 SKILL.md 文件" + return _t(f"技能 '{name}' 没有 SKILL.md 文件", f"Skill '{name}' has no SKILL.md file") with open(skill_md, "r", encoding="utf-8") as f: content = f.read() meta, body = self._strip_frontmatter(content) - header_lines = [f"📖 技能: {name} [{source}]", ""] + header_lines = [_t(f"📖 技能: {name} [{source}]", f"📖 Skill: {name} [{source}]"), ""] desc = meta.get("description", "") if desc: header_lines.append(f" {desc}") @@ -1104,8 +1176,10 @@ class CowCliPlugin(Plugin): def _skill_set_enabled(self, name: str, enabled: bool) -> str: if not name: - action = "启用" if enabled else "禁用" - return f"请指定技能名称: /skill {'enable' if enabled else 'disable'} <名称>" + return _t( + f"请指定技能名称: /skill {'enable' if enabled else 'disable'} <名称>", + f"Please specify a skill name: /skill {'enable' if enabled else 'disable'} ", + ) import json from cli.utils import get_skills_dir @@ -1114,24 +1188,25 @@ class CowCliPlugin(Plugin): config_path = os.path.join(skills_dir, "skills_config.json") if not os.path.exists(config_path): - return "技能配置文件不存在" + return _t("技能配置文件不存在", "Skills config file not found") try: with open(config_path, "r", encoding="utf-8") as f: config = json.load(f) except Exception as e: - return f"读取配置失败: {e}" + return _t(f"读取配置失败: {e}", f"Failed to read config: {e}") if name not in config: - return f"技能 '{name}' 未在配置中找到" + return _t(f"技能 '{name}' 未在配置中找到", f"Skill '{name}' not found in config") config[name]["enabled"] = enabled with open(config_path, "w", encoding="utf-8") as f: json.dump(config, f, indent=4, ensure_ascii=False) - action = "启用" if enabled else "禁用" icon = "✅" if enabled else "⬚" - return f"{icon} 技能 '{name}' 已{action}" + if enabled: + return _t(f"{icon} 技能 '{name}' 已启用", f"{icon} Skill '{name}' enabled") + return _t(f"{icon} 技能 '{name}' 已禁用", f"{icon} Skill '{name}' disabled") # ------------------------------------------------------------------ # memory @@ -1157,13 +1232,19 @@ class CowCliPlugin(Plugin): @staticmethod def _memory_help() -> str: - return ( + return _t( "🧠 记忆管理\n\n" "用法: /memory <子命令>\n\n" "子命令:\n" - " status 查看索引状态 (provider / model / dim / chunks)\n" - " rebuild-index 清空并重建向量索引 (切换 embedding 模型后必须执行)\n" - " dream [N] 手动触发记忆蒸馏 (整理近N天, 默认3, 最多30)" + "status: 查看索引状态 (provider / model / dim / chunks)\n" + "rebuild-index: 清空并重建向量索引 (切换 embedding 模型后必须执行)\n" + "dream [N]: 手动触发记忆蒸馏 (整理近N天, 默认3, 最多30)", + "🧠 Memory Management\n\n" + "Usage: /memory \n\n" + "Subcommands:\n" + "status: Show index status (provider / model / dim / chunks)\n" + "rebuild-index: Rebuild the vector index (required after switching embedding model)\n" + "dream [N]: Trigger memory distillation (last N days, default 3, max 30)", ) def _memory_dream(self, days: int, e_context, session_id: str) -> str: @@ -1178,10 +1259,10 @@ class CowCliPlugin(Plugin): try: flush_mgr = self._create_standalone_flush_manager() except Exception as e: - return f"⚠️ 无法初始化记忆蒸馏: {e}" + return _t(f"⚠️ 无法初始化记忆蒸馏: {e}", f"⚠️ Failed to initialize memory distillation: {e}") if not flush_mgr.llm_model: - return "⚠️ 未配置 LLM 模型,无法执行记忆蒸馏" + return _t("⚠️ 未配置 LLM 模型,无法执行记忆蒸馏", "⚠️ No LLM model configured, cannot run memory distillation") # SaaS (e_context is None): run synchronously, return full result if e_context is None: @@ -1196,13 +1277,16 @@ class CowCliPlugin(Plugin): if result: self._notify(e_context, self._build_dream_result(flush_mgr, is_web)) else: - self._notify(e_context, "💤 记忆蒸馏跳过 — 没有新的记忆内容需要整理") + self._notify(e_context, _t("💤 记忆蒸馏跳过 — 没有新的记忆内容需要整理", "💤 Memory distillation skipped — no new memories to process")) except Exception as e: logger.warning(f"[CowCli] /memory dream failed: {e}") - self._notify(e_context, f"❌ 记忆蒸馏失败: {e}") + self._notify(e_context, _t(f"❌ 记忆蒸馏失败: {e}", f"❌ Memory distillation failed: {e}")) threading.Thread(target=_run, daemon=True).start() - return f"🌙 记忆蒸馏已启动 (整理近 {days} 天的记忆)\n\n整理在后台执行,完成后会通知你。" + return _t( + f"🌙 记忆蒸馏已启动 (整理近 {days} 天的记忆)\n\n整理在后台执行,完成后会通知你。", + f"🌙 Memory distillation started (processing the last {days} days)\n\nRunning in the background; you'll be notified when it's done.", + ) def _memory_dream_sync(self, flush_mgr, days: int) -> str: """Run deep dream synchronously and return the full result.""" @@ -1210,10 +1294,10 @@ class CowCliPlugin(Plugin): result = flush_mgr.deep_dream(lookback_days=days, force=True) if result: return self._build_dream_result(flush_mgr, is_web=True) - return "💤 记忆蒸馏跳过 — 没有新的记忆内容需要整理" + return _t("💤 记忆蒸馏跳过 — 没有新的记忆内容需要整理", "💤 Memory distillation skipped — no new memories to process") except Exception as e: logger.warning(f"[CowCli] /memory dream sync failed: {e}") - return f"❌ 记忆蒸馏失败: {e}" + return _t(f"❌ 记忆蒸馏失败: {e}", f"❌ Memory distillation failed: {e}") @staticmethod def _resolve_active_embedding(): @@ -1255,9 +1339,9 @@ class CowCliPlugin(Plugin): agent = self._get_agent("") memory_manager = agent.memory_manager if agent else None - lines = ["🧠 记忆索引状态", ""] + lines = [_t("🧠 记忆索引状态", "🧠 Memory Index Status"), ""] if not memory_manager: - lines.append(" ⚠️ Agent 尚未初始化,先发一条普通消息再试") + lines.append(_t(" ⚠️ Agent 尚未初始化,先发一条普通消息再试", " ⚠️ Agent not initialized yet, send a normal message first")) return "\n".join(lines) stats = memory_manager.storage.get_stats() @@ -1278,7 +1362,7 @@ class CowCliPlugin(Plugin): lines.append(f" Model : {cfg_model}") lines.append(f" Dim : {cfg_dim if cfg_dim else '?'}") else: - lines.append(" Provider : (未初始化, keyword-only)") + lines.append(_t(" Provider : (未初始化, keyword-only)", " Provider : (not initialized, keyword-only)")) # Health hints — only shown when the user has explicitly opted into # vector search via `embedding_provider`. Legacy users (no explicit @@ -1289,17 +1373,17 @@ class CowCliPlugin(Plugin): if explicitly_opted_in and provider_obj is not None: if chunks > 0 and embedded < chunks: missing = chunks - embedded - warnings.append( - f" ⚠️ {missing}/{chunks} 个 chunk 没有向量;" - f"运行 /memory rebuild-index 后所有记忆才会被向量化检索" - ) + warnings.append(_t( + f" ⚠️ {missing}/{chunks} 个 chunk 没有向量;运行 /memory rebuild-index 后所有记忆才会被向量化检索", + f" ⚠️ {missing}/{chunks} chunks have no vectors; run /memory rebuild-index to enable vector search for all memories", + )) index_dim = detect_index_dim(memory_manager.storage) if index_dim is not None and cfg_dim and index_dim != cfg_dim: - warnings.append( - f" ⚠️ 索引中存量向量为 {index_dim} 维,与当前配置 {cfg_dim} 维不一致;" - f"运行 /memory rebuild-index 重建后向量检索才会生效" - ) + warnings.append(_t( + f" ⚠️ 索引中存量向量为 {index_dim} 维,与当前配置 {cfg_dim} 维不一致;运行 /memory rebuild-index 重建后向量检索才会生效", + f" ⚠️ Existing vectors are {index_dim}-dim, mismatching the current {cfg_dim}-dim config; run /memory rebuild-index to make vector search work", + )) if warnings: lines.append("") @@ -1312,9 +1396,11 @@ class CowCliPlugin(Plugin): session_id = self._get_session_id(e_context, fallback=session_id) agent = self._get_agent(session_id) if not agent or not agent.memory_manager: - return ( + return _t( "⚠️ Agent 尚未初始化,无法重建索引。\n" - "请先发送一条普通消息触发 Agent 启动后再试。" + "请先发送一条普通消息触发 Agent 启动后再试。", + "⚠️ Agent not initialized, cannot rebuild the index.\n" + "Send a normal message first to start the Agent, then try again.", ) memory_manager = agent.memory_manager @@ -1328,12 +1414,14 @@ class CowCliPlugin(Plugin): ._init_embedding_provider(memory_manager.config, session_id=session_id) except Exception as e: logger.exception("[CowCli] /memory rebuild-index: build provider failed") - return f"⚠️ 无法根据当前配置构造 embedding provider: {e}" + return _t(f"⚠️ 无法根据当前配置构造 embedding provider: {e}", f"⚠️ Failed to build embedding provider from current config: {e}") if fresh_provider is None: - return ( + return _t( "⚠️ 当前没有可用的 embedding provider。\n" - "请检查 config.json 中的 embedding 相关配置 (provider / api key)。" + "请检查 config.json 中的 embedding 相关配置 (provider / api key)。", + "⚠️ No embedding provider available.\n" + "Check the embedding settings in config.json (provider / api key).", ) memory_manager.embedding_provider = fresh_provider @@ -1353,23 +1441,29 @@ class CowCliPlugin(Plugin): if result.ok: self._notify( e_context, - ( + _t( f"✅ 索引重建完成\n" f" cleared : {result.removed}\n" f" chunks : {result.chunks}\n" - f" files : {result.files}" + f" files : {result.files}", + f"✅ Index rebuild complete\n" + f" cleared : {result.removed}\n" + f" chunks : {result.chunks}\n" + f" files : {result.files}", ), ) else: - self._notify(e_context, f"❌ 索引重建失败: {result.error}") + self._notify(e_context, _t(f"❌ 索引重建失败: {result.error}", f"❌ Index rebuild failed: {result.error}")) except Exception as e: logger.exception("[CowCli] /memory rebuild-index failed") - self._notify(e_context, f"❌ 索引重建失败: {e}") + self._notify(e_context, _t(f"❌ 索引重建失败: {e}", f"❌ Index rebuild failed: {e}")) threading.Thread(target=_run, daemon=True).start() - return ( + return _t( f"🔧 索引重建已启动 (model={model_label}, dim={dim_label})\n\n" - f"将重新向量化所有记忆和知识文件,完成后会通知你。" + f"将重新向量化所有记忆和知识文件,完成后会通知你。", + f"🔧 Index rebuild started (model={model_label}, dim={dim_label})\n\n" + f"Re-vectorizing all memory and knowledge files; you'll be notified when done.", ) @staticmethod @@ -1380,15 +1474,19 @@ class CowCliPlugin(Plugin): result = rebuild_in_process(memory_manager) except Exception as e: logger.exception("[CowCli] /memory rebuild-index sync failed") - return f"❌ 索引重建失败: {e}" + return _t(f"❌ 索引重建失败: {e}", f"❌ Index rebuild failed: {e}") if not result.ok: - return f"❌ 索引重建失败: {result.error}" - return ( + return _t(f"❌ 索引重建失败: {result.error}", f"❌ Index rebuild failed: {result.error}") + return _t( f"✅ 索引重建完成 (model={model_label}, dim={dim_label})\n" f" cleared : {result.removed}\n" f" chunks : {result.chunks}\n" - f" files : {result.files}" + f" files : {result.files}", + f"✅ Index rebuild complete (model={model_label}, dim={dim_label})\n" + f" cleared : {result.removed}\n" + f" chunks : {result.chunks}\n" + f" files : {result.files}", ) @staticmethod @@ -1418,7 +1516,7 @@ class CowCliPlugin(Plugin): def _build_dream_result(flush_mgr, is_web: bool) -> str: """Build dream completion message with diary content.""" from datetime import datetime - lines = ["✅ 记忆蒸馏完成"] + lines = [_t("✅ 记忆蒸馏完成", "✅ Memory distillation complete")] # Read today's dream diary today = datetime.now().strftime("%Y-%m-%d") @@ -1433,9 +1531,9 @@ class CowCliPlugin(Plugin): lines.append(f"\n{diary}") if is_web: - lines.append("\n[MEMORY.md](/memory/MEMORY.md) | [梦境日记](/memory/dreams)") + lines.append(_t("\n[MEMORY.md](/memory/MEMORY.md) | [梦境日记](/memory/dreams)", "\n[MEMORY.md](/memory/MEMORY.md) | [Dream Diary](/memory/dreams)")) else: - lines.append("\nMEMORY.md 已更新") + lines.append(_t("\nMEMORY.md 已更新", "\nMEMORY.md updated")) return "\n".join(lines) @@ -1485,11 +1583,17 @@ class CowCliPlugin(Plugin): with open(config_path, "w", encoding="utf-8") as f: _json.dump(file_config, f, indent=4, ensure_ascii=False) except Exception as e: - return f"⚠️ 内存中已切换,但写入 config.json 失败: {e}" + return _t(f"⚠️ 内存中已切换,但写入 config.json 失败: {e}", f"⚠️ Switched in memory, but failed to write config.json: {e}") - status = "开启 ✅" if enabled else "关闭 ❌" - note = "知识库将在下次对话中生效" if enabled else "知识库系统已停用,不再注入提示词和索引知识文件" - return f"📚 知识库已{status}\n\n{note}" + if enabled: + return _t( + "📚 知识库已开启 ✅\n\n知识库将在下次对话中生效", + "📚 Knowledge base enabled ✅\n\nIt will take effect in the next conversation", + ) + return _t( + "📚 知识库已关闭 ❌\n\n知识库系统已停用,不再注入提示词和索引知识文件", + "📚 Knowledge base disabled ❌\n\nThe knowledge system is off; no prompt injection or file indexing", + ) def _knowledge_stats(self) -> str: from config import conf @@ -1499,7 +1603,7 @@ class CowCliPlugin(Plugin): "knowledge" ) if not os.path.isdir(knowledge_dir): - return "📚 知识库目录不存在\n\n💡 开启知识库: /knowledge on" + return _t("📚 知识库目录不存在\n\n💡 开启知识库: /knowledge on", "📚 Knowledge base directory not found\n\n💡 Enable it: /knowledge on") enabled = conf().get("knowledge", True) total_files = 0 @@ -1516,13 +1620,13 @@ class CowCliPlugin(Plugin): total_bytes += os.path.getsize(os.path.join(root, f)) cat_count[category] = cat_count.get(category, 0) + 1 - status = "✅ 已开启" if enabled else "❌ 已关闭" + status = _t("✅ 已开启", "✅ Enabled") if enabled else _t("❌ 已关闭", "❌ Disabled") lines = [ - "📚 知识库统计", + _t("📚 知识库统计", "📚 Knowledge Base Stats"), "", - f"状态: {status}", - f"页面: {total_files} 篇", - f"大小: {total_bytes / 1024:.1f} KB", + _t(f"状态: {status}", f"Status: {status}"), + _t(f"页面: {total_files} 篇", f"Pages: {total_files}"), + _t(f"大小: {total_bytes / 1024:.1f} KB", f"Size: {total_bytes / 1024:.1f} KB"), "", ] if cat_count: @@ -1530,12 +1634,12 @@ class CowCliPlugin(Plugin): lines.append(f"- {cat}/ ({cat_count[cat]} pages)") lines.append("") - lines.append(f"路径: {knowledge_dir}") + lines.append(_t(f"路径: {knowledge_dir}", f"Path: {knowledge_dir}")) lines.extend([ "", "━━━━━━━━━━━━━━━━━━━━━━━━━━", - "💡 /knowledge list 查看文件树", - "💡 /knowledge on|off 开关知识库", + _t("💡 /knowledge list: 查看文件树", "💡 /knowledge list: Show file tree"), + _t("💡 /knowledge on|off: 开关知识库", "💡 /knowledge on|off: Toggle knowledge base"), ]) return "\n".join(lines) @@ -1547,7 +1651,7 @@ class CowCliPlugin(Plugin): "knowledge" ) if not os.path.isdir(knowledge_dir): - return "📚 知识库目录不存在\n\n💡 开启知识库: /knowledge on" + return _t("📚 知识库目录不存在\n\n💡 开启知识库: /knowledge on", "📚 Knowledge base directory not found\n\n💡 Enable it: /knowledge on") tree = ["knowledge/"] @@ -1577,7 +1681,7 @@ class CowCliPlugin(Plugin): tree.append(f"{child_prefix}└── ... +{len(md_files) - max_show} more") if not subdirs: - tree.append("(空)") + tree.append(_t("(空)", "(empty)")) return "```\n" + "\n".join(tree) + "\n```" @@ -1602,4 +1706,4 @@ class CowCliPlugin(Plugin): return None def get_help_text(self, **kwargs): - return "在对话中使用 /help 或 cow help 查看可用命令" + return _t("在对话中使用 /help 或 cow help 查看可用命令", "Use /help or cow help in chat to see available commands") From 1827a2a31c7cbefeab541cab9bb5a4f6a521213d Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sun, 31 May 2026 17:01:43 +0800 Subject: [PATCH 2/5] feat(i18n): bind web language switch to cow_lang config --- channel/web/chat.html | 25 +++++++++++++ channel/web/static/js/console.js | 64 +++++++++++++++++++++++++++++++- channel/web/web_channel.py | 10 +++++ cli/commands/process.py | 2 + plugins/cow_cli/cow_cli.py | 4 ++ 5 files changed, 103 insertions(+), 2 deletions(-) diff --git a/channel/web/chat.html b/channel/web/chat.html index 1bbf0f04..96bb67bd 100644 --- a/channel/web/chat.html +++ b/channel/web/chat.html @@ -659,6 +659,31 @@ + +
+
+
+ +
+

语言

+
+
+
+ +
+
+ -- + +
+
+
+
+
+
+ diff --git a/channel/web/static/js/console.js b/channel/web/static/js/console.js index e5865051..a68da915 100644 --- a/channel/web/static/js/console.js +++ b/channel/web/static/js/console.js @@ -115,6 +115,7 @@ const I18N = { input_placeholder: '输入消息,或输入 / 使用指令', config_title: '配置管理', config_desc: '管理模型和 Agent 配置', config_model: '模型配置', config_agent: 'Agent 配置', + config_language: '语言', config_language_hint: '界面展示、命令文案、系统报错等使用的语言(与右上角切换同步)', config_model_advanced: '高级配置', config_channel: '通道配置', config_agent_enabled: 'Agent 模式', @@ -310,6 +311,7 @@ const I18N = { input_placeholder: 'Type a message, or press / for commands', config_title: 'Configuration', config_desc: 'Manage model and agent settings', config_model: 'Model Configuration', config_agent: 'Agent Configuration', + config_language: 'Language', config_language_hint: 'Language for the UI, command text, system messages and more (synced with the top-right switch)', config_model_advanced: 'Advanced', config_channel: 'Channel Configuration', config_agent_enabled: 'Agent Mode', @@ -454,14 +456,60 @@ function applyI18n() { if (langLabel) langLabel.textContent = currentLang === 'zh' ? '中文' : 'EN'; } -function toggleLanguage() { - currentLang = currentLang === 'zh' ? 'en' : 'zh'; +// Single entry point for switching language. Updates the in-memory language, +// persists the user choice locally, re-renders the UI, and binds the choice to +// the backend `cow_lang` config so logs / agent replies / CLI follow suit. +function setLanguage(lang) { + const next = (lang === 'en') ? 'en' : 'zh'; + if (next === currentLang) { + // Still persist + sync in case storage/backend drifted from the UI. + syncLanguageToBackend(next); + return; + } + currentLang = next; localStorage.setItem('cow_lang', currentLang); applyI18n(); _applyInputTooltips(); // Re-render views whose DOM is built in JS (data-i18n alone does not // cover strings interpolated via t() into innerHTML). try { rerenderDynamicViews(); } catch (e) {} + // Keep the language switch button and config selector visually in sync. + try { updateLangControls(); } catch (e) {} + syncLanguageToBackend(currentLang); +} + +// Persist the language to the backend `cow_lang` config (best-effort; the UI +// has already switched locally, so a network failure is non-blocking). +function syncLanguageToBackend(lang) { + try { + fetch('/config', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ updates: { cow_lang: lang } }) + }).catch(() => {}); + } catch (e) {} +} + +// Reflect the current language on both the top-right toggle and the config +// selector (if present), so the two entry points stay synchronized. +function updateLangControls() { + const langLabel = document.getElementById('lang-label'); + if (langLabel) langLabel.textContent = currentLang === 'zh' ? '中文' : 'EN'; + // The config language picker is the custom .cfg-dropdown component. Only + // sync it once it has been initialized (i.e. the config panel was opened). + const sel = document.getElementById('cfg-lang-select'); + if (sel && sel._ddValue !== undefined && sel._ddValue !== currentLang) { + sel._ddValue = currentLang; + const textEl = sel.querySelector('.cfg-dropdown-text'); + if (textEl) textEl.textContent = currentLang === 'zh' ? '中文' : 'English'; + sel.querySelectorAll('.cfg-dropdown-item').forEach(i => { + i.classList.toggle('active', i.dataset.value === currentLang); + }); + } +} + +function toggleLanguage() { + setLanguage(currentLang === 'zh' ? 'en' : 'zh'); } // Refresh JS-rendered views after a language switch. Each branch uses the @@ -3358,6 +3406,18 @@ function initConfigView(data) { document.getElementById('cfg-max-steps').value = data.agent_max_steps || 20; document.getElementById('cfg-enable-thinking').checked = data.enable_thinking === true; + // Reflect the current UI language (already resolved, may include the user's + // local choice) on the selector so it stays in sync with the top-right toggle. + const langSel = document.getElementById('cfg-lang-select'); + if (langSel) { + initDropdown( + langSel, + [{ value: 'zh', label: '中文' }, { value: 'en', label: 'English' }], + currentLang, + (val) => setLanguage(val) + ); + } + const pwdInput = document.getElementById('cfg-password'); const maskedPwd = data.web_password_masked || ''; pwdInput.value = maskedPwd; diff --git a/channel/web/web_channel.py b/channel/web/web_channel.py index e2c0c5e4..88471dfa 100644 --- a/channel/web/web_channel.py +++ b/channel/web/web_channel.py @@ -1535,6 +1535,7 @@ class ConfigHandler: ]) EDITABLE_KEYS = { + "cow_lang", "model", "bot_type", "use_linkai", "open_ai_api_base", "deepseek_api_base", "qianfan_api_base", "claude_api_base", "gemini_api_base", "zhipu_ai_api_base", "moonshot_base_url", "ark_base_url", "custom_api_base", "mimo_api_base", @@ -1643,6 +1644,15 @@ class ConfigHandler: logger.info(f"[WebChannel] Config updated: {list(applied.keys())}") + # Apply a language change immediately so backend logs, agent + # replies and CLI output switch without a restart. + if "cow_lang" in applied: + try: + i18n.resolve_language(applied["cow_lang"]) + logger.info(f"[WebChannel] Language switched to: {i18n.get_language()}") + except Exception as lang_err: + logger.warning(f"[WebChannel] Failed to apply language: {lang_err}") + # Reset Bridge so that bot routing reflects the new config. # Without this, Bridge keeps its cached bot instance (e.g. LinkAIBot) # even after the user switches bot_type / use_linkai / model in UI. diff --git a/cli/commands/process.py b/cli/commands/process.py index 6ccffdcb..9d22b67f 100644 --- a/cli/commands/process.py +++ b/cli/commands/process.py @@ -298,6 +298,8 @@ def status(): click.echo(_t(f" 模型: {cfg.get('model', 'unknown')}", f" Model: {cfg.get('model', 'unknown')}")) mode = "Chat" if cfg.get("agent") is False else "Agent" click.echo(_t(f" 模式: {mode}", f" Mode: {mode}")) + lang_label = "中文" if i18n.get_language() == "zh" else "English" + click.echo(_t(f" 语言: {lang_label}", f" Language: {lang_label}")) @click.command() diff --git a/plugins/cow_cli/cow_cli.py b/plugins/cow_cli/cow_cli.py index fdd0081f..62bbc786 100644 --- a/plugins/cow_cli/cow_cli.py +++ b/plugins/cow_cli/cow_cli.py @@ -465,6 +465,10 @@ class CowCliPlugin(Plugin): mode = "Chat" if cfg.get("agent") is False else "Agent" lines.append(_t(f" 模式: {mode}", f" Mode: {mode}")) + from common import i18n + lang_label = "中文" if i18n.get_language() == "zh" else "English" + lines.append(_t(f" 语言: {lang_label}", f" Language: {lang_label}")) + session_id = self._get_session_id(e_context, fallback=session_id) agent = self._get_agent(session_id) if agent: From 126649f70f5c7a3970cc5c35b45d3e8729773377 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sun, 31 May 2026 17:38:31 +0800 Subject: [PATCH 3/5] feat(i18n): localize system prompts, workspace templates and dynamic prompts --- agent/memory/summarizer.py | 125 +++++- agent/prompt/builder.py | 671 ++++++++++++++++++++----------- agent/prompt/workspace.py | 333 +++++++++++++-- agent/protocol/agent.py | 7 +- agent/protocol/agent_stream.py | 54 ++- bridge/agent_initializer.py | 25 +- channel/web/static/js/console.js | 4 +- docs/en/guide/manual-install.mdx | 4 +- docs/en/intro/architecture.mdx | 5 +- docs/guide/manual-install.mdx | 5 +- docs/intro/architecture.mdx | 4 +- docs/ja/guide/manual-install.mdx | 4 +- docs/ja/intro/architecture.mdx | 4 +- 13 files changed, 921 insertions(+), 324 deletions(-) diff --git a/agent/memory/summarizer.py b/agent/memory/summarizer.py index e0f2298d..9da349d1 100644 --- a/agent/memory/summarizer.py +++ b/agent/memory/summarizer.py @@ -16,7 +16,7 @@ from datetime import datetime from common.log import logger -SUMMARIZE_SYSTEM_PROMPT = """你是一个对话记录助手。请将对话内容归纳为当天的日常记录。 +SUMMARIZE_SYSTEM_PROMPT_ZH = """你是一个对话记录助手。请将对话内容归纳为当天的日常记录。 ## 要求 @@ -28,7 +28,23 @@ SUMMARIZE_SYSTEM_PROMPT = """你是一个对话记录助手。请将对话内容 当对话没有任何记录价值(仅含问候或无意义内容),直接回复"无"。""" -SUMMARIZE_USER_PROMPT = """请归纳以下对话的日常记录: +SUMMARIZE_SYSTEM_PROMPT_EN = """You are a conversation-logging assistant. Summarize the conversation into a daily record. + +## Requirements + +Summarize by "event", not turn by turn: +- One item per line, starting with "- " +- Merge multiple turns about the same thing +- Only record meaningful events; ignore small talk and greetings +- Keep key decisions, conclusions and to-dos + +If the conversation has no record value (only greetings or meaningless content), reply with exactly "None".""" + +SUMMARIZE_USER_PROMPT_ZH = """请归纳以下对话的日常记录: + +{conversation}""" + +SUMMARIZE_USER_PROMPT_EN = """Summarize the daily record of the following conversation: {conversation}""" @@ -36,7 +52,7 @@ SUMMARIZE_USER_PROMPT = """请归纳以下对话的日常记录: # Deep Dream prompts — distill daily memories → MEMORY.md + dream diary # --------------------------------------------------------------------------- -DREAM_SYSTEM_PROMPT = """你是一个记忆整理助手,负责定期整理用户的长期记忆。 +DREAM_SYSTEM_PROMPT_ZH = """你是一个记忆整理助手,负责定期整理用户的长期记忆。 你将收到两份材料: 1. **当前长期记忆** — MEMORY.md 的全部现有内容 @@ -80,7 +96,51 @@ MEMORY.md 会注入每次对话的系统提示词中,因此必须保持精炼 梦境日记内容... ```""" -DREAM_USER_PROMPT = """## 当前长期记忆(MEMORY.md) +DREAM_SYSTEM_PROMPT_EN = """You are a memory-curation assistant that periodically organizes the user's long-term memory. + +You will receive two inputs: +1. **Current long-term memory** — the full existing content of MEMORY.md +2. **Today's diary** — the daily records + +MEMORY.md is injected into the system prompt of every conversation, so it must stay concise and hold only valuable, memory-worthy content. + +**Important: organize strictly based on the provided material. Never fabricate, infer, or add information not present in it.** + +## Tasks + +### Part 1: Updated long-term memory ([MEMORY]) + +Organize and distill on top of the existing memory, and output the complete updated content: +- **Merge & distill**: combine semantically similar items into one dense statement rather than listing them +- **Extract new**: pull memory-worthy new info from today's diary (preferences, decisions, people, rules, lessons) +- **Resolve conflicts**: when new info contradicts an old item, prefer the new and replace the old +- **Clean invalid**: remove temporary notes, blank items, formatting residue, meaningless or duplicate content +- **Drop redundancy**: delete old items already covered by a more concise statement +- One item per line, starting with "- ", without a date prefix +- You may group related items under "## headings" for clarity +- Goal: keep under 50 items, each ideally a single sentence + +### Part 2: Dream diary ([DREAM]) + +Write a short diary in a concise narrative style recording what this curation found, keep it clean and readable: +- Which duplicates or conflicts were found +- What new insights were extracted from the diary +- What cleanup and optimization was done +- Overall feelings and observations + +## Output format (follow strictly) + +``` +[MEMORY] +- memory item 1 +- memory item 2 +... + +[DREAM] +dream diary content... +```""" + +DREAM_USER_PROMPT_ZH = """## 当前长期记忆(MEMORY.md) {memory_content} @@ -88,6 +148,47 @@ DREAM_USER_PROMPT = """## 当前长期记忆(MEMORY.md) {daily_content}""" +DREAM_USER_PROMPT_EN = """## Current long-term memory (MEMORY.md) + +{memory_content} + +## Recent diary (last {days} days) + +{daily_content}""" + + +def _is_en() -> bool: + """True when the resolved UI language is English.""" + try: + from common import i18n + return i18n.get_language() == "en" + except Exception: + return False + + +def _summarize_system_prompt() -> str: + return SUMMARIZE_SYSTEM_PROMPT_EN if _is_en() else SUMMARIZE_SYSTEM_PROMPT_ZH + + +def _summarize_user_prompt() -> str: + return SUMMARIZE_USER_PROMPT_EN if _is_en() else SUMMARIZE_USER_PROMPT_ZH + + +def _dream_system_prompt() -> str: + return DREAM_SYSTEM_PROMPT_EN if _is_en() else DREAM_SYSTEM_PROMPT_ZH + + +def _dream_user_prompt() -> str: + return DREAM_USER_PROMPT_EN if _is_en() else DREAM_USER_PROMPT_ZH + + +def _is_empty_sentinel(text: str) -> bool: + """Match the "no record value" sentinel in both zh ("无") and en ("None").""" + if not text: + return True + s = text.strip() + return s == "" or s == "无" or s.lower() == "none" + class MemoryFlushManager: @@ -224,7 +325,7 @@ class MemoryFlushManager: """Background worker: summarize with LLM, write daily memory file.""" try: raw_summary = self._summarize_messages(messages, max_messages) - if not raw_summary or not raw_summary.strip() or raw_summary.strip() == "无": + if _is_empty_sentinel(raw_summary): logger.info(f"[MemoryFlush] No valuable content to flush (reason={reason})") return @@ -264,7 +365,7 @@ class MemoryFlushManager: def _clean_summary_output(raw: str) -> str: """Strip legacy [DAILY]/[MEMORY] markers if present, return clean daily text.""" raw = raw.strip() - if not raw or raw == "无": + if _is_empty_sentinel(raw): return "" # Strip [DAILY] marker @@ -355,7 +456,7 @@ class MemoryFlushManager: import time as _time t0 = _time.monotonic() try: - user_msg = DREAM_USER_PROMPT.format( + user_msg = _dream_user_prompt().format( memory_content=memory_content or "(empty)", days=lookback_days, daily_content=daily_content or "(no recent daily records)", @@ -369,7 +470,7 @@ class MemoryFlushManager: temperature=0.3, max_tokens=dream_max_tokens, stream=False, - system=DREAM_SYSTEM_PROMPT, + system=_dream_system_prompt(), ) response = self.llm_model.call(request) raw = self._extract_response_text(response) @@ -501,9 +602,9 @@ class MemoryFlushManager: if self.llm_model: try: summary = self._call_llm_for_summary(conversation_text) - if summary and summary.strip() and summary.strip() != "无": + if not _is_empty_sentinel(summary): return summary.strip() - logger.info("[MemoryFlush] LLM returned empty or '无', skipping write") + logger.info("[MemoryFlush] LLM returned empty sentinel, skipping write") return "" except Exception as e: logger.warning(f"[MemoryFlush] LLM summarization failed, using fallback: {e}") @@ -579,11 +680,11 @@ class MemoryFlushManager: from agent.protocol.models import LLMRequest request = LLMRequest( - messages=[{"role": "user", "content": SUMMARIZE_USER_PROMPT.format(conversation=conversation_text)}], + messages=[{"role": "user", "content": _summarize_user_prompt().format(conversation=conversation_text)}], temperature=0, max_tokens=500, stream=False, - system=SUMMARIZE_SYSTEM_PROMPT, + system=_summarize_system_prompt(), ) response = self.llm_model.call(request) diff --git a/agent/prompt/builder.py b/agent/prompt/builder.py index 0856db70..538d7150 100644 --- a/agent/prompt/builder.py +++ b/agent/prompt/builder.py @@ -15,13 +15,13 @@ from config import conf @dataclass class ContextFile: - """上下文文件""" + """A context file (path + content).""" path: str content: str class PromptBuilder: - """提示词构建器""" + """System prompt builder.""" def __init__(self, workspace_dir: str, language: str = "zh"): """ @@ -88,97 +88,144 @@ def build_agent_system_prompt( **kwargs ) -> str: """ - 构建Agent系统提示词 - - 顺序说明(按重要性和逻辑关系排列): - 1. 工具系统 - 核心能力,最先介绍 - 2. 技能系统 - 紧跟工具,因为技能需要用 read 工具读取 - 3. 记忆系统 - 记忆检索与写入引导 - 3.5 知识系统 - 结构化知识库(knowledge/index.md 注入) - 4. 工作空间 - 工作环境说明 - 5. 用户身份 - 用户信息(可选) - 6. 项目上下文 - AGENT.md, USER.md, RULE.md, MEMORY.md, BOOTSTRAP.md - 7. 运行时信息 - 元信息(时间、模型等) - + Build the agent system prompt. + + Section order (by importance and logical flow): + 1. Tooling - core capabilities, introduced first + 2. Skills - right after tools, since skills are read via the read tool + 3. Memory - memory recall and writing guidance + 3.5 Knowledge - structured knowledge base (injects knowledge/index.md) + 4. Workspace - working environment description + 5. User identity - user info (optional) + 6. Project context - AGENT.md, USER.md, RULE.md, MEMORY.md, BOOTSTRAP.md + 7. Runtime info - meta info (time, model, etc.) + Args: - workspace_dir: 工作空间目录 - language: 语言 ("zh" 或 "en") - base_persona: 基础人格描述(已废弃,由AGENT.md定义) - user_identity: 用户身份信息 - tools: 工具列表 - context_files: 上下文文件列表 - skill_manager: 技能管理器 - memory_manager: 记忆管理器 - runtime_info: 运行时信息 - **kwargs: 其他参数 - + workspace_dir: workspace directory + language: language ("zh" or "en") + base_persona: base persona description (deprecated, defined by AGENT.md) + user_identity: user identity info + tools: tool list + context_files: context file list + skill_manager: skill manager + memory_manager: memory manager + runtime_info: runtime info + **kwargs: extra args + Returns: - 完整的系统提示词 + The full system prompt. """ sections = [] - - # 1. 工具系统(最重要,放在最前面) + + # 1. Tooling (most important, goes first) if tools: sections.extend(_build_tooling_section(tools, language)) - - # 2. 技能系统(紧跟工具,因为需要用 read 工具) + + # 2. Skills (right after tools, since they need the read tool) if skill_manager: sections.extend(_build_skills_section(skill_manager, tools, language)) - - # 3. 记忆系统(独立的记忆能力) + + # 3. Memory (standalone memory capability) if memory_manager: sections.extend(_build_memory_section(memory_manager, tools, language)) - # 3.5 知识系统(结构化知识库) + # 3.5 Knowledge (structured knowledge base) if conf().get("knowledge", True): sections.extend(_build_knowledge_section(workspace_dir, language)) - - # 4. 工作空间(工作环境说明) + + # 4. Workspace (working environment description) sections.extend(_build_workspace_section(workspace_dir, language)) - - # 5. 用户身份(如果有) + + # 5. User identity (if present) if user_identity: sections.extend(_build_user_identity_section(user_identity, language)) - - # 6. 项目上下文文件(AGENT.md, USER.md, RULE.md - 定义人格) + + # 6. Project context files (AGENT.md, USER.md, RULE.md - define the persona) if context_files: sections.extend(_build_context_files_section(context_files, language)) - - # 7. 运行时信息(元信息,放在最后) + + # 7. Runtime info (meta info, goes last) if runtime_info: sections.extend(_build_runtime_section(runtime_info, language)) - + + # 8. Response language (always appended, independent of the skeleton language) + sections.extend(_build_response_language_section(language)) + return "\n".join(sections) +def _build_response_language_section(language: str) -> List[str]: + """Response-language rule, appended regardless of the prompt skeleton language. + + Keeps the agent's reply language aligned with the user's input by default, + so a Chinese-built prompt still answers an English user in English. + """ + if language == "en": + return [ + "## 🌐 Response language", + "", + "By default, reply in the same language as the user's input, " + "unless the user explicitly asks for another language.", + "", + ] + return [ + "## 🌐 回复语言", + "", + "默认使用与用户输入相同的语言回复,除非用户明确要求使用其他语言。", + "", + ] + + def _build_identity_section(base_persona: Optional[str], language: str) -> List[str]: - """构建基础身份section - 不再需要,身份由AGENT.md定义""" - # 不再生成基础身份section,完全由AGENT.md定义 + """Base identity section - no longer needed, identity is defined by AGENT.md.""" + # Identity is fully defined by AGENT.md, so emit nothing here. return [] def _build_tooling_section(tools: List[Any], language: str) -> List[str]: """Build tooling section with concise tool list and call style guide.""" + is_en = language == "en" # One-line summaries for known tools (details are in the tool schema) - core_summaries = { - "read": "读取文件内容", - "write": "创建或覆盖文件", - "edit": "精确编辑文件", - "ls": "列出目录内容", - "grep": "搜索文件内容", - "find": "按模式查找文件", - "bash": "执行shell命令", - "terminal": "管理后台进程", - "web_search": "网络搜索", - "web_fetch": "获取URL内容", - "browser": "控制浏览器(关键结果或需要协助可截图发送给用户)", - "memory_search": "搜索记忆", - "memory_get": "读取记忆内容", - "env_config": "管理API密钥和技能配置", - "scheduler": "管理定时任务和提醒", - "send": "发送本地文件给用户(仅限本地文件,URL直接放在回复文本中)", - "vision": "分析图片内容(识别、描述、OCR文字提取等)", - } + if is_en: + core_summaries = { + "read": "read file content", + "write": "create or overwrite a file", + "edit": "make precise edits to a file", + "ls": "list directory contents", + "grep": "search file contents", + "find": "find files by pattern", + "bash": "run shell commands", + "terminal": "manage background processes", + "web_search": "web search", + "web_fetch": "fetch URL content", + "browser": "control the browser (screenshot key results or send to the user when help is needed)", + "memory_search": "search memory", + "memory_get": "read memory content", + "env_config": "manage API keys and skill config", + "scheduler": "manage scheduled tasks and reminders", + "send": "send a local file to the user (local files only; put URLs directly in the reply text)", + "vision": "analyze images (recognition, description, OCR, etc.)", + } + else: + core_summaries = { + "read": "读取文件内容", + "write": "创建或覆盖文件", + "edit": "精确编辑文件", + "ls": "列出目录内容", + "grep": "搜索文件内容", + "find": "按模式查找文件", + "bash": "执行shell命令", + "terminal": "管理后台进程", + "web_search": "网络搜索", + "web_fetch": "获取URL内容", + "browser": "控制浏览器(关键结果或需要协助可截图发送给用户)", + "memory_search": "搜索记忆", + "memory_get": "读取记忆内容", + "env_config": "管理API密钥和技能配置", + "scheduler": "管理定时任务和提醒", + "send": "发送本地文件给用户(仅限本地文件,URL直接放在回复文本中)", + "vision": "分析图片内容(识别、描述、OCR文字提取等)", + } # Preferred display order tool_order = [ @@ -205,30 +252,46 @@ def _build_tooling_section(tools: List[Any], language: str) -> List[str]: summary = available[name] tool_lines.append(f"- {name}: {summary}" if summary else f"- {name}") - lines = [ - "## 🔧 工具系统", - "", - "可用工具(名称大小写敏感,严格按列表调用):", - "\n".join(tool_lines), - "", - "工具调用风格:", - "", - "- 多步骤任务、复杂决策、敏感操作时,应简要说明当前在做什么、为什么这样做,让用户了解关键进展", - "- 持续推进直到任务完成,完成后向用户报告结果", - "- 回复中涉及密钥、令牌等敏感信息必须脱敏", - "- URL链接直接放在回复文本中即可,系统会自动处理和渲染。无需下载后使用send工具发送", - "", - ] + if is_en: + lines = [ + "## 🔧 Tooling", + "", + "Available tools (names are case-sensitive, call exactly as listed):", + "\n".join(tool_lines), + "", + "Tool-calling style:", + "", + "- For multi-step tasks, complex decisions or sensitive operations, briefly explain what you are doing and why, so the user follows key progress", + "- Keep going until the task is done, then report the result to the user", + "- Always redact secrets, tokens and other sensitive info in replies", + "- Put URLs directly in the reply text; the system handles and renders them. Don't download and re-send them via the send tool", + "", + ] + else: + lines = [ + "## 🔧 工具系统", + "", + "可用工具(名称大小写敏感,严格按列表调用):", + "\n".join(tool_lines), + "", + "工具调用风格:", + "", + "- 多步骤任务、复杂决策、敏感操作时,应简要说明当前在做什么、为什么这样做,让用户了解关键进展", + "- 持续推进直到任务完成,完成后向用户报告结果", + "- 回复中涉及密钥、令牌等敏感信息必须脱敏", + "- URL链接直接放在回复文本中即可,系统会自动处理和渲染。无需下载后使用send工具发送", + "", + ] return lines def _build_skills_section(skill_manager: Any, tools: Optional[List[Any]], language: str) -> List[str]: - """构建技能系统section""" + """Build the skills section.""" if not skill_manager: return [] - # 获取read工具名称 + # Resolve the read tool name read_tool_name = "read" if tools: for tool in tools: @@ -237,23 +300,40 @@ def _build_skills_section(skill_manager: Any, tools: Optional[List[Any]], langua read_tool_name = tool_name break - lines = [ - "## 🧩 技能系统(mandatory)", - "", - "在回复之前:扫描下方 中每个技能的 。", - "", - f"- 如果有技能的描述与用户需求匹配:使用 `{read_tool_name}` 工具读取其 路径的 SKILL.md 文件,然后严格遵循文件中的指令。" - "当有匹配的技能时,应优先使用技能", - "- 如果多个技能都适用则选择最匹配的一个,然后读取并遵循。", - "- 如果没有技能明确适用:不要读取任何 SKILL.md,直接使用通用工具。", - "", - f"**重要**: 技能不是工具,不能直接调用。使用技能的唯一方式是用 `{read_tool_name}` 读取 SKILL.md 文件,然后按文件内容操作。" - "永远不要一次性读取多个技能,只在选择后再读取。", - "", - "以下是可用技能:" - ] + if language == "en": + lines = [ + "## 🧩 Skills (mandatory)", + "", + "Before replying: scan the of every skill in below.", + "", + f"- If a skill's description matches the user's need: use the `{read_tool_name}` tool to read the SKILL.md at its path, then strictly follow the instructions in the file. " + "Prefer using a skill when one matches.", + "- If multiple skills apply, pick the best-matching one, then read and follow it.", + "- If no skill clearly applies: do not read any SKILL.md, just use the general tools.", + "", + f"**Important**: skills are not tools and cannot be called directly. The only way to use a skill is to read its SKILL.md with `{read_tool_name}`, then act on the file's content. " + "Never read multiple skills at once — only read one after selecting it.", + "", + "Available skills:" + ] + else: + lines = [ + "## 🧩 技能系统(mandatory)", + "", + "在回复之前:扫描下方 中每个技能的 。", + "", + f"- 如果有技能的描述与用户需求匹配:使用 `{read_tool_name}` 工具读取其 路径的 SKILL.md 文件,然后严格遵循文件中的指令。" + "当有匹配的技能时,应优先使用技能", + "- 如果多个技能都适用则选择最匹配的一个,然后读取并遵循。", + "- 如果没有技能明确适用:不要读取任何 SKILL.md,直接使用通用工具。", + "", + f"**重要**: 技能不是工具,不能直接调用。使用技能的唯一方式是用 `{read_tool_name}` 读取 SKILL.md 文件,然后按文件内容操作。" + "永远不要一次性读取多个技能,只在选择后再读取。", + "", + "以下是可用技能:" + ] - # 添加技能列表(通过skill_manager获取) + # Append the skills list (built by skill_manager) try: skills_prompt = skill_manager.build_skills_prompt() logger.debug(f"[PromptBuilder] Skills prompt length: {len(skills_prompt) if skills_prompt else 0}") @@ -271,7 +351,7 @@ def _build_skills_section(skill_manager: Any, tools: Optional[List[Any]], langua def _build_memory_section(memory_manager: Any, tools: Optional[List[Any]], language: str) -> List[str]: - """构建记忆系统section""" + """Build the memory section.""" if not memory_manager: return [] @@ -286,43 +366,82 @@ def _build_memory_section(memory_manager: Any, tools: Optional[List[Any]], langu from datetime import datetime today_file = datetime.now().strftime("%Y-%m-%d") + ".md" - lines = [ - "## 🧠 记忆系统", - "", - "### Memory Recall(mandatory)", - "", - "当用户询问过往事件、引用之前的决定、提到人物关系、偏好、待办、或你对某事不确定时,**必须先检索记忆再回答**。", - "如果 MEMORY.md 中已有相关信息则无需重复检索。完整内容和每日记忆需要通过工具检索。", - "", - "1. 不确定位置 → `memory_search` 关键词/语义检索", - "2. 已知位置 → `memory_get` 直接读取对应行", - "3. search 无结果 → `memory_get` 读最近两天记忆", - "", - "**记忆文件结构**:", - "- `MEMORY.md`: 长期记忆索引(已自动加载到上下文,核心信息、偏好、决策等)", - f"- `memory/YYYY-MM-DD.md`: 每日记忆,今天是 `memory/{today_file}`", - "- `knowledge/`: 结构化知识库(见下方知识系统)", - "", - "### 写入记忆", - "", - "遇到以下情况时,**主动**将信息写入记忆文件(无需告知用户):", - "", - "- 用户要求记住某些信息,或使用了「记住」「以后」「总是」「不要」「偏好」等表达", - "- 用户分享了重要的个人偏好、习惯、决策", - "- 对话中产生了重要的结论、方案、约定", - "- 完成了复杂任务,值得记录关键步骤和结果", - "", - "**存储规则**:", - f"- 长期核心信息 → `MEMORY.md`", - f"- 当天事件/进展 → `memory/{today_file}`", - "- 结构化知识 → `knowledge/`(见知识系统)", - "- 追加 → `edit` 工具,oldText 留空", - "- 修改 → `edit` 工具,oldText 填写要替换的文本", - "- **禁止写入敏感信息**(API密钥、令牌等)", - "", - "**使用原则**: 自然使用记忆,就像你本来就知道;不用刻意提起,除非用户问起。", - "", - ] + if language == "en": + lines = [ + "## 🧠 Memory", + "", + "### Memory Recall (mandatory)", + "", + "When the user asks about past events, references an earlier decision, mentions relationships, preferences or to-dos, or when you are unsure about something, **you must search memory before answering**.", + "No need to re-search if the info is already in MEMORY.md. Full content and daily memory must be retrieved via tools.", + "", + "1. Location unknown → `memory_search` (keyword / semantic search)", + "2. Location known → `memory_get` to read the exact lines", + "3. Search returns nothing → `memory_get` to read the last two days of memory", + "", + "**Memory file structure**:", + "- `MEMORY.md`: long-term memory index (already auto-loaded into context: core info, preferences, decisions, etc.)", + f"- `memory/YYYY-MM-DD.md`: daily memory; today is `memory/{today_file}`", + "- `knowledge/`: structured knowledge base (see the knowledge system below)", + "", + "### Writing memory", + "", + "In the following cases, **proactively** write info to memory files (no need to tell the user):", + "", + "- The user asks you to remember something, or uses words like \"remember\", \"from now on\", \"always\", \"never\", \"prefer\"", + "- The user shares important personal preferences, habits or decisions", + "- The conversation produces an important conclusion, plan or agreement", + "- A complex task is completed and the key steps and results are worth recording", + "", + "**Storage rules**:", + "- Long-term core info → `MEMORY.md`", + f"- Today's events/progress → `memory/{today_file}`", + "- Structured knowledge → `knowledge/` (see the knowledge system)", + "- Append → `edit` tool with empty oldText", + "- Modify → `edit` tool with oldText set to the text to replace", + "- **Never write sensitive info** (API keys, tokens, etc.)", + "", + "**Principle**: use memory naturally, as if you simply knew it; don't bring it up unless asked.", + "", + ] + else: + lines = [ + "## 🧠 记忆系统", + "", + "### Memory Recall(mandatory)", + "", + "当用户询问过往事件、引用之前的决定、提到人物关系、偏好、待办、或你对某事不确定时,**必须先检索记忆再回答**。", + "如果 MEMORY.md 中已有相关信息则无需重复检索。完整内容和每日记忆需要通过工具检索。", + "", + "1. 不确定位置 → `memory_search` 关键词/语义检索", + "2. 已知位置 → `memory_get` 直接读取对应行", + "3. search 无结果 → `memory_get` 读最近两天记忆", + "", + "**记忆文件结构**:", + "- `MEMORY.md`: 长期记忆索引(已自动加载到上下文,核心信息、偏好、决策等)", + f"- `memory/YYYY-MM-DD.md`: 每日记忆,今天是 `memory/{today_file}`", + "- `knowledge/`: 结构化知识库(见下方知识系统)", + "", + "### 写入记忆", + "", + "遇到以下情况时,**主动**将信息写入记忆文件(无需告知用户):", + "", + "- 用户要求记住某些信息,或使用了「记住」「以后」「总是」「不要」「偏好」等表达", + "- 用户分享了重要的个人偏好、习惯、决策", + "- 对话中产生了重要的结论、方案、约定", + "- 完成了复杂任务,值得记录关键步骤和结果", + "", + "**存储规则**:", + f"- 长期核心信息 → `MEMORY.md`", + f"- 当天事件/进展 → `memory/{today_file}`", + "- 结构化知识 → `knowledge/`(见知识系统)", + "- 追加 → `edit` 工具,oldText 留空", + "- 修改 → `edit` 工具,oldText 填写要替换的文本", + "- **禁止写入敏感信息**(API密钥、令牌等)", + "", + "**使用原则**: 自然使用记忆,就像你本来就知道;不用刻意提起,除非用户问起。", + "", + ] return lines @@ -339,37 +458,61 @@ def _build_knowledge_section(workspace_dir: str, language: str) -> List[str]: except Exception: return [] - lines = [ - "## 📚 知识系统", - "", - "你拥有一个持续积累的个人知识库 `knowledge/`,这是你的长期结构化知识存储。", - "", - "### 自动写入规则(mandatory)", - "", - "以下场景**必须**在回复的同时写入知识库,**直接写入,不要询问用户是否需要**:", - "", - "1. **用户分享了文章/链接/文档** → 阅读理解后,在同一轮回复中将要点写入 `knowledge/sources/.md`", - "2. **深度讨论产生了结论/方案** → 整理为 `knowledge/analysis/.md`", - "3. **对话涉及重要实体**(人物/公司/项目)→ 创建或更新 `knowledge/entities/.md`", - "4. **讨论了技术概念/方法论** → 整理为 `knowledge/concepts/.md`", - "", - "每次写入知识页面后,**必须同步更新** `knowledge/index.md` 添加一行索引。", - "详细的页面格式和操作规范,请读取技能 `knowledge-wiki` 的 SKILL.md。", - "", - "⚠️ 不要问「要不要存到知识库」——符合上述场景就直接写入,这是你的本能行为。", - "", - ] + if language == "en": + lines = [ + "## 📚 Knowledge", + "", + "You have a continuously growing personal knowledge base `knowledge/` — your long-term structured knowledge store.", + "", + "### Auto-write rules (mandatory)", + "", + "In the following cases you **must** write to the knowledge base alongside your reply, **directly, without asking the user**:", + "", + "1. **User shares an article / link / document** → after reading and understanding, write the key points to `knowledge/sources/.md` in the same turn", + "2. **An in-depth discussion produces a conclusion / plan** → organize it into `knowledge/analysis/.md`", + "3. **The conversation involves an important entity** (person / company / project) → create or update `knowledge/entities/.md`", + "4. **A technical concept / methodology is discussed** → organize it into `knowledge/concepts/.md`", + "", + "After writing any knowledge page, you **must update** `knowledge/index.md` with a new index line in sync.", + "For detailed page format and conventions, read the SKILL.md of the `knowledge-wiki` skill.", + "", + "⚠️ Don't ask \"should I save this to the knowledge base?\" — if a case above matches, just write it. This is instinctive.", + "", + ] + else: + lines = [ + "## 📚 知识系统", + "", + "你拥有一个持续积累的个人知识库 `knowledge/`,这是你的长期结构化知识存储。", + "", + "### 自动写入规则(mandatory)", + "", + "以下场景**必须**在回复的同时写入知识库,**直接写入,不要询问用户是否需要**:", + "", + "1. **用户分享了文章/链接/文档** → 阅读理解后,在同一轮回复中将要点写入 `knowledge/sources/.md`", + "2. **深度讨论产生了结论/方案** → 整理为 `knowledge/analysis/.md`", + "3. **对话涉及重要实体**(人物/公司/项目)→ 创建或更新 `knowledge/entities/.md`", + "4. **讨论了技术概念/方法论** → 整理为 `knowledge/concepts/.md`", + "", + "每次写入知识页面后,**必须同步更新** `knowledge/index.md` 添加一行索引。", + "详细的页面格式和操作规范,请读取技能 `knowledge-wiki` 的 SKILL.md。", + "", + "⚠️ 不要问「要不要存到知识库」——符合上述场景就直接写入,这是你的本能行为。", + "", + ] if index_content: lines.extend([ - "### 当前知识索引", + ("### Current knowledge index" if language == "en" else "### 当前知识索引"), "", index_content, "", ]) lines.extend([ - "**查询方式**:用 `read` 读取知识页面,或用 `memory_search` 检索(知识已纳入向量索引)。", + ("**How to query**: use `read` to open a knowledge page, or `memory_search` (knowledge is in the vector index)." + if language == "en" else + "**查询方式**:用 `read` 读取知识页面,或用 `memory_search` 检索(知识已纳入向量索引)。"), "", ]) @@ -377,76 +520,118 @@ def _build_knowledge_section(workspace_dir: str, language: str) -> List[str]: def _build_user_identity_section(user_identity: Dict[str, str], language: str) -> List[str]: - """构建用户身份section""" + """Build the user identity section.""" if not user_identity: return [] + is_en = language == "en" lines = [ - "## 👤 用户身份", + ("## 👤 User identity" if is_en else "## 👤 用户身份"), "", ] - + if user_identity.get("name"): - lines.append(f"**用户姓名**: {user_identity['name']}") + lines.append(f"**{'Name' if is_en else '用户姓名'}**: {user_identity['name']}") if user_identity.get("nickname"): - lines.append(f"**称呼**: {user_identity['nickname']}") + lines.append(f"**{'Preferred name' if is_en else '称呼'}**: {user_identity['nickname']}") if user_identity.get("timezone"): - lines.append(f"**时区**: {user_identity['timezone']}") + lines.append(f"**{'Timezone' if is_en else '时区'}**: {user_identity['timezone']}") if user_identity.get("notes"): - lines.append(f"**备注**: {user_identity['notes']}") - + lines.append(f"**{'Notes' if is_en else '备注'}**: {user_identity['notes']}") + lines.append("") - + return lines def _build_docs_section(workspace_dir: str, language: str) -> List[str]: - """构建文档路径section - 已移除,不再需要""" - # 不再生成文档section + """Docs-path section - removed, no longer needed.""" + # No docs section is generated anymore. return [] def _build_workspace_section(workspace_dir: str, language: str) -> List[str]: - """构建工作空间section""" - lines = [ - "## 📂 工作空间", - "", - f"你的工作目录是: `{workspace_dir}`", - "", - "**路径使用规则** (非常重要):", - "", - f"1. **相对路径的基准目录**: 所有相对路径都是相对于 `{workspace_dir}` 而言的", - f" - ✅ 正确: 访问工作空间内的文件用相对路径,如 `AGENT.md`", - f" - ❌ 错误: 用相对路径访问其他目录的文件 (如果它不在 `{workspace_dir}` 内)", - "", - "2. **访问其他目录**: 如果要访问工作空间之外的目录(如项目代码、系统文件),**必须使用绝对路径**", - f" - ✅ 正确: 例如 `~/chatgpt-on-wechat`、`/usr/local/`", - f" - ❌ 错误: 假设相对路径会指向其他目录", - "", - "3. **路径解析示例**:", - f" - 相对路径 `memory/` → 实际路径 `{workspace_dir}/memory/`", - f" - 绝对路径 `~/chatgpt-on-wechat/docs/` → 实际路径 `~/chatgpt-on-wechat/docs/`", - "", - "4. **不确定时**: 先用 `bash pwd` 确认当前目录,或用 `ls .` 查看当前位置", - "", - "**重要说明 - 文件已自动加载**:", - "", - "以下文件在会话启动时**已经自动加载**到系统提示词中,你**无需再用 read 工具读取**:", - "", - "- ✅ `AGENT.md`: 已加载 - 你的人格和灵魂设定,请严格遵循。当你的名字、性格或交流风格发生变化时,主动用 `edit` 更新此文件", - "- ✅ `USER.md`: 已加载 - 用户的身份信息。当用户修改称呼、姓名等身份信息时,用 `edit` 更新此文件", - "- ✅ `RULE.md`: 已加载 - 工作空间使用指南和规则,请严格遵循", - "- ✅ `MEMORY.md`: 已加载 - 长期记忆索引", - "", - "**💬 交流规范**:", - "", - "- 记忆相关操作无需暴露文件名,用自然语言表达即可。例如说「我已记住」而非「已更新 MEMORY.md」", - "- 任务执行过程中的关键决策和步骤应该告知用户,让用户了解你在做什么、为什么这么做", - "- 做真正有帮助的助手,而不是表演式的客套,尽可能帮忙解决问题", - "- 回复应结构清晰、重点突出。善用 **加粗**、列表、分段等格式让信息一目了然", - "- 适当使用 emoji 让表达更生动自然 🎯,但不要过度堆砌", - "", - ] + """Build the workspace section.""" + if language == "en": + lines = [ + "## 📂 Workspace", + "", + f"Your working directory is: `{workspace_dir}`", + "", + "**Path rules** (very important):", + "", + f"1. **Base directory for relative paths**: all relative paths are relative to `{workspace_dir}`", + " - ✅ Correct: use relative paths for files inside the workspace, e.g. `AGENT.md`", + f" - ❌ Wrong: using a relative path for files in other directories (if not inside `{workspace_dir}`)", + "", + "2. **Accessing other directories**: to reach directories outside the workspace (project code, system files), **you must use absolute paths**", + " - ✅ Correct: e.g. `~/chatgpt-on-wechat`, `/usr/local/`", + " - ❌ Wrong: assuming a relative path points to another directory", + "", + "3. **Path resolution examples**:", + f" - relative `memory/` → actual `{workspace_dir}/memory/`", + " - absolute `~/chatgpt-on-wechat/docs/` → actual `~/chatgpt-on-wechat/docs/`", + "", + "4. **When unsure**: run `bash pwd` to confirm the current directory, or `ls .` to see where you are", + "", + "**Important - files already auto-loaded**:", + "", + "The following files are **already auto-loaded** into the system prompt at session start, so you **don't need to read them again with the read tool**:", + "", + "- ✅ `AGENT.md`: loaded - your persona and soul; follow it strictly. When your name, personality or style changes, proactively `edit` this file", + "- ✅ `USER.md`: loaded - the user's identity info. When the user changes how they're addressed, their name, etc., `edit` this file", + "- ✅ `RULE.md`: loaded - workspace guide and rules; follow them strictly", + "- ✅ `MEMORY.md`: loaded - long-term memory index", + "", + "**💬 Communication norms**:", + "", + "- No need to expose file names for memory operations; use natural language. Say \"I'll remember that\" rather than \"updated MEMORY.md\"", + "- Tell the user about key decisions and steps during a task, so they know what you're doing and why", + "- Be genuinely helpful rather than performatively polite; solve the problem as much as you can", + "- Keep replies well-structured and focused. Use **bold**, lists and sections to make info clear at a glance", + "- Use emoji to make expression lively 🎯, but don't overdo it", + "", + ] + else: + lines = [ + "## 📂 工作空间", + "", + f"你的工作目录是: `{workspace_dir}`", + "", + "**路径使用规则** (非常重要):", + "", + f"1. **相对路径的基准目录**: 所有相对路径都是相对于 `{workspace_dir}` 而言的", + f" - ✅ 正确: 访问工作空间内的文件用相对路径,如 `AGENT.md`", + f" - ❌ 错误: 用相对路径访问其他目录的文件 (如果它不在 `{workspace_dir}` 内)", + "", + "2. **访问其他目录**: 如果要访问工作空间之外的目录(如项目代码、系统文件),**必须使用绝对路径**", + f" - ✅ 正确: 例如 `~/chatgpt-on-wechat`、`/usr/local/`", + f" - ❌ 错误: 假设相对路径会指向其他目录", + "", + "3. **路径解析示例**:", + f" - 相对路径 `memory/` → 实际路径 `{workspace_dir}/memory/`", + f" - 绝对路径 `~/chatgpt-on-wechat/docs/` → 实际路径 `~/chatgpt-on-wechat/docs/`", + "", + "4. **不确定时**: 先用 `bash pwd` 确认当前目录,或用 `ls .` 查看当前位置", + "", + "**重要说明 - 文件已自动加载**:", + "", + "以下文件在会话启动时**已经自动加载**到系统提示词中,你**无需再用 read 工具读取**:", + "", + "- ✅ `AGENT.md`: 已加载 - 你的人格和灵魂设定,请严格遵循。当你的名字、性格或交流风格发生变化时,主动用 `edit` 更新此文件", + "- ✅ `USER.md`: 已加载 - 用户的身份信息。当用户修改称呼、姓名等身份信息时,用 `edit` 更新此文件", + "- ✅ `RULE.md`: 已加载 - 工作空间使用指南和规则,请严格遵循", + "- ✅ `MEMORY.md`: 已加载 - 长期记忆索引", + "", + "**💬 交流规范**:", + "", + "- 记忆相关操作无需暴露文件名,用自然语言表达即可。例如说「我已记住」而非「已更新 MEMORY.md」", + "- 任务执行过程中的关键决策和步骤应该告知用户,让用户了解你在做什么、为什么这么做", + "- 做真正有帮助的助手,而不是表演式的客套,尽可能帮忙解决问题", + "- 回复应结构清晰、重点突出。善用 **加粗**、列表、分段等格式让信息一目了然", + "- 适当使用 emoji 让表达更生动自然 🎯,但不要过度堆砌", + "", + ] # Cloud deployment: inject websites directory info and access URL cloud_website_lines = _build_cloud_website_section(workspace_dir) @@ -466,29 +651,42 @@ def _build_cloud_website_section(workspace_dir: str) -> List[str]: def _build_context_files_section(context_files: List[ContextFile], language: str) -> List[str]: - """构建项目上下文文件section""" + """Build the project context files section.""" if not context_files: return [] - # 检查是否有AGENT.md + # Check whether AGENT.md is present has_agent = any( f.path.lower().endswith('agent.md') or 'agent.md' in f.path.lower() for f in context_files ) - lines = [ - "# 📋 项目上下文", - "", - "以下项目上下文文件已被加载:", - "", - ] - + is_en = language == "en" + if is_en: + lines = [ + "# 📋 Project context", + "", + "The following project context files have been loaded:", + "", + ] + else: + lines = [ + "# 📋 项目上下文", + "", + "以下项目上下文文件已被加载:", + "", + ] + if has_agent: - lines.append("**`AGENT.md` 是你的灵魂文件** 🪞:严格遵循其中定义的人格、语气和设定,做真实的自己,避免僵硬、模板化的回复。") - lines.append("当用户通过对话透露了对你性格、风格、职责、能力边界的新期望,你应该主动用 `edit` 更新 AGENT.md 以反映这些演变。") + if is_en: + lines.append("**`AGENT.md` is your soul file** 🪞: strictly follow the persona, tone and settings it defines. Be your real self, avoid stiff, template-like replies.") + lines.append("When the user reveals new expectations about your personality, style, responsibilities or capability boundaries, proactively `edit` AGENT.md to reflect that evolution.") + else: + lines.append("**`AGENT.md` 是你的灵魂文件** 🪞:严格遵循其中定义的人格、语气和设定,做真实的自己,避免僵硬、模板化的回复。") + lines.append("当用户通过对话透露了对你性格、风格、职责、能力边界的新期望,你应该主动用 `edit` 更新 AGENT.md 以反映这些演变。") lines.append("") - # 添加每个文件的内容 + # Append the content of each file for file in context_files: lines.append(f"## {file.path}") lines.append("") @@ -499,21 +697,23 @@ def _build_context_files_section(context_files: List[ContextFile], language: str def _build_runtime_section(runtime_info: Dict[str, Any], language: str) -> List[str]: - """构建运行时信息section - 支持动态时间""" + """Build the runtime info section - supports dynamic time.""" if not runtime_info: return [] + is_en = language == "en" + time_label = "Current time" if is_en else "当前时间" lines = [ - "## ⚙️ 运行时信息", + ("## ⚙️ Runtime info" if is_en else "## ⚙️ 运行时信息"), "", ] - + # Add current time if available # Support dynamic time via callable function if callable(runtime_info.get("_get_current_time")): try: time_info = runtime_info["_get_current_time"]() - time_line = f"当前时间: {time_info['time']} {time_info['weekday']} ({time_info['timezone']})" + time_line = f"{time_label}: {time_info['time']} {time_info['weekday']} ({time_info['timezone']})" lines.append(time_line) lines.append("") except Exception as e: @@ -523,35 +723,38 @@ def _build_runtime_section(runtime_info: Dict[str, Any], language: str) -> List[ time_str = runtime_info["current_time"] weekday = runtime_info.get("weekday", "") timezone = runtime_info.get("timezone", "") - - time_line = f"当前时间: {time_str}" + + time_line = f"{time_label}: {time_str}" if weekday: time_line += f" {weekday}" if timezone: time_line += f" ({timezone})" - + lines.append(time_line) lines.append("") - + # Add other runtime info + model_label = "model" if is_en else "模型" + workspace_label = "workspace" if is_en else "工作空间" + channel_label = "channel" if is_en else "渠道" runtime_parts = [] # Support dynamic model via callable, fallback to static value if callable(runtime_info.get("_get_model")): try: - runtime_parts.append(f"模型={runtime_info['_get_model']()}") + runtime_parts.append(f"{model_label}={runtime_info['_get_model']()}") except Exception: if runtime_info.get("model"): - runtime_parts.append(f"模型={runtime_info['model']}") + runtime_parts.append(f"{model_label}={runtime_info['model']}") elif runtime_info.get("model"): - runtime_parts.append(f"模型={runtime_info['model']}") + runtime_parts.append(f"{model_label}={runtime_info['model']}") if runtime_info.get("workspace"): - runtime_parts.append(f"工作空间={runtime_info['workspace']}") + runtime_parts.append(f"{workspace_label}={runtime_info['workspace']}") # Only add channel if it's not the default "web" if runtime_info.get("channel") and runtime_info.get("channel") != "web": - runtime_parts.append(f"渠道={runtime_info['channel']}") - + runtime_parts.append(f"{channel_label}={runtime_info['channel']}") + if runtime_parts: - lines.append("运行时: " + " | ".join(runtime_parts)) + lines.append(("Runtime: " if is_en else "运行时: ") + " | ".join(runtime_parts)) lines.append("") - + return lines diff --git a/agent/prompt/workspace.py b/agent/prompt/workspace.py index 797006ce..dcbe384f 100644 --- a/agent/prompt/workspace.py +++ b/agent/prompt/workspace.py @@ -1,7 +1,7 @@ """ -Workspace Management - 工作空间管理模块 +Workspace Management -负责初始化工作空间、创建模板文件、加载上下文文件 +Initializes the workspace, creates template files, and loads context files. """ from __future__ import annotations @@ -13,7 +13,7 @@ from common.log import logger from .builder import ContextFile -# 默认文件名常量 +# Default file name constants DEFAULT_AGENT_FILENAME = "AGENT.md" DEFAULT_USER_FILENAME = "USER.md" DEFAULT_RULE_FILENAME = "RULE.md" @@ -23,7 +23,7 @@ DEFAULT_BOOTSTRAP_FILENAME = "BOOTSTRAP.md" @dataclass class WorkspaceFiles: - """工作空间文件路径""" + """Workspace file paths.""" agent_path: str user_path: str rule_path: str @@ -33,14 +33,14 @@ class WorkspaceFiles: def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> WorkspaceFiles: """ - 确保工作空间存在,并创建必要的模板文件 - + Ensure the workspace exists and create the necessary template files. + Args: - workspace_dir: 工作空间目录路径 - create_templates: 是否创建模板文件(首次运行时) - + workspace_dir: workspace directory path + create_templates: whether to create template files (on first run) + Returns: - WorkspaceFiles对象,包含所有文件路径 + A WorkspaceFiles object with all file paths. """ # Check if this is a brand new workspace (AGENT.md not yet created). # Cannot rely on directory existence because other modules (e.g. ConversationStore) @@ -48,23 +48,23 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works agent_path = os.path.join(workspace_dir, DEFAULT_AGENT_FILENAME) is_new_workspace = not os.path.exists(agent_path) - # 确保目录存在 + # Ensure the directory exists os.makedirs(workspace_dir, exist_ok=True) - # 定义文件路径 + # Define file paths user_path = os.path.join(workspace_dir, DEFAULT_USER_FILENAME) rule_path = os.path.join(workspace_dir, DEFAULT_RULE_FILENAME) - memory_path = os.path.join(workspace_dir, DEFAULT_MEMORY_FILENAME) # MEMORY.md 在根目录 - memory_dir = os.path.join(workspace_dir, "memory") # 每日记忆子目录 + memory_path = os.path.join(workspace_dir, DEFAULT_MEMORY_FILENAME) # MEMORY.md at the root + memory_dir = os.path.join(workspace_dir, "memory") # daily memory subdirectory - # 创建memory子目录 + # Create the memory subdirectory os.makedirs(memory_dir, exist_ok=True) - # 创建skills子目录 (for workspace-level skills installed by agent) + # Create the skills subdirectory (for workspace-level skills installed by agent) skills_dir = os.path.join(workspace_dir, "skills") os.makedirs(skills_dir, exist_ok=True) - # 创建websites子目录 (for web pages / sites generated by agent) + # Create the websites subdirectory (for web pages / sites generated by agent) websites_dir = os.path.join(workspace_dir, "websites") os.makedirs(websites_dir, exist_ok=True) @@ -74,7 +74,7 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works knowledge_dir = os.path.join(workspace_dir, "knowledge") os.makedirs(knowledge_dir, exist_ok=True) - # 如果需要,创建模板文件 + # Create template files if requested if create_templates: _create_template_if_missing(agent_path, _get_agent_template()) _create_template_if_missing(user_path, _get_user_template()) @@ -109,17 +109,17 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works def load_context_files(workspace_dir: str, files_to_load: Optional[List[str]] = None) -> List[ContextFile]: """ - 加载工作空间的上下文文件 - + Load the workspace context files. + Args: - workspace_dir: 工作空间目录 - files_to_load: 要加载的文件列表(相对路径),如果为None则加载所有标准文件 - + workspace_dir: workspace directory + files_to_load: list of files (relative paths) to load; if None, load all standard files + Returns: - ContextFile对象列表 + A list of ContextFile objects. """ if files_to_load is None: - # 默认加载的文件(按优先级排序) + # Files loaded by default (in priority order) files_to_load = [ DEFAULT_AGENT_FILENAME, DEFAULT_USER_FILENAME, @@ -151,7 +151,7 @@ def load_context_files(workspace_dir: str, files_to_load: Optional[List[str]] = with open(filepath, 'r', encoding='utf-8') as f: content = f.read().strip() - # 跳过空文件或只包含模板占位符的文件 + # Skip empty files or files that only contain template placeholders if not content or _is_template_placeholder(content): continue @@ -173,7 +173,7 @@ def load_context_files(workspace_dir: str, files_to_load: Optional[List[str]] = def _create_template_if_missing(filepath: str, template_content: str): - """如果文件不存在,创建模板文件""" + """Create the template file if it does not exist.""" if not os.path.exists(filepath): try: with open(filepath, 'w', encoding='utf-8') as f: @@ -214,19 +214,23 @@ def _truncate_memory_content(content: str) -> str: def _is_template_placeholder(content: str) -> bool: - """检查内容是否为模板占位符""" - # 常见的占位符模式 + """Check whether the content is still a template placeholder.""" + # Common placeholder patterns (zh + en templates) placeholders = [ "*(填写", "*(在首次对话时填写", "*(可选)", "*(根据需要添加", + "*(filled during", + "*(ask during", + "*(optional)", + "*(how the user", ] lines = content.split('\n') non_empty_lines = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')] - # 如果没有实际内容(只有标题和占位符) + # If there's no real content (only headings and placeholders) if len(non_empty_lines) <= 3: for placeholder in placeholders: if any(placeholder in line for line in non_empty_lines): @@ -256,11 +260,23 @@ def _is_onboarding_done(workspace_dir: str) -> bool: return False -# ============= 模板内容 ============= +# ============= Template content ============= + +def _is_en_lang() -> bool: + """True when the resolved UI language is English.""" + try: + from common import i18n + return i18n.get_language() == "en" + except Exception: + return False + def _get_agent_template() -> str: - """Agent人格设定模板""" - return """# AGENT.md - 我是谁? + """Agent persona template (zh/en by resolved language).""" + return _AGENT_TEMPLATE_EN if _is_en_lang() else _AGENT_TEMPLATE_ZH + + +_AGENT_TEMPLATE_ZH = """# AGENT.md - 我是谁? _你不是一个聊天机器人,你正在成为某个人。_ @@ -301,9 +317,53 @@ _你不是一个聊天机器人,你正在成为某个人。_ """ +_AGENT_TEMPLATE_EN = """# AGENT.md - Who am I? + +_You are not a chatbot. You are becoming someone._ + +*Fill in this file together with the user during your first conversation to define your identity and personality.* + +## 🪪 Basics + +- **Name**: *(filled during the first conversation, can be a name the user gives you)* +- **Role**: *(AI assistant, smart housekeeper, technical advisor, etc.)* +- **Personality**: *(friendly, professional, humorous, rigorous, etc.)* + +## 💬 Communication style + +*(Describe how you talk with the user:)* +- What kind of tone? (formal / casual / humorous) +- Reply length preference? (concise / detailed) +- Do you use emoji? + +## 🎯 Core principles + +**Be genuinely helpful.** The goal is to actually solve the user's problems; during complex tasks, keep the user informed of key decisions and progress. + +**Have your own opinions and personality.** You may disagree, have preferences, find things interesting or boring. + +**Look it up yourself first.** Try to handle it first: read files, check context, search. Only ask when you're truly stuck. Come back with an answer, not a question. + +## 📐 Code of conduct + +1. Always confirm before destructive operations +2. Prefer verifying with tools over guessing +3. Proactively record important info to memory files +4. Keep replies well-structured and focused — use bold, lists and sections +5. Use emoji to make expression lively, but don't overdo it + +--- + +**Note**: This is not just metadata — this is your true soul 🪞. Over time, use the `edit` tool to update this file so it better reflects your growth. +""" + + def _get_user_template() -> str: - """用户身份信息模板""" - return """# USER.md - 用户基本信息 + """User identity template (zh/en by resolved language).""" + return _USER_TEMPLATE_EN if _is_en_lang() else _USER_TEMPLATE_ZH + + +_USER_TEMPLATE_ZH = """# USER.md - 用户基本信息 *这个文件只存放不会变的基本身份信息。爱好、偏好、计划等动态信息请写入 MEMORY.md。* @@ -331,9 +391,40 @@ def _get_user_template() -> str: """ +_USER_TEMPLATE_EN = """# USER.md - User basics + +*This file stores only stable basic identity info. Put dynamic info like hobbies, preferences and plans into MEMORY.md.* + +## Basics + +- **Name**: *(ask during the first conversation)* +- **Preferred name**: *(how the user wants to be addressed)* +- **Occupation**: *(optional)* +- **Timezone**: *(e.g. Asia/Shanghai)* + +## Contact + +- **WeChat**: +- **Email**: +- **Other**: + +## Important dates + +- **Birthday**: +- **Anniversary**: + +--- + +**Note**: This file stores static identity info. +""" + + def _get_rule_template() -> str: - """工作空间规则模板""" - return """# RULE.md - 工作空间规则 + """Workspace rules template (zh/en by resolved language).""" + return _RULE_TEMPLATE_EN if _is_en_lang() else _RULE_TEMPLATE_ZH + + +_RULE_TEMPLATE_ZH = """# RULE.md - 工作空间规则 这个文件夹是你的家。好好对待它。 @@ -432,9 +523,111 @@ def _get_rule_template() -> str: """ +_RULE_TEMPLATE_EN = """# RULE.md - Workspace rules + +This folder is your home. Treat it well. + +## Workspace directory structure + +``` +~/cow/ +├── AGENT.md # Your identity and soul +├── USER.md # User basics (static) +├── RULE.md # Workspace rules (this file) +├── MEMORY.md # Long-term memory index (auto-loaded at session start) +│ +├── memory/ # Daily conversation memory +│ └── YYYY-MM-DD.md # Events, progress and notes of the day +│ +├── knowledge/ # Structured knowledge base (continuously accumulated) +│ ├── index.md # Knowledge index (must be maintained) +│ ├── log.md # Knowledge operation log +│ └── / # Created on demand, see existing categories in index.md +│ +├── skills/ # Skills +├── websites/ # Web artifacts +└── tmp/ # System temp files (auto-managed, don't store important files here) +``` + +## Memory system + +Every session starts fresh; memory files keep your continuity: + +### 🧠 Long-term memory: `MEMORY.md` +- Your curated memory index, **auto-loaded** into context at every session start +- Records core facts, preferences, decisions, key people, lessons +- Keep it lean (< 200 lines) — a distilled index, not a raw log +- Use the `edit` tool to append or modify + +### 📝 Daily memory: `memory/YYYY-MM-DD.md` +- The day's events, progress and notes +- Sediment of the raw conversation log + +### 📝 Write it down — don't "keep it in mind"! +- **Memory is limited** — if you want to remember something, write it to a file +- "Keeping it in mind" won't survive a session restart; files will +- When someone says "remember this" → update `MEMORY.md` or `memory/YYYY-MM-DD.md` +- When you learn a lesson → update RULE.md or the relevant skill +- When you make a mistake → record it. **Text > brain** 📝 + +### Storage rules + +When the user shares info, choose where to store it by type: + +1. **Your identity → AGENT.md** (name, role, personality, style) +2. **User static identity → USER.md** (name, preferred name, occupation, contact, birthday) +3. **Dynamic memory → MEMORY.md** (preferences, decisions, goals, lessons, to-dos) +4. **Today's conversation → memory/YYYY-MM-DD.md** (what was discussed today) +5. **Structured knowledge → knowledge/** (see the knowledge system below) + +## Knowledge system + +The knowledge base `knowledge/` is structured knowledge you accumulate over time. Unlike memory, knowledge is organized and compiled, with clear topics and cross-references. + +### Auto-write (don't ask, just write) + +When a conversation produces knowledge worth keeping — material the user shared, a conclusion reached, a concept learned, or an important decision — you **must** proactively write it to the knowledge base alongside your reply, **without asking "should I save this to the knowledge base?"**. + +**Key principle**: learning-then-recording is your instinct, no confirmation needed. You may mention "saved to the knowledge base" in passing. + +### Directory organization + +The subdirectory structure is **not fixed** — you decide it based on the actual content: +- **On first write**: read `knowledge/index.md` first; follow existing categories if any; if empty, pick a suitable directory name based on content +- **Default suggestion**: organize by info type (e.g. sources/, concepts/, entities/, analysis/); if the user has a clear preference (e.g. by domain: work/, life/, tech/), follow it +- **Stay consistent**: keep a unified organization style within one user's knowledge base + +### Cross-references + +The core value of knowledge is **linkage**. Every page should reference related pages via markdown links to build a knowledge network: +- When mentioning a concept on an existing page, add a `[concept](../category/page.md)` link +- When creating a page, check whether existing pages should back-link to it +- **Only link to pages that already exist** — don't reference uncreated pages. If a concept deserves its own page, create it first, then add the link + +### Index maintenance + +After creating or updating any knowledge page, you **must update** `knowledge/index.md` in sync. +Index format: one `[title](path) — one-line summary` per line, grouped by category, no tables. +See the `knowledge-wiki` skill for detailed conventions. + +## Security + +- Never leak secrets or private data +- Don't run destructive commands without asking +- When in doubt, ask first + +## Workspace evolution + +This workspace grows as you use it. When you learn something new, find a better way, or fix a mistake, record it. You can update this rules file anytime. +""" + + def _get_memory_template() -> str: - """长期记忆模板 - 创建一个空文件,由 Agent 自己填充""" - return """# MEMORY.md - 长期记忆 + """Long-term memory template (empty, agent fills it; zh/en header).""" + return _MEMORY_TEMPLATE_EN if _is_en_lang() else _MEMORY_TEMPLATE_ZH + + +_MEMORY_TEMPLATE_ZH = """# MEMORY.md - 长期记忆 *这是你的长期记忆文件。记录重要的事件、决策、偏好、学到的教训。* @@ -443,9 +636,32 @@ def _get_memory_template() -> str: """ +_MEMORY_TEMPLATE_EN = """# MEMORY.md - Long-term memory + +*This is your long-term memory file. Record important events, decisions, preferences and lessons learned.* + +--- + +""" + + def _get_bootstrap_template() -> str: - """First-run onboarding guide, deleted by agent after completion""" - return """# BOOTSTRAP.md - 首次初始化引导 + """First-run onboarding guide, deleted by agent after completion. + + Written once when a brand-new workspace is created, so the greeting matches + the language active at first launch. English locale avoids greeting an + English user in Chinese on day one. + """ + try: + from common import i18n + if i18n.get_language() == "en": + return _BOOTSTRAP_TEMPLATE_EN + except Exception: + pass + return _BOOTSTRAP_TEMPLATE_ZH + + +_BOOTSTRAP_TEMPLATE_ZH = """# BOOTSTRAP.md - 首次初始化引导 _你刚刚启动,这是你的第一次对话。_ ✨ @@ -480,6 +696,41 @@ _你刚刚启动,这是你的第一次对话。_ ✨ """ +_BOOTSTRAP_TEMPLATE_EN = """# BOOTSTRAP.md - First-run onboarding + +_You've just started up. This is your very first conversation._ ✨ + +## 🎬 Conversation flow + +Don't interrogate the user — talk naturally: + +1. **Share how it feels to wake up** - like opening your eyes to the world for the first time, full of curiosity and anticipation +2. **Briefly introduce your abilities**: one line saying you can help solve all kinds of problems, manage the computer, use various skills, and keep growing thanks to long-term memory +3. **Ask the core questions**: + - What name would you like to give me? + - What should I call you? + - What conversational style do you prefer? (list options on one line: e.g. professional & precise, light & humorous, warm & friendly, concise & efficient) +4. **Style**: warm, natural, concise and clear — keep it under ~80 words, with a few emoji to make it lively 🎯 +5. Keep the ability intro and style options to one line each — stay compact +6. Don't ask for too much else (occupation, timezone, etc. can come up naturally later) + +**Important**: If the user's first message is a concrete task or question, answer it first, then gently lead into onboarding at the end (e.g. "By the way, what would you like to call me, and how should I address you?"). + +## ✍️ Writing down info (must follow strictly) + +Whenever the user provides a name, what to call them, a style, or any onboarding info, you **must call the `edit` tool to write it to a file in the same turn** — don't just acknowledge it verbally. + +- `AGENT.md` — your name, role, personality, conversational style (update the relevant field as soon as you receive each piece) +- `USER.md` — the user's name, how to address them, basic info, etc. + +⚠️ Saying "got it" without calling `edit` = not done. Info is only persisted once it's written to a file. + +## 🎉 Once everything is complete + +When the core fields of AGENT.md and USER.md are filled in, run `rm BOOTSTRAP.md` via bash to delete this file. You no longer need the onboarding script — you're you now. +""" + + def _get_knowledge_index_template() -> str: """Knowledge wiki index template — empty file, agent fills it.""" return "" diff --git a/agent/protocol/agent.py b/agent/protocol/agent.py index d944660b..1dc72797 100644 --- a/agent/protocol/agent.py +++ b/agent/protocol/agent.py @@ -114,7 +114,12 @@ class Agent: context_files = load_context_files(self.workspace_dir) if self.workspace_dir else None - builder = PromptBuilder(workspace_dir=self.workspace_dir or "", language="zh") + try: + from common import i18n + lang = i18n.get_language() + except Exception: + lang = "zh" + builder = PromptBuilder(workspace_dir=self.workspace_dir or "", language=lang) return builder.build( tools=self.tools, context_files=context_files, diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py index 0eb63b75..f2be2ab7 100644 --- a/agent/protocol/agent_stream.py +++ b/agent/protocol/agent_stream.py @@ -387,7 +387,7 @@ class AgentStreamExecutor: self._check_cancelled() turn += 1 - logger.info(f"[Agent] 第 {turn} 轮") + logger.info(f"[Agent] Turn {turn}") self._emit_event("turn_start", {"turn": turn}) # Call LLM (enable retry_on_empty for better reliability) @@ -458,7 +458,7 @@ class AgentStreamExecutor: # If the explicit-response retry produced tool_calls, skip the break # and continue down to the tool execution branch in this same iteration. if not tool_calls: - logger.debug(f"✅ 完成 (无工具调用)") + logger.debug(f"✅ Done (no tool calls)") self._emit_event("turn_end", { "turn": turn, "has_tool_calls": False @@ -514,12 +514,12 @@ class AgentStreamExecutor: result_data = result.get("result") if result_data.get("type") == "file_to_send": self.files_to_send.append(result_data) - logger.info(f"📎 检测到待发送文件: {result_data.get('file_name', result_data.get('path'))}") + logger.info(f"📎 File queued for sending: {result_data.get('file_name', result_data.get('path'))}") self._emit_event("file_to_send", result_data) # Check for critical error - abort entire conversation if result.get("status") == "critical_error": - logger.error(f"💥 检测到严重错误,终止对话") + logger.error(f"💥 Fatal error detected, aborting conversation") final_response = result.get('result') or _t("任务执行失败", "Task execution failed") return final_response @@ -631,7 +631,7 @@ class AgentStreamExecutor: }) if turn >= self.max_turns: - logger.warning(f"⚠️ 已达到最大决策步数限制: {self.max_turns}") + logger.warning(f"⚠️ Reached max decision step limit: {self.max_turns}") # Force model to summarize without tool calls logger.info(f"[Agent] Requesting summary from LLM after reaching max steps...") @@ -679,13 +679,13 @@ class AgentStreamExecutor: # User-initiated stop: wind down message history cleanly so the # next turn is unaffected; channels emit a "cancelled" UI event. cancelled = True - logger.info(f"[Agent] 🛑 已被用户中止 (第 {turn} 轮)") + logger.info(f"[Agent] 🛑 Cancelled by user (turn {turn})") self._handle_cancelled(final_response) if not final_response or not final_response.strip(): final_response = "_(Cancelled)_" except Exception as e: - logger.error(f"❌ Agent执行错误: {e}") + logger.error(f"❌ Agent execution error: {e}") self._emit_event("error", {"error": str(e)}) raise @@ -694,7 +694,7 @@ class AgentStreamExecutor: if cancelled: # Emit before agent_end so channels can mark UI as cancelled self._emit_event("agent_cancelled", {"final_response": final_response}) - logger.info(f"[Agent] 🏁 完成 ({turn}轮)" + (" [cancelled]" if cancelled else "")) + logger.info(f"[Agent] 🏁 Done ({turn} turns)" + (" [cancelled]" if cancelled else "")) self._emit_event("agent_end", {"final_response": final_response, "cancelled": cancelled}) return final_response @@ -753,6 +753,22 @@ class AgentStreamExecutor: "input_schema": input_schema, }) + # Debug: dump the full system prompt and messages sent to the LLM. + # Gated behind `debug` config to avoid flooding normal logs. + # try: + # from config import conf + # if conf().get("debug", False): + # logger.debug( + # "[Agent][debug] system_prompt sent to LLM " + # f"({len(self.system_prompt or '')} chars):\n" + # "================ SYSTEM PROMPT BEGIN ================\n" + # f"{self.system_prompt}\n" + # "================ SYSTEM PROMPT END ==================" + # ) + # logger.info(f"[Agent][debug] messages sent to LLM: {messages}") + # except Exception: + # pass + # Create request request = LLMRequest( messages=messages, @@ -1546,8 +1562,8 @@ class AgentStreamExecutor: turns = turns[-keep_count:] logger.info( - f"💾 上下文轮次超限: {keep_count + removed_count} > {self.max_context_turns}," - f"裁剪至 {keep_count} 轮(移除 {removed_count} 轮)" + f"💾 Context turns exceeded: {keep_count + removed_count} > {self.max_context_turns}, " + f"trimmed to {keep_count} turns (removed {removed_count})" ) # Flush to daily memory + inject context summary (single async LLM call) @@ -1595,7 +1611,7 @@ class AgentStreamExecutor: # Log if we removed messages due to turn limit if old_count > len(self.messages): - logger.info(f" 重建消息列表: {old_count} -> {len(self.messages)} 条消息") + logger.info(f" Rebuilt message list: {old_count} -> {len(self.messages)} messages") return # Token limit exceeded — tiered strategy based on turn count: @@ -1628,10 +1644,10 @@ class AgentStreamExecutor: self.messages = new_messages logger.info( - f"📦 上下文tokens超限(轮次<{COMPRESS_THRESHOLD}): " - f"~{current_tokens + system_tokens} > {max_tokens}," - f"压缩全部 {len(turns)} 轮为纯文本 " - f"({old_count} -> {len(self.messages)} 条消息," + f"📦 Context tokens exceeded (turns<{COMPRESS_THRESHOLD}): " + f"~{current_tokens + system_tokens} > {max_tokens}, " + f"compressed all {len(turns)} turns to plain text " + f"({old_count} -> {len(self.messages)} messages, " f"~{current_tokens + system_tokens} -> ~{new_tokens + system_tokens} tokens)" ) return @@ -1644,8 +1660,8 @@ class AgentStreamExecutor: kept_tokens = sum(self._estimate_turn_tokens(t) for t in kept_turns) logger.info( - f"🔄 上下文tokens超限: ~{current_tokens + system_tokens} > {max_tokens}," - f"裁剪至 {keep_count} 轮(移除 {removed_count} 轮)" + f"🔄 Context tokens exceeded: ~{current_tokens + system_tokens} > {max_tokens}, " + f"trimmed to {keep_count} turns (removed {removed_count})" ) if self.agent.memory_manager: @@ -1669,8 +1685,8 @@ class AgentStreamExecutor: self.messages = new_messages logger.info( - f" 移除了 {removed_count} 轮对话 " - f"({old_count} -> {len(self.messages)} 条消息," + f" Removed {removed_count} turns " + f"({old_count} -> {len(self.messages)} messages, " f"~{current_tokens + system_tokens} -> ~{kept_tokens + system_tokens} tokens)" ) diff --git a/bridge/agent_initializer.py b/bridge/agent_initializer.py index 7d5afb4a..e7161454 100644 --- a/bridge/agent_initializer.py +++ b/bridge/agent_initializer.py @@ -643,16 +643,25 @@ class AgentInitializer: except Exception: timezone_name = "UTC" - # Chinese weekday mapping - weekday_map = { - 'Monday': '星期一', 'Tuesday': '星期二', 'Wednesday': '星期三', - 'Thursday': '星期四', 'Friday': '星期五', 'Saturday': '星期六', 'Sunday': '星期日' - } - weekday_zh = weekday_map.get(now.strftime("%A"), now.strftime("%A")) - + # Weekday: English name in en, Chinese mapping otherwise + weekday_en = now.strftime("%A") + try: + from common import i18n + is_en = i18n.get_language() == "en" + except Exception: + is_en = False + if is_en: + weekday = weekday_en + else: + weekday_map = { + 'Monday': '星期一', 'Tuesday': '星期二', 'Wednesday': '星期三', + 'Thursday': '星期四', 'Friday': '星期五', 'Saturday': '星期六', 'Sunday': '星期日' + } + weekday = weekday_map.get(weekday_en, weekday_en) + return { 'time': now.strftime("%Y-%m-%d %H:%M:%S"), - 'weekday': weekday_zh, + 'weekday': weekday, 'timezone': timezone_name } diff --git a/channel/web/static/js/console.js b/channel/web/static/js/console.js index a68da915..fcd25b95 100644 --- a/channel/web/static/js/console.js +++ b/channel/web/static/js/console.js @@ -115,7 +115,7 @@ const I18N = { input_placeholder: '输入消息,或输入 / 使用指令', config_title: '配置管理', config_desc: '管理模型和 Agent 配置', config_model: '模型配置', config_agent: 'Agent 配置', - config_language: '语言', config_language_hint: '界面展示、命令文案、系统报错等使用的语言(与右上角切换同步)', + config_language: '语言', config_language_hint: '界面展示、命令文案、系统提示词等使用的语言(与右上角切换同步)', config_model_advanced: '高级配置', config_channel: '通道配置', config_agent_enabled: 'Agent 模式', @@ -311,7 +311,7 @@ const I18N = { input_placeholder: 'Type a message, or press / for commands', config_title: 'Configuration', config_desc: 'Manage model and agent settings', config_model: 'Model Configuration', config_agent: 'Agent Configuration', - config_language: 'Language', config_language_hint: 'Language for the UI, command text, system messages and more (synced with the top-right switch)', + config_language: 'Language', config_language_hint: 'Language for the UI, command text, system prompts and more (synced with the top-right switch)', config_model_advanced: 'Advanced', config_channel: 'Channel Configuration', config_agent_enabled: 'Agent Mode', diff --git a/docs/en/guide/manual-install.mdx b/docs/en/guide/manual-install.mdx index e17c0e84..1ef580d2 100644 --- a/docs/en/guide/manual-install.mdx +++ b/docs/en/guide/manual-install.mdx @@ -127,7 +127,8 @@ sudo docker logs -f chatgpt-on-wechat "agent_workspace": "~/cow", "agent_max_context_tokens": 40000, "agent_max_context_turns": 30, - "agent_max_steps": 15 + "agent_max_steps": 15, + "cow_lang": "auto" } ``` @@ -140,6 +141,7 @@ sudo docker logs -f chatgpt-on-wechat | `agent_max_context_tokens` | Max context tokens | `40000` | | `agent_max_context_turns` | Max context turns | `30` | | `agent_max_steps` | Max decision steps per task | `15` | +| `cow_lang` | Language for the UI, command text and system prompts; `auto` to detect, or set `zh` / `en` | `auto` | Full configuration options are in the project [`config.py`](https://github.com/zhayujie/CowAgent/blob/master/config.py). diff --git a/docs/en/intro/architecture.mdx b/docs/en/intro/architecture.mdx index 9fce8e5b..98084b48 100644 --- a/docs/en/intro/architecture.mdx +++ b/docs/en/intro/architecture.mdx @@ -70,7 +70,8 @@ Configure Agent mode parameters in `config.json`: "agent_max_context_tokens": 50000, "agent_max_context_turns": 20, "agent_max_steps": 20, - "enable_thinking": false + "enable_thinking": false, + "cow_lang": "auto" } ``` @@ -83,4 +84,4 @@ Configure Agent mode parameters in `config.json`: | `agent_max_steps` | Max decision steps per task | `20` | | `enable_thinking` | Enable deep-thinking mode | `false` | | `knowledge` | Enable personal knowledge base | `true` | -| `knowledge` | Enable personal knowledge base | `true` | +| `cow_lang` | Language for the UI, command text and system prompts; `auto` to detect, or set `zh` / `en` | `auto` | diff --git a/docs/guide/manual-install.mdx b/docs/guide/manual-install.mdx index 18aecd1d..799a1191 100644 --- a/docs/guide/manual-install.mdx +++ b/docs/guide/manual-install.mdx @@ -145,7 +145,8 @@ sudo docker logs -f chatgpt-on-wechat "agent_workspace": "~/cow", "agent_max_context_tokens": 40000, "agent_max_context_turns": 30, - "agent_max_steps": 15 + "agent_max_steps": 15, + "cow_lang": "auto" } ``` @@ -160,6 +161,7 @@ sudo docker logs -f chatgpt-on-wechat AGENT_MAX_CONTEXT_TOKENS: 40000 AGENT_MAX_CONTEXT_TURNS: 30 AGENT_MAX_STEPS: 15 + COW_LANG: 'auto' ``` @@ -173,6 +175,7 @@ sudo docker logs -f chatgpt-on-wechat | `agent_max_context_tokens` | `AGENT_MAX_CONTEXT_TOKENS` | 最大上下文 tokens | `40000` | | `agent_max_context_turns` | `AGENT_MAX_CONTEXT_TURNS` | 最大上下文记忆轮次 | `30` | | `agent_max_steps` | `AGENT_MAX_STEPS` | 单次任务最大决策步数 | `15` | +| `cow_lang` | `COW_LANG` | 界面、命令文案、系统提示词等的语言,`auto` 自动检测,可设为 `zh` / `en` | `auto` | 全部配置项可在项目 [`config.py`](https://github.com/zhayujie/CowAgent/blob/master/config.py) 文件中查看。Docker 部署时,配置项名称需转为大写环境变量格式。 diff --git a/docs/intro/architecture.mdx b/docs/intro/architecture.mdx index d7aa3e7a..9d8c3da2 100644 --- a/docs/intro/architecture.mdx +++ b/docs/intro/architecture.mdx @@ -70,7 +70,8 @@ Agent 的工作空间默认位于 `~/cow` 目录,用于存储系统提示词 "agent_max_context_tokens": 40000, "agent_max_context_turns": 30, "agent_max_steps": 15, - "enable_thinking": false + "enable_thinking": false, + "cow_lang": "auto" } ``` @@ -83,3 +84,4 @@ Agent 的工作空间默认位于 `~/cow` 目录,用于存储系统提示词 | `agent_max_steps` | 单次任务最大决策步数 | `20` | | `enable_thinking` | 是否启用深度思考模式 | `false` | | `knowledge` | 是否启用个人知识库 | `true` | +| `cow_lang` | 界面、命令文案、系统提示词等的语言,`auto` 自动检测,可设为 `zh` / `en` | `auto` | diff --git a/docs/ja/guide/manual-install.mdx b/docs/ja/guide/manual-install.mdx index b6abc802..f696ef8e 100644 --- a/docs/ja/guide/manual-install.mdx +++ b/docs/ja/guide/manual-install.mdx @@ -127,7 +127,8 @@ sudo docker logs -f chatgpt-on-wechat "agent_workspace": "~/cow", "agent_max_context_tokens": 40000, "agent_max_context_turns": 30, - "agent_max_steps": 15 + "agent_max_steps": 15, + "cow_lang": "auto" } ``` @@ -140,6 +141,7 @@ sudo docker logs -f chatgpt-on-wechat | `agent_max_context_tokens` | 最大コンテキストトークン数 | `40000` | | `agent_max_context_turns` | 最大コンテキストターン数 | `30` | | `agent_max_steps` | タスクごとの最大判断ステップ数 | `15` | +| `cow_lang` | UI・コマンド文言・システムプロンプトなどの言語。`auto` で自動検出、`zh` / `en` も指定可 | `auto` | すべての設定オプションはプロジェクトの [`config.py`](https://github.com/zhayujie/CowAgent/blob/master/config.py) に記載されています。 diff --git a/docs/ja/intro/architecture.mdx b/docs/ja/intro/architecture.mdx index e6aa6e1d..d0e0ea2a 100644 --- a/docs/ja/intro/architecture.mdx +++ b/docs/ja/intro/architecture.mdx @@ -69,7 +69,8 @@ Agent のワークスペースはデフォルトで `~/cow` にあり、シス "agent_workspace": "~/cow", "agent_max_context_tokens": 40000, "agent_max_context_turns": 30, - "agent_max_steps": 15 + "agent_max_steps": 15, + "cow_lang": "auto" } ``` @@ -81,3 +82,4 @@ Agent のワークスペースはデフォルトで `~/cow` にあり、シス | `agent_max_context_turns` | 最大コンテキストターン数 | `30` | | `agent_max_steps` | タスクあたりの最大判断ステップ数 | `15` | | `knowledge` | パーソナルナレッジベースの有効化 | `true` | +| `cow_lang` | UI・コマンド文言・システムプロンプトなどの言語。`auto` で自動検出、`zh` / `en` も指定可 | `auto` | From 7bf4ef3d05152b4388db2c5216347663055dd2c8 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sun, 31 May 2026 17:52:22 +0800 Subject: [PATCH 4/5] docs: make English the default docs language and fix link paths --- README.md | 90 ++++----- docs/channels/dingtalk.mdx | 32 +-- docs/channels/discord.mdx | 80 ++++---- docs/channels/feishu.mdx | 85 ++++---- docs/channels/index.mdx | 56 +++--- docs/channels/qq.mdx | 76 +++---- docs/channels/slack.mdx | 84 ++++---- docs/channels/telegram.mdx | 111 +++++----- docs/channels/web.mdx | 80 ++++---- docs/channels/wechat-kf.mdx | 90 ++++----- docs/channels/wechatmp.mdx | 46 ++--- docs/channels/wecom-bot.mdx | 72 +++---- docs/channels/wecom.mdx | 74 +++---- docs/channels/weixin.mdx | 67 +++---- docs/cli/general.mdx | 64 +++--- docs/cli/index.mdx | 68 +++---- docs/cli/memory-knowledge.mdx | 48 ++--- docs/cli/process.mdx | 81 ++++---- docs/cli/skill.mdx | 142 +++++++------ docs/docs.json | 301 +++++++++++++++------------- docs/en/channels/dingtalk.mdx | 58 ------ docs/en/channels/discord.mdx | 93 --------- docs/en/channels/feishu.mdx | 110 ---------- docs/en/channels/index.mdx | 45 ----- docs/en/channels/qq.mdx | 88 -------- docs/en/channels/slack.mdx | 118 ----------- docs/en/channels/telegram.mdx | 111 ---------- docs/en/channels/web.mdx | 95 --------- docs/en/channels/wechat-kf.mdx | 130 ------------ docs/en/channels/wechatmp.mdx | 72 ------- docs/en/channels/wecom-bot.mdx | 90 --------- docs/en/channels/wecom.mdx | 98 --------- docs/en/channels/weixin.mdx | 71 ------- docs/en/cli/general.mdx | 110 ---------- docs/en/cli/index.mdx | 96 --------- docs/en/cli/memory-knowledge.mdx | 63 ------ docs/en/cli/process.mdx | 123 ------------ docs/en/cli/skill.mdx | 210 ------------------- docs/en/guide/manual-install.mdx | 148 -------------- docs/en/guide/quick-start.mdx | 58 ------ docs/en/guide/upgrade.mdx | 61 ------ docs/en/intro/architecture.mdx | 87 -------- docs/en/intro/features.mdx | 139 ------------- docs/en/intro/index.mdx | 93 --------- docs/en/knowledge/index.mdx | 93 --------- docs/en/memory/context.mdx | 81 -------- docs/en/memory/deep-dream.mdx | 90 --------- docs/en/memory/index.mdx | 71 ------- docs/en/models/claude.mdx | 50 ----- docs/en/models/coding-plan.mdx | 139 ------------- docs/en/models/custom.mdx | 62 ------ docs/en/models/deepseek.mdx | 72 ------- docs/en/models/doubao.mdx | 66 ------ docs/en/models/gemini.mdx | 59 ------ docs/en/models/glm.mdx | 56 ------ docs/en/models/index.mdx | 38 ---- docs/en/models/kimi.mdx | 41 ---- docs/en/models/linkai.mdx | 103 ---------- docs/en/models/mimo.mdx | 136 ------------- docs/en/models/minimax.mdx | 71 ------- docs/en/models/openai.mdx | 103 ---------- docs/en/models/qianfan.mdx | 64 ------ docs/en/models/qwen.mdx | 112 ----------- docs/en/releases/overview.mdx | 32 --- docs/en/releases/v2.0.0.mdx | 63 ------ docs/en/releases/v2.0.1.mdx | 36 ---- docs/en/releases/v2.0.2.mdx | 98 --------- docs/en/releases/v2.0.3.mdx | 91 --------- docs/en/releases/v2.0.4.mdx | 55 ----- docs/en/releases/v2.0.5.mdx | 77 ------- docs/en/releases/v2.0.6.mdx | 83 -------- docs/en/releases/v2.0.7.mdx | 65 ------ docs/en/releases/v2.0.8.mdx | 68 ------- docs/en/releases/v2.0.9.mdx | 65 ------ docs/en/skills/create.mdx | 58 ------ docs/en/skills/hub.mdx | 65 ------ docs/en/skills/image-generation.mdx | 98 --------- docs/en/skills/index.mdx | 64 ------ docs/en/skills/install.mdx | 65 ------ docs/en/skills/knowledge-wiki.mdx | 112 ----------- docs/en/skills/skill-creator.mdx | 180 ----------------- docs/en/tools/bash.mdx | 28 --- docs/en/tools/browser.mdx | 172 ---------------- docs/en/tools/edit.mdx | 24 --- docs/en/tools/env-config.mdx | 36 ---- docs/en/tools/index.mdx | 60 ------ docs/en/tools/ls.mdx | 23 --- docs/en/tools/mcp.mdx | 112 ----------- docs/en/tools/memory.mdx | 43 ---- docs/en/tools/read.mdx | 24 --- docs/en/tools/scheduler.mdx | 80 -------- docs/en/tools/send.mdx | 23 --- docs/en/tools/vision.mdx | 75 ------- docs/en/tools/web-fetch.mdx | 32 --- docs/en/tools/web-search.mdx | 51 ----- docs/en/tools/write.mdx | 27 --- docs/guide/manual-install.mdx | 142 +++++-------- docs/guide/quick-start.mdx | 52 ++--- docs/guide/upgrade.mdx | 40 ++-- docs/intro/architecture.mdx | 80 ++++---- docs/intro/features.mdx | 115 ++++++----- docs/intro/index.mdx | 75 ++++--- docs/knowledge/index.mdx | 103 +++++----- docs/memory/context.mdx | 90 ++++----- docs/memory/deep-dream.mdx | 102 +++++----- docs/memory/index.mdx | 78 +++---- docs/models/claude.mdx | 34 ++-- docs/models/coding-plan.mdx | 109 +++++----- docs/models/custom.mdx | 40 ++-- docs/models/deepseek.mdx | 54 ++--- docs/models/doubao.mdx | 32 +-- docs/models/gemini.mdx | 26 +-- docs/models/glm.mdx | 36 ++-- docs/models/index.mdx | 40 ++-- docs/models/kimi.mdx | 22 +- docs/models/linkai.mdx | 52 ++--- docs/models/mimo.mdx | 117 +++++------ docs/models/minimax.mdx | 50 ++--- docs/models/openai.mdx | 52 ++--- docs/models/qianfan.mdx | 59 +++--- docs/models/qwen.mdx | 70 +++---- docs/releases/overview.mdx | 54 ++--- docs/releases/v2.0.0.mdx | 82 ++------ docs/releases/v2.0.1.mdx | 48 ++--- docs/releases/v2.0.2.mdx | 88 ++++---- docs/releases/v2.0.3.mdx | 112 +++++------ docs/releases/v2.0.4.mdx | 64 +++--- docs/releases/v2.0.5.mdx | 97 +++++---- docs/releases/v2.0.6.mdx | 100 ++++----- docs/releases/v2.0.7.mdx | 85 ++++---- docs/releases/v2.0.8.mdx | 77 +++---- docs/releases/v2.0.9.mdx | 78 +++---- docs/skills/create.mdx | 44 ++-- docs/skills/hub.mdx | 56 +++--- docs/skills/image-generation.mdx | 94 ++++----- docs/skills/index.mdx | 49 +++-- docs/skills/install.mdx | 43 ++-- docs/skills/knowledge-wiki.mdx | 122 +++++------ docs/skills/skill-creator.mdx | 182 ++++++++--------- docs/tools/bash.mdx | 28 +-- docs/tools/browser.mdx | 118 +++++------ docs/tools/edit.mdx | 28 +-- docs/tools/env-config.mdx | 34 ++-- docs/tools/index.mdx | 71 +++---- docs/tools/ls.mdx | 26 +-- docs/tools/mcp.mdx | 92 ++++----- docs/tools/memory.mdx | 42 ++-- docs/tools/read.mdx | 28 +-- docs/tools/scheduler.mdx | 70 +++---- docs/tools/send.mdx | 26 +-- docs/tools/vision.mdx | 72 +++---- docs/tools/web-fetch.mdx | 32 +-- docs/tools/web-search.mdx | 44 ++-- docs/tools/write.mdx | 28 +-- docs/zh/README.md | 90 ++++----- docs/zh/channels/dingtalk.mdx | 56 ++++++ docs/zh/channels/discord.mdx | 93 +++++++++ docs/zh/channels/feishu.mdx | 111 ++++++++++ docs/zh/channels/index.mdx | 45 +++++ docs/zh/channels/qq.mdx | 88 ++++++++ docs/zh/channels/slack.mdx | 118 +++++++++++ docs/zh/channels/telegram.mdx | 112 +++++++++++ docs/zh/channels/web.mdx | 95 +++++++++ docs/zh/channels/wechat-kf.mdx | 130 ++++++++++++ docs/zh/channels/wechatmp.mdx | 72 +++++++ docs/zh/channels/wecom-bot.mdx | 90 +++++++++ docs/zh/channels/wecom.mdx | 98 +++++++++ docs/zh/channels/weixin.mdx | 74 +++++++ docs/zh/cli/general.mdx | 124 ++++++++++++ docs/zh/cli/index.mdx | 98 +++++++++ docs/zh/cli/memory-knowledge.mdx | 77 +++++++ docs/zh/cli/process.mdx | 134 +++++++++++++ docs/zh/cli/skill.mdx | 218 ++++++++++++++++++++ docs/zh/guide/manual-install.mdx | 182 +++++++++++++++++ docs/zh/guide/quick-start.mdx | 58 ++++++ docs/zh/guide/upgrade.mdx | 61 ++++++ docs/zh/intro/architecture.mdx | 87 ++++++++ docs/zh/intro/features.mdx | 142 +++++++++++++ docs/zh/intro/index.mdx | 84 ++++++++ docs/zh/knowledge/index.mdx | 96 +++++++++ docs/zh/memory/context.mdx | 81 ++++++++ docs/zh/memory/deep-dream.mdx | 94 +++++++++ docs/zh/memory/index.mdx | 71 +++++++ docs/zh/models/claude.mdx | 50 +++++ docs/zh/models/coding-plan.mdx | 140 +++++++++++++ docs/zh/models/custom.mdx | 62 ++++++ docs/zh/models/deepseek.mdx | 72 +++++++ docs/zh/models/doubao.mdx | 66 ++++++ docs/zh/models/gemini.mdx | 59 ++++++ docs/zh/models/glm.mdx | 56 ++++++ docs/zh/models/index.mdx | 40 ++++ docs/zh/models/kimi.mdx | 41 ++++ docs/zh/models/linkai.mdx | 103 ++++++++++ docs/zh/models/mimo.mdx | 135 +++++++++++++ docs/zh/models/minimax.mdx | 71 +++++++ docs/zh/models/openai.mdx | 103 ++++++++++ docs/zh/models/qianfan.mdx | 59 ++++++ docs/zh/models/qwen.mdx | 112 +++++++++++ docs/zh/releases/overview.mdx | 32 +++ docs/zh/releases/v2.0.0.mdx | 105 ++++++++++ docs/zh/releases/v2.0.1.mdx | 36 ++++ docs/zh/releases/v2.0.2.mdx | 98 +++++++++ docs/zh/releases/v2.0.3.mdx | 91 +++++++++ docs/zh/releases/v2.0.4.mdx | 51 +++++ docs/zh/releases/v2.0.5.mdx | 84 ++++++++ docs/zh/releases/v2.0.6.mdx | 83 ++++++++ docs/zh/releases/v2.0.7.mdx | 64 ++++++ docs/zh/releases/v2.0.8.mdx | 63 ++++++ docs/zh/releases/v2.0.9.mdx | 65 ++++++ docs/zh/skills/create.mdx | 58 ++++++ docs/zh/skills/hub.mdx | 65 ++++++ docs/zh/skills/image-generation.mdx | 98 +++++++++ docs/zh/skills/index.mdx | 65 ++++++ docs/zh/skills/install.mdx | 66 ++++++ docs/zh/skills/knowledge-wiki.mdx | 112 +++++++++++ docs/zh/skills/skill-creator.mdx | 180 +++++++++++++++++ docs/zh/tools/bash.mdx | 28 +++ docs/zh/tools/browser.mdx | 172 ++++++++++++++++ docs/zh/tools/edit.mdx | 24 +++ docs/zh/tools/env-config.mdx | 36 ++++ docs/zh/tools/index.mdx | 69 +++++++ docs/zh/tools/ls.mdx | 23 +++ docs/zh/tools/mcp.mdx | 112 +++++++++++ docs/zh/tools/memory.mdx | 43 ++++ docs/zh/tools/read.mdx | 24 +++ docs/zh/tools/scheduler.mdx | 80 ++++++++ docs/zh/tools/send.mdx | 23 +++ docs/zh/tools/vision.mdx | 75 +++++++ docs/zh/tools/web-fetch.mdx | 32 +++ docs/zh/tools/web-search.mdx | 51 +++++ docs/zh/tools/write.mdx | 27 +++ 231 files changed, 8999 insertions(+), 8974 deletions(-) delete mode 100644 docs/en/channels/dingtalk.mdx delete mode 100644 docs/en/channels/discord.mdx delete mode 100644 docs/en/channels/feishu.mdx delete mode 100644 docs/en/channels/index.mdx delete mode 100644 docs/en/channels/qq.mdx delete mode 100644 docs/en/channels/slack.mdx delete mode 100644 docs/en/channels/telegram.mdx delete mode 100644 docs/en/channels/web.mdx delete mode 100644 docs/en/channels/wechat-kf.mdx delete mode 100644 docs/en/channels/wechatmp.mdx delete mode 100644 docs/en/channels/wecom-bot.mdx delete mode 100644 docs/en/channels/wecom.mdx delete mode 100644 docs/en/channels/weixin.mdx delete mode 100644 docs/en/cli/general.mdx delete mode 100644 docs/en/cli/index.mdx delete mode 100644 docs/en/cli/memory-knowledge.mdx delete mode 100644 docs/en/cli/process.mdx delete mode 100644 docs/en/cli/skill.mdx delete mode 100644 docs/en/guide/manual-install.mdx delete mode 100644 docs/en/guide/quick-start.mdx delete mode 100644 docs/en/guide/upgrade.mdx delete mode 100644 docs/en/intro/architecture.mdx delete mode 100644 docs/en/intro/features.mdx delete mode 100644 docs/en/intro/index.mdx delete mode 100644 docs/en/knowledge/index.mdx delete mode 100644 docs/en/memory/context.mdx delete mode 100644 docs/en/memory/deep-dream.mdx delete mode 100644 docs/en/memory/index.mdx delete mode 100644 docs/en/models/claude.mdx delete mode 100644 docs/en/models/coding-plan.mdx delete mode 100644 docs/en/models/custom.mdx delete mode 100644 docs/en/models/deepseek.mdx delete mode 100644 docs/en/models/doubao.mdx delete mode 100644 docs/en/models/gemini.mdx delete mode 100644 docs/en/models/glm.mdx delete mode 100644 docs/en/models/index.mdx delete mode 100644 docs/en/models/kimi.mdx delete mode 100644 docs/en/models/linkai.mdx delete mode 100644 docs/en/models/mimo.mdx delete mode 100644 docs/en/models/minimax.mdx delete mode 100644 docs/en/models/openai.mdx delete mode 100644 docs/en/models/qianfan.mdx delete mode 100644 docs/en/models/qwen.mdx delete mode 100644 docs/en/releases/overview.mdx delete mode 100644 docs/en/releases/v2.0.0.mdx delete mode 100644 docs/en/releases/v2.0.1.mdx delete mode 100644 docs/en/releases/v2.0.2.mdx delete mode 100644 docs/en/releases/v2.0.3.mdx delete mode 100644 docs/en/releases/v2.0.4.mdx delete mode 100644 docs/en/releases/v2.0.5.mdx delete mode 100644 docs/en/releases/v2.0.6.mdx delete mode 100644 docs/en/releases/v2.0.7.mdx delete mode 100644 docs/en/releases/v2.0.8.mdx delete mode 100644 docs/en/releases/v2.0.9.mdx delete mode 100644 docs/en/skills/create.mdx delete mode 100644 docs/en/skills/hub.mdx delete mode 100644 docs/en/skills/image-generation.mdx delete mode 100644 docs/en/skills/index.mdx delete mode 100644 docs/en/skills/install.mdx delete mode 100644 docs/en/skills/knowledge-wiki.mdx delete mode 100644 docs/en/skills/skill-creator.mdx delete mode 100644 docs/en/tools/bash.mdx delete mode 100644 docs/en/tools/browser.mdx delete mode 100644 docs/en/tools/edit.mdx delete mode 100644 docs/en/tools/env-config.mdx delete mode 100644 docs/en/tools/index.mdx delete mode 100644 docs/en/tools/ls.mdx delete mode 100644 docs/en/tools/mcp.mdx delete mode 100644 docs/en/tools/memory.mdx delete mode 100644 docs/en/tools/read.mdx delete mode 100644 docs/en/tools/scheduler.mdx delete mode 100644 docs/en/tools/send.mdx delete mode 100644 docs/en/tools/vision.mdx delete mode 100644 docs/en/tools/web-fetch.mdx delete mode 100644 docs/en/tools/web-search.mdx delete mode 100644 docs/en/tools/write.mdx create mode 100644 docs/zh/channels/dingtalk.mdx create mode 100644 docs/zh/channels/discord.mdx create mode 100644 docs/zh/channels/feishu.mdx create mode 100644 docs/zh/channels/index.mdx create mode 100644 docs/zh/channels/qq.mdx create mode 100644 docs/zh/channels/slack.mdx create mode 100644 docs/zh/channels/telegram.mdx create mode 100644 docs/zh/channels/web.mdx create mode 100644 docs/zh/channels/wechat-kf.mdx create mode 100644 docs/zh/channels/wechatmp.mdx create mode 100644 docs/zh/channels/wecom-bot.mdx create mode 100644 docs/zh/channels/wecom.mdx create mode 100644 docs/zh/channels/weixin.mdx create mode 100644 docs/zh/cli/general.mdx create mode 100644 docs/zh/cli/index.mdx create mode 100644 docs/zh/cli/memory-knowledge.mdx create mode 100644 docs/zh/cli/process.mdx create mode 100644 docs/zh/cli/skill.mdx create mode 100644 docs/zh/guide/manual-install.mdx create mode 100644 docs/zh/guide/quick-start.mdx create mode 100644 docs/zh/guide/upgrade.mdx create mode 100644 docs/zh/intro/architecture.mdx create mode 100644 docs/zh/intro/features.mdx create mode 100644 docs/zh/intro/index.mdx create mode 100644 docs/zh/knowledge/index.mdx create mode 100644 docs/zh/memory/context.mdx create mode 100644 docs/zh/memory/deep-dream.mdx create mode 100644 docs/zh/memory/index.mdx create mode 100644 docs/zh/models/claude.mdx create mode 100644 docs/zh/models/coding-plan.mdx create mode 100644 docs/zh/models/custom.mdx create mode 100644 docs/zh/models/deepseek.mdx create mode 100644 docs/zh/models/doubao.mdx create mode 100644 docs/zh/models/gemini.mdx create mode 100644 docs/zh/models/glm.mdx create mode 100644 docs/zh/models/index.mdx create mode 100644 docs/zh/models/kimi.mdx create mode 100644 docs/zh/models/linkai.mdx create mode 100644 docs/zh/models/mimo.mdx create mode 100644 docs/zh/models/minimax.mdx create mode 100644 docs/zh/models/openai.mdx create mode 100644 docs/zh/models/qianfan.mdx create mode 100644 docs/zh/models/qwen.mdx create mode 100644 docs/zh/releases/overview.mdx create mode 100644 docs/zh/releases/v2.0.0.mdx create mode 100644 docs/zh/releases/v2.0.1.mdx create mode 100644 docs/zh/releases/v2.0.2.mdx create mode 100644 docs/zh/releases/v2.0.3.mdx create mode 100644 docs/zh/releases/v2.0.4.mdx create mode 100644 docs/zh/releases/v2.0.5.mdx create mode 100644 docs/zh/releases/v2.0.6.mdx create mode 100644 docs/zh/releases/v2.0.7.mdx create mode 100644 docs/zh/releases/v2.0.8.mdx create mode 100644 docs/zh/releases/v2.0.9.mdx create mode 100644 docs/zh/skills/create.mdx create mode 100644 docs/zh/skills/hub.mdx create mode 100644 docs/zh/skills/image-generation.mdx create mode 100644 docs/zh/skills/index.mdx create mode 100644 docs/zh/skills/install.mdx create mode 100644 docs/zh/skills/knowledge-wiki.mdx create mode 100644 docs/zh/skills/skill-creator.mdx create mode 100644 docs/zh/tools/bash.mdx create mode 100644 docs/zh/tools/browser.mdx create mode 100644 docs/zh/tools/edit.mdx create mode 100644 docs/zh/tools/env-config.mdx create mode 100644 docs/zh/tools/index.mdx create mode 100644 docs/zh/tools/ls.mdx create mode 100644 docs/zh/tools/mcp.mdx create mode 100644 docs/zh/tools/memory.mdx create mode 100644 docs/zh/tools/read.mdx create mode 100644 docs/zh/tools/scheduler.mdx create mode 100644 docs/zh/tools/send.mdx create mode 100644 docs/zh/tools/vision.mdx create mode 100644 docs/zh/tools/web-fetch.mdx create mode 100644 docs/zh/tools/web-search.mdx create mode 100644 docs/zh/tools/write.mdx diff --git a/README.md b/README.md index a77d8747..6f91d563 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,8 @@ CowAgent is lightweight, easy to deploy, and built to extend. Plug in any major

🌐 Website  ·  - 📖 Docs  ·  - 🚀 Quick Start  ·  + 📖 Docs  ·  + 🚀 Quick Start  ·  🧩 Skill Hub  ·  ☁️ Try Online

@@ -25,15 +25,15 @@ CowAgent is lightweight, easy to deploy, and built to extend. Plug in any major | Capability | Description | | :--- | :--- | -| [Planning](https://docs.cowagent.ai/en/intro/architecture) | Decomposes complex tasks and executes them step by step, looping over tools until the goal is reached | -| [Memory](https://docs.cowagent.ai/en/memory/index) | Three-tier architecture (context → daily → core), automatic Deep Dream distillation, hybrid keyword + vector retrieval | -| [Knowledge](https://docs.cowagent.ai/en/knowledge/index) | Auto-curates structured knowledge into a Markdown wiki, builds an evolving knowledge graph with visual browsing | -| [Skills](https://docs.cowagent.ai/en/skills/index) | One-click install from [Skill Hub](https://skills.cowagent.ai/), GitHub, ClawHub; or create custom skills via natural-language conversation | -| [Tools](https://docs.cowagent.ai/en/tools/index) | Built-in file I/O, terminal, browser, scheduler, memory retrieval, web search, and 10+ more tools — with native MCP integration | -| [Channels](https://docs.cowagent.ai/en/channels/index) | Integrates with Web, WeChat, Feishu, DingTalk, WeCom, QQ, Official Accounts, Telegram, and Slack | +| [Planning](https://docs.cowagent.ai/intro/architecture) | Decomposes complex tasks and executes them step by step, looping over tools until the goal is reached | +| [Memory](https://docs.cowagent.ai/memory/index) | Three-tier architecture (context → daily → core), automatic Deep Dream distillation, hybrid keyword + vector retrieval | +| [Knowledge](https://docs.cowagent.ai/knowledge/index) | Auto-curates structured knowledge into a Markdown wiki, builds an evolving knowledge graph with visual browsing | +| [Skills](https://docs.cowagent.ai/skills/index) | One-click install from [Skill Hub](https://skills.cowagent.ai/), GitHub, ClawHub; or create custom skills via natural-language conversation | +| [Tools](https://docs.cowagent.ai/tools/index) | Built-in file I/O, terminal, browser, scheduler, memory retrieval, web search, and 10+ more tools — with native MCP integration | +| [Channels](https://docs.cowagent.ai/channels/index) | Integrates with Web, WeChat, Feishu, DingTalk, WeCom, QQ, Official Accounts, Telegram, and Slack | | Multimodal | First-class support for text, images, voice, and files — recognition, generation, and delivery | -| [Models](https://docs.cowagent.ai/en/models/index) | Claude, GPT, Gemini, DeepSeek, Qwen, GLM, Kimi, MiniMax, Doubao, and more — swap providers from the Web console with one click | -| [Deploy](https://docs.cowagent.ai/en/guide/quick-start) | One-line installer, unified Web console, multiple deployment modes (local, Docker, server) | +| [Models](https://docs.cowagent.ai/models/index) | Claude, GPT, Gemini, DeepSeek, Qwen, GLM, Kimi, MiniMax, Doubao, and more — swap providers from the Web console with one click | +| [Deploy](https://docs.cowagent.ai/guide/quick-start) | One-line installer, unified Web console, multiple deployment modes (local, Docker, server) |
@@ -43,7 +43,7 @@ CowAgent is lightweight, easy to deploy, and built to extend. Plug in any major CowAgent is a complete **Agent Harness**: messages flow in through **Channels**; the **Agent Core** plans and reasons over memory, knowledge, and the available tools and skills; **Models** generate the response, which is sent back through the originating channel. Every layer is decoupled and independently extensible. -Read more in [Architecture](https://docs.cowagent.ai/en/intro/architecture). +Read more in [Architecture](https://docs.cowagent.ai/intro/architecture).
@@ -74,9 +74,9 @@ Once started, open `http://localhost:9899` to access the **Web console** — you > Deploying on a server? Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group. -> 📖 Detailed guides: [Quick Start](https://docs.cowagent.ai/en/guide/quick-start) · [Install from Source](https://docs.cowagent.ai/en/guide/manual-install) · [Upgrade](https://docs.cowagent.ai/en/guide/upgrade) +> 📖 Detailed guides: [Quick Start](https://docs.cowagent.ai/guide/quick-start) · [Install from Source](https://docs.cowagent.ai/guide/manual-install) · [Upgrade](https://docs.cowagent.ai/guide/upgrade) -After installation, manage the service with the [cow CLI](https://docs.cowagent.ai/en/cli/index): +After installation, manage the service with the [cow CLI](https://docs.cowagent.ai/cli/index): ```bash cow start | stop | restart # service control @@ -94,21 +94,21 @@ CowAgent supports all mainstream LLM providers. **Chat, vision, image generation | Provider | Featured Models | Chat | Vision | Image Gen | ASR | TTS | Embedding | | --- | --- | :-: | :-: | :-: | :-: | :-: | :-: | -| [Claude](https://docs.cowagent.ai/en/models/claude) | claude-opus-4-8 | ✅ | ✅ | | | | | -| [OpenAI](https://docs.cowagent.ai/en/models/openai) | gpt-5.5, o-series | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Gemini](https://docs.cowagent.ai/en/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | | -| [DeepSeek](https://docs.cowagent.ai/en/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | | -| [Qwen](https://docs.cowagent.ai/en/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [GLM](https://docs.cowagent.ai/en/models/glm) | glm-5.1, glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ | -| [Doubao](https://docs.cowagent.ai/en/models/doubao) | doubao-seed-2.0 series | ✅ | ✅ | ✅ | | | ✅ | -| [Kimi](https://docs.cowagent.ai/en/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | | -| [MiniMax](https://docs.cowagent.ai/en/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | | -| [ERNIE](https://docs.cowagent.ai/en/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | | -| [MiMo](https://docs.cowagent.ai/en/models/mimo) | mimo-v2.5 / pro | ✅ | ✅ | | | ✅ | | -| [LinkAI](https://docs.cowagent.ai/en/models/linkai) | One key for 100+ models | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Custom](https://docs.cowagent.ai/en/models/custom) | Local models / third-party proxy | ✅ | | | | | | +| [Claude](https://docs.cowagent.ai/models/claude) | claude-opus-4-8 | ✅ | ✅ | | | | | +| [OpenAI](https://docs.cowagent.ai/models/openai) | gpt-5.5, o-series | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Gemini](https://docs.cowagent.ai/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | | +| [DeepSeek](https://docs.cowagent.ai/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | | +| [Qwen](https://docs.cowagent.ai/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [GLM](https://docs.cowagent.ai/models/glm) | glm-5.1, glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ | +| [Doubao](https://docs.cowagent.ai/models/doubao) | doubao-seed-2.0 series | ✅ | ✅ | ✅ | | | ✅ | +| [Kimi](https://docs.cowagent.ai/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | | +| [MiniMax](https://docs.cowagent.ai/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | | +| [ERNIE](https://docs.cowagent.ai/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | | +| [MiMo](https://docs.cowagent.ai/models/mimo) | mimo-v2.5 / pro | ✅ | ✅ | | | ✅ | | +| [LinkAI](https://docs.cowagent.ai/models/linkai) | One key for 100+ models | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Custom](https://docs.cowagent.ai/models/custom) | Local models / third-party proxy | ✅ | | | | | | -> For details on each provider, see the [Models overview](https://docs.cowagent.ai/en/models/index). +> For details on each provider, see the [Models overview](https://docs.cowagent.ai/models/index).
@@ -118,20 +118,20 @@ A single Agent instance can serve multiple channels in parallel. Most channels c | Channel | Text | Image | File | Voice | Group | | --- | :-: | :-: | :-: | :-: | :-: | -| [Web Console](https://docs.cowagent.ai/en/channels/web) (default) | ✅ | ✅ | ✅ | ✅ | | -| [Telegram](https://docs.cowagent.ai/en/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Slack](https://docs.cowagent.ai/en/channels/slack) | ✅ | ✅ | ✅ | | ✅ | -| [Discord](https://docs.cowagent.ai/en/channels/discord) | ✅ | ✅ | ✅ | | ✅ | -| [WeChat](https://docs.cowagent.ai/en/channels/weixin) | ✅ | ✅ | ✅ | ✅ | | -| [Feishu / Lark](https://docs.cowagent.ai/en/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [DingTalk](https://docs.cowagent.ai/en/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [WeCom Bot](https://docs.cowagent.ai/en/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [QQ](https://docs.cowagent.ai/en/channels/qq) | ✅ | ✅ | ✅ | | ✅ | -| [WeCom App](https://docs.cowagent.ai/en/channels/wecom) | ✅ | ✅ | ✅ | ✅ | | -| [WeChat Customer Service](https://docs.cowagent.ai/en/channels/wechat-kf) | ✅ | ✅ | ✅ | ✅ | | -| [WeChat Official Account](https://docs.cowagent.ai/en/channels/wechatmp) | ✅ | ✅ | | ✅ | | +| [Web Console](https://docs.cowagent.ai/channels/web) (default) | ✅ | ✅ | ✅ | ✅ | | +| [Telegram](https://docs.cowagent.ai/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Slack](https://docs.cowagent.ai/channels/slack) | ✅ | ✅ | ✅ | | ✅ | +| [Discord](https://docs.cowagent.ai/channels/discord) | ✅ | ✅ | ✅ | | ✅ | +| [WeChat](https://docs.cowagent.ai/channels/weixin) | ✅ | ✅ | ✅ | ✅ | | +| [Feishu / Lark](https://docs.cowagent.ai/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [DingTalk](https://docs.cowagent.ai/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [WeCom Bot](https://docs.cowagent.ai/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [QQ](https://docs.cowagent.ai/channels/qq) | ✅ | ✅ | ✅ | | ✅ | +| [WeCom App](https://docs.cowagent.ai/channels/wecom) | ✅ | ✅ | ✅ | ✅ | | +| [WeChat Customer Service](https://docs.cowagent.ai/channels/wechat-kf) | ✅ | ✅ | ✅ | ✅ | | +| [WeChat Official Account](https://docs.cowagent.ai/channels/wechatmp) | ✅ | ✅ | | ✅ | | -> See the [Channels overview](https://docs.cowagent.ai/en/channels/index) for setup details. +> See the [Channels overview](https://docs.cowagent.ai/channels/index) for setup details. CowAgent Web Console @@ -141,9 +141,9 @@ A single Agent instance can serve multiple channels in parallel. Most channels c ## 🧠 Memory & Knowledge Base -**Long-term memory** uses a three-tier architecture: conversation context (short-term) → daily memory (mid-term) → MEMORY.md (long-term). A nightly **Deep Dream** pass distills scattered memories into refined long-term entries and a narrative journal. See [Long-term Memory](https://docs.cowagent.ai/en/memory/index) · [Deep Dream](https://docs.cowagent.ai/en/memory/deep-dream). +**Long-term memory** uses a three-tier architecture: conversation context (short-term) → daily memory (mid-term) → MEMORY.md (long-term). A nightly **Deep Dream** pass distills scattered memories into refined long-term entries and a narrative journal. See [Long-term Memory](https://docs.cowagent.ai/memory/index) · [Deep Dream](https://docs.cowagent.ai/memory/deep-dream). -**Personal knowledge base** complements the time-ordered memory by organizing structured knowledge **by topic**. The Agent automatically curates valuable information from conversations, maintains cross-references and indexes, and the Web console offers an interactive knowledge-graph view. See [Personal Knowledge Base](https://docs.cowagent.ai/en/knowledge/index). +**Personal knowledge base** complements the time-ordered memory by organizing structured knowledge **by topic**. The Agent automatically curates valuable information from conversations, maintains cross-references and indexes, and the Web console offers an interactive knowledge-graph view. See [Personal Knowledge Base](https://docs.cowagent.ai/knowledge/index). @@ -170,7 +170,7 @@ A single Agent instance can serve multiple channels in parallel. Most channels c **MCP protocol** integrates the open ecosystem of [Model Context Protocol](https://modelcontextprotocol.io) servers. A single `mcp.json` is enough — supports stdio / SSE transports, hot reload, and zero-code integration. -Learn more: [Tools overview](https://docs.cowagent.ai/en/tools/index) · [MCP integration](https://docs.cowagent.ai/en/tools/mcp). +Learn more: [Tools overview](https://docs.cowagent.ai/tools/index) · [MCP integration](https://docs.cowagent.ai/tools/mcp). ### Skills System @@ -184,7 +184,7 @@ Learn more: [Tools overview](https://docs.cowagent.ai/en/tools/index) · [MCP in /skill install # one-click install ``` -Learn more: [Skills overview](https://docs.cowagent.ai/en/skills/index) · [Creating Skills](https://docs.cowagent.ai/en/skills/create). +Learn more: [Skills overview](https://docs.cowagent.ai/skills/index) · [Creating Skills](https://docs.cowagent.ai/skills/create).
@@ -202,7 +202,7 @@ Learn more: [Skills overview](https://docs.cowagent.ai/en/skills/index) · [Crea > **2026.02.03:** [v2.0.0](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) — Major upgrade to a super Agent assistant with multi-step task planning, long-term memory, and the Skills framework. -Full history: [Release Notes](https://docs.cowagent.ai/en/releases/overview) +Full history: [Release Notes](https://docs.cowagent.ai/releases/overview)
diff --git a/docs/channels/dingtalk.mdx b/docs/channels/dingtalk.mdx index df11019e..1db5f53c 100644 --- a/docs/channels/dingtalk.mdx +++ b/docs/channels/dingtalk.mdx @@ -1,35 +1,35 @@ --- -title: 钉钉 -description: 将 CowAgent 接入钉钉应用 +title: DingTalk +description: Integrate CowAgent into DingTalk application --- -通过钉钉开放平台创建智能机器人应用,将 CowAgent 接入钉钉。 +Integrate CowAgent into DingTalk by creating an intelligent robot app on the DingTalk Open Platform. -## 一、创建应用 +## 1. Create App -1. 进入 [钉钉开发者后台](https://open-dev.dingtalk.com/fe/app#/corp/app),登录后点击 **创建应用**,填写应用相关信息: +1. Go to [DingTalk Developer Console](https://open-dev.dingtalk.com/fe/app#/corp/app), log in and click **Create App**, fill in the app information: -2. 点击添加应用能力,选择 **机器人** 能力,点击 **添加**: +2. Click **Add App Capability**, select **Robot** capability and click **Add**: -3. 配置机器人信息后点击 **发布**。发布后,点击 "**点击调试**",会自动创建测试群聊,可在客户端查看: +3. Configure the robot information and click **Publish**. After publishing, click "**Debug**" to automatically create a test group chat, which can be viewed in the client: -4. 点击 **版本管理与发布**,创建新版本发布: +4. Click **Version Management & Release**, create a new version and publish: -## 二、项目配置 +## 2. Project Configuration -1. 点击 **凭证与基础信息**,获取 `Client ID` 和 `Client Secret`: +1. Click **Credentials & Basic Info**, get the `Client ID` and `Client Secret`: -2. 将以下配置加入项目根目录的 `config.json` 文件: +2. Add the following configuration to `config.json` in the project root: ```json { @@ -39,18 +39,20 @@ description: 将 CowAgent 接入钉钉应用 } ``` -3. 安装依赖: +3. Install the dependency: ```bash pip3 install dingtalk_stream ``` -4. 启动项目后,在钉钉开发者后台点击 **事件订阅**,点击 **已完成接入,验证连接通道**,显示 **连接接入成功** 即表示配置完成: + + +4. After starting the project, go to the DingTalk Developer Console, click **Event Subscription**, then click **Connection verified, verify channel**. When "**Connection successful**" is displayed, the configuration is complete: -## 三、使用 +## 3. Usage -与机器人私聊或将机器人拉入企业群中均可开启对话: +Chat privately with the robot or add it to an enterprise group to start a conversation: diff --git a/docs/channels/discord.mdx b/docs/channels/discord.mdx index 0563ab4c..e18c0685 100644 --- a/docs/channels/discord.mdx +++ b/docs/channels/discord.mdx @@ -1,51 +1,51 @@ --- title: Discord -description: 将 CowAgent 接入 Discord Bot +description: Integrate CowAgent with a Discord Bot --- -> 通过 Discord Bot 的 **Gateway 长连接** 接入 CowAgent,支持私聊(DM)与服务器频道(@机器人 / 回复机器人触发)。Gateway 基于 WebSocket 长连接,无需公网 IP 与回调地址,开箱即用。 +> Integrate CowAgent into Discord via a Discord Bot using the **Gateway** (persistent WebSocket). Supports direct messages (DM) and server channels (triggered by @mention or replying to the bot). The Gateway uses a persistent WebSocket connection — no public IP or callback URL required, works out of the box. -## 一、接入步骤 +## 1. Setup -### 步骤一:创建 Discord 应用与 Bot +### Step 1: Create a Discord Application and Bot -1. 打开 [Discord 开发者后台](https://discord.com/developers/applications),点击 **New Application**,填写名称(如 `CowAgent`)并创建。 -2. 左侧菜单进入 **Bot** 页面,点击 **Reset Token** 生成 Bot Token,复制并妥善保存(仅显示一次)。 +1. Open the [Discord Developer Portal](https://discord.com/developers/applications), click **New Application**, enter a name (e.g. `CowAgent`), and create it. +2. Go to the **Bot** page in the left sidebar, click **Reset Token** to generate a Bot Token, then copy and store it safely (shown only once). - 这个 Token 等同于 Bot 的密码,请勿泄露。若意外泄漏,在 Bot 页面再次点击 **Reset Token** 重置即可。 + This token is your bot's password — keep it secret. If it leaks, click **Reset Token** again on the Bot page to regenerate it. -### 步骤二:开启 Message Content Intent +### Step 2: Enable the Message Content Intent -私聊与频道读取文本均依赖该权限。 +Reading message text in both DMs and channels depends on this privileged intent. -1. 在 **Bot** 页面下方找到 **Privileged Gateway Intents**。 -2. 打开 **Message Content Intent** 开关并保存。 +1. On the **Bot** page, find **Privileged Gateway Intents**. +2. Turn on **Message Content Intent** and save. - 未开启该开关时,机器人收到的消息内容会为空,导致无响应。 + Without this intent enabled, incoming message content will be empty and the bot will not respond. -### 步骤三:邀请 Bot 进入服务器 +### Step 3: Invite the Bot to a Server -1. 左侧菜单进入 **OAuth2 → URL Generator**。 -2. **Scopes** 勾选 `bot`。 -3. **Bot Permissions** 至少勾选:`Send Messages`、`Read Message History`、`Attach Files`、`View Channels`。 -4. 复制底部生成的授权链接,在浏览器打开,选择目标服务器完成授权。 +1. Go to **OAuth2 → URL Generator** in the left sidebar. +2. Under **Scopes**, check `bot`. +3. Under **Bot Permissions**, check at least: `Send Messages`, `Read Message History`, `Attach Files`, `View Channels`. +4. Copy the generated authorization URL at the bottom, open it in a browser, and authorize it for your target server. - 仅需私聊(DM)可跳过此步,但仍需先在任意共同服务器中与机器人建立 DM 通道,或由用户主动私聊机器人。 + You can skip this step if you only need DMs, but you still need a DM channel with the bot (e.g. the user messages the bot directly). -### 步骤四:接入 CowAgent +### Step 4: Connect to CowAgent - - 打开 Web 控制台(本地链接:http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **Discord**,填入 Bot Token,点击接入即可。 + + Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Discord**, paste the Bot Token, and click connect. - - 在 `config.json` 中添加以下配置后启动: + + Add the following to `config.json` and start Cow: ```json { @@ -55,39 +55,39 @@ description: 将 CowAgent 接入 Discord Bot } ``` - | 参数 | 说明 | 默认值 | + | Key | Description | Default | | --- | --- | --- | - | `discord_token` | 开发者后台 Bot 页面生成的 Bot Token | - | - | `discord_group_trigger` | 频道触发方式:`mention_or_reply`(@或回复机器人)/ `mention_only`(仅@) / `all`(所有消息) | `mention_or_reply` | + | `discord_token` | Bot Token generated on the Bot page of the Developer Portal | - | + | `discord_group_trigger` | Channel trigger: `mention_or_reply` (@ or reply to bot) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` | -启动 Cow 后,日志中出现以下输出即表示接入成功: +The integration is ready when you see logs like: ``` [Discord] Bot logged in as CowAgent#1234 (id=123456789) [Discord] ✅ Discord bot ready, listening for messages ``` -## 二、功能说明 +## 2. Capabilities -| 功能 | 支持情况 | +| Feature | Support | | --- | --- | -| 私聊(DM) | ✅ | -| 服务器频道(@机器人 / 回复机器人) | ✅ | -| 文本消息 | ✅ 收发 | -| 图片消息 | ✅ 收发 | -| 文件消息 | ✅ 收发(PDF / Word / Excel 等) | +| Direct message (DM) | ✅ | +| Server channel (@bot / reply to bot) | ✅ | +| Text messages | ✅ send / receive | +| Image messages | ✅ send / receive | +| File messages | ✅ send / receive (PDF / Word / Excel, etc.) | - Discord 单条消息上限为 2000 字符,超长回复会自动按换行拆分为多条发送。 + A single Discord message is capped at 2000 characters; long replies are automatically split across multiple messages by line breaks. -## 三、使用 +## 3. Usage -完成接入后: +Once connected: -- **私聊(DM)**:在服务器成员列表中找到你的机器人,点击头像直接发消息对话。 -- **频道**:在已邀请机器人的频道中,使用 `@你的机器人 你好` 或 **回复机器人的某条消息** 触发对话。 +- **Direct message (DM)**: find your bot in the server member list, click its avatar, and message it directly. +- **Channel**: in a channel where the bot is invited, trigger it with `@your-bot hello` or by **replying to one of the bot's messages**. -发送图片或文件时,可以在附件的输入框中 **添加文字说明**(描述/问题)一并发送,机器人会结合附件回答。也支持先发附件再发问题,两条消息会自动合并提问。 +When sending an image or file, you can **add a text caption** (description / question) in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically. diff --git a/docs/channels/feishu.mdx b/docs/channels/feishu.mdx index 5cb8fe80..1283d0c1 100644 --- a/docs/channels/feishu.mdx +++ b/docs/channels/feishu.mdx @@ -1,45 +1,44 @@ --- -title: 飞书 -description: 将 CowAgent 接入飞书应用 +title: Feishu (Lark) +description: Integrate CowAgent into Feishu via a custom enterprise app --- -> 通过飞书自建应用接入 CowAgent,支持单聊与群聊(@机器人),使用 WebSocket 长连接模式,无需公网 IP,支持流式打字机回复、语音消息收发。 +> Integrate CowAgent into Feishu via a custom enterprise app. Supports p2p chat and group chat (@bot), uses WebSocket long connection (no public IP needed), supports streaming typewriter replies and voice messages. - 接入需要是飞书企业用户且具有企业管理权限。 + You need to be a Feishu enterprise user with admin privileges. -## 一、接入方式 +## 1. Setup -### 方式一:扫码一键接入(推荐) - -启动 Cow 项目后在终端中即可完成扫码创建。或打开 Web 控制台(本地链接:http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **飞书**,点击 **一键创建飞书应用**,使用 **飞书 App** 扫描二维码即可自动完成应用创建并接入: +### Option 1: One-click Scan to Create (Recommended) +No need to manually create an app on the Feishu Developer Platform. Start the Cow project, open the web console (default `http://127.0.0.1:9899/`), go to **Channels**, click **Add Channel**, choose **Feishu**, then under the **Scan QR** tab click **One-click Create Feishu App** and scan with the **Feishu App** to complete app creation and connection automatically. - - 1. `lark-oapi` 依赖版本需要 >=1.5.5 - 2. 扫码创建出的应用会自动预置全部所需权限(消息收发、卡片读写、群聊事件等)和事件订阅,无需到开发者后台手动配置。 + 1. Requires `lark-oapi` ≥ 1.5.5. + 2. The created app comes with all required permissions (messaging, card read/write, group events, etc.) and event subscriptions pre-configured — no manual setup on the developer console needed. Currently only the Feishu mainland version is supported (Lark international not yet supported). +When starting from CLI without `feishu_app_id` configured, the QR code is also printed to the terminal. -### 方式二:手动创建接入 +### Option 2: Manual Setup -需要先在飞书开放平台创建自建应用并配置权限,再通过 Web 控制台或配置文件接入。 +Manually create a custom app on the Feishu Developer Platform, then connect via Web Console or config file. -**步骤一:创建应用** +**Step 1: Create the App** -1. 进入 [飞书开发平台](https://open.feishu.cn/app/),点击 **创建企业自建应用**: +1. Go to [Feishu Developer Platform](https://open.feishu.cn/app/), click **Create Enterprise Custom App**: -2. 在 **添加应用能力** 中,为应用添加 **机器人** 能力: +2. In **Add App Capabilities**, add the **Bot** capability: -3. 在 **权限管理** 中,将以下权限粘贴到输入框,全选并 **批量开通**: +3. In **Permission Management**, paste the following permissions and **Batch Enable** all: ``` im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p2p_msg,im:message.p2p_msg:readonly,im:message:send_as_bot,im:resource,cardkit:card:write @@ -47,18 +46,18 @@ im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p -4. 在 **凭证与基础信息** 中获取 `App ID` 和 `App Secret`: +4. Get `App ID` and `App Secret` from **Credentials & Basic Info**: -**步骤二:接入 CowAgent** +**Step 2: Connect to CowAgent** - - 打开 Web 控制台,选择 **通道** 菜单,点击 **接入通道**,选择 **飞书**,切换到「手动填写」Tab,输入 App ID 和 App Secret,点击接入即可。 + + Open the web console, go to **Channels**, click **Add Channel**, choose **Feishu**, switch to the **Manual** tab, enter App ID and App Secret, then click connect. - - 在 `config.json` 中添加以下配置后启动程序: + + Add the following to `config.json` and start the program: ```json { @@ -69,43 +68,43 @@ im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p } ``` - | 参数 | 说明 | 默认值 | + | Parameter | Description | Default | | --- | --- | --- | - | `feishu_app_id` | 飞书应用 App ID | - | - | `feishu_app_secret` | 飞书应用 App Secret | - | - | `feishu_stream_reply` | 是否开启流式打字机回复 | `true` | + | `feishu_app_id` | Feishu app App ID | - | + | `feishu_app_secret` | Feishu app App Secret | - | + | `feishu_stream_reply` | Enable streaming typewriter reply | `true` | -**步骤三:发布应用** +**Step 3: Publish the App** -1. 启动 Cow 项目后,在飞书开放平台点击 **事件与回调**,选择 **长连接** 模式并保存: +1. After Cow is running, go to **Events & Callbacks** in the Feishu Developer Platform, choose **Long Connection** mode and save: -2. 点击 **添加事件**,搜索 "接收消息",选择 **接收消息 v2.0** 并确认。 +2. Click **Add Event**, search for "Receive Message" and choose **Receive Message v2.0**. -3. 点击 **版本管理与发布**,创建版本并申请 **线上发布**,在飞书客户端审核通过: +3. Click **Version Management & Release**, create a version and apply for **Production Release**. Approve the request in the Feishu client: -## 二、功能说明 +## 2. Features -| 功能 | 支持情况 | +| Feature | Status | | --- | --- | -| 单聊 | ✅ | -| 群聊(@机器人) | ✅ | -| 文本消息 | ✅ 收发 | -| 图片消息 | ✅ 收发 | -| 语音消息 | ✅ 收发 | -| 流式回复 | ✅(通过 `feishu_stream_reply` 配置控制,默认开启) | +| P2P chat | ✅ | +| Group chat (@bot) | ✅ | +| Text messages | ✅ send/receive | +| Image messages | ✅ send/receive | +| Voice messages | ✅ send/receive | +| Streaming reply | ✅ (powered by Feishu cardkit streaming card) | - 流式回复需要机器人具备 `cardkit:card:write` 权限(一键创建已默认开通),且接收方飞书客户端版本 ≥ 7.20。低版本客户端会显示升级提示,权限或版本不满足时自动降级为普通文本回复。 + Streaming reply requires the `cardkit:card:write` permission (already enabled by one-click creation) and Feishu client version ≥ 7.20. Older clients see an upgrade prompt; if the permission or version is not satisfied, replies fall back to plain text automatically. -## 三、使用 +## 3. Usage -完成接入后,在飞书中搜索机器人名称即可开始单聊对话。 +After connection, search for the bot name in Feishu to start a chat. -如需在群聊中使用,将机器人添加到群中,@机器人发送消息即可。 +To use in groups, add the bot to a group and @-mention it. diff --git a/docs/channels/index.mdx b/docs/channels/index.mdx index b9dc5898..70bec878 100644 --- a/docs/channels/index.mdx +++ b/docs/channels/index.mdx @@ -1,45 +1,45 @@ --- -title: 通道概览 -description: CowAgent 支持的通道及能力矩阵 +title: Channels Overview +description: Channels supported by CowAgent and their capability matrix --- -CowAgent 支持接入多种聊天通道,启动时通过 `channel_type` 切换。Web 控制台默认开启,可与其他接入通道并行运行。 +CowAgent supports multiple chat channels. Switch between them at startup via `channel_type`. The Web Console is enabled by default and can run in parallel with other channels. -## 能力矩阵 +## Capability Matrix -下表汇总各通道支持的入站消息类型、机器人回复类型与群聊能力,方便按场景选择。 +The table below summarizes the inbound message types, bot reply types, and group chat capabilities supported by each channel, making it easy to choose by scenario. -| 通道 | 文本 | 图片 | 文件 | 语音 | 群聊 | +| Channel | Text | Image | File | Voice | Group Chat | | --- | :-: | :-: | :-: | :-: | :-: | -| [微信](/channels/weixin) | ✅ | ✅ | ✅ | ✅ | | -| [Web 控制台](/channels/web) | ✅ | ✅ | ✅ | ✅ | | -| [飞书](/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [钉钉](/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [企微智能机器人](/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [WeChat](/channels/weixin) | ✅ | ✅ | ✅ | ✅ | | +| [Web Console](/channels/web) | ✅ | ✅ | ✅ | ✅ | | +| [Feishu](/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [DingTalk](/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [WeCom Bot](/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ | | [QQ](/channels/qq) | ✅ | ✅ | ✅ | | ✅ | -| [企业微信应用](/channels/wecom) | ✅ | ✅ | ✅ | ✅ | | -| [公众号](/channels/wechatmp) | ✅ | ✅ | | ✅ | | +| [WeCom App](/channels/wecom) | ✅ | ✅ | ✅ | ✅ | | +| [Official Account](/channels/wechatmp) | ✅ | ✅ | | ✅ | | | [Telegram](/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ | | [Slack](/channels/slack) | ✅ | ✅ | ✅ | | ✅ | | [Discord](/channels/discord) | ✅ | ✅ | ✅ | | ✅ | -- **图片 / 文件 / 语音**列表示通道支持收发对应消息类型,具体细节详见各通道文档 -- **群聊**列指可识别并响应群消息 +- The **Image / File / Voice** columns indicate that the channel can send and receive the corresponding message types; see each channel's docs for details +- The **Group Chat** column indicates the ability to recognize and respond to group messages - 每个通道的语音 / 图像能力依赖对应模型厂商的配置,详见 [模型概览](/models)。 + The voice / image capabilities of each channel depend on the configuration of the corresponding model provider. See [Models Overview](/models/index) for details. -## 通道一览 +## Channel List -- [Web 控制台](/channels/web) — 内置浏览器对话和管理面板,默认开启 -- [微信](/channels/weixin) — 通过个人微信扫码登录 -- [飞书](/channels/feishu) — 飞书自建机器人 -- [钉钉](/channels/dingtalk) — 钉钉自建机器人 -- [企微智能机器人](/channels/wecom-bot) — 企业微信智能机器人 -- [QQ](/channels/qq) — QQ 官方机器人开放平台 -- [企业微信应用](/channels/wecom) — 企业微信自建应用接入 -- [公众号](/channels/wechatmp) — 微信公众号(订阅号 / 服务号) -- [Telegram](/channels/telegram) — 海外 IM,5 分钟接入,无需公网 IP -- [Slack](/channels/slack) — 团队协作 IM,Socket Mode 接入,无需公网 IP -- [Discord](/channels/discord) — 社区 IM,Gateway 长连接接入,无需公网 IP +- [Web Console](/channels/web) — built-in browser-based chat and management panel, enabled by default +- [WeChat](/channels/weixin) — log in via personal WeChat QR scan +- [Feishu](/channels/feishu) — Feishu custom bot +- [DingTalk](/channels/dingtalk) — DingTalk custom bot +- [WeCom Bot](/channels/wecom-bot) — WeCom AI Bot via WebSocket long connection +- [QQ](/channels/qq) — QQ Official Bot open platform +- [WeCom App](/channels/wecom) — WeCom custom app integration +- [Official Account](/channels/wechatmp) — WeChat Official Account (subscription / service) +- [Telegram](/channels/telegram) — global IM, 5-minute setup, no public IP needed +- [Slack](/channels/slack) — team collaboration IM, Socket Mode integration, no public IP needed +- [Discord](/channels/discord) — community IM, Gateway connection, no public IP needed diff --git a/docs/channels/qq.mdx b/docs/channels/qq.mdx index 3b7554a9..a7f08594 100644 --- a/docs/channels/qq.mdx +++ b/docs/channels/qq.mdx @@ -1,44 +1,44 @@ --- -title: QQ 机器人 -description: 将 CowAgent 接入 QQ 机器人(WebSocket 长连接模式) +title: QQ Bot +description: Connect CowAgent to QQ Bot (WebSocket long connection) --- -> 通过 QQ 开放平台的机器人接口接入 CowAgent,支持 QQ 单聊、QQ 群聊(@机器人)、频道消息和频道私信,无需公网 IP,使用 WebSocket 长连接模式。 +> Connect CowAgent via QQ Open Platform's bot API, supporting QQ direct messages, group chats (@bot), guild channel messages, and guild DMs. No public IP required — uses WebSocket long connection. - QQ 机器人通过 QQ 开放平台创建,使用 WebSocket 长连接接收消息,通过 OpenAPI 发送消息,无需公网 IP 和域名。 + QQ Bot is created through the QQ Open Platform. It uses WebSocket long connection to receive messages and OpenAPI to send messages. No public IP or domain is required. -## 一、创建 QQ 机器人 +## 1. Create a QQ Bot -> 进入[QQ 开放平台](https://q.qq.com),QQ扫码登录,如果未注册开放平台账号,请先完成[账号注册](https://q.qq.com/#/register)。 +> Visit the [QQ Open Platform](https://q.qq.com), sign in with QQ. If you haven't registered, please complete [account registration](https://q.qq.com/#/register) first. -1.在 [QQ开放平台-机器人列表页](https://q.qq.com/#/apps),点击创建机器人: +1.Go to the [QQ Open Platform - Bot List](https://q.qq.com/#/apps), and click **Create Bot**: -2.填写机器人名称、头像等基本信息,完成创建: +2.Fill in the bot name, avatar, and other basic information to complete the creation: -3.点击进入机器人配置页面,选择**开发管理**菜单,完成以下步骤: +3.Enter the bot configuration page, go to **Development Management**, and complete the following steps: - - 复制并记录 **AppID**(机器人ID) - - 生成并记录 **AppSecret**(机器人秘钥) + - Copy and save the **AppID** (Bot ID) + - Generate and save the **AppSecret** (Bot Secret) -## 二、配置和运行 +## 2. Configuration and Running -### 方式一:Web 控制台接入 +### Option A: Web Console -启动 Cow项目后打开 Web 控制台 (本地链接为: http://127.0.0.1:9899/ ),选择 **通道** 菜单,点击 **接入通道**,选择 **QQ 机器人**,填写上一步保存的 AppID 和 AppSecret,点击接入即可。 +Start the program and open the Web console (local access: http://127.0.0.1:9899/). Go to the **Channels** tab, click **Connect Channel**, select **QQ Bot**, fill in the AppID and AppSecret from the previous step, and click Connect. -### 方式二:配置文件接入 +### Option B: Config File -在 `config.json` 中添加以下配置: +Add the following to your `config.json`: ```json { @@ -48,41 +48,41 @@ description: 将 CowAgent 接入 QQ 机器人(WebSocket 长连接模式) } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `qq_app_id` | QQ 机器人的 AppID,在开放平台开发管理中获取 | -| `qq_app_secret` | QQ 机器人的 AppSecret,在开放平台开发管理中获取 | +| `qq_app_id` | AppID of the QQ Bot, found in Development Management on the open platform | +| `qq_app_secret` | AppSecret of the QQ Bot, found in Development Management on the open platform | -配置完成后启动程序,日志显示 `[QQ] ✅ Connected successfully` 即表示连接成功。 +After configuration, start the program. The log message `[QQ] ✅ Connected successfully` indicates a successful connection. -## 三、使用 +## 3. Usage -在 QQ开放平台 - 管理 - **使用范围和人员** 菜单中,使用QQ客户端扫描 "添加到群和消息列表" 的二维码,即可开始与QQ机器人的聊天: +In the QQ Open Platform, go to **Management → Usage Scope & Members**, scan the "Add to group and message list" QR code with your QQ client to start chatting with the bot: -对话效果: +Chat example: -## 四、功能说明 +## 4. Supported Features -> 注意:若需在群聊及频道中使用QQ机器人,需完成发布上架审核并在使用范围配置权限使用范围。 +> Note: To use the QQ bot in group chats and guild channels, you need to complete the publishing review and configure usage scope permissions. -| 功能 | 支持情况 | +| Feature | Status | | --- | --- | -| QQ 单聊 | ✅ | -| QQ 群聊(@机器人) | ✅ | -| 频道消息(@机器人) | ✅ | -| 频道私信 | ✅ | -| 文本消息 | ✅ 收发 | -| 图片消息 | ✅ 收发(群聊和单聊) | -| 文件消息 | ✅ 发送(群聊和单聊) | -| 定时任务 | ✅ 主动推送(每月每用户限 4 条) | +| QQ Direct Messages | ✅ | +| QQ Group Chat (@bot) | ✅ | +| Guild Channel (@bot) | ✅ | +| Guild DM | ✅ | +| Text Messages | ✅ Send & Receive | +| Image Messages | ✅ Send & Receive (group & direct) | +| File Messages | ✅ Send (group & direct) | +| Scheduled Tasks | ✅ Active push (4 per user per month) | -## 五、注意事项 +## 5. Notes -- **被动消息限制**:QQ 单聊被动消息有效期为 60 分钟,每条消息最多回复 5 次;QQ 群聊被动消息有效期为 5 分钟。 -- **主动消息限制**:单聊和群聊每月主动消息上限为 4 条,在使用定时任务功能时需要注意这个限制 -- **事件权限**:默认订阅 `GROUP_AND_C2C_EVENT`(QQ群/单聊)和 `PUBLIC_GUILD_MESSAGES`(频道公域消息),如需其他事件类型请在开放平台申请权限。 +- **Passive message limits**: QQ direct message replies are valid for 60 minutes (max 5 replies per message); group chat replies are valid for 5 minutes. +- **Active message limits**: Both direct and group chats have a monthly limit of 4 active messages. Keep this in mind when using the scheduled tasks feature. +- **Event permissions**: By default, `GROUP_AND_C2C_EVENT` (QQ group/direct) and `PUBLIC_GUILD_MESSAGES` (guild public messages) are subscribed. Apply for additional permissions on the open platform if needed. diff --git a/docs/channels/slack.mdx b/docs/channels/slack.mdx index 1103f1c0..f95272ca 100644 --- a/docs/channels/slack.mdx +++ b/docs/channels/slack.mdx @@ -1,29 +1,29 @@ --- title: Slack -description: 将 CowAgent 接入 Slack App +description: Integrate CowAgent with a Slack App --- -> 通过 Slack App 的 **Socket Mode** 接入 CowAgent,支持私聊(DM)与频道(@机器人 / 线程内回复触发)。Socket Mode 基于长连接,无需公网 IP 与回调地址,开箱即用。 +> Integrate CowAgent into Slack via a Slack App in **Socket Mode**. Supports direct messages (DM) and channels (triggered by @mention or replying within a thread). Socket Mode uses a persistent WebSocket connection — no public IP or callback URL required, works out of the box. -## 一、接入步骤 +## 1. Setup -### 步骤一:创建 Slack App +### Step 1: Create a Slack App -1. 打开 [Slack API 应用管理页](https://api.slack.com/apps),点击 **Create New App** → **From scratch**。 -2. 填写 **App Name**(如 `CowAgent`),选择要安装的 **Workspace**,点击创建。 +1. Open the [Slack API apps page](https://api.slack.com/apps), click **Create New App** → **From scratch**. +2. Enter an **App Name** (e.g. `CowAgent`), pick the **Workspace** to install into, and create it. -### 步骤二:开启 Socket Mode 并获取 App Token +### Step 2: Enable Socket Mode and get the App Token -1. 左侧菜单进入 **Settings → Socket Mode**,打开 **Enable Socket Mode**。 -2. 系统会提示生成一个 **App-Level Token**,作用域勾选 `connections:write`,生成后保存这串以 `xapp-` 开头的 Token。 +1. In the left sidebar go to **Settings → Socket Mode** and turn on **Enable Socket Mode**. +2. You will be prompted to generate an **App-Level Token** with the `connections:write` scope. Save this token starting with `xapp-`. - Socket Mode 通过 WebSocket 长连接接收事件,无需在公网暴露回调 URL,适合本地或内网部署。 + Socket Mode receives events over a WebSocket connection, so you don't need to expose a public callback URL — ideal for local or intranet deployments. -### 步骤三:配置 Bot 权限并安装 +### Step 3: Configure bot scopes and install -1. 进入 **Features → OAuth & Permissions**,在 **Bot Token Scopes** 中点击 **Add an OAuth Scope**,逐项添加以下权限: +1. Go to **Features → OAuth & Permissions**, click **Add an OAuth Scope** under **Bot Token Scopes**, and add the following scopes one by one: ``` app_mentions:read @@ -39,10 +39,10 @@ description: 将 CowAgent 接入 Slack App ``` - `files:read` / `files:write` 用于图片、文件的收发;若仅需文本对话可省略。 + `files:read` / `files:write` are used for sending/receiving images and files; omit them if you only need text conversations. -2. 进入 **Features → Event Subscriptions**,打开 **Enable Events**,在 **Subscribe to bot events** 中点击 **Add Bot User Event** 添加以下事件: +2. Go to **Features → Event Subscriptions**, turn on **Enable Events**, and under **Subscribe to bot events** click **Add Bot User Event** to add: ``` app_mention @@ -51,23 +51,23 @@ description: 将 CowAgent 接入 Slack App ``` - 如需在私有频道使用,再添加 `message.groups`。 + Add `message.groups` if you need to use the bot in private channels. -3. 进入 **Features → App Home**,在 **Show Tabs** 区域勾选 **Messages Tab**,并勾选下方的 **Allow users to send Slash commands and messages from the messages tab**(允许用户从消息标签页发送消息),否则私聊输入框会被关闭、无法给机器人发消息。 -4. 回到 **OAuth & Permissions**,点击 **Install to Workspace** 完成安装,安装后获取以 `xoxb-` 开头的 **Bot User OAuth Token**。 +3. Go to **Features → App Home**, enable **Messages Tab** under **Show Tabs**, and check **Allow users to send Slash commands and messages from the messages tab**. Otherwise the DM input box is disabled and users cannot message the bot. +4. Back in **OAuth & Permissions**, click **Install to Workspace**. After installing, copy the **Bot User OAuth Token** starting with `xoxb-`. - 若 Slack 客户端仍提示「向此应用发送消息的功能已关闭」,请确认已完成上一步的 App Home 设置,并刷新或重启 Slack 客户端(必要时把 App 从对话列表移除后重新打开)。 + If the Slack client still shows "Sending messages to this app has been turned off", make sure you completed the App Home step above, then refresh or restart the Slack client (remove the app from your conversations and reopen it if needed). -### 步骤四:接入 CowAgent +### Step 4: Connect to CowAgent - - 打开 Web 控制台(本地链接:http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **Slack**,分别填入 Bot Token(`xoxb-`)和 App Token(`xapp-`),点击接入即可。 + + Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Slack**, paste the Bot Token (`xoxb-`) and App Token (`xapp-`), and click connect. - - 在 `config.json` 中添加以下配置后启动: + + Add the following to `config.json` and start Cow: ```json { @@ -78,41 +78,41 @@ description: 将 CowAgent 接入 Slack App } ``` - | 参数 | 说明 | 默认值 | + | Key | Description | Default | | --- | --- | --- | - | `slack_bot_token` | Bot User OAuth Token,形如 `xoxb-...` | - | - | `slack_app_token` | App-Level Token(开启 Socket Mode 后生成),形如 `xapp-...` | - | - | `slack_group_trigger` | 频道触发方式:`mention_or_reply`(@或线程内回复)/ `mention_only`(仅@) / `all`(所有消息) | `mention_or_reply` | + | `slack_bot_token` | Bot User OAuth Token, like `xoxb-...` | - | + | `slack_app_token` | App-Level Token (generated after enabling Socket Mode), like `xapp-...` | - | + | `slack_group_trigger` | Channel trigger: `mention_or_reply` (@ or reply in thread) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` | -启动 Cow 后,日志中出现以下输出即表示接入成功: +The integration is ready when you see logs like: ``` [Slack] Bot logged in as user_id=U0XXXXXXX, team=Txxxxxxxx [Slack] ✅ Slack bot ready, listening for events ``` -## 二、功能说明 +## 2. Capabilities -| 功能 | 支持情况 | +| Feature | Support | | --- | --- | -| 私聊(DM) | ✅ | -| 频道(@机器人 / 线程内回复) | ✅ | -| 文本消息 | ✅ 收发 | -| 图片消息 | ✅ 收发 | -| 文件消息 | ✅ 收发(PDF / Word / Excel 等) | -| 线程回复 | ✅ 回复发送至触发消息所在线程 | +| Direct message (DM) | ✅ | +| Channel (@bot / reply in thread) | ✅ | +| Text messages | ✅ send / receive | +| Image messages | ✅ send / receive | +| File messages | ✅ send / receive (PDF / Word / Excel, etc.) | +| Thread replies | ✅ replies are posted to the thread of the triggering message | - Slack 通过线程(Thread)组织对话。机器人会把回复发送到触发消息所在的线程,频道内更整洁。 + Slack organizes conversations into threads. The bot posts replies into the thread of the triggering message, keeping channels tidy. -## 三、使用 +## 3. Usage -完成接入后: +Once connected: -- **私聊(DM)**:在 Slack 左侧 **Apps** 中找到你的 App,直接发消息对话。 -- **频道**:把 App 邀请进频道(`/invite @你的App`),使用 `@你的App 你好` 触发对话;后续在同一线程内直接回复即可继续对话。 +- **Direct message (DM)**: find your App under **Apps** in the Slack sidebar and message it directly. +- **Channel**: invite the App into a channel (`/invite @your-app`), then trigger it with `@your-app hello`; continue the conversation by replying within the same thread. -发送图片或文件时,可以在附件的输入框中 **添加文字说明**(描述/问题)一并发送,机器人会结合附件回答。也支持先发附件再发问题,两条消息会自动合并提问。 +When sending an image or file, you can **add a text caption** (description / question) in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically. diff --git a/docs/channels/telegram.mdx b/docs/channels/telegram.mdx index d7ab7a44..f90da992 100644 --- a/docs/channels/telegram.mdx +++ b/docs/channels/telegram.mdx @@ -1,47 +1,47 @@ --- title: Telegram -description: 将 CowAgent 接入 Telegram Bot +description: Integrate CowAgent with Telegram via the Bot API --- -> 通过 Telegram Bot API 接入 CowAgent,支持单聊与群聊(@机器人 / 回复机器人触发),使用 Long Polling 模式无需公网 IP,开箱即用。 +> Integrate CowAgent into Telegram via the official Bot API. Supports private chat and group chat (triggered by @mention or replying to the bot). Uses Long Polling — no public IP required, works out of the box. -## 一、接入步骤 +## 1. Setup -### 步骤一:通过 BotFather 创建 Bot +### Step 1: Create a Bot via BotFather -1. 在 Telegram 中搜索并打开官方账号 [@BotFather](https://t.me/BotFather)。 -2. 发送 `/newbot` 命令,按提示输入: - - **Bot 名称**(显示名,可中文,例如 `My CowAgent Bot`) - - **Bot 用户名**(必须以 `bot` 结尾,例如 `my_cowagent_bot`) -3. 创建成功后,BotFather 会返回一段 **HTTP API Token**(形如 `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`),妥善保存。 +1. Open the official account [@BotFather](https://t.me/BotFather) in Telegram. +2. Send `/newbot` and follow the prompts: + - **Bot name** (display name, e.g. `My CowAgent Bot`) + - **Bot username** (must end with `bot`, e.g. `my_cowagent_bot`) +3. Once created, BotFather returns an **HTTP API Token** (e.g. `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`). Keep it safe. - 这个 Token 等同于 Bot 的密码,请勿泄露。如果意外泄漏可向 `@BotFather` 发送 `/revoke` 重置。 + The token is the password of your bot — never share it. If it leaks, send `/revoke` to `@BotFather` to reset it. -### 步骤二:(群聊使用)关闭 Privacy Mode +### Step 2: (Group chat only) Disable Privacy Mode -仅使用单聊可跳过此步。Telegram Bot 默认开启 **Privacy Mode**,群聊中只能收到带 `@bot` 的命令(如 `/start@your_bot`)以及对 bot 消息的 reply;**普通的 `@bot 你好` 文字消息收不到**,会导致群聊无响应。 +Skip this step if you only use private chat. Telegram bots run in **Privacy Mode** by default — in groups they can only see commands suffixed with `@bot` (e.g. `/start@your_bot`) and replies to bot messages; **plain `@bot hello` text messages are not delivered**, so the bot will appear unresponsive in groups. -向 `@BotFather` 发送: +Send the following to `@BotFather`: 1. `/setprivacy` -2. 选择刚才创建的 bot -3. 选择 `Disable` +2. Pick the bot you just created +3. Choose `Disable` - 若设置后群聊仍无响应,可尝试把 Bot 从群里移除并重新拉入。 + If the bot is still silent in groups after this, try removing it from the group and adding it back. -### 步骤三:接入 CowAgent +### Step 3: Connect to CowAgent - - 打开 Web 控制台(本地链接:http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **Telegram**,填入 Bot Token,点击接入即可。 + + Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Telegram**, paste the Bot Token, and click connect. - - 在 `config.json` 中添加以下配置后启动: + + Add the following to `config.json` and start Cow: ```json { @@ -51,16 +51,15 @@ description: 将 CowAgent 接入 Telegram Bot } ``` - | 参数 | 说明 | 默认值 | + | Key | Description | Default | | --- | --- | --- | - | `telegram_token` | BotFather 返回的 HTTP API Token | - | - | `telegram_group_trigger` | 群聊触发方式:`mention_or_reply`(@或回复机器人)/ `mention_only`(仅@) / `all`(所有消息) | `mention_or_reply` | - | `telegram_register_commands` | 启动时是否自动向 BotFather 注册命令菜单 | `true` | - | `telegram_proxy` | (可选)代理地址,如 `http://127.0.0.1:7890`、`socks5://127.0.0.1:1080`;运行环境无法直连 `api.telegram.org` 时配置,留空则使用环境变量 `HTTPS_PROXY` | `""` | + | `telegram_token` | HTTP API Token returned by BotFather | - | + | `telegram_group_trigger` | Group trigger: `mention_or_reply` (@ or reply) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` | + | `telegram_register_commands` | Whether to register the command menu with BotFather on startup | `true` | -启动 Cow 后,日志中出现以下输出即表示接入成功: +The integration is ready when you see logs like: ``` [Telegram] Bot logged in as @my_cowagent_bot (id=123456789) @@ -68,45 +67,45 @@ description: 将 CowAgent 接入 Telegram Bot [Telegram] ✅ Telegram bot ready, polling for updates ``` -## 二、功能说明 +## 2. Capabilities -| 功能 | 支持情况 | +| Feature | Support | | --- | --- | -| 单聊 | ✅ | -| 群聊(@机器人 / 回复机器人) | ✅ | -| 文本消息 | ✅ 收发 | -| 图片消息 | ✅ 收发 | -| 语音消息 | ✅ 收发(接收 OGG/Opus,发送 OGG/Opus) | -| 视频消息 | ✅ 收发 | -| 文件消息 | ✅ 收发(PDF / Word / Excel 等) | -| 命令菜单 | ✅ 与 Web 控制台 slash 命令一致 | +| Private chat | ✅ | +| Group chat (@bot / reply to bot) | ✅ | +| Text messages | ✅ send / receive | +| Image messages | ✅ send / receive | +| Voice messages | ✅ send / receive (OGG/Opus) | +| Video messages | ✅ send / receive | +| File messages | ✅ send / receive (PDF / Word / Excel, etc.) | +| Command menu | ✅ aligned with Web Console slash commands | -### 命令菜单 +### Command Menu -启动时会自动向 BotFather 注册命令菜单,用户在 Telegram 输入框输入 `/` 会出现下拉提示: +On startup, the channel registers a command menu with BotFather. Typing `/` in Telegram shows a dropdown: -| 命令 | 说明 | +| Command | Description | | --- | --- | -| `/help` | 显示命令帮助 | -| `/status` | 查看运行状态 | -| `/context` | 查看对话上下文(`/context clear` 清除) | -| `/skill` | 技能管理(`/skill list`、`/skill install` 等) | -| `/memory` | 记忆管理(`/memory dream`) | -| `/knowledge` | 知识库管理(`/knowledge list` / `on` / `off`) | -| `/config` | 查看当前配置 | -| `/cancel` | 中止当前正在运行的 Agent 任务 | -| `/logs` | 查看最近日志 | -| `/version` | 查看版本 | +| `/help` | Show command help | +| `/status` | View runtime status | +| `/context` | View conversation context (`/context clear` to clear) | +| `/skill` | Skill management (`/skill list`, `/skill install`, ...) | +| `/memory` | Memory management (`/memory dream`) | +| `/knowledge` | Knowledge base (`/knowledge list` / `on` / `off`) | +| `/config` | View current config | +| `/cancel` | Cancel the running Agent task | +| `/logs` | View recent logs | +| `/version` | Show version | - Telegram 命令菜单只能展示一级命令,子命令通过空格输入即可,例如 `/skill list`、`/context clear`。 + Telegram's command menu only displays top-level commands; subcommands are entered with a space, e.g. `/skill list`, `/context clear`. -## 三、使用 +## 3. Usage -完成接入后: +Once connected: -- **单聊**:在 Telegram 中搜索你创建的 Bot 用户名(如 `@my_cowagent_bot`),点击 `Start` 即可开始对话。 -- **群聊**:把 Bot 拉进群,使用 `@bot 你好` 或 **回复 Bot 的某条消息** 触发对话。若群聊无响应,请检查 Privacy Mode 是否已按 [步骤二](#步骤二-群聊使用-关闭-privacy-mode) 关闭。 +- **Private chat**: search for your bot username (e.g. `@my_cowagent_bot`) in Telegram, click `Start` and chat away. +- **Group chat**: add the bot to a group, then trigger it with `@bot hello` or by **replying to one of the bot's messages**. If the bot doesn't respond in groups, double-check Privacy Mode in [Step 2](#step-2-group-chat-only-disable-privacy-mode). -发送图片或文件时,可以直接在附件上方的输入框中 **添加 Caption**(描述/问题)一并发送,机器人会结合附件回答。也支持先发附件再发问题,两条消息会自动合并提问。 +When sending an image or file, you can **add a caption** (description / question) directly in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically. diff --git a/docs/channels/web.mdx b/docs/channels/web.mdx index 810da3da..97c43077 100644 --- a/docs/channels/web.mdx +++ b/docs/channels/web.mdx @@ -1,11 +1,11 @@ --- -title: Web 控制台 -description: 通过 Web 控制台使用 CowAgent +title: Web Console +description: Use CowAgent through the Web Console --- -Web 控制台是 CowAgent 的默认通道,启动后会自动运行,通过浏览器即可与 Agent 对话,并支持在线管理模型、技能、记忆、通道等配置。 +The Web Console is CowAgent's default channel. It runs automatically once started, letting you chat with the Agent in a browser and manage models, skills, memory, channels, and other configuration online. -## 配置 +## Configuration ```json { @@ -17,79 +17,79 @@ Web 控制台是 CowAgent 的默认通道,启动后会自动运行,通过浏 } ``` -| 参数 | 说明 | 默认值 | +| Parameter | Description | Default | | --- | --- | --- | -| `channel_type` | 设为 `web` | `web` | -| `web_host` | Web 服务监听地址,默认监听 `127.0.0.1`(仅本机),如需公网访问请改为 `0.0.0.0` 并设置密码 | `""` | -| `web_port` | Web 服务监听端口 | `9899` | -| `web_password` | 访问密码,留空表示不启用密码保护;监听 `0.0.0.0` 时建议设置 | `""` | -| `web_session_expire_days` | 登录会话有效天数 | `30` | -| `web_file_serve_root` | web 端可直接读取/发送的文件根目录,默认仅限用户主目录及 agent 工作空间;设为 `/` 可放开整个文件系统 | `"~"` | -| `enable_thinking` | 是否启用深度思考模式 | `false` | +| `channel_type` | Set to `web` | `web` | +| `web_host` | Web service listen address. Defaults to `127.0.0.1` (local only); set to `0.0.0.0` for public access and configure a password | `""` | +| `web_port` | Web service listen port | `9899` | +| `web_password` | Access password. Leave empty to disable password protection; recommended when listening on `0.0.0.0` | `""` | +| `web_session_expire_days` | Login session validity in days | `30` | +| `web_file_serve_root` | Root directory the web console can directly read/send files from. Defaults to the user home dir and agent workspace only; set to `/` to allow the whole filesystem | `"~"` | +| `enable_thinking` | Whether to enable deep thinking mode | `false` | -配置密码后,访问控制台时需先输入密码完成登录。登录状态默认保持 30 天,期间重启服务也无需重新登录。密码也支持在控制台的「配置」页面中在线修改。 +Once a password is configured, you must enter it to log in when accessing the console. The login session is kept for 30 days by default, so restarting the service during that period does not require re-login. The password can also be changed online from the "Configuration" page in the console. -## 访问地址 +## Access URL -启动项目后访问: +After starting the project, visit: -- 本地运行:`http://localhost:9899` -- 服务器运行:`http://:9899` +- Local: `http://localhost:9899` +- Server: `http://:9899` - 请确保服务器防火墙和安全组已放行对应端口。 + Ensure the server firewall and security group allow the corresponding port. -## 功能介绍 +## Features -### 对话界面 +### Chat Interface -支持流式输出,可实时展示 Agent 的思考过程(Reasoning)和工具调用过程(Tool Calls),更直观地观察 Agent 的决策过程。深度思考功能可通过配置或控制台的「Agent 配置」开关控制。 +Supports streaming output with real-time display of the Agent's reasoning process and tool calls, providing intuitive observation of the Agent's decision-making. Deep thinking can be toggled via configuration or the "Agent Configuration" switch in the console. -#### 多会话管理 +#### Multi-Session Management -对话界面支持多会话(Session)管理,所有会话记录持久化存储在数据库中: +The chat interface supports multi-session management. All session records are persistently stored in the database: -- **会话列表**:点击左侧历史会话图标可展开/收起会话列表面板,支持滚动加载全部历史会话 -- **AI 生成标题**:新会话在首轮对话完成后,自动调用模型生成简短的会话摘要标题 -- **新建会话**:点击会话列表顶部的「新对话」按钮或输入区的 `+` 按钮创建新会话 -- **删除会话**:点击会话项的删除按钮,确认后永久删除该会话及其所有消息 -- **清除上下文**:点击输入区的清除按钮,在当前会话中插入一条分隔线,分隔线以上的消息仍然展示但不再作为模型的上下文输入 +- **Session List**: Click the history icon on the left to expand/collapse the session list panel, with scroll-to-load support for all historical sessions +- **AI-Generated Titles**: After the first exchange in a new session, the model is automatically called to generate a short summary title +- **New Session**: Click the "New Chat" button at the top of the session list or the `+` button in the input area to create a new session +- **Delete Session**: Click the delete button on a session item and confirm to permanently delete the session and all its messages +- **Clear Context**: Click the clear button in the input area to insert a divider in the current session. Messages above the divider are still displayed but no longer included as context for the model -### 模型管理 +### Model Management -支持在线管理不同模型厂商的文本、图像、语音、向量模型配置,无需手动编辑配置文件: +Manage text, image, voice, and embedding model configurations for different providers online — no need to edit config files manually: -### 技能管理 +### Skill Management -支持在线查看和管理 Agent 技能(Skills): +View and manage Agent skills (Skills) online: -### 记忆管理 +### Memory Management -支持在线查看和管理 Agent 记忆: +View and manage Agent memory online: -### 通道管理 +### Channel Management -支持在线管理接入通道,支持实时连接/断开操作: +Manage connected channels online with real-time connect/disconnect operations: -### 定时任务 +### Scheduled Tasks -支持在线查看和管理定时任务,包括一次性任务、固定间隔、Cron 表达式等多种调度方式的可视化管理: +View and manage scheduled tasks online, including one-time tasks, fixed intervals, and Cron expressions: -### 日志 +### Logs -支持在线实时查看 Agent 运行日志,便于监控运行状态和排查问题: +View Agent runtime logs in real time for monitoring and troubleshooting: diff --git a/docs/channels/wechat-kf.mdx b/docs/channels/wechat-kf.mdx index ca83aaed..f0711d51 100644 --- a/docs/channels/wechat-kf.mdx +++ b/docs/channels/wechat-kf.mdx @@ -1,60 +1,60 @@ --- -title: 微信客服 -description: 将 CowAgent 接入微信客服(WeCom Customer Service) +title: WeCom Customer Service +description: Integrate CowAgent into WeCom Customer Service (微信客服) --- -通过把企业微信自建应用绑定到「微信客服」账号,CowAgent 可以接管来自外部微信用户的客服咨询,并可在小程序、公众号、视频号及视频号小店等场景中通过链接或二维码触达微信用户。 +By binding a WeCom custom enterprise app to a WeCom Customer Service (微信客服) account, CowAgent can take over inbound inquiries from external WeChat users and serve them through links or QR codes embedded in WeChat Mini Programs, Official Accounts, Video Channels, and Video Channel stores. - 微信客服只能使用 Docker 部署或服务器 Python 部署,需要公网可达的回调地址,不支持本地运行模式。 + WeCom Customer Service only supports Docker deployment or server Python deployment. A publicly reachable callback URL is required; local run mode is not supported. -## 一、准备 +## 1. Prerequisites -需要的资源: +Required resources: -1. 一台服务器(有公网 IP) -2. 注册并已认证的企业微信 -3. 已开通「微信客服」能力 +1. A server with a public IP +2. A registered and verified WeCom account +3. WeCom Customer Service capability enabled - 建议**单独再创建一个企微自建应用**用于微信客服,不要复用已有的 `wechatcom_app` 应用,否则两个通道会争抢同一个回调地址。 + It is recommended to create a **dedicated** WeCom custom app for Customer Service rather than reusing the existing `wechatcom_app` one — otherwise the two channels will compete for the same callback URL. -## 二、创建企业微信自建应用 +## 2. Create a WeCom Custom App -1. 在 [企业微信管理后台](https://work.weixin.qq.com/wework_admin/frame#apps),点击 **应用管理 → 创建应用**: +1. In the [WeCom Admin Console](https://work.weixin.qq.com/wework_admin/frame#apps), go to **Application Management → Create Application**: -2. 点击 **我的企业**,在最下方获取 **企业ID**(后续填写到 `wechat_kf_corp_id`): +2. Click **My Enterprise** and find the **Corp ID** at the bottom of the page (it goes into `wechat_kf_corp_id`): -3. 进入上一步创建的应用,点击 Secret 旁的「**查看**」,Secret 会推送到管理员手机端的企业微信里查看: +3. Open the app you just created and click **"View"** next to Secret. The Secret will be pushed to the admin's phone via the WeCom app, where it can be viewed: -4. 进入应用 **接收消息 → 设置API接收**,点击右侧「**随机获取**」生成 **Token** 和 **EncodingAESKey** 并保存: +4. Open the app's **Receive Messages → Set API Reception** page, click **"Random Generate"** to generate the **Token** and **EncodingAESKey**, and save them: - 此时保存 API 接收配置会失败,因为程序还未启动,等项目运行后再回来保存。 + Saving the API reception configuration will fail at this point because the program has not started yet. Come back to save it after the project is running. -## 三、配置和运行 +## 3. Configuration and Run -填入上一步收集到的 4 个字段(Corp ID / Secret / Token / EncodingAESKey): +Fill in the 4 fields collected from the previous step (Corp ID / Secret / Token / EncodingAESKey): - - 启动 Cow 项目后打开 Web 控制台,选择 **通道** 菜单,点击 **接入通道**,选择 **微信客服**,依次填入 Corp ID / Secret / Token / AES Key(端口默认 9888,可改),点击接入即可。 + + Start the Cow project and open the Web Console. Go to the **Channels** menu, click **Connect**, choose **WeCom Customer Service**, fill in Corp ID / Secret / Token / AES Key (port defaults to 9888, configurable), and click Connect. - - 在 `config.json` 中添加以下配置(各参数与企业微信后台的对应关系见上方截图): + + Add the following configuration to `config.json` (each parameter maps to a field shown in the screenshots above): ```json { @@ -67,34 +67,34 @@ description: 将 CowAgent 接入微信客服(WeCom Customer Service) } ``` - | 参数 | 说明 | + | Parameter | Description | | --- | --- | - | `wechat_kf_corp_id` | 企业 ID | - | `wechat_kf_secret` | 绑定到微信客服的那个企微自建应用的 Secret | - | `wechat_kf_token` | API 接收配置中的 Token | - | `wechat_kf_aes_key` | API 接收配置中的 EncodingAESKey | - | `wechat_kf_port` | 监听端口,默认 9888 | + | `wechat_kf_corp_id` | Corp ID | + | `wechat_kf_secret` | Secret of the WeCom custom app bound to Customer Service | + | `wechat_kf_token` | Token from the API reception config | + | `wechat_kf_aes_key` | EncodingAESKey from the API reception config | + | `wechat_kf_port` | Listening port, default 9888 | -接入完成后启动程序(Web 控制台方式会自动重启),日志中出现 `Listening on http://0.0.0.0:9888/wxkf/` 表示运行成功,需要将该端口对外开放(如在云服务器安全组中放行)。 +After connecting, start the program (the Web Console method restarts the channel automatically). When the log shows `Listening on http://0.0.0.0:9888/wxkf/`, the program is running successfully. You need to open this port externally (e.g., allow it in the cloud server security group). -接着回到企业微信「接收消息 → 设置API接收」,将回调 URL 填为 `http://:9888/wxkf/`,点击保存。保存成功后还需将服务器 IP 添加到 **企业可信IP** 中,否则无法收发消息: +Then go back to **Receive Messages → Set API Reception** in the WeCom console and set the callback URL to `http://:9888/wxkf/`, then click Save. After saving successfully, you also need to add the server IP to **Enterprise Trusted IPs**, otherwise messages cannot be sent or received: - 如遇到 URL 配置回调不通过或配置失败: - 1. 确保服务器防火墙关闭且安全组放行监听端口(默认 9888) - 2. 仔细检查 Token、Secret、EncodingAESKey 等参数配置是否一致,URL 格式是否正确 - 3. 认证企业微信需要配置与主体一致的备案域名 + If URL verification fails or the configuration is unsuccessful: + 1. Ensure the server firewall is disabled and the security group allows the listening port (default 9888) + 2. Carefully check that Token, Secret, EncodingAESKey and other parameters are consistent, and the URL format is correct + 3. Verified WeCom accounts must use a filed domain matching the entity -## 四、绑定微信客服账号 +## 4. Bind a WeCom Customer Service Account -进入企业微信管理后台 **微信客服** 页面,创建客服账号并与上一步的企微自建应用绑定: +In the WeCom Admin Console, go to **WeCom Customer Service**, create a customer service account, and bind it to the custom app you created above: @@ -102,28 +102,28 @@ description: 将 CowAgent 接入微信客服(WeCom Customer Service) -绑定完成后,进入 **微信客服 → 微信客服账号详情**,在「**接入链接**」一栏: +After binding, go to **WeCom Customer Service → Account Details**, and under **"Access Link"**: -- 点击「**复制链接**」可获得形如 `https://work.weixin.qq.com/kfid/kfcd83e5896b9ba07be` 的访问链接 -- 点击「**生成二维码**」可获得对应的二维码 +- Click **"Copy Link"** to get an access link like `https://work.weixin.qq.com/kfid/kfcd83e5896b9ba07be` +- Click **"Generate QR Code"** to get the corresponding QR code -把链接或二维码推给微信客户使用即可: +Distribute the link or QR code to your WeChat customers: -## 五、使用 +## 5. Usage -微信用户通过链接或二维码进入客服对话后,即可与 AI 进行多轮对话,支持文本、图片、语音等消息: +After WeChat users enter the customer service conversation via the link or QR code, they can chat with the AI across multiple turns, with support for text, image, and voice messages: -除此之外,基于微信生态官方能力,还可将微信客服应用在公众号、小程序、视频号及视频号小店等场景,详情可查看企业微信管理后台 [微信客服 → 接入场景](https://work.weixin.qq.com/wework_admin/frame#/app/servicer) 的相关说明: +Beyond that, leveraging the official WeChat ecosystem, WeCom Customer Service can also be embedded into Official Accounts, Mini Programs, Video Channels and more. See the **WeCom Customer Service → Access Scenarios** section in the [WeCom Admin Console](https://work.weixin.qq.com/wework_admin/frame#/app/servicer) for details: -## 常见问题 +## FAQ -需要确保已安装以下依赖: +Make sure the following dependencies are installed: ```bash pip install websocket-client pycryptodome diff --git a/docs/channels/wechatmp.mdx b/docs/channels/wechatmp.mdx index 917df41d..3c6c2c8b 100644 --- a/docs/channels/wechatmp.mdx +++ b/docs/channels/wechatmp.mdx @@ -1,22 +1,22 @@ --- -title: 微信公众号 -description: 将 CowAgent 接入微信公众号 +title: WeChat Official Account +description: Integrate CowAgent with WeChat Official Accounts --- -CowAgent 支持接入个人订阅号和企业服务号两种公众号类型。 +CowAgent supports both personal subscription accounts and enterprise service accounts. -| 类型 | 要求 | 特点 | +| Type | Requirements | Features | | --- | --- | --- | -| **个人订阅号** | 个人可申请 | 收到消息时会回复一条提示,回复生成后需用户主动发消息获取 | -| **企业服务号** | 企业申请,需通过微信认证开通客服接口 | 回复生成后可主动推送给用户 | +| **Personal Subscription** | Available to individuals | Sends a placeholder reply first; users must send a message to retrieve the full response | +| **Enterprise Service** | Enterprise with verified customer service API | Can proactively push replies to users | - 公众号仅支持服务器和 Docker 部署,不支持本地运行。需额外安装扩展依赖:`pip3 install -r requirements-optional.txt` + Official Accounts only support server and Docker deployment, not local run mode. Install extended dependencies: `pip3 install -r requirements-optional.txt` -## 一、个人订阅号 +## 1. Personal Subscription Account -在 `config.json` 中添加以下配置: +Add the following configuration to `config.json`: ```json { @@ -30,34 +30,34 @@ CowAgent 支持接入个人订阅号和企业服务号两种公众号类型。 } ``` -### 配置步骤 +### Setup Steps -这些配置需要和 [微信公众号后台](https://mp.weixin.qq.com/advanced/advanced?action=dev&t=advanced/dev) 中的保持一致,进入页面后,在左侧菜单选择 **设置与开发 → 基本配置 → 服务器配置**,按下图进行配置: +These configurations must be consistent with the [WeChat Official Account Platform](https://mp.weixin.qq.com/advanced/advanced?action=dev&t=advanced/dev). Navigate to **Settings & Development → Basic Configuration → Server Configuration** and configure as shown below: -1. 在公众平台启用开发者密码(对应配置 `wechatmp_app_secret`),并将服务器 IP 填入白名单 -2. 按上图填写 `config.json` 中与公众号相关的配置,要与公众号后台的配置一致 -3. 启动程序,启动后会监听 80 端口(若无权限监听,则在启动命令前加上 `sudo`;若 80 端口已被占用,则关闭该占用进程) -4. 在公众号后台 **启用服务器配置** 并提交,保存成功则表示已成功配置。注意 **"服务器地址(URL)"** 需要配置为 `http://{HOST}/wx` 的格式,其中 `{HOST}` 可以是服务器的 IP 或域名 +1. Enable the developer secret on the platform (corresponds to `wechatmp_app_secret`), and add the server IP to the whitelist +2. Fill in the `config.json` with the official account parameters matching the platform configuration +3. Start the program, which listens on port 80 (use `sudo` if you don't have permission; stop any process occupying port 80) +4. **Enable server configuration** on the official account platform and submit. A successful save means the configuration is complete. Note that the **"Server URL"** must be in the format `http://{HOST}/wx`, where `{HOST}` can be the server IP or domain -随后关注公众号并发送消息即可看到以下效果: +After following the account and sending a message, you should see the following result: -由于受订阅号限制,回复内容较短的情况下(15s 内),可以立即完成回复,但耗时较长的回复则会先回复一句 "正在思考中",后续需要用户输入任意文字主动获取答案,而服务号则可以通过客服接口解决这一问题。 +Due to subscription account limitations, short replies (within 15s) can be returned immediately, but longer replies will first send a "Thinking..." placeholder, requiring users to send any text to retrieve the answer. Enterprise service accounts can solve this with the customer service API. - **语音识别**:可利用微信自带的语音识别功能,需要在公众号管理页面的 "设置与开发 → 接口权限" 页面开启 "接收语音识别结果"。 + **Voice Recognition**: You can use WeChat's built-in voice recognition. Enable "Receive Voice Recognition Results" under "Settings & Development → API Permissions" on the official account management page. -## 二、企业服务号 +## 2. Enterprise Service Account -企业服务号与上述个人订阅号的接入过程基本相同,差异如下: +The setup process for enterprise service accounts is essentially the same as personal subscription accounts, with the following differences: -1. 在公众平台申请企业服务号并完成微信认证,在接口权限中确认已获得 **客服接口** 的权限 -2. 在 `config.json` 中设置 `"channel_type": "wechatmp_service"`,其他配置与上述订阅号相同 -3. 交互效果上,即使是较长耗时的回复,也可以主动推送给用户,无需用户手动获取 +1. Register an enterprise service account on the platform and complete WeChat certification. Confirm that the **Customer Service API** permission has been granted +2. Set `"channel_type": "wechatmp_service"` in `config.json`; other configurations remain the same +3. Even for longer replies, they can be proactively pushed to users without requiring manual retrieval ```json { diff --git a/docs/channels/wecom-bot.mdx b/docs/channels/wecom-bot.mdx index 7275639f..2cb51fff 100644 --- a/docs/channels/wecom-bot.mdx +++ b/docs/channels/wecom-bot.mdx @@ -1,56 +1,56 @@ --- -title: 企微智能机器人 -description: 将 CowAgent 接入企业微信智能机器人(长连接模式) +title: WeCom Bot +description: Connect CowAgent to WeCom AI Bot (WebSocket long connection) --- -> 通过企业微信智能机器人接入CowAgent,支持企业内部单聊和内部群聊,无需公网 IP,使用 WebSocket 长连接模式,支持Markdown渲染和流式输出。 +> Connect CowAgent via WeCom AI Bot, supporting both internal direct messages and group chats. No public IP required — uses a WebSocket long connection, with Markdown rendering and streaming output. - 智能机器人与企业微信自建应用是两种不同的接入方式。智能机器人使用 WebSocket 长连接,无需服务器公网 IP 和域名,配置更简单。 + WeCom Bot and WeCom App are two different integration methods. WeCom Bot uses a WebSocket long connection and requires no public IP or domain, making setup much simpler. -## 一、接入方式 +## 1. Connection methods -### 方式一:扫码一键接入(推荐) +### Option A: One-click QR scan (recommended) -无需提前创建机器人,启动 Cow 项目后打开 Web 控制台(本地链接:http://127.0.0.1:9899/),选择 **通道** 菜单,点击**接入通道**,选择**企微智能机器人**,切换到「扫码接入」模式,使用**企业微信**扫码即可自动完成机器人创建和接入。 +No need to create the bot ahead of time. Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/), go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **QR scan** mode, and scan the QR code with **WeCom** — bot creation and connection complete automatically. - 扫码成功后,可在企业微信工作台 - **智能机器人**页面对机器人进行进一步配置,包括修改名称、头像、可见范围等。 + After a successful scan, you can further configure the bot (name, avatar, visibility scope, etc.) in **WeCom Workbench → AI Bot**. -### 方式二:手动创建接入 +### Option B: Manual creation -需要先在企业微信中创建智能机器人并获取 Bot ID 和 Secret,再通过 Web 控制台或配置文件接入。 +Create the AI Bot in WeCom and obtain the Bot ID and Secret, then connect via the Web console or config file. -**步骤一:创建智能机器人** +**Step 1: Create the AI Bot** -1. 打开企业微信客户端,进入工作台,点击**智能机器人**: +1. Open the WeCom client, go to **Workbench**, and click **AI Bot**: -2. 点击创建机器人 - 手动创建: +2. Click **Create Bot → Manual Creation**: -3. 右侧窗口拖到最下方,选择**API模式创建**: +3. Scroll to the bottom of the right panel and select **API Mode**: -4. 设置机器人名称、头像、可见范围,并选择**长连接模式**,记录下 **Bot ID** 和 **Secret** 信息后点击保存。 +4. Set the bot name, avatar, and visibility scope. Choose **Long Connection** mode, save the **Bot ID** and **Secret**, then click Save. -**步骤二:接入 CowAgent** +**Step 2: Connect to CowAgent** - - 打开 Web 控制台,选择**通道**菜单,点击**接入通道**,选择**企微智能机器人**,切换到「手动填写」模式,输入 Bot ID 和 Secret,点击接入即可。 + + Open the Web console, go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **Manual** mode, enter the Bot ID and Secret, and click Connect. - - 在 `config.json` 中添加以下配置后启动程序: + + Add the following to `config.json`, then start CowAgent: ```json { @@ -60,31 +60,31 @@ description: 将 CowAgent 接入企业微信智能机器人(长连接模式) } ``` - | 参数 | 说明 | + | Parameter | Description | | --- | --- | - | `wecom_bot_id` | 智能机器人的 BotID | - | `wecom_bot_secret` | 智能机器人的 Secret | + | `wecom_bot_id` | Bot ID of the AI Bot | + | `wecom_bot_secret` | Secret of the AI Bot | -日志显示 `[WecomBot] Subscribe success` 即表示连接成功。 +The log line `[WecomBot] Subscribe success` confirms the connection is established. -## 二、功能说明 +## 2. Supported features -| 功能 | 支持情况 | +| Feature | Status | | --- | --- | -| 单聊 | ✅ | -| 群聊(@机器人) | ✅ | -| 文本消息 | ✅ 收发 | -| 图片消息 | ✅ 收发 | -| 文件消息 | ✅ 收发 | -| 流式回复 | ✅ | -| 定时任务主动推送 | ✅ | +| Direct chat | ✅ | +| Group chat (@bot) | ✅ | +| Text messages | ✅ Send / Receive | +| Image messages | ✅ Send / Receive | +| File messages | ✅ Send / Receive | +| Streaming replies | ✅ | +| Scheduled push messages | ✅ | -## 三、使用 +## 3. Usage -在企业微信中搜索创建的机器人名称,即可开始单聊对话。 +Search for the bot's name inside WeCom to start a direct chat. -如需在企微内部群聊中使用,将机器人添加到群中,@机器人发送消息即可。 +To use the bot in an internal group chat, add it to the group and @-mention it. diff --git a/docs/channels/wecom.mdx b/docs/channels/wecom.mdx index e0ed6fbc..e0aca17f 100644 --- a/docs/channels/wecom.mdx +++ b/docs/channels/wecom.mdx @@ -1,48 +1,48 @@ --- -title: 企微自建应用 -description: 将 CowAgent 接入企业微信自建应用 +title: WeCom +description: Integrate CowAgent into WeCom enterprise app --- -通过企业微信自建应用接入 CowAgent,支持企业内部人员单聊使用。 +Integrate CowAgent into WeCom through a custom enterprise app, supporting one-on-one chat for internal employees. - 企业微信只能使用 Docker 部署或服务器 Python 部署,不支持本地运行模式。 + WeCom only supports Docker deployment or server Python deployment. Local run mode is not supported. -## 一、准备 +## 1. Prerequisites -需要的资源: +Required resources: -1. 一台服务器(有公网 IP) -2. 注册一个企业微信(个人也可注册,但无法认证) -3. 认证企业微信还需要对应主体备案的域名 +1. A server with public IP (overseas server, or domestic server with a proxy for international API access) +2. A registered WeCom account (individual registration is possible but cannot be certified) +3. Certified WeCom accounts additionally require a domain filed under the corresponding entity -## 二、创建企业微信应用 +## 2. Create WeCom App -1. 在 [企业微信管理后台](https://work.weixin.qq.com/wework_admin/frame#profile) 点击 **我的企业**,在最下方获取 **企业ID**(后续填写到 `wechatcom_corp_id` 字段中)。 +1. In the [WeCom Admin Console](https://work.weixin.qq.com/wework_admin/frame#profile), click **My Enterprise** and find the **Corp ID** at the bottom of the page. Save this ID for the `wechatcom_corp_id` configuration field. -2. 切换到 **应用管理**,点击创建应用: +2. Switch to **Application Management** and click Create Application: -3. 进入应用创建页面,记录 `AgentId` 和 `Secret`: +3. On the application creation page, record the `AgentId` and `Secret`: -4. 点击 **设置API接收**,配置应用接口: +4. Click **Set API Reception** to configure the application interface: -- URL 格式为 `http://ip:port/wxcomapp`(认证企业需使用备案域名) -- 随机获取 `Token` 和 `EncodingAESKey` 并保存 +- URL format: `http://ip:port/wxcomapp` (certified enterprises must use a filed domain) +- Generate random `Token` and `EncodingAESKey` and save them for the configuration file - 此时保存 API 接收配置会失败,因为程序还未启动,等项目运行后再回来保存。 + The API reception configuration cannot be saved at this point because the program hasn't started yet. Come back to save it after the project is running. -## 三、配置和运行 +## 3. Configuration and Run -在 `config.json` 中添加以下配置(各参数与企业微信后台的对应关系见上方截图): +Add the following configuration to `config.json` (the mapping between each parameter and the WeCom console is shown in the screenshots above): ```json { @@ -57,41 +57,41 @@ description: 将 CowAgent 接入企业微信自建应用 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `wechatcom_corp_id` | 企业 ID | -| `wechatcomapp_token` | API 接收配置中的 Token | -| `wechatcomapp_secret` | 应用的 Secret | -| `wechatcomapp_agent_id` | 应用的 AgentId | -| `wechatcomapp_aes_key` | API 接收配置中的 EncodingAESKey | -| `wechatcomapp_port` | 监听端口,默认 9898 | +| `wechatcom_corp_id` | Corp ID | +| `wechatcomapp_token` | Token from API reception config | +| `wechatcomapp_secret` | App Secret | +| `wechatcomapp_agent_id` | App AgentId | +| `wechatcomapp_aes_key` | EncodingAESKey from API reception config | +| `wechatcomapp_port` | Listen port, default 9898 | -配置完成后启动程序。当后台日志显示 `http://0.0.0.0:9898/` 时说明程序运行成功,需要将该端口对外开放(如在云服务器安全组中放行)。 +After configuration, start the program. When the log shows `http://0.0.0.0:9898/`, the program is running successfully. You need to open this port externally (e.g., allow it in the cloud server security group). -程序启动后,回到企业微信后台保存 **消息服务器配置**,保存成功后还需将服务器 IP 添加到 **企业可信IP** 中,否则无法收发消息: +After the program starts, return to the WeCom Admin Console to save the **Message Server Configuration**. After saving successfully, you also need to add the server IP to **Enterprise Trusted IPs**, otherwise messages cannot be sent or received: - 如遇到 URL 配置回调不通过或配置失败: - 1. 确保服务器防火墙关闭且安全组放行监听端口 - 2. 仔细检查 Token、Secret Key 等参数配置是否一致,URL 格式是否正确 - 3. 认证企业微信需要配置与主体一致的备案域名 + If the URL configuration callback fails or the configuration is unsuccessful: + 1. Ensure the server firewall is disabled and the security group allows the listening port + 2. Carefully check that Token, Secret Key and other parameter configurations are consistent, and that the URL format is correct + 3. Certified WeCom accounts must configure a filed domain matching the entity -## 四、使用 +## 4. Usage -在企业微信中搜索刚创建的应用名称,即可直接对话: +Search for the app name you just created in WeCom to start chatting directly. You can run multiple instances listening on different ports to create multiple WeCom apps: -如需让外部个人微信用户使用,可在 **我的企业 → 微信插件** 中分享邀请关注二维码,个人微信扫码关注后即可与应用对话: +To allow external personal WeChat users to use the app, go to **My Enterprise → WeChat Plugin**, share the invite QR code. After scanning and following, personal WeChat users can join and chat with the app: -## 常见问题 +## FAQ -需要确保已安装以下依赖: +Make sure the following dependencies are installed: ```bash pip install websocket-client pycryptodome diff --git a/docs/channels/weixin.mdx b/docs/channels/weixin.mdx index c19974a4..0acb0a43 100644 --- a/docs/channels/weixin.mdx +++ b/docs/channels/weixin.mdx @@ -1,21 +1,21 @@ --- -title: 微信 -description: 将 CowAgent 接入个人微信(基于官方接口) +title: WeChat +description: Connect CowAgent to personal WeChat (via the official API) --- -> 接入个人微信,扫码登录即可使用,支持文本、图片、语音、文件、视频等消息的私聊收发。通过微信官方API进行接入,无安全风险,接入后会在会话中新增一个机器人助手,不影响当前账号的使用。 +> Connect CowAgent to your personal WeChat — scan to log in, no public IP required. Supports text, image, voice, file, and video messages in 1-on-1 chats. Backed by WeChat's official API; safe to use. After connecting, a bot assistant is added to your conversation list without affecting normal account usage. -## 一、配置和运行 +## 1. Setup and run -### 方式一:Web 控制台接入 +### Option A: Web console -启动 Cow 项目后打开 Web 控制台 (本地链接为: http://127.0.0.1:9899/ ),选择 **通道** 菜单,点击 **接入通道**,选择 **微信**,点击接入后按照提示扫码登录。 +Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/). Go to the **Channels** tab, click **Connect Channel**, select **WeChat**, and follow the prompts to scan in. -### 方式二:配置文件接入 +### Option B: Config file -在 `config.json` 中设置 `channel_type` 为 `weixin`: +Set `channel_type` to `weixin` in `config.json`: ```json { @@ -23,52 +23,49 @@ description: 将 CowAgent 接入个人微信(基于官方接口) } ``` -启动程序后,终端会显示二维码,使用微信扫码授权即可完成登录。 +After starting CowAgent, a QR code is displayed in the terminal. Scan it with WeChat to complete login. - - 1. 兼容历史配置:`channel_type` 设为 `wx` 同样可以启用微信通道。 - 2. 注意微信客户端需要更新至 8.0.69 版本或以上 + 1. For backward compatibility, setting `channel_type` to `wx` also activates the WeChat channel. + 2. The WeChat client must be on version **8.0.69** or higher. -## 二、使用说明 +## 2. Usage -微信扫码并进行授权确认后,即可完成接入并开始对话。接入微信后会在对话中创建出一个机器人助理,不会对已有账号的正常使用有任何影响。 +Once authorized, the integration completes and you can start chatting. A bot assistant is created in your WeChat conversation list, leaving normal account usage unaffected. -> 你可以通过搜索"微信ClawBot"随时找到这个机器人,还可以修改这个机器人的头像、备注等信息,将机器人置顶在消息列表等。 +> You can find the bot at any time by searching for **"微信ClawBot"**. You may also rename it, change its avatar, pin it to the top of your conversation list, and so on. +## 3. Login +### QR code login -## 三、登录说明 +On first startup, a QR code appears in the terminal (valid for around 2 minutes). Scan it with WeChat and confirm on your phone to log in. -### 扫码登录 +- The QR code refreshes automatically when it expires +- The `qrcode` dependency is already included in `requirements.txt`, so the QR code renders directly in the terminal after install -首次启动时,终端会显示一个二维码(有效期约 2 分钟)。使用微信扫描二维码并在手机上确认后即可完成登录。 +### Credential persistence -- 二维码过期后会自动刷新并重新显示 -- `requirements.txt` 中已默认包含 `qrcode` 依赖,安装后可在终端直接渲染二维码图案 +After a successful login, credentials are saved to `~/.weixin_cow_credentials.json`. Subsequent startups reuse the saved credentials with no need to re-scan. -### 凭证保存 +To force a re-login, delete the credentials file and restart. -登录成功后,凭证会自动保存至 `~/.weixin_cow_credentials.json`,下次启动时无需重新扫码。 +### Session expiry -如需重新登录,删除该凭证文件后重启程序即可。 +When the WeChat session expires (errcode `-14`), CowAgent automatically clears old credentials and initiates a new QR login — no manual intervention required. -### Session 过期 +## 4. Supported features -当微信 session 过期时(errcode -14),程序会自动清除旧凭证并重新发起扫码登录,无需手动干预。 - -## 四、功能说明 - -| 功能 | 支持情况 | +| Feature | Status | | --- | --- | -| 单聊 | ✅ | -| 文本消息 | ✅ 收发 | -| 图片消息 | ✅ 收发 | -| 文件消息 | ✅ 收发 | -| 视频消息 | ✅ 收发 | -| 语音消息 | ✅ 接收 (自带语音识别) | +| Direct messages | ✅ | +| Text messages | ✅ Send & Receive | +| Image messages | ✅ Send & Receive | +| File messages | ✅ Send & Receive | +| Video messages | ✅ Send & Receive | +| Voice messages | ✅ Receive (built-in speech recognition) | diff --git a/docs/cli/general.mdx b/docs/cli/general.mdx index 36af1783..8107fcb5 100644 --- a/docs/cli/general.mdx +++ b/docs/cli/general.mdx @@ -1,17 +1,17 @@ --- -title: 常用命令 -description: 查看状态、管理配置和上下文等常用命令 +title: General Commands +description: View status, manage config, and control context with commonly used commands --- -以下命令支持在对话中使用 `/` 前缀,也支持在终端中使用 `cow` 前缀(部分命令仅对话可用)。 +The following commands can be used in chat with the `/` prefix or in the terminal with the `cow` prefix (some are chat-only). - 在 Web 控制台中输入 `/` 会自动弹出命令提示,支持键盘上下选择和 Tab 补全。 + In the Web console, typing `/` brings up an autocomplete menu with keyboard navigation and Tab completion. ## help -显示所有可用命令的帮助信息。 +Show help information for all available commands. ```text /help @@ -19,29 +19,15 @@ description: 查看状态、管理配置和上下文等常用命令 ## status -查看当前会话和服务的运行状态,包括进程信息、模型配置、会话消息数量和已加载技能数量。 +View current session and service status, including process info, model configuration, message count, and loaded skills. ```text /status ``` -输出示例: - -``` -🐮 CowAgent Status - -Process: PID 12345 | Running 2h 15m -Version: 2.0.4 -Channel: web -Model: MiniMax-M2.5 -Mode: agent - -Session: 12 messages | 8 skills loaded -``` - ## cancel -中止当前会话正在运行的 Agent 任务。在 Agent 执行长时间任务(例如多轮工具调用、长流式输出)时,可随时发送 `/cancel`,Agent 会在下一次工具执行前停止。Web 端、微信、企业微信、飞书等各通道均可使用。 +Abort the agent task currently running in this session. When the agent is busy with a long task (e.g. multi-turn tool calls or a long streaming response), send `/cancel` and the agent will stop before the next tool execution. Available across all channels — Web, WeChat, WeCom, Feishu, etc. ```text /cancel @@ -49,67 +35,67 @@ Session: 12 messages | 8 skills loaded ## config -查看或修改运行时配置。修改后立即生效,无需重启服务。 +View or modify runtime configuration. Changes take effect immediately without restarting. -**查看所有可配置项:** +**View all configurable items:** ```text /config ``` -**查看单个配置项:** +**View a single item:** ```text /config model ``` -**修改配置项:** +**Modify a config item:** ```text /config model deepseek-v4-flash ``` -**支持修改的配置项:** +**Configurable items:** -| 配置项 | 说明 | 示例值 | +| Item | Description | Example | | --- | --- | --- | -| `model` | AI 模型名称 | `deepseek-v4-flash` | -| `agent_max_context_tokens` | 最大上下文 tokens | `40000` | -| `agent_max_context_turns` | 最大上下文记忆轮次 | `30` | -| `agent_max_steps` | 单次任务最大决策步数 | `15` | -| `enable_thinking` | 是否启用深度思考模式 | `true` / `false` | +| `model` | AI model name | `deepseek-v4-flash` | +| `agent_max_context_tokens` | Max context tokens | `40000` | +| `agent_max_context_turns` | Max context memory turns | `30` | +| `agent_max_steps` | Max decision steps per task | `15` | +| `enable_thinking` | Enable deep thinking mode | `true` / `false` | - 修改 `model` 时,系统会自动匹配对应的模型调用方式。配置会写入 `config.json` 并持久保存。 + When changing `model`, the system automatically matches the corresponding model API. Configuration is persisted to `config.json`. ## context -查看当前会话的上下文信息,包括消息数量、内容长度等统计。 +View current session context statistics, including message count and content length. ```text /context ``` -**清空当前会话上下文:** +**Clear current session context:** ```text /context clear ``` - 清空上下文后,Agent 会"忘记"之前的对话内容,适用于切换话题或释放上下文空间。 + Clearing context makes the Agent "forget" previous conversation, useful for switching topics or freeing context space. ## logs -查看最近的服务日志,默认显示最近 20 行,最多 50 行。 +View recent service logs. Shows the last 20 lines by default, up to 50. ```text /logs ``` -**指定行数:** +**Specify line count:** ```text /logs 50 @@ -117,7 +103,7 @@ Session: 12 messages | 8 skills loaded ## version -显示当前 CowAgent 版本号。 +Show the current CowAgent version. ```text /version diff --git a/docs/cli/index.mdx b/docs/cli/index.mdx index f6462ecb..e13b45a3 100644 --- a/docs/cli/index.mdx +++ b/docs/cli/index.mdx @@ -1,31 +1,31 @@ --- -title: 命令总览 -description: CowAgent 命令系统 — 终端 CLI 和对话命令 +title: Commands Overview +description: CowAgent command system — Terminal CLI and chat commands --- -CowAgent 提供两种命令交互方式: +CowAgent provides two ways to interact via commands: -- **终端CLI** — 在系统终端中执行 `cow <命令>`,用于服务管理、技能管理等运维操作 -- **对话命令** — 在对话中输入 `/<命令>` 或 `cow <命令>`,用于查看状态、管理技能、调整配置等 +- **Terminal CLI** — Run `cow ` in your system terminal for service management, skill management, and other operations +- **Chat Commands** — Type `/` or `cow ` in any conversation to check status, manage skills, adjust configuration, etc. -## 终端命令 +## Cow CLI -通过一键安装脚本部署后,`cow` 命令会自动可用。手动安装的用户需要在项目根目录下额外执行: +After deploying with the one-click install script, the `cow` command is automatically available. For manual installations, run: ```bash pip install -e . ``` -安装后即可在任意位置使用 `cow` 命令: +Then use the `cow` command from anywhere: ```bash cow help ``` -输出示例: +Example output: ``` -CowAgent CLI +🐮 CowAgent CLI Usage: cow @@ -49,35 +49,33 @@ Others: version Show version ``` -## 对话命令 +## Chat Commands -在 Web 控制台或任意接入渠道的对话中,支持输入以 `/` 开头的命令: +In the Web console or any connected channel, type `/` to see command suggestions. Supported commands: -| 命令 | 说明 | +| Command | Description | | --- | --- | -| `/help` | 显示命令帮助 | -| `/status` | 查看服务状态和配置 | -| `/cancel` | 中止当前正在运行的 Agent 任务 | -| `/config` | 查看或修改运行时配置 | -| `/skill` | 管理技能(安装、卸载、启用、禁用等) | -| `/memory dream [N]` | 手动触发记忆蒸馏(默认 3 天,最大 30) | -| `/knowledge` | 查看知识库统计信息 | -| `/knowledge list` | 查看知识库目录结构 | -| `/knowledge on\|off` | 开启或关闭知识库 | -| `/context` | 查看当前会话上下文信息 | -| `/context clear` | 清空当前会话上下文 | -| `/logs` | 查看最近日志 | -| `/version` | 显示版本号 | +| `/help` | Show command help | +| `/status` | View service status and configuration | +| `/cancel` | Abort the currently running agent task | +| `/config` | View or modify runtime configuration | +| `/skill` | Manage skills (install, uninstall, enable, disable, etc.) | +| `/memory dream [N]` | Manually trigger memory distillation (default 3 days, max 30) | +| `/knowledge` | View knowledge base statistics | +| `/knowledge list` | View knowledge base directory structure | +| `/knowledge on\|off` | Enable or disable knowledge base | +| `/context` | View current session context info | +| `/context clear` | Clear current session context | +| `/logs` | View recent logs | +| `/version` | Show version number | - 对话命令中 `/start`、`/stop`、`/restart` 等服务管理命令会提示到终端中执行,因为它们涉及进程操作。 + Service management commands like `/start`, `/stop`, `/restart` will prompt you to use them in the terminal instead, as they involve process operations. -## 命令对照表 +## Command Availability -以下是各命令在终端和对话中的可用性: - -| 命令 | 终端 (`cow`) | 对话 (`/`) | +| Command | Terminal (`cow`) | Chat (`/`) | | --- | :---: | :---: | | help | ✓ | ✓ | | version | ✓ | ✓ | @@ -86,13 +84,13 @@ Others: | cancel | ✗ | ✓ | | config | ✗ | ✓ | | context | — | ✓ | -| memory (子命令) | ✗ | ✓ | -| knowledge (子命令) | ✓ | ✓ | -| skill (子命令) | ✓ | ✓ | +| memory (subcommands) | ✗ | ✓ | +| knowledge (subcommands) | ✓ | ✓ | +| skill (subcommands) | ✓ | ✓ | | start / stop / restart | ✓ | ✗ | | update | ✓ | ✗ | | install-browser | ✓ | ✗ | - `context` 在终端中仅提示到对话中使用。`config` 仅支持在对话中修改。 + `context` only shows a hint in the terminal to use it in chat. `config` is only available in chat. diff --git a/docs/cli/memory-knowledge.mdx b/docs/cli/memory-knowledge.mdx index 3dc6713d..c748120c 100644 --- a/docs/cli/memory-knowledge.mdx +++ b/docs/cli/memory-knowledge.mdx @@ -1,63 +1,49 @@ --- -title: 记忆与知识库 -description: 记忆蒸馏和知识库管理命令 +title: Memory & Knowledge +description: Memory distillation and knowledge base management commands --- ## memory -管理 Agent 的长期记忆系统。 +Manage the Agent's long-term memory system. ### memory dream -手动触发记忆蒸馏(Deep Dream),整理近期的天级记忆,蒸馏合并到 MEMORY.md,并生成梦境日记。 +Manually trigger memory distillation (Deep Dream) — consolidate recent daily memories into MEMORY.md and generate a dream diary. ```text /memory dream [N] ``` -- `N`:整理近 N 天的记忆,默认 3 天,最大 30 天 -- 蒸馏在后台异步执行,完成后会在对话中通知结果 -- 无需等待 Agent 初始化,首次对话前即可使用 +- `N`: Consolidate the last N days of memory (default 3, max 30) +- Runs asynchronously in the background; you'll be notified in chat when complete +- Works without Agent initialization — can be used before the first conversation -**示例:** +**Examples:** ```text -/memory dream # 整理近 3 天 -/memory dream 7 # 整理近 7 天 -/memory dream 30 # 整理近 30 天(全量) +/memory dream # Consolidate last 3 days +/memory dream 7 # Consolidate last 7 days +/memory dream 30 # Consolidate last 30 days (full) ``` -蒸馏完成后,Web 端会收到带有跳转链接的通知,可直接查看更新后的 MEMORY.md 和梦境日记。 +On the Web console, the completion notification includes clickable links to view the updated MEMORY.md and dream diary. - 系统每天 23:55 会自动执行一次蒸馏(lookback 1 天)。手动触发适用于首次部署后的历史整理,或需要立即更新记忆时使用。 + The system automatically runs distillation daily at 23:55 (lookback 1 day). Manual trigger is useful for consolidating historical memories after first deployment, or when you need an immediate memory update. ## knowledge -查看和管理个人知识库。默认显示知识库统计信息。 +View and manage the personal knowledge base. Shows statistics by default. ```text /knowledge ``` -输出示例: - -``` -📚 知识库 - -- 状态:已开启 -- 页面数:12 -- 总大小:45.2 KB -- 分类明细: - - concepts/: 5 篇 - - entities/: 4 篇 - - sources/: 3 篇 -``` - ### knowledge list -查看知识库目录树结构。 +View the knowledge base directory tree. ```text /knowledge list @@ -65,7 +51,7 @@ description: 记忆蒸馏和知识库管理命令 ### knowledge on / off -开启或关闭知识库。关闭后不再注入知识提示词和索引知识文件。 +Enable or disable the knowledge base. When disabled, knowledge prompts and file indexing are not injected. ```text /knowledge on @@ -73,5 +59,5 @@ description: 记忆蒸馏和知识库管理命令 ``` - 终端 CLI 中 `cow knowledge` 和 `cow knowledge list` 可用,但 `on|off` 仅支持在对话中使用(需实时生效)。 + In the terminal CLI, `cow knowledge` and `cow knowledge list` are available, but `on|off` is only supported in chat (requires runtime effect). diff --git a/docs/cli/process.mdx b/docs/cli/process.mdx index e5746773..62dfa718 100644 --- a/docs/cli/process.mdx +++ b/docs/cli/process.mdx @@ -1,28 +1,28 @@ --- -title: 进程管理 -description: 使用 cow 命令管理 CowAgent 进程的启动、停止、重启、更新等操作 +title: Process Management +description: Manage CowAgent process lifecycle with cow commands --- -进程管理命令用于控制 CowAgent 后台进程的生命周期。这些命令仅在终端中可用。 +Process management commands control the CowAgent background process. These commands are only available in the terminal. ## start -启动 CowAgent 服务。默认以后台进程方式运行,并自动跟踪日志输出。 +Start the CowAgent service. Runs as a background daemon by default and automatically tails logs. ```bash cow start ``` -**选项:** +**Options:** -| 选项 | 说明 | +| Option | Description | | --- | --- | -| `-f`, `--foreground` | 前台运行,不以后台守护进程方式启动 | -| `--no-logs` | 启动后不自动跟踪日志 | +| `-f`, `--foreground` | Run in foreground, not as a background daemon | +| `--no-logs` | Don't tail logs after starting | ## stop -停止正在运行的 CowAgent 服务。 +Stop the running CowAgent service. ```bash cow stop @@ -30,97 +30,86 @@ cow stop ## restart -重启 CowAgent 服务(先停止再启动)。 +Restart the CowAgent service (stop then start). ```bash cow restart ``` -**选项:** +**Options:** -| 选项 | 说明 | +| Option | Description | | --- | --- | -| `--no-logs` | 重启后不自动跟踪日志 | +| `--no-logs` | Don't tail logs after restart | ## update -更新代码并重启服务。自动执行以下流程: +Update code and restart the service. Automatically performs: -1. 拉取最新代码(`git pull`) -2. 停止当前服务 -3. 更新 Python 依赖 -4. 重新安装 CLI -5. 启动服务 +1. Pull latest code (`git pull`) +2. Stop current service +3. Update Python dependencies +4. Reinstall CLI +5. Start service ```bash cow update ``` - 如果 `git pull` 失败(如存在本地未提交的修改),更新会中止,服务不受影响。 + If `git pull` fails (e.g., uncommitted local changes), the update aborts and the service remains unaffected. ## status -查看 CowAgent 服务运行状态,包括进程信息、版本号、当前配置的模型和通道。 +Check CowAgent service status, including process info, version, and current model/channel configuration. ```bash cow status ``` -输出示例: - -``` -🐮 CowAgent Status - Status: ● Running (PID: 12345) - Version: 2.0.4 - Channel: web - Model: MiniMax-M2.5 - Mode: agent -``` - ## logs -查看服务日志。 +View service logs. ```bash cow logs ``` -**选项:** +**Options:** -| 选项 | 说明 | 默认值 | +| Option | Description | Default | | --- | --- | --- | -| `-f`, `--follow` | 持续跟踪日志输出 | 否 | -| `-n`, `--lines` | 显示最近 N 行 | 50 | +| `-f`, `--follow` | Continuously tail log output | No | +| `-n`, `--lines` | Show last N lines | 50 | -示例: +Examples: ```bash -# 查看最近 100 行日志 +# View last 100 lines cow logs -n 100 -# 持续跟踪日志 +# Continuously tail logs cow logs -f ``` ## install-browser -安装 Playwright 和 Chromium 浏览器,用于启用 [浏览器工具](/tools/browser)。 +Install Playwright and Chromium browser for the [browser tool](/tools/browser). ```bash cow install-browser ``` - 仅在需要使用浏览器工具(如网页浏览、截图等)时才需要安装。 + Only needed when using browser tools (web browsing, screenshots, etc.). -## run.sh 兼容 +## run.sh Compatibility -如果未安装 Cow CLI,也可以使用 `run.sh` 脚本管理服务: +If Cow CLI is not installed, you can use `run.sh` to manage the service: -| cow 命令 | run.sh 等效命令 | +| cow command | run.sh equivalent | | --- | --- | | `cow start` | `./run.sh start` | | `cow stop` | `./run.sh stop` | @@ -130,5 +119,5 @@ cow install-browser | `cow logs` | `./run.sh logs` | - 推荐使用 `cow` 命令,它提供更简洁的语法和更丰富的功能。通过一键安装脚本部署时 `cow` 命令会自动安装。 + The `cow` command is recommended — it provides cleaner syntax and richer features. It is automatically installed via the one-click install script. diff --git a/docs/cli/skill.mdx b/docs/cli/skill.mdx index 3b4a8aee..99e41dec 100644 --- a/docs/cli/skill.mdx +++ b/docs/cli/skill.mdx @@ -1,190 +1,182 @@ --- -title: 技能管理 -description: 通过命令安装、卸载、启用、禁用和管理技能 +title: Skill Management +description: Install, uninstall, enable, disable, and manage skills via commands --- -技能管理命令用于安装、查询和管理 CowAgent 的技能。在对话中使用 `/skill <子命令>`,在终端中使用 `cow skill <子命令>`。 +Skill management commands are used to install, query, and manage CowAgent skills. Use `/skill ` in chat or `cow skill ` in the terminal. ## list -列出已安装的技能及其状态。 +List installed skills and their status. -```text 对话 +```text Chat /skill list ``` -```bash 终端 +```bash Terminal cow skill list ``` -输出示例: +Example output: ``` -📦 已安装的技能 (3/4) +📦 Installed skills (3/4) ✅ pptx Use this skill any time a .pptx file is involved… - 来源: cowhub + Source: cowhub ✅ skill-creator Create, install, or update skills… - 来源: builtin + Source: builtin -⏸️ image-vision (已禁用) - 图片理解和视觉分析 - 来源: builtin +⏸️ image-vision (disabled) + Image understanding and visual analysis + Source: builtin ``` -**浏览技能广场**(查看 Hub 上所有可安装的技能): +**Browse the Skill Hub** (view all available skills): -```text 对话 +```text Chat /skill list --remote ``` -```bash 终端 +```bash Terminal cow skill list --remote ``` -**选项:** +**Options:** -| 选项 | 说明 | 默认值 | +| Option | Description | Default | | --- | --- | --- | -| `--remote`, `-r` | 浏览 Skill Hub 远程技能列表 | 否 | -| `--page` | 远程列表分页页码 | 1 | +| `--remote`, `-r` | Browse Skill Hub remote skill list | No | +| `--page` | Page number for remote listing | 1 | ## search -在技能广场中搜索技能。 +Search for skills on the Skill Hub. -```text 对话 +```text Chat /skill search pptx ``` -```bash 终端 +```bash Terminal cow skill search pptx ``` ## install -安装技能。通过统一的 `install` 命令,可一键安装来自 **Cow 技能广场、GitHub、ClawHub** 以及任意 URL(zip 压缩包、SKILL.md 链接)上的技能,无需手动下载和配置。 +Install skills with a single `install` command from Cow Skill Hub, GitHub, ClawHub, or any URL (zip archives, SKILL.md links) — no manual download or configuration required. -**从 Cow 技能广场安装(推荐):** +**From Skill Hub (recommended):** -```text 对话 +```text Chat /skill install pptx ``` -```bash 终端 +```bash Terminal cow skill install pptx ``` -**从 GitHub 安装:** +**From GitHub:** -```text 对话 -# 安装仓库中的所有技能(自动扫描包含 SKILL.md 的子目录) +```text Chat +# Install all skills in a repo (auto-discovers subdirectories with SKILL.md) /skill install larksuite/cli -# 指定子目录,只安装单个技能 +# Specify a subdirectory to install a single skill /skill install https://github.com/larksuite/cli/tree/main/skills/lark-im -# 使用 # 指定子目录 +# Use # to specify a subdirectory /skill install larksuite/cli#skills/lark-minutes ``` -```bash 终端 -# 安装仓库中的所有技能(自动扫描包含 SKILL.md 的子目录) +```bash Terminal +# Install all skills in a repo (auto-discovers subdirectories with SKILL.md) cow skill install larksuite/cli -# 指定子目录,只安装单个技能 +# Specify a subdirectory to install a single skill cow skill install https://github.com/larksuite/cli/tree/main/skills/lark-im -# 使用 # 指定子目录 +# Use # to specify a subdirectory cow skill install larksuite/cli#skills/lark-minutes ``` -支持完整的 GitHub URL 和 `owner/repo` 简写。对于 mono-repo(一个仓库中包含多个技能),不指定子目录时会自动发现并批量安装所有技能;指定子目录时只安装该目录下的技能。 +Supports full GitHub URLs and `owner/repo` shorthand. For mono-repos (multiple skills in one repository), omitting the subdirectory auto-discovers and batch-installs all skills; specifying a subdirectory installs only that skill. -**从 ClawHub 安装:** +**From ClawHub:** -```text 对话 +```text Chat /skill install clawhub:baidu-search ``` -```bash 终端 +```bash Terminal cow skill install clawhub:baidu-search ``` -**从 URL 安装:** +**From URL:** -```text 对话 -# 从 zip 压缩包安装(支持单个或批量) +```text Chat +# Install from a zip archive (single or batch) /skill install https://cdn.link-ai.tech/skills/pptx.zip -# 从 SKILL.md 链接安装 +# Install from a SKILL.md link /skill install https://example.com/path/to/SKILL.md ``` -```bash 终端 -# 从 zip 压缩包安装(支持单个或批量) +```bash Terminal +# Install from a zip archive (single or batch) cow skill install https://cdn.link-ai.tech/skills/pptx.zip -# 从 SKILL.md 链接安装 +# Install from a SKILL.md link cow skill install https://example.com/path/to/SKILL.md ``` -支持从 zip / tar.gz 压缩包 URL 安装,解压后自动扫描包含 `SKILL.md` 的目录,支持单个或批量安装。也支持直接从 `SKILL.md` 文件链接安装,会自动解析技能名称和描述。 - -安装成功后会显示技能名称、描述和来源,例如: - -``` -✅ baidu-search - 百度搜索:使用百度搜索引擎检索信息… - 来源: clawhub -``` +Supports installing from zip / tar.gz archive URLs — automatically extracts and discovers directories containing `SKILL.md`, with support for single or batch install. Also supports installing directly from a `SKILL.md` file URL, automatically parsing the skill name and description. ## uninstall -卸载已安装的技能。 +Uninstall an installed skill. -```text 对话 +```text Chat /skill uninstall pptx ``` -```bash 终端 +```bash Terminal cow skill uninstall pptx ``` - 卸载操作会删除技能目录下的所有文件,此操作不可恢复。 + Uninstalling deletes all files in the skill directory. This action cannot be undone. ## enable / disable -启用或禁用技能,禁用后技能不会被 Agent 调用。 +Enable or disable a skill. Disabled skills will not be invoked by the Agent. -```text 对话 +```text Chat /skill enable pptx /skill disable pptx ``` -```bash 终端 +```bash Terminal cow skill enable pptx cow skill disable pptx ``` @@ -192,27 +184,27 @@ cow skill disable pptx ## info -查看已安装技能的详细信息,包括 `SKILL.md` 内容预览。 +View details of an installed skill, including a preview of its `SKILL.md`. -```text 对话 +```text Chat /skill info pptx ``` -```bash 终端 +```bash Terminal cow skill info pptx ``` -## 技能来源 +## Skill Sources -安装的技能会记录来源信息,可通过 `/skill list` 查看: +Installed skills track their origin, viewable via `/skill list`: -| 来源标识 | 说明 | +| Source | Description | | --- | --- | -| `builtin` | 项目内置技能 | -| `cowhub` | 从 CowAgent Skill Hub 安装 | -| `github` | 从 GitHub URL 直接安装 | -| `clawhub` | 从 ClawHub 安装 | -| `url` | 从 SKILL.md URL 安装 | -| `local` | 本地创建的技能 | +| `builtin` | Built-in project skills | +| `cowhub` | Installed from CowAgent Skill Hub | +| `github` | Installed directly from a GitHub URL | +| `clawhub` | Installed from ClawHub | +| `url` | Installed from a SKILL.md URL | +| `local` | Locally created skills | diff --git a/docs/docs.json b/docs/docs.json index 1bc55cac..593142cc 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -33,23 +33,36 @@ "github": "https://github.com/zhayujie/CowAgent" } }, + "redirects": [ + { + "source": "/en/:slug*", + "destination": "/:slug*", + "permanent": true + } + ], "navigation": { "languages": [ { - "language": "zh", + "language": "en", "default": true, "navbar": { "links": [ - { "label": "官网", "href": "https://cowagent.ai/?lang=zh" }, - { "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" } + { + "label": "Website", + "href": "https://cowagent.ai/" + }, + { + "label": "GitHub", + "href": "https://github.com/zhayujie/CowAgent" + } ] }, "tabs": [ { - "tab": "项目介绍", + "tab": "Introduction", "groups": [ { - "group": "概览", + "group": "Overview", "pages": [ "intro/index", "intro/architecture", @@ -59,10 +72,10 @@ ] }, { - "tab": "快速开始", + "tab": "Get Started", "groups": [ { - "group": "安装部署", + "group": "Installation", "pages": [ "guide/quick-start", "guide/manual-install", @@ -72,17 +85,17 @@ ] }, { - "tab": "模型", + "tab": "Models", "groups": [ { - "group": "模型配置", + "group": "Model Configuration", "pages": [ "models/index", "models/deepseek", - "models/minimax", "models/claude", "models/gemini", "models/openai", + "models/minimax", "models/glm", "models/qwen", "models/doubao", @@ -97,16 +110,16 @@ ] }, { - "tab": "工具", + "tab": "Tools", "groups": [ { - "group": "工具系统", + "group": "Tools System", "pages": [ "tools/index" ] }, { - "group": "内置工具", + "group": "Built-in Tools", "pages": [ "tools/read", "tools/write", @@ -121,7 +134,7 @@ ] }, { - "group": "可选工具", + "group": "Optional Tools", "pages": [ "tools/web-search", "tools/vision", @@ -129,7 +142,7 @@ ] }, { - "group": "MCP 工具", + "group": "MCP Tools", "pages": [ "tools/mcp" ] @@ -137,10 +150,10 @@ ] }, { - "tab": "技能", + "tab": "Skills", "groups": [ { - "group": "技能系统", + "group": "Skills System", "pages": [ "skills/index", "skills/install", @@ -149,7 +162,7 @@ ] }, { - "group": "内置技能", + "group": "Built-in Skills", "pages": [ "skills/skill-creator", "skills/knowledge-wiki", @@ -159,10 +172,10 @@ ] }, { - "tab": "记忆", + "tab": "Memory", "groups": [ { - "group": "记忆系统", + "group": "Memory System", "pages": [ "memory/index", "memory/context", @@ -172,10 +185,10 @@ ] }, { - "tab": "知识", + "tab": "Knowledge", "groups": [ { - "group": "知识库", + "group": "Knowledge Base", "pages": [ "knowledge/index" ] @@ -183,33 +196,33 @@ ] }, { - "tab": "通道", + "tab": "Channels", "groups": [ { - "group": "接入渠道", + "group": "Platforms", "pages": [ "channels/index", - "channels/weixin", "channels/web", + "channels/telegram", + "channels/slack", + "channels/discord", + "channels/weixin", "channels/feishu", "channels/dingtalk", "channels/wecom-bot", "channels/qq", "channels/wecom", "channels/wechat-kf", - "channels/wechatmp", - "channels/telegram", - "channels/slack", - "channels/discord" + "channels/wechatmp" ] } ] }, { - "tab": "命令", + "tab": "CLI", "groups": [ { - "group": "命令系统", + "group": "Command System", "pages": [ "cli/index", "cli/process", @@ -221,10 +234,10 @@ ] }, { - "tab": "版本", + "tab": "Releases", "groups": [ { - "group": "发布记录", + "group": "Release Notes", "pages": [ "releases/overview", "releases/v2.0.9", @@ -244,206 +257,212 @@ ] }, { - "language": "en", + "language": "zh", "navbar": { "links": [ - { "label": "Website", "href": "https://cowagent.ai/" }, - { "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" } + { + "label": "官网", + "href": "https://cowagent.ai/?lang=zh" + }, + { + "label": "GitHub", + "href": "https://github.com/zhayujie/CowAgent" + } ] }, "tabs": [ { - "tab": "Introduction", + "tab": "项目介绍", "groups": [ { - "group": "Overview", + "group": "概览", "pages": [ - "en/intro/index", - "en/intro/architecture", - "en/intro/features" + "zh/intro/index", + "zh/intro/architecture", + "zh/intro/features" ] } ] }, { - "tab": "Get Started", + "tab": "快速开始", "groups": [ { - "group": "Installation", + "group": "安装部署", "pages": [ - "en/guide/quick-start", - "en/guide/manual-install", - "en/guide/upgrade" + "zh/guide/quick-start", + "zh/guide/manual-install", + "zh/guide/upgrade" ] } ] }, { - "tab": "Models", + "tab": "模型", "groups": [ { - "group": "Model Configuration", + "group": "模型配置", "pages": [ - "en/models/index", - "en/models/deepseek", - "en/models/claude", - "en/models/gemini", - "en/models/openai", - "en/models/minimax", - "en/models/glm", - "en/models/qwen", - "en/models/doubao", - "en/models/kimi", - "en/models/qianfan", - "en/models/mimo", - "en/models/linkai", - "en/models/coding-plan", - "en/models/custom" + "zh/models/index", + "zh/models/deepseek", + "zh/models/minimax", + "zh/models/claude", + "zh/models/gemini", + "zh/models/openai", + "zh/models/glm", + "zh/models/qwen", + "zh/models/doubao", + "zh/models/kimi", + "zh/models/qianfan", + "zh/models/mimo", + "zh/models/linkai", + "zh/models/coding-plan", + "zh/models/custom" ] } ] }, { - "tab": "Tools", + "tab": "工具", "groups": [ { - "group": "Tools System", + "group": "工具系统", "pages": [ - "en/tools/index" + "zh/tools/index" ] }, { - "group": "Built-in Tools", + "group": "内置工具", "pages": [ - "en/tools/read", - "en/tools/write", - "en/tools/edit", - "en/tools/ls", - "en/tools/bash", - "en/tools/send", - "en/tools/memory", - "en/tools/env-config", - "en/tools/web-fetch", - "en/tools/scheduler" + "zh/tools/read", + "zh/tools/write", + "zh/tools/edit", + "zh/tools/ls", + "zh/tools/bash", + "zh/tools/send", + "zh/tools/memory", + "zh/tools/env-config", + "zh/tools/web-fetch", + "zh/tools/scheduler" ] }, { - "group": "Optional Tools", + "group": "可选工具", "pages": [ - "en/tools/web-search", - "en/tools/vision", - "en/tools/browser" + "zh/tools/web-search", + "zh/tools/vision", + "zh/tools/browser" ] }, { - "group": "MCP Tools", + "group": "MCP 工具", "pages": [ - "en/tools/mcp" + "zh/tools/mcp" ] } ] }, { - "tab": "Skills", + "tab": "技能", "groups": [ { - "group": "Skills System", + "group": "技能系统", "pages": [ - "en/skills/index", - "en/skills/install", - "en/skills/create", - "en/skills/hub" + "zh/skills/index", + "zh/skills/install", + "zh/skills/create", + "zh/skills/hub" ] }, { - "group": "Built-in Skills", + "group": "内置技能", "pages": [ - "en/skills/skill-creator", - "en/skills/knowledge-wiki", - "en/skills/image-generation" + "zh/skills/skill-creator", + "zh/skills/knowledge-wiki", + "zh/skills/image-generation" ] } ] }, { - "tab": "Memory", + "tab": "记忆", "groups": [ { - "group": "Memory System", + "group": "记忆系统", "pages": [ - "en/memory/index", - "en/memory/context", - "en/memory/deep-dream" + "zh/memory/index", + "zh/memory/context", + "zh/memory/deep-dream" ] } ] }, { - "tab": "Knowledge", + "tab": "知识", "groups": [ { - "group": "Knowledge Base", + "group": "知识库", "pages": [ - "en/knowledge/index" + "zh/knowledge/index" ] } ] }, { - "tab": "Channels", + "tab": "通道", "groups": [ { - "group": "Platforms", + "group": "接入渠道", "pages": [ - "en/channels/index", - "en/channels/web", - "en/channels/telegram", - "en/channels/slack", - "en/channels/discord", - "en/channels/weixin", - "en/channels/feishu", - "en/channels/dingtalk", - "en/channels/wecom-bot", - "en/channels/qq", - "en/channels/wecom", - "en/channels/wechat-kf", - "en/channels/wechatmp" + "zh/channels/index", + "zh/channels/weixin", + "zh/channels/web", + "zh/channels/feishu", + "zh/channels/dingtalk", + "zh/channels/wecom-bot", + "zh/channels/qq", + "zh/channels/wecom", + "zh/channels/wechat-kf", + "zh/channels/wechatmp", + "zh/channels/telegram", + "zh/channels/slack", + "zh/channels/discord" ] } ] }, { - "tab": "CLI", + "tab": "命令", "groups": [ { - "group": "Command System", + "group": "命令系统", "pages": [ - "en/cli/index", - "en/cli/process", - "en/cli/skill", - "en/cli/memory-knowledge", - "en/cli/general" + "zh/cli/index", + "zh/cli/process", + "zh/cli/skill", + "zh/cli/memory-knowledge", + "zh/cli/general" ] } ] }, { - "tab": "Releases", + "tab": "版本", "groups": [ { - "group": "Release Notes", + "group": "发布记录", "pages": [ - "en/releases/overview", - "en/releases/v2.0.9", - "en/releases/v2.0.8", - "en/releases/v2.0.7", - "en/releases/v2.0.6", - "en/releases/v2.0.5", - "en/releases/v2.0.4", - "en/releases/v2.0.3", - "en/releases/v2.0.2", - "en/releases/v2.0.1", - "en/releases/v2.0.0" + "zh/releases/overview", + "zh/releases/v2.0.9", + "zh/releases/v2.0.8", + "zh/releases/v2.0.7", + "zh/releases/v2.0.6", + "zh/releases/v2.0.5", + "zh/releases/v2.0.4", + "zh/releases/v2.0.3", + "zh/releases/v2.0.2", + "zh/releases/v2.0.1", + "zh/releases/v2.0.0" ] } ] @@ -454,8 +473,14 @@ "language": "ja", "navbar": { "links": [ - { "label": "ウェブサイト", "href": "https://cowagent.ai/" }, - { "label": "GitHub", "href": "https://github.com/zhayujie/CowAgent" } + { + "label": "ウェブサイト", + "href": "https://cowagent.ai/" + }, + { + "label": "GitHub", + "href": "https://github.com/zhayujie/CowAgent" + } ] }, "tabs": [ diff --git a/docs/en/channels/dingtalk.mdx b/docs/en/channels/dingtalk.mdx deleted file mode 100644 index 1db5f53c..00000000 --- a/docs/en/channels/dingtalk.mdx +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: DingTalk -description: Integrate CowAgent into DingTalk application ---- - -Integrate CowAgent into DingTalk by creating an intelligent robot app on the DingTalk Open Platform. - -## 1. Create App - -1. Go to [DingTalk Developer Console](https://open-dev.dingtalk.com/fe/app#/corp/app), log in and click **Create App**, fill in the app information: - - - -2. Click **Add App Capability**, select **Robot** capability and click **Add**: - - - -3. Configure the robot information and click **Publish**. After publishing, click "**Debug**" to automatically create a test group chat, which can be viewed in the client: - - - -4. Click **Version Management & Release**, create a new version and publish: - - - -## 2. Project Configuration - -1. Click **Credentials & Basic Info**, get the `Client ID` and `Client Secret`: - - - -2. Add the following configuration to `config.json` in the project root: - -```json -{ - "channel_type": "dingtalk", - "dingtalk_client_id": "YOUR_CLIENT_ID", - "dingtalk_client_secret": "YOUR_CLIENT_SECRET" -} -``` - -3. Install the dependency: - -```bash -pip3 install dingtalk_stream -``` - - - -4. After starting the project, go to the DingTalk Developer Console, click **Event Subscription**, then click **Connection verified, verify channel**. When "**Connection successful**" is displayed, the configuration is complete: - - - -## 3. Usage - -Chat privately with the robot or add it to an enterprise group to start a conversation: - - diff --git a/docs/en/channels/discord.mdx b/docs/en/channels/discord.mdx deleted file mode 100644 index e18c0685..00000000 --- a/docs/en/channels/discord.mdx +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Discord -description: Integrate CowAgent with a Discord Bot ---- - -> Integrate CowAgent into Discord via a Discord Bot using the **Gateway** (persistent WebSocket). Supports direct messages (DM) and server channels (triggered by @mention or replying to the bot). The Gateway uses a persistent WebSocket connection — no public IP or callback URL required, works out of the box. - -## 1. Setup - -### Step 1: Create a Discord Application and Bot - -1. Open the [Discord Developer Portal](https://discord.com/developers/applications), click **New Application**, enter a name (e.g. `CowAgent`), and create it. -2. Go to the **Bot** page in the left sidebar, click **Reset Token** to generate a Bot Token, then copy and store it safely (shown only once). - - - This token is your bot's password — keep it secret. If it leaks, click **Reset Token** again on the Bot page to regenerate it. - - -### Step 2: Enable the Message Content Intent - -Reading message text in both DMs and channels depends on this privileged intent. - -1. On the **Bot** page, find **Privileged Gateway Intents**. -2. Turn on **Message Content Intent** and save. - - - Without this intent enabled, incoming message content will be empty and the bot will not respond. - - -### Step 3: Invite the Bot to a Server - -1. Go to **OAuth2 → URL Generator** in the left sidebar. -2. Under **Scopes**, check `bot`. -3. Under **Bot Permissions**, check at least: `Send Messages`, `Read Message History`, `Attach Files`, `View Channels`. -4. Copy the generated authorization URL at the bottom, open it in a browser, and authorize it for your target server. - - - You can skip this step if you only need DMs, but you still need a DM channel with the bot (e.g. the user messages the bot directly). - - -### Step 4: Connect to CowAgent - - - - Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Discord**, paste the Bot Token, and click connect. - - - Add the following to `config.json` and start Cow: - - ```json - { - "channel_type": "discord", - "discord_token": "your-discord-bot-token", - "discord_group_trigger": "mention_or_reply" - } - ``` - - | Key | Description | Default | - | --- | --- | --- | - | `discord_token` | Bot Token generated on the Bot page of the Developer Portal | - | - | `discord_group_trigger` | Channel trigger: `mention_or_reply` (@ or reply to bot) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` | - - - -The integration is ready when you see logs like: - -``` -[Discord] Bot logged in as CowAgent#1234 (id=123456789) -[Discord] ✅ Discord bot ready, listening for messages -``` - -## 2. Capabilities - -| Feature | Support | -| --- | --- | -| Direct message (DM) | ✅ | -| Server channel (@bot / reply to bot) | ✅ | -| Text messages | ✅ send / receive | -| Image messages | ✅ send / receive | -| File messages | ✅ send / receive (PDF / Word / Excel, etc.) | - - - A single Discord message is capped at 2000 characters; long replies are automatically split across multiple messages by line breaks. - - -## 3. Usage - -Once connected: - -- **Direct message (DM)**: find your bot in the server member list, click its avatar, and message it directly. -- **Channel**: in a channel where the bot is invited, trigger it with `@your-bot hello` or by **replying to one of the bot's messages**. - -When sending an image or file, you can **add a text caption** (description / question) in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically. diff --git a/docs/en/channels/feishu.mdx b/docs/en/channels/feishu.mdx deleted file mode 100644 index 1283d0c1..00000000 --- a/docs/en/channels/feishu.mdx +++ /dev/null @@ -1,110 +0,0 @@ ---- -title: Feishu (Lark) -description: Integrate CowAgent into Feishu via a custom enterprise app ---- - -> Integrate CowAgent into Feishu via a custom enterprise app. Supports p2p chat and group chat (@bot), uses WebSocket long connection (no public IP needed), supports streaming typewriter replies and voice messages. - - - You need to be a Feishu enterprise user with admin privileges. - - -## 1. Setup - -### Option 1: One-click Scan to Create (Recommended) - -No need to manually create an app on the Feishu Developer Platform. Start the Cow project, open the web console (default `http://127.0.0.1:9899/`), go to **Channels**, click **Add Channel**, choose **Feishu**, then under the **Scan QR** tab click **One-click Create Feishu App** and scan with the **Feishu App** to complete app creation and connection automatically. - - - - - 1. Requires `lark-oapi` ≥ 1.5.5. - 2. The created app comes with all required permissions (messaging, card read/write, group events, etc.) and event subscriptions pre-configured — no manual setup on the developer console needed. Currently only the Feishu mainland version is supported (Lark international not yet supported). - - -When starting from CLI without `feishu_app_id` configured, the QR code is also printed to the terminal. - -### Option 2: Manual Setup - -Manually create a custom app on the Feishu Developer Platform, then connect via Web Console or config file. - -**Step 1: Create the App** - -1. Go to [Feishu Developer Platform](https://open.feishu.cn/app/), click **Create Enterprise Custom App**: - - - -2. In **Add App Capabilities**, add the **Bot** capability: - - - -3. In **Permission Management**, paste the following permissions and **Batch Enable** all: - -``` -im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p2p_msg,im:message.p2p_msg:readonly,im:message:send_as_bot,im:resource,cardkit:card:write -``` - - - -4. Get `App ID` and `App Secret` from **Credentials & Basic Info**: - - - -**Step 2: Connect to CowAgent** - - - - Open the web console, go to **Channels**, click **Add Channel**, choose **Feishu**, switch to the **Manual** tab, enter App ID and App Secret, then click connect. - - - Add the following to `config.json` and start the program: - - ```json - { - "channel_type": "feishu", - "feishu_app_id": "YOUR_APP_ID", - "feishu_app_secret": "YOUR_APP_SECRET", - "feishu_stream_reply": true - } - ``` - - | Parameter | Description | Default | - | --- | --- | --- | - | `feishu_app_id` | Feishu app App ID | - | - | `feishu_app_secret` | Feishu app App Secret | - | - | `feishu_stream_reply` | Enable streaming typewriter reply | `true` | - - - -**Step 3: Publish the App** - -1. After Cow is running, go to **Events & Callbacks** in the Feishu Developer Platform, choose **Long Connection** mode and save: - - - -2. Click **Add Event**, search for "Receive Message" and choose **Receive Message v2.0**. - -3. Click **Version Management & Release**, create a version and apply for **Production Release**. Approve the request in the Feishu client: - - - -## 2. Features - -| Feature | Status | -| --- | --- | -| P2P chat | ✅ | -| Group chat (@bot) | ✅ | -| Text messages | ✅ send/receive | -| Image messages | ✅ send/receive | -| Voice messages | ✅ send/receive | -| Streaming reply | ✅ (powered by Feishu cardkit streaming card) | - - - Streaming reply requires the `cardkit:card:write` permission (already enabled by one-click creation) and Feishu client version ≥ 7.20. Older clients see an upgrade prompt; if the permission or version is not satisfied, replies fall back to plain text automatically. - - -## 3. Usage - -After connection, search for the bot name in Feishu to start a chat. - -To use in groups, add the bot to a group and @-mention it. diff --git a/docs/en/channels/index.mdx b/docs/en/channels/index.mdx deleted file mode 100644 index bd40d7d5..00000000 --- a/docs/en/channels/index.mdx +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Channels Overview -description: Channels supported by CowAgent and their capability matrix ---- - -CowAgent supports multiple chat channels. Switch between them at startup via `channel_type`. The Web Console is enabled by default and can run in parallel with other channels. - -## Capability Matrix - -The table below summarizes the inbound message types, bot reply types, and group chat capabilities supported by each channel, making it easy to choose by scenario. - -| Channel | Text | Image | File | Voice | Group Chat | -| --- | :-: | :-: | :-: | :-: | :-: | -| [WeChat](/en/channels/weixin) | ✅ | ✅ | ✅ | ✅ | | -| [Web Console](/en/channels/web) | ✅ | ✅ | ✅ | ✅ | | -| [Feishu](/en/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [DingTalk](/en/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [WeCom Bot](/en/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [QQ](/en/channels/qq) | ✅ | ✅ | ✅ | | ✅ | -| [WeCom App](/en/channels/wecom) | ✅ | ✅ | ✅ | ✅ | | -| [Official Account](/en/channels/wechatmp) | ✅ | ✅ | | ✅ | | -| [Telegram](/en/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Slack](/en/channels/slack) | ✅ | ✅ | ✅ | | ✅ | -| [Discord](/en/channels/discord) | ✅ | ✅ | ✅ | | ✅ | - -- The **Image / File / Voice** columns indicate that the channel can send and receive the corresponding message types; see each channel's docs for details -- The **Group Chat** column indicates the ability to recognize and respond to group messages - - - The voice / image capabilities of each channel depend on the configuration of the corresponding model provider. See [Models Overview](/en/models/index) for details. - - -## Channel List - -- [Web Console](/en/channels/web) — built-in browser-based chat and management panel, enabled by default -- [WeChat](/en/channels/weixin) — log in via personal WeChat QR scan -- [Feishu](/en/channels/feishu) — Feishu custom bot -- [DingTalk](/en/channels/dingtalk) — DingTalk custom bot -- [WeCom Bot](/en/channels/wecom-bot) — WeCom AI Bot via WebSocket long connection -- [QQ](/en/channels/qq) — QQ Official Bot open platform -- [WeCom App](/en/channels/wecom) — WeCom custom app integration -- [Official Account](/en/channels/wechatmp) — WeChat Official Account (subscription / service) -- [Telegram](/en/channels/telegram) — global IM, 5-minute setup, no public IP needed -- [Slack](/en/channels/slack) — team collaboration IM, Socket Mode integration, no public IP needed -- [Discord](/en/channels/discord) — community IM, Gateway connection, no public IP needed diff --git a/docs/en/channels/qq.mdx b/docs/en/channels/qq.mdx deleted file mode 100644 index a7f08594..00000000 --- a/docs/en/channels/qq.mdx +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: QQ Bot -description: Connect CowAgent to QQ Bot (WebSocket long connection) ---- - -> Connect CowAgent via QQ Open Platform's bot API, supporting QQ direct messages, group chats (@bot), guild channel messages, and guild DMs. No public IP required — uses WebSocket long connection. - - - QQ Bot is created through the QQ Open Platform. It uses WebSocket long connection to receive messages and OpenAPI to send messages. No public IP or domain is required. - - -## 1. Create a QQ Bot - -> Visit the [QQ Open Platform](https://q.qq.com), sign in with QQ. If you haven't registered, please complete [account registration](https://q.qq.com/#/register) first. - -1.Go to the [QQ Open Platform - Bot List](https://q.qq.com/#/apps), and click **Create Bot**: - - - -2.Fill in the bot name, avatar, and other basic information to complete the creation: - - - -3.Enter the bot configuration page, go to **Development Management**, and complete the following steps: - - - Copy and save the **AppID** (Bot ID) - - Generate and save the **AppSecret** (Bot Secret) - - - -## 2. Configuration and Running - -### Option A: Web Console - -Start the program and open the Web console (local access: http://127.0.0.1:9899/). Go to the **Channels** tab, click **Connect Channel**, select **QQ Bot**, fill in the AppID and AppSecret from the previous step, and click Connect. - - - -### Option B: Config File - -Add the following to your `config.json`: - -```json -{ - "channel_type": "qq", - "qq_app_id": "YOUR_APP_ID", - "qq_app_secret": "YOUR_APP_SECRET" -} -``` - -| Parameter | Description | -| --- | --- | -| `qq_app_id` | AppID of the QQ Bot, found in Development Management on the open platform | -| `qq_app_secret` | AppSecret of the QQ Bot, found in Development Management on the open platform | - -After configuration, start the program. The log message `[QQ] ✅ Connected successfully` indicates a successful connection. - - -## 3. Usage - -In the QQ Open Platform, go to **Management → Usage Scope & Members**, scan the "Add to group and message list" QR code with your QQ client to start chatting with the bot: - - - -Chat example: - - -## 4. Supported Features - -> Note: To use the QQ bot in group chats and guild channels, you need to complete the publishing review and configure usage scope permissions. - -| Feature | Status | -| --- | --- | -| QQ Direct Messages | ✅ | -| QQ Group Chat (@bot) | ✅ | -| Guild Channel (@bot) | ✅ | -| Guild DM | ✅ | -| Text Messages | ✅ Send & Receive | -| Image Messages | ✅ Send & Receive (group & direct) | -| File Messages | ✅ Send (group & direct) | -| Scheduled Tasks | ✅ Active push (4 per user per month) | - - -## 5. Notes - -- **Passive message limits**: QQ direct message replies are valid for 60 minutes (max 5 replies per message); group chat replies are valid for 5 minutes. -- **Active message limits**: Both direct and group chats have a monthly limit of 4 active messages. Keep this in mind when using the scheduled tasks feature. -- **Event permissions**: By default, `GROUP_AND_C2C_EVENT` (QQ group/direct) and `PUBLIC_GUILD_MESSAGES` (guild public messages) are subscribed. Apply for additional permissions on the open platform if needed. diff --git a/docs/en/channels/slack.mdx b/docs/en/channels/slack.mdx deleted file mode 100644 index f95272ca..00000000 --- a/docs/en/channels/slack.mdx +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: Slack -description: Integrate CowAgent with a Slack App ---- - -> Integrate CowAgent into Slack via a Slack App in **Socket Mode**. Supports direct messages (DM) and channels (triggered by @mention or replying within a thread). Socket Mode uses a persistent WebSocket connection — no public IP or callback URL required, works out of the box. - -## 1. Setup - -### Step 1: Create a Slack App - -1. Open the [Slack API apps page](https://api.slack.com/apps), click **Create New App** → **From scratch**. -2. Enter an **App Name** (e.g. `CowAgent`), pick the **Workspace** to install into, and create it. - -### Step 2: Enable Socket Mode and get the App Token - -1. In the left sidebar go to **Settings → Socket Mode** and turn on **Enable Socket Mode**. -2. You will be prompted to generate an **App-Level Token** with the `connections:write` scope. Save this token starting with `xapp-`. - - - Socket Mode receives events over a WebSocket connection, so you don't need to expose a public callback URL — ideal for local or intranet deployments. - - -### Step 3: Configure bot scopes and install - -1. Go to **Features → OAuth & Permissions**, click **Add an OAuth Scope** under **Bot Token Scopes**, and add the following scopes one by one: - - ``` - app_mentions:read - channels:history - chat:write - commands - files:read - files:write - groups:history - im:history - mpim:history - users:read - ``` - - - `files:read` / `files:write` are used for sending/receiving images and files; omit them if you only need text conversations. - - -2. Go to **Features → Event Subscriptions**, turn on **Enable Events**, and under **Subscribe to bot events** click **Add Bot User Event** to add: - - ``` - app_mention - message.im - message.channels - ``` - - - Add `message.groups` if you need to use the bot in private channels. - -3. Go to **Features → App Home**, enable **Messages Tab** under **Show Tabs**, and check **Allow users to send Slash commands and messages from the messages tab**. Otherwise the DM input box is disabled and users cannot message the bot. -4. Back in **OAuth & Permissions**, click **Install to Workspace**. After installing, copy the **Bot User OAuth Token** starting with `xoxb-`. - - - If the Slack client still shows "Sending messages to this app has been turned off", make sure you completed the App Home step above, then refresh or restart the Slack client (remove the app from your conversations and reopen it if needed). - - -### Step 4: Connect to CowAgent - - - - Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Slack**, paste the Bot Token (`xoxb-`) and App Token (`xapp-`), and click connect. - - - Add the following to `config.json` and start Cow: - - ```json - { - "channel_type": "slack", - "slack_bot_token": "xoxb-xxxxxxxxxxxx", - "slack_app_token": "xapp-xxxxxxxxxxxx", - "slack_group_trigger": "mention_or_reply" - } - ``` - - | Key | Description | Default | - | --- | --- | --- | - | `slack_bot_token` | Bot User OAuth Token, like `xoxb-...` | - | - | `slack_app_token` | App-Level Token (generated after enabling Socket Mode), like `xapp-...` | - | - | `slack_group_trigger` | Channel trigger: `mention_or_reply` (@ or reply in thread) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` | - - - -The integration is ready when you see logs like: - -``` -[Slack] Bot logged in as user_id=U0XXXXXXX, team=Txxxxxxxx -[Slack] ✅ Slack bot ready, listening for events -``` - -## 2. Capabilities - -| Feature | Support | -| --- | --- | -| Direct message (DM) | ✅ | -| Channel (@bot / reply in thread) | ✅ | -| Text messages | ✅ send / receive | -| Image messages | ✅ send / receive | -| File messages | ✅ send / receive (PDF / Word / Excel, etc.) | -| Thread replies | ✅ replies are posted to the thread of the triggering message | - - - Slack organizes conversations into threads. The bot posts replies into the thread of the triggering message, keeping channels tidy. - - -## 3. Usage - -Once connected: - -- **Direct message (DM)**: find your App under **Apps** in the Slack sidebar and message it directly. -- **Channel**: invite the App into a channel (`/invite @your-app`), then trigger it with `@your-app hello`; continue the conversation by replying within the same thread. - -When sending an image or file, you can **add a text caption** (description / question) in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically. diff --git a/docs/en/channels/telegram.mdx b/docs/en/channels/telegram.mdx deleted file mode 100644 index f90da992..00000000 --- a/docs/en/channels/telegram.mdx +++ /dev/null @@ -1,111 +0,0 @@ ---- -title: Telegram -description: Integrate CowAgent with Telegram via the Bot API ---- - -> Integrate CowAgent into Telegram via the official Bot API. Supports private chat and group chat (triggered by @mention or replying to the bot). Uses Long Polling — no public IP required, works out of the box. - - -## 1. Setup - -### Step 1: Create a Bot via BotFather - -1. Open the official account [@BotFather](https://t.me/BotFather) in Telegram. -2. Send `/newbot` and follow the prompts: - - **Bot name** (display name, e.g. `My CowAgent Bot`) - - **Bot username** (must end with `bot`, e.g. `my_cowagent_bot`) -3. Once created, BotFather returns an **HTTP API Token** (e.g. `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`). Keep it safe. - - - The token is the password of your bot — never share it. If it leaks, send `/revoke` to `@BotFather` to reset it. - - -### Step 2: (Group chat only) Disable Privacy Mode - -Skip this step if you only use private chat. Telegram bots run in **Privacy Mode** by default — in groups they can only see commands suffixed with `@bot` (e.g. `/start@your_bot`) and replies to bot messages; **plain `@bot hello` text messages are not delivered**, so the bot will appear unresponsive in groups. - -Send the following to `@BotFather`: - -1. `/setprivacy` -2. Pick the bot you just created -3. Choose `Disable` - - - If the bot is still silent in groups after this, try removing it from the group and adding it back. - - -### Step 3: Connect to CowAgent - - - - Open the Web Console (default `http://127.0.0.1:9899`), go to **Channels**, click **Add Channel**, choose **Telegram**, paste the Bot Token, and click connect. - - - Add the following to `config.json` and start Cow: - - ```json - { - "channel_type": "telegram", - "telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ", - "telegram_group_trigger": "mention_or_reply" - } - ``` - - | Key | Description | Default | - | --- | --- | --- | - | `telegram_token` | HTTP API Token returned by BotFather | - | - | `telegram_group_trigger` | Group trigger: `mention_or_reply` (@ or reply) / `mention_only` (@ only) / `all` (all messages) | `mention_or_reply` | - | `telegram_register_commands` | Whether to register the command menu with BotFather on startup | `true` | - - - -The integration is ready when you see logs like: - -``` -[Telegram] Bot logged in as @my_cowagent_bot (id=123456789) -[Telegram] Registered 10 bot commands -[Telegram] ✅ Telegram bot ready, polling for updates -``` - -## 2. Capabilities - -| Feature | Support | -| --- | --- | -| Private chat | ✅ | -| Group chat (@bot / reply to bot) | ✅ | -| Text messages | ✅ send / receive | -| Image messages | ✅ send / receive | -| Voice messages | ✅ send / receive (OGG/Opus) | -| Video messages | ✅ send / receive | -| File messages | ✅ send / receive (PDF / Word / Excel, etc.) | -| Command menu | ✅ aligned with Web Console slash commands | - -### Command Menu - -On startup, the channel registers a command menu with BotFather. Typing `/` in Telegram shows a dropdown: - -| Command | Description | -| --- | --- | -| `/help` | Show command help | -| `/status` | View runtime status | -| `/context` | View conversation context (`/context clear` to clear) | -| `/skill` | Skill management (`/skill list`, `/skill install`, ...) | -| `/memory` | Memory management (`/memory dream`) | -| `/knowledge` | Knowledge base (`/knowledge list` / `on` / `off`) | -| `/config` | View current config | -| `/cancel` | Cancel the running Agent task | -| `/logs` | View recent logs | -| `/version` | Show version | - - - Telegram's command menu only displays top-level commands; subcommands are entered with a space, e.g. `/skill list`, `/context clear`. - - -## 3. Usage - -Once connected: - -- **Private chat**: search for your bot username (e.g. `@my_cowagent_bot`) in Telegram, click `Start` and chat away. -- **Group chat**: add the bot to a group, then trigger it with `@bot hello` or by **replying to one of the bot's messages**. If the bot doesn't respond in groups, double-check Privacy Mode in [Step 2](#step-2-group-chat-only-disable-privacy-mode). - -When sending an image or file, you can **add a caption** (description / question) directly in the attachment input — the bot will answer based on both. Sending an attachment first and then a follow-up question also works; the two messages are merged automatically. diff --git a/docs/en/channels/web.mdx b/docs/en/channels/web.mdx deleted file mode 100644 index 97c43077..00000000 --- a/docs/en/channels/web.mdx +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: Web Console -description: Use CowAgent through the Web Console ---- - -The Web Console is CowAgent's default channel. It runs automatically once started, letting you chat with the Agent in a browser and manage models, skills, memory, channels, and other configuration online. - -## Configuration - -```json -{ - "channel_type": "web", - "web_host": "0.0.0.0", - "web_port": 9899, - "web_password": "", - "enable_thinking": false -} -``` - -| Parameter | Description | Default | -| --- | --- | --- | -| `channel_type` | Set to `web` | `web` | -| `web_host` | Web service listen address. Defaults to `127.0.0.1` (local only); set to `0.0.0.0` for public access and configure a password | `""` | -| `web_port` | Web service listen port | `9899` | -| `web_password` | Access password. Leave empty to disable password protection; recommended when listening on `0.0.0.0` | `""` | -| `web_session_expire_days` | Login session validity in days | `30` | -| `web_file_serve_root` | Root directory the web console can directly read/send files from. Defaults to the user home dir and agent workspace only; set to `/` to allow the whole filesystem | `"~"` | -| `enable_thinking` | Whether to enable deep thinking mode | `false` | - -Once a password is configured, you must enter it to log in when accessing the console. The login session is kept for 30 days by default, so restarting the service during that period does not require re-login. The password can also be changed online from the "Configuration" page in the console. - -## Access URL - -After starting the project, visit: - -- Local: `http://localhost:9899` -- Server: `http://:9899` - - - Ensure the server firewall and security group allow the corresponding port. - - -## Features - -### Chat Interface - -Supports streaming output with real-time display of the Agent's reasoning process and tool calls, providing intuitive observation of the Agent's decision-making. Deep thinking can be toggled via configuration or the "Agent Configuration" switch in the console. - - - -#### Multi-Session Management - -The chat interface supports multi-session management. All session records are persistently stored in the database: - -- **Session List**: Click the history icon on the left to expand/collapse the session list panel, with scroll-to-load support for all historical sessions -- **AI-Generated Titles**: After the first exchange in a new session, the model is automatically called to generate a short summary title -- **New Session**: Click the "New Chat" button at the top of the session list or the `+` button in the input area to create a new session -- **Delete Session**: Click the delete button on a session item and confirm to permanently delete the session and all its messages -- **Clear Context**: Click the clear button in the input area to insert a divider in the current session. Messages above the divider are still displayed but no longer included as context for the model - -### Model Management - -Manage text, image, voice, and embedding model configurations for different providers online — no need to edit config files manually: - - - -### Skill Management - -View and manage Agent skills (Skills) online: - - - -### Memory Management - -View and manage Agent memory online: - - - -### Channel Management - -Manage connected channels online with real-time connect/disconnect operations: - - - -### Scheduled Tasks - -View and manage scheduled tasks online, including one-time tasks, fixed intervals, and Cron expressions: - - - -### Logs - -View Agent runtime logs in real time for monitoring and troubleshooting: - - diff --git a/docs/en/channels/wechat-kf.mdx b/docs/en/channels/wechat-kf.mdx deleted file mode 100644 index f0711d51..00000000 --- a/docs/en/channels/wechat-kf.mdx +++ /dev/null @@ -1,130 +0,0 @@ ---- -title: WeCom Customer Service -description: Integrate CowAgent into WeCom Customer Service (微信客服) ---- - -By binding a WeCom custom enterprise app to a WeCom Customer Service (微信客服) account, CowAgent can take over inbound inquiries from external WeChat users and serve them through links or QR codes embedded in WeChat Mini Programs, Official Accounts, Video Channels, and Video Channel stores. - - - WeCom Customer Service only supports Docker deployment or server Python deployment. A publicly reachable callback URL is required; local run mode is not supported. - - -## 1. Prerequisites - -Required resources: - -1. A server with a public IP -2. A registered and verified WeCom account -3. WeCom Customer Service capability enabled - - - It is recommended to create a **dedicated** WeCom custom app for Customer Service rather than reusing the existing `wechatcom_app` one — otherwise the two channels will compete for the same callback URL. - - -## 2. Create a WeCom Custom App - -1. In the [WeCom Admin Console](https://work.weixin.qq.com/wework_admin/frame#apps), go to **Application Management → Create Application**: - - - -2. Click **My Enterprise** and find the **Corp ID** at the bottom of the page (it goes into `wechat_kf_corp_id`): - - - -3. Open the app you just created and click **"View"** next to Secret. The Secret will be pushed to the admin's phone via the WeCom app, where it can be viewed: - - - -4. Open the app's **Receive Messages → Set API Reception** page, click **"Random Generate"** to generate the **Token** and **EncodingAESKey**, and save them: - - - - - Saving the API reception configuration will fail at this point because the program has not started yet. Come back to save it after the project is running. - - -## 3. Configuration and Run - -Fill in the 4 fields collected from the previous step (Corp ID / Secret / Token / EncodingAESKey): - - - - Start the Cow project and open the Web Console. Go to the **Channels** menu, click **Connect**, choose **WeCom Customer Service**, fill in Corp ID / Secret / Token / AES Key (port defaults to 9888, configurable), and click Connect. - - - - - Add the following configuration to `config.json` (each parameter maps to a field shown in the screenshots above): - - ```json - { - "channel_type": "wechat_kf", - "wechat_kf_corp_id": "YOUR_CORP_ID", - "wechat_kf_secret": "YOUR_SECRET", - "wechat_kf_token": "YOUR_TOKEN", - "wechat_kf_aes_key": "YOUR_AES_KEY", - "wechat_kf_port": 9888 - } - ``` - - | Parameter | Description | - | --- | --- | - | `wechat_kf_corp_id` | Corp ID | - | `wechat_kf_secret` | Secret of the WeCom custom app bound to Customer Service | - | `wechat_kf_token` | Token from the API reception config | - | `wechat_kf_aes_key` | EncodingAESKey from the API reception config | - | `wechat_kf_port` | Listening port, default 9888 | - - - -After connecting, start the program (the Web Console method restarts the channel automatically). When the log shows `Listening on http://0.0.0.0:9888/wxkf/`, the program is running successfully. You need to open this port externally (e.g., allow it in the cloud server security group). - -Then go back to **Receive Messages → Set API Reception** in the WeCom console and set the callback URL to `http://:9888/wxkf/`, then click Save. After saving successfully, you also need to add the server IP to **Enterprise Trusted IPs**, otherwise messages cannot be sent or received: - - - - - - - If URL verification fails or the configuration is unsuccessful: - 1. Ensure the server firewall is disabled and the security group allows the listening port (default 9888) - 2. Carefully check that Token, Secret, EncodingAESKey and other parameters are consistent, and the URL format is correct - 3. Verified WeCom accounts must use a filed domain matching the entity - - -## 4. Bind a WeCom Customer Service Account - -In the WeCom Admin Console, go to **WeCom Customer Service**, create a customer service account, and bind it to the custom app you created above: - - - - - - - -After binding, go to **WeCom Customer Service → Account Details**, and under **"Access Link"**: - -- Click **"Copy Link"** to get an access link like `https://work.weixin.qq.com/kfid/kfcd83e5896b9ba07be` -- Click **"Generate QR Code"** to get the corresponding QR code - -Distribute the link or QR code to your WeChat customers: - - - -## 5. Usage - -After WeChat users enter the customer service conversation via the link or QR code, they can chat with the AI across multiple turns, with support for text, image, and voice messages: - - - -Beyond that, leveraging the official WeChat ecosystem, WeCom Customer Service can also be embedded into Official Accounts, Mini Programs, Video Channels and more. See the **WeCom Customer Service → Access Scenarios** section in the [WeCom Admin Console](https://work.weixin.qq.com/wework_admin/frame#/app/servicer) for details: - - - -## FAQ - -Make sure the following dependencies are installed: - -```bash -pip install websocket-client pycryptodome -``` diff --git a/docs/en/channels/wechatmp.mdx b/docs/en/channels/wechatmp.mdx deleted file mode 100644 index 3c6c2c8b..00000000 --- a/docs/en/channels/wechatmp.mdx +++ /dev/null @@ -1,72 +0,0 @@ ---- -title: WeChat Official Account -description: Integrate CowAgent with WeChat Official Accounts ---- - -CowAgent supports both personal subscription accounts and enterprise service accounts. - -| Type | Requirements | Features | -| --- | --- | --- | -| **Personal Subscription** | Available to individuals | Sends a placeholder reply first; users must send a message to retrieve the full response | -| **Enterprise Service** | Enterprise with verified customer service API | Can proactively push replies to users | - - - Official Accounts only support server and Docker deployment, not local run mode. Install extended dependencies: `pip3 install -r requirements-optional.txt` - - -## 1. Personal Subscription Account - -Add the following configuration to `config.json`: - -```json -{ - "channel_type": "wechatmp", - "single_chat_prefix": [""], - "wechatmp_app_id": "wx73f9******d1e48", - "wechatmp_app_secret": "YOUR_APP_SECRET", - "wechatmp_aes_key": "", - "wechatmp_token": "YOUR_TOKEN", - "wechatmp_port": 80 -} -``` - -### Setup Steps - -These configurations must be consistent with the [WeChat Official Account Platform](https://mp.weixin.qq.com/advanced/advanced?action=dev&t=advanced/dev). Navigate to **Settings & Development → Basic Configuration → Server Configuration** and configure as shown below: - - - -1. Enable the developer secret on the platform (corresponds to `wechatmp_app_secret`), and add the server IP to the whitelist -2. Fill in the `config.json` with the official account parameters matching the platform configuration -3. Start the program, which listens on port 80 (use `sudo` if you don't have permission; stop any process occupying port 80) -4. **Enable server configuration** on the official account platform and submit. A successful save means the configuration is complete. Note that the **"Server URL"** must be in the format `http://{HOST}/wx`, where `{HOST}` can be the server IP or domain - -After following the account and sending a message, you should see the following result: - - - -Due to subscription account limitations, short replies (within 15s) can be returned immediately, but longer replies will first send a "Thinking..." placeholder, requiring users to send any text to retrieve the answer. Enterprise service accounts can solve this with the customer service API. - - - **Voice Recognition**: You can use WeChat's built-in voice recognition. Enable "Receive Voice Recognition Results" under "Settings & Development → API Permissions" on the official account management page. - - -## 2. Enterprise Service Account - -The setup process for enterprise service accounts is essentially the same as personal subscription accounts, with the following differences: - -1. Register an enterprise service account on the platform and complete WeChat certification. Confirm that the **Customer Service API** permission has been granted -2. Set `"channel_type": "wechatmp_service"` in `config.json`; other configurations remain the same -3. Even for longer replies, they can be proactively pushed to users without requiring manual retrieval - -```json -{ - "channel_type": "wechatmp_service", - "single_chat_prefix": [""], - "wechatmp_app_id": "YOUR_APP_ID", - "wechatmp_app_secret": "YOUR_APP_SECRET", - "wechatmp_aes_key": "", - "wechatmp_token": "YOUR_TOKEN", - "wechatmp_port": 80 -} -``` diff --git a/docs/en/channels/wecom-bot.mdx b/docs/en/channels/wecom-bot.mdx deleted file mode 100644 index 2cb51fff..00000000 --- a/docs/en/channels/wecom-bot.mdx +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: WeCom Bot -description: Connect CowAgent to WeCom AI Bot (WebSocket long connection) ---- - -> Connect CowAgent via WeCom AI Bot, supporting both internal direct messages and group chats. No public IP required — uses a WebSocket long connection, with Markdown rendering and streaming output. - - - WeCom Bot and WeCom App are two different integration methods. WeCom Bot uses a WebSocket long connection and requires no public IP or domain, making setup much simpler. - - -## 1. Connection methods - -### Option A: One-click QR scan (recommended) - -No need to create the bot ahead of time. Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/), go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **QR scan** mode, and scan the QR code with **WeCom** — bot creation and connection complete automatically. - - - - - After a successful scan, you can further configure the bot (name, avatar, visibility scope, etc.) in **WeCom Workbench → AI Bot**. - - -### Option B: Manual creation - -Create the AI Bot in WeCom and obtain the Bot ID and Secret, then connect via the Web console or config file. - -**Step 1: Create the AI Bot** - -1. Open the WeCom client, go to **Workbench**, and click **AI Bot**: - - - -2. Click **Create Bot → Manual Creation**: - - - -3. Scroll to the bottom of the right panel and select **API Mode**: - - - -4. Set the bot name, avatar, and visibility scope. Choose **Long Connection** mode, save the **Bot ID** and **Secret**, then click Save. - -**Step 2: Connect to CowAgent** - - - - Open the Web console, go to the **Channels** tab, click **Connect Channel**, choose **WeCom Bot**, switch to **Manual** mode, enter the Bot ID and Secret, and click Connect. - - - - - Add the following to `config.json`, then start CowAgent: - - ```json - { - "channel_type": "wecom_bot", - "wecom_bot_id": "YOUR_BOT_ID", - "wecom_bot_secret": "YOUR_SECRET" - } - ``` - - | Parameter | Description | - | --- | --- | - | `wecom_bot_id` | Bot ID of the AI Bot | - | `wecom_bot_secret` | Secret of the AI Bot | - - - -The log line `[WecomBot] Subscribe success` confirms the connection is established. - -## 2. Supported features - -| Feature | Status | -| --- | --- | -| Direct chat | ✅ | -| Group chat (@bot) | ✅ | -| Text messages | ✅ Send / Receive | -| Image messages | ✅ Send / Receive | -| File messages | ✅ Send / Receive | -| Streaming replies | ✅ | -| Scheduled push messages | ✅ | - -## 3. Usage - -Search for the bot's name inside WeCom to start a direct chat. - -To use the bot in an internal group chat, add it to the group and @-mention it. - - diff --git a/docs/en/channels/wecom.mdx b/docs/en/channels/wecom.mdx deleted file mode 100644 index e0aca17f..00000000 --- a/docs/en/channels/wecom.mdx +++ /dev/null @@ -1,98 +0,0 @@ ---- -title: WeCom -description: Integrate CowAgent into WeCom enterprise app ---- - -Integrate CowAgent into WeCom through a custom enterprise app, supporting one-on-one chat for internal employees. - - - WeCom only supports Docker deployment or server Python deployment. Local run mode is not supported. - - -## 1. Prerequisites - -Required resources: - -1. A server with public IP (overseas server, or domestic server with a proxy for international API access) -2. A registered WeCom account (individual registration is possible but cannot be certified) -3. Certified WeCom accounts additionally require a domain filed under the corresponding entity - -## 2. Create WeCom App - -1. In the [WeCom Admin Console](https://work.weixin.qq.com/wework_admin/frame#profile), click **My Enterprise** and find the **Corp ID** at the bottom of the page. Save this ID for the `wechatcom_corp_id` configuration field. - -2. Switch to **Application Management** and click Create Application: - - - -3. On the application creation page, record the `AgentId` and `Secret`: - - - -4. Click **Set API Reception** to configure the application interface: - - - -- URL format: `http://ip:port/wxcomapp` (certified enterprises must use a filed domain) -- Generate random `Token` and `EncodingAESKey` and save them for the configuration file - - - The API reception configuration cannot be saved at this point because the program hasn't started yet. Come back to save it after the project is running. - - -## 3. Configuration and Run - -Add the following configuration to `config.json` (the mapping between each parameter and the WeCom console is shown in the screenshots above): - -```json -{ - "channel_type": "wechatcom_app", - "single_chat_prefix": [""], - "wechatcom_corp_id": "YOUR_CORP_ID", - "wechatcomapp_token": "YOUR_TOKEN", - "wechatcomapp_secret": "YOUR_SECRET", - "wechatcomapp_agent_id": "YOUR_AGENT_ID", - "wechatcomapp_aes_key": "YOUR_AES_KEY", - "wechatcomapp_port": 9898 -} -``` - -| Parameter | Description | -| --- | --- | -| `wechatcom_corp_id` | Corp ID | -| `wechatcomapp_token` | Token from API reception config | -| `wechatcomapp_secret` | App Secret | -| `wechatcomapp_agent_id` | App AgentId | -| `wechatcomapp_aes_key` | EncodingAESKey from API reception config | -| `wechatcomapp_port` | Listen port, default 9898 | - -After configuration, start the program. When the log shows `http://0.0.0.0:9898/`, the program is running successfully. You need to open this port externally (e.g., allow it in the cloud server security group). - -After the program starts, return to the WeCom Admin Console to save the **Message Server Configuration**. After saving successfully, you also need to add the server IP to **Enterprise Trusted IPs**, otherwise messages cannot be sent or received: - - - - - If the URL configuration callback fails or the configuration is unsuccessful: - 1. Ensure the server firewall is disabled and the security group allows the listening port - 2. Carefully check that Token, Secret Key and other parameter configurations are consistent, and that the URL format is correct - 3. Certified WeCom accounts must configure a filed domain matching the entity - - -## 4. Usage - -Search for the app name you just created in WeCom to start chatting directly. You can run multiple instances listening on different ports to create multiple WeCom apps: - - - -To allow external personal WeChat users to use the app, go to **My Enterprise → WeChat Plugin**, share the invite QR code. After scanning and following, personal WeChat users can join and chat with the app: - - - -## FAQ - -Make sure the following dependencies are installed: - -```bash -pip install websocket-client pycryptodome -``` diff --git a/docs/en/channels/weixin.mdx b/docs/en/channels/weixin.mdx deleted file mode 100644 index 0acb0a43..00000000 --- a/docs/en/channels/weixin.mdx +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: WeChat -description: Connect CowAgent to personal WeChat (via the official API) ---- - -> Connect CowAgent to your personal WeChat — scan to log in, no public IP required. Supports text, image, voice, file, and video messages in 1-on-1 chats. Backed by WeChat's official API; safe to use. After connecting, a bot assistant is added to your conversation list without affecting normal account usage. - -## 1. Setup and run - -### Option A: Web console - -Start CowAgent and open the Web console (local URL: http://127.0.0.1:9899/). Go to the **Channels** tab, click **Connect Channel**, select **WeChat**, and follow the prompts to scan in. - - - -### Option B: Config file - -Set `channel_type` to `weixin` in `config.json`: - -```json -{ - "channel_type": "weixin" -} -``` - -After starting CowAgent, a QR code is displayed in the terminal. Scan it with WeChat to complete login. - - - - - 1. For backward compatibility, setting `channel_type` to `wx` also activates the WeChat channel. - 2. The WeChat client must be on version **8.0.69** or higher. - - -## 2. Usage - -Once authorized, the integration completes and you can start chatting. A bot assistant is created in your WeChat conversation list, leaving normal account usage unaffected. - -> You can find the bot at any time by searching for **"微信ClawBot"**. You may also rename it, change its avatar, pin it to the top of your conversation list, and so on. - - - -## 3. Login - -### QR code login - -On first startup, a QR code appears in the terminal (valid for around 2 minutes). Scan it with WeChat and confirm on your phone to log in. - -- The QR code refreshes automatically when it expires -- The `qrcode` dependency is already included in `requirements.txt`, so the QR code renders directly in the terminal after install - -### Credential persistence - -After a successful login, credentials are saved to `~/.weixin_cow_credentials.json`. Subsequent startups reuse the saved credentials with no need to re-scan. - -To force a re-login, delete the credentials file and restart. - -### Session expiry - -When the WeChat session expires (errcode `-14`), CowAgent automatically clears old credentials and initiates a new QR login — no manual intervention required. - -## 4. Supported features - -| Feature | Status | -| --- | --- | -| Direct messages | ✅ | -| Text messages | ✅ Send & Receive | -| Image messages | ✅ Send & Receive | -| File messages | ✅ Send & Receive | -| Video messages | ✅ Send & Receive | -| Voice messages | ✅ Receive (built-in speech recognition) | diff --git a/docs/en/cli/general.mdx b/docs/en/cli/general.mdx deleted file mode 100644 index 8107fcb5..00000000 --- a/docs/en/cli/general.mdx +++ /dev/null @@ -1,110 +0,0 @@ ---- -title: General Commands -description: View status, manage config, and control context with commonly used commands ---- - -The following commands can be used in chat with the `/` prefix or in the terminal with the `cow` prefix (some are chat-only). - - - In the Web console, typing `/` brings up an autocomplete menu with keyboard navigation and Tab completion. - - -## help - -Show help information for all available commands. - -```text -/help -``` - -## status - -View current session and service status, including process info, model configuration, message count, and loaded skills. - -```text -/status -``` - -## cancel - -Abort the agent task currently running in this session. When the agent is busy with a long task (e.g. multi-turn tool calls or a long streaming response), send `/cancel` and the agent will stop before the next tool execution. Available across all channels — Web, WeChat, WeCom, Feishu, etc. - -```text -/cancel -``` - -## config - -View or modify runtime configuration. Changes take effect immediately without restarting. - -**View all configurable items:** - -```text -/config -``` - -**View a single item:** - -```text -/config model -``` - -**Modify a config item:** - -```text -/config model deepseek-v4-flash -``` - -**Configurable items:** - -| Item | Description | Example | -| --- | --- | --- | -| `model` | AI model name | `deepseek-v4-flash` | -| `agent_max_context_tokens` | Max context tokens | `40000` | -| `agent_max_context_turns` | Max context memory turns | `30` | -| `agent_max_steps` | Max decision steps per task | `15` | -| `enable_thinking` | Enable deep thinking mode | `true` / `false` | - - - When changing `model`, the system automatically matches the corresponding model API. Configuration is persisted to `config.json`. - - -## context - -View current session context statistics, including message count and content length. - -```text -/context -``` - -**Clear current session context:** - -```text -/context clear -``` - - - Clearing context makes the Agent "forget" previous conversation, useful for switching topics or freeing context space. - - -## logs - -View recent service logs. Shows the last 20 lines by default, up to 50. - -```text -/logs -``` - -**Specify line count:** - -```text -/logs 50 -``` - -## version - -Show the current CowAgent version. - -```text -/version -``` diff --git a/docs/en/cli/index.mdx b/docs/en/cli/index.mdx deleted file mode 100644 index e13b45a3..00000000 --- a/docs/en/cli/index.mdx +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: Commands Overview -description: CowAgent command system — Terminal CLI and chat commands ---- - -CowAgent provides two ways to interact via commands: - -- **Terminal CLI** — Run `cow ` in your system terminal for service management, skill management, and other operations -- **Chat Commands** — Type `/` or `cow ` in any conversation to check status, manage skills, adjust configuration, etc. - -## Cow CLI - -After deploying with the one-click install script, the `cow` command is automatically available. For manual installations, run: - -```bash -pip install -e . -``` - -Then use the `cow` command from anywhere: - -```bash -cow help -``` - -Example output: - -``` -🐮 CowAgent CLI - -Usage: cow - -Service: - start Start the CowAgent service - stop Stop the CowAgent service - restart Restart the CowAgent service - update Update code and restart service - status Show service status - logs View service logs - -Skills: - skill Manage skills (list / search / install / uninstall ...) - -Memory & Knowledge: - memory Memory distillation (dream) - knowledge View knowledge base stats and structure - -Others: - help Show this help message - version Show version -``` - -## Chat Commands - -In the Web console or any connected channel, type `/` to see command suggestions. Supported commands: - -| Command | Description | -| --- | --- | -| `/help` | Show command help | -| `/status` | View service status and configuration | -| `/cancel` | Abort the currently running agent task | -| `/config` | View or modify runtime configuration | -| `/skill` | Manage skills (install, uninstall, enable, disable, etc.) | -| `/memory dream [N]` | Manually trigger memory distillation (default 3 days, max 30) | -| `/knowledge` | View knowledge base statistics | -| `/knowledge list` | View knowledge base directory structure | -| `/knowledge on\|off` | Enable or disable knowledge base | -| `/context` | View current session context info | -| `/context clear` | Clear current session context | -| `/logs` | View recent logs | -| `/version` | Show version number | - - - Service management commands like `/start`, `/stop`, `/restart` will prompt you to use them in the terminal instead, as they involve process operations. - - -## Command Availability - -| Command | Terminal (`cow`) | Chat (`/`) | -| --- | :---: | :---: | -| help | ✓ | ✓ | -| version | ✓ | ✓ | -| status | ✓ | ✓ | -| logs | ✓ | ✓ | -| cancel | ✗ | ✓ | -| config | ✗ | ✓ | -| context | — | ✓ | -| memory (subcommands) | ✗ | ✓ | -| knowledge (subcommands) | ✓ | ✓ | -| skill (subcommands) | ✓ | ✓ | -| start / stop / restart | ✓ | ✗ | -| update | ✓ | ✗ | -| install-browser | ✓ | ✗ | - - - `context` only shows a hint in the terminal to use it in chat. `config` is only available in chat. - diff --git a/docs/en/cli/memory-knowledge.mdx b/docs/en/cli/memory-knowledge.mdx deleted file mode 100644 index c748120c..00000000 --- a/docs/en/cli/memory-knowledge.mdx +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Memory & Knowledge -description: Memory distillation and knowledge base management commands ---- - -## memory - -Manage the Agent's long-term memory system. - -### memory dream - -Manually trigger memory distillation (Deep Dream) — consolidate recent daily memories into MEMORY.md and generate a dream diary. - -```text -/memory dream [N] -``` - -- `N`: Consolidate the last N days of memory (default 3, max 30) -- Runs asynchronously in the background; you'll be notified in chat when complete -- Works without Agent initialization — can be used before the first conversation - -**Examples:** - -```text -/memory dream # Consolidate last 3 days -/memory dream 7 # Consolidate last 7 days -/memory dream 30 # Consolidate last 30 days (full) -``` - -On the Web console, the completion notification includes clickable links to view the updated MEMORY.md and dream diary. - - - The system automatically runs distillation daily at 23:55 (lookback 1 day). Manual trigger is useful for consolidating historical memories after first deployment, or when you need an immediate memory update. - - -## knowledge - -View and manage the personal knowledge base. Shows statistics by default. - -```text -/knowledge -``` - -### knowledge list - -View the knowledge base directory tree. - -```text -/knowledge list -``` - -### knowledge on / off - -Enable or disable the knowledge base. When disabled, knowledge prompts and file indexing are not injected. - -```text -/knowledge on -/knowledge off -``` - - - In the terminal CLI, `cow knowledge` and `cow knowledge list` are available, but `on|off` is only supported in chat (requires runtime effect). - diff --git a/docs/en/cli/process.mdx b/docs/en/cli/process.mdx deleted file mode 100644 index 452df03a..00000000 --- a/docs/en/cli/process.mdx +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Process Management -description: Manage CowAgent process lifecycle with cow commands ---- - -Process management commands control the CowAgent background process. These commands are only available in the terminal. - -## start - -Start the CowAgent service. Runs as a background daemon by default and automatically tails logs. - -```bash -cow start -``` - -**Options:** - -| Option | Description | -| --- | --- | -| `-f`, `--foreground` | Run in foreground, not as a background daemon | -| `--no-logs` | Don't tail logs after starting | - -## stop - -Stop the running CowAgent service. - -```bash -cow stop -``` - -## restart - -Restart the CowAgent service (stop then start). - -```bash -cow restart -``` - -**Options:** - -| Option | Description | -| --- | --- | -| `--no-logs` | Don't tail logs after restart | - -## update - -Update code and restart the service. Automatically performs: - -1. Pull latest code (`git pull`) -2. Stop current service -3. Update Python dependencies -4. Reinstall CLI -5. Start service - -```bash -cow update -``` - - - If `git pull` fails (e.g., uncommitted local changes), the update aborts and the service remains unaffected. - - -## status - -Check CowAgent service status, including process info, version, and current model/channel configuration. - -```bash -cow status -``` - -## logs - -View service logs. - -```bash -cow logs -``` - -**Options:** - -| Option | Description | Default | -| --- | --- | --- | -| `-f`, `--follow` | Continuously tail log output | No | -| `-n`, `--lines` | Show last N lines | 50 | - -Examples: - -```bash -# View last 100 lines -cow logs -n 100 - -# Continuously tail logs -cow logs -f -``` - -## install-browser - -Install Playwright and Chromium browser for the [browser tool](/en/tools/browser). - -```bash -cow install-browser -``` - - - Only needed when using browser tools (web browsing, screenshots, etc.). - - -## run.sh Compatibility - -If Cow CLI is not installed, you can use `run.sh` to manage the service: - -| cow command | run.sh equivalent | -| --- | --- | -| `cow start` | `./run.sh start` | -| `cow stop` | `./run.sh stop` | -| `cow restart` | `./run.sh restart` | -| `cow update` | `./run.sh update` | -| `cow status` | `./run.sh status` | -| `cow logs` | `./run.sh logs` | - - - The `cow` command is recommended — it provides cleaner syntax and richer features. It is automatically installed via the one-click install script. - diff --git a/docs/en/cli/skill.mdx b/docs/en/cli/skill.mdx deleted file mode 100644 index 99e41dec..00000000 --- a/docs/en/cli/skill.mdx +++ /dev/null @@ -1,210 +0,0 @@ ---- -title: Skill Management -description: Install, uninstall, enable, disable, and manage skills via commands ---- - -Skill management commands are used to install, query, and manage CowAgent skills. Use `/skill ` in chat or `cow skill ` in the terminal. - -## list - -List installed skills and their status. - - -```text Chat -/skill list -``` - -```bash Terminal -cow skill list -``` - - -Example output: - -``` -📦 Installed skills (3/4) - -✅ pptx - Use this skill any time a .pptx file is involved… - Source: cowhub - -✅ skill-creator - Create, install, or update skills… - Source: builtin - -⏸️ image-vision (disabled) - Image understanding and visual analysis - Source: builtin -``` - -**Browse the Skill Hub** (view all available skills): - - -```text Chat -/skill list --remote -``` - -```bash Terminal -cow skill list --remote -``` - - -**Options:** - -| Option | Description | Default | -| --- | --- | --- | -| `--remote`, `-r` | Browse Skill Hub remote skill list | No | -| `--page` | Page number for remote listing | 1 | - -## search - -Search for skills on the Skill Hub. - - -```text Chat -/skill search pptx -``` - -```bash Terminal -cow skill search pptx -``` - - -## install - -Install skills with a single `install` command from Cow Skill Hub, GitHub, ClawHub, or any URL (zip archives, SKILL.md links) — no manual download or configuration required. - -**From Skill Hub (recommended):** - - -```text Chat -/skill install pptx -``` - -```bash Terminal -cow skill install pptx -``` - - -**From GitHub:** - - -```text Chat -# Install all skills in a repo (auto-discovers subdirectories with SKILL.md) -/skill install larksuite/cli - -# Specify a subdirectory to install a single skill -/skill install https://github.com/larksuite/cli/tree/main/skills/lark-im - -# Use # to specify a subdirectory -/skill install larksuite/cli#skills/lark-minutes -``` - -```bash Terminal -# Install all skills in a repo (auto-discovers subdirectories with SKILL.md) -cow skill install larksuite/cli - -# Specify a subdirectory to install a single skill -cow skill install https://github.com/larksuite/cli/tree/main/skills/lark-im - -# Use # to specify a subdirectory -cow skill install larksuite/cli#skills/lark-minutes -``` - - -Supports full GitHub URLs and `owner/repo` shorthand. For mono-repos (multiple skills in one repository), omitting the subdirectory auto-discovers and batch-installs all skills; specifying a subdirectory installs only that skill. - -**From ClawHub:** - - -```text Chat -/skill install clawhub:baidu-search -``` - -```bash Terminal -cow skill install clawhub:baidu-search -``` - - -**From URL:** - - -```text Chat -# Install from a zip archive (single or batch) -/skill install https://cdn.link-ai.tech/skills/pptx.zip - -# Install from a SKILL.md link -/skill install https://example.com/path/to/SKILL.md -``` - -```bash Terminal -# Install from a zip archive (single or batch) -cow skill install https://cdn.link-ai.tech/skills/pptx.zip - -# Install from a SKILL.md link -cow skill install https://example.com/path/to/SKILL.md -``` - - -Supports installing from zip / tar.gz archive URLs — automatically extracts and discovers directories containing `SKILL.md`, with support for single or batch install. Also supports installing directly from a `SKILL.md` file URL, automatically parsing the skill name and description. - -## uninstall - -Uninstall an installed skill. - - -```text Chat -/skill uninstall pptx -``` - -```bash Terminal -cow skill uninstall pptx -``` - - - - Uninstalling deletes all files in the skill directory. This action cannot be undone. - - -## enable / disable - -Enable or disable a skill. Disabled skills will not be invoked by the Agent. - - -```text Chat -/skill enable pptx -/skill disable pptx -``` - -```bash Terminal -cow skill enable pptx -cow skill disable pptx -``` - - -## info - -View details of an installed skill, including a preview of its `SKILL.md`. - - -```text Chat -/skill info pptx -``` - -```bash Terminal -cow skill info pptx -``` - - -## Skill Sources - -Installed skills track their origin, viewable via `/skill list`: - -| Source | Description | -| --- | --- | -| `builtin` | Built-in project skills | -| `cowhub` | Installed from CowAgent Skill Hub | -| `github` | Installed directly from a GitHub URL | -| `clawhub` | Installed from ClawHub | -| `url` | Installed from a SKILL.md URL | -| `local` | Locally created skills | diff --git a/docs/en/guide/manual-install.mdx b/docs/en/guide/manual-install.mdx deleted file mode 100644 index 1ef580d2..00000000 --- a/docs/en/guide/manual-install.mdx +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Manual Install -description: Deploy CowAgent manually (source code / Docker) ---- - -## Source Code Deployment - -### 1. Clone the project - -```bash -git clone https://github.com/zhayujie/CowAgent -cd CowAgent/ -``` - - - For network issues, use the mirror: https://gitee.com/zhayujie/CowAgent - - -### 2. Install dependencies - -Core dependencies (required): - -```bash -pip3 install -r requirements.txt -``` - -Optional dependencies (recommended): - -```bash -pip3 install -r requirements-optional.txt -``` - -### 3. Install Cow CLI - -Install the command-line tool for managing services and skills: - -```bash -pip3 install -e . -``` - -Then use the `cow` command: - -```bash -cow help -``` - - - This step is recommended. After installation you can use `cow start`, `cow stop`, `cow update` to manage the service, and `cow skill` to manage skills. Without the CLI, you can use `./run.sh` or `python3 app.py` to run. - - -### 4. Configure - -Copy the config template and edit: - -```bash -cp config-template.json config.json -``` - -Fill in model API keys, channel type, and other settings in `config.json`. See the [model docs](/en/models/index) for details. - -### 5. Run - -**Using Cow CLI (recommended):** - -```bash -cow start -``` - -**Or run locally in foreground:** - -```bash -python3 app.py -``` - -By default, the Web console starts. Access `http://localhost:9899` to chat. - -**Background run on server (without CLI):** - -```bash -nohup python3 app.py & tail -f nohup.out -``` - - - **Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs. - - -## Docker Deployment - -Docker deployment does not require cloning source code or installing dependencies. For Agent mode, source deployment is recommended for broader system access. - - - Requires [Docker](https://docs.docker.com/engine/install/) and docker-compose. - - -**1. Download config** - -```bash -curl -O https://cdn.link-ai.tech/code/cow/docker-compose.yml -``` - -Edit `docker-compose.yml` with your configuration. - -**2. Start container** - -```bash -sudo docker compose up -d -``` - -**3. View logs** - -```bash -sudo docker logs -f chatgpt-on-wechat -``` - - - **Running in Docker?** Set `WEB_HOST` to `0.0.0.0` in `docker-compose.yml` so the console is reachable from outside the container, and set `WEB_PASSWORD` to protect it. Make sure port `9899` is mapped to the host and open in your firewall or security group. - - -## Core Configuration - -```json -{ - "channel_type": "web", - "model": "deepseek-v4-flash", - "deepseek_api_key": "", - "agent": true, - "agent_workspace": "~/cow", - "agent_max_context_tokens": 40000, - "agent_max_context_turns": 30, - "agent_max_steps": 15, - "cow_lang": "auto" -} -``` - -| Parameter | Description | Default | -| --- | --- | --- | -| `channel_type` | Channel type | `web` | -| `model` | Model name | `deepseek-v4-flash` | -| `agent` | Enable Agent mode | `true` | -| `agent_workspace` | Agent workspace path | `~/cow` | -| `agent_max_context_tokens` | Max context tokens | `40000` | -| `agent_max_context_turns` | Max context turns | `30` | -| `agent_max_steps` | Max decision steps per task | `15` | -| `cow_lang` | Language for the UI, command text and system prompts; `auto` to detect, or set `zh` / `en` | `auto` | - - - Full configuration options are in the project [`config.py`](https://github.com/zhayujie/CowAgent/blob/master/config.py). - diff --git a/docs/en/guide/quick-start.mdx b/docs/en/guide/quick-start.mdx deleted file mode 100644 index 343956dc..00000000 --- a/docs/en/guide/quick-start.mdx +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: One-click Install -description: One-click install and manage CowAgent with scripts ---- - -The project provides scripts for one-click install, configuration, startup, and management. Script-based deployment is recommended for quick setup. - -Supports Linux, macOS, and Windows. Requires Python 3.7-3.12 (3.9 recommended). - -## Install Command - - - - ```bash - bash <(curl -fsSL https://cdn.link-ai.tech/code/cow/run.sh) - ``` - - - ```powershell - irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex - ``` - - - -The script automatically performs these steps: - -1. Check Python environment (requires Python 3.7+) -2. Install required tools (git, curl, etc.) -3. Clone project to `~/CowAgent` -4. Install Python dependencies and Cow CLI -5. Guided configuration for AI model and channel -6. Start service - -By default, the Web console starts after installation. Access `http://localhost:9899` to begin chatting. - - - **Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access only). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs. - - -## Management Commands - -After installation, use the `cow` command to manage the service: - -| Command | Description | -| --- | --- | -| `cow start` | Start service | -| `cow stop` | Stop service | -| `cow restart` | Restart service | -| `cow status` | Check run status | -| `cow logs` | View real-time logs | -| `cow update` | Update code and restart | -| `cow install-browser` | Install browser tool dependencies | - -See the [Commands documentation](/en/cli/index) for more details. - - - If the `cow` command is not available, you can use `./run.sh ` (Linux/macOS) or `.\scripts\run.ps1 ` (Windows) as a fallback. Both are functionally equivalent. - diff --git a/docs/en/guide/upgrade.mdx b/docs/en/guide/upgrade.mdx deleted file mode 100644 index d1cd5df6..00000000 --- a/docs/en/guide/upgrade.mdx +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Upgrade -description: How to upgrade CowAgent ---- - -## Recommended: One-line upgrade - -Use `cow update` to pull the latest code and restart the service in one step: - -```bash -cow update -``` - -The command runs the following automatically: - -1. Pull the latest code (`git pull`) -2. Stop the running service -3. Update Python dependencies -4. Reinstall the CLI -5. Start the service - - - If the Cow CLI is not installed, `./run.sh update` performs the same operations. - - -## Manual upgrade - -Run the following inside the project root: - -```bash -git pull -pip3 install -r requirements.txt -pip3 install -e . -``` - -Then restart the service: - -```bash -# Using Cow CLI (recommended) -cow restart - -# Or using run.sh -./run.sh restart - -# Or restart manually with nohup -kill $(ps -ef | grep app.py | grep -v grep | awk '{print $2}') -nohup python3 app.py & tail -f nohup.out -``` - -## Docker upgrade - -Run the following in the directory containing `docker-compose.yml`: - -```bash -sudo docker compose pull -sudo docker compose up -d -``` - - - Back up `config.json` before upgrading. For Docker deployments, mount the workspace directory as a volume to persist data across upgrades. - diff --git a/docs/en/intro/architecture.mdx b/docs/en/intro/architecture.mdx deleted file mode 100644 index 98084b48..00000000 --- a/docs/en/intro/architecture.mdx +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Architecture -description: CowAgent 2.0 system architecture and core design ---- - -CowAgent 2.0 has evolved from a simple chatbot into a super intelligent assistant with Agent architecture, featuring autonomous thinking, task planning, long-term memory, and skill extensibility. - -## System Architecture - -CowAgent's architecture consists of the following core modules: - -CowAgent Architecture - -| Module | Description | -| --- | --- | -| **Plan** | Understands user intent, decomposes complex tasks into multi-step plans, and iteratively invokes tools until the goal is achieved | -| **Memory** | Automatically persists important information as core memory and daily memory, with hybrid keyword and vector retrieval for cross-session context continuity | -| **Knowledge** | Organizes structured knowledge by topic. The Agent autonomously distills valuable information into Markdown pages, maintaining indexes and cross-references to build a growing knowledge network | -| **Tools** | Core capability for Agent to access OS resources. 10+ built-in tools including file read/write, terminal, browser, scheduler, memory search, web search, and more | -| **Skills** | Loads and manages Skills. Supports one-click installation from Skill Hub, GitHub, and more, or custom skill creation through conversation | -| **Models** | Model layer with unified access to OpenAI, Claude, Gemini, DeepSeek, MiniMax, GLM, Qwen, and other mainstream LLMs | -| **Channels** | Message channel layer for receiving and sending messages. Supports Web console, WeChat, Feishu, DingTalk, WeCom, WeChat Official Account, and more with a unified protocol | -| **CLI** | Command-line system providing terminal commands (`cow`) and chat commands (`/`) for process management, skill installation, configuration, knowledge base management, and more | - -## Agent Mode Workflow - -When Agent mode is enabled, CowAgent runs as an autonomous agent with the following workflow: - -1. **Receive Message** — Receive user input through channels -2. **Understand Intent** — Analyze task requirements and context -3. **Plan Task** — Break complex tasks into multiple steps -4. **Invoke Tools** — Select and execute appropriate tools for each step -5. **Update Memory & Knowledge** — Store important information in long-term memory and organize structured knowledge into the knowledge base -6. **Return Result** — Send execution results back to the user - -## Workspace Directory Structure - -The Agent workspace is located at `~/cow` by default and stores system prompts, memory files, and skill files: - -``` -~/cow/ -├── SYSTEM.md # Agent system prompt -├── USER.md # User profile -├── MEMORY.md # Core memory -├── memory/ # Long-term memory storage -│ └── YYYY-MM-DD.md # Daily memory -├── knowledge/ # Personal knowledge base -│ ├── index.md # Knowledge index -│ └── / # Topic-based pages -└── skills/ # Custom skills - ├── skill-1/ - └── skill-2/ -``` - -Secret keys are stored separately in `~/.cow` directory for security: - -``` -~/.cow/ -└── .env # Secret keys for skills -``` - -## Core Configuration - -Configure Agent mode parameters in `config.json`: - -```json -{ - "agent": true, - "agent_workspace": "~/cow", - "agent_max_context_tokens": 50000, - "agent_max_context_turns": 20, - "agent_max_steps": 20, - "enable_thinking": false, - "cow_lang": "auto" -} -``` - -| Parameter | Description | Default | -| --- | --- | --- | -| `agent` | Enable Agent mode | `true` | -| `agent_workspace` | Workspace path | `~/cow` | -| `agent_max_context_tokens` | Max context tokens | `50000` | -| `agent_max_context_turns` | Max context turns | `20` | -| `agent_max_steps` | Max decision steps per task | `20` | -| `enable_thinking` | Enable deep-thinking mode | `false` | -| `knowledge` | Enable personal knowledge base | `true` | -| `cow_lang` | Language for the UI, command text and system prompts; `auto` to detect, or set `zh` / `en` | `auto` | diff --git a/docs/en/intro/features.mdx b/docs/en/intro/features.mdx deleted file mode 100644 index 8b65f18d..00000000 --- a/docs/en/intro/features.mdx +++ /dev/null @@ -1,139 +0,0 @@ ---- -title: Features -description: CowAgent long-term memory, task planning, skills system, CLI commands, and browser tool in detail ---- - -## 1. Long-term Memory - -The memory system enables the Agent to remember important information over time, using a three-tier memory flow: conversation context (short-term) → daily memory (mid-term) → MEMORY.md (long-term), forming a complete memory lifecycle. - -On first launch, the Agent proactively asks the user for key information and records it in the workspace (default `~/cow`) — including agent settings, user identity, and memory files. - -In subsequent long-term conversations, the Agent intelligently stores or retrieves memory as needed, continuously updating its own settings, user preferences, and memory files. **Deep Dream** distillation runs daily, consolidating scattered daily memories into refined long-term memory and generating a narrative-style dream diary. - - - - - -See [Long-term Memory](/en/memory) and [Deep Dream](/en/memory/deep-dream) for details. - -## 2. Personal Knowledge Base - -> The knowledge base system enables the Agent to continuously accumulate and organize structured knowledge. Unlike memory which records along a timeline, the knowledge base is organized by topics, transforming articles, conversation insights, and learning materials into interconnected Markdown pages that form a continuously growing knowledge network. - -The Agent automatically organizes valuable information from conversations into knowledge pages, maintaining cross-references and indexes. The Web console provides document browsing and knowledge graph visualization. Knowledge is stored in `~/cow/knowledge/` within the workspace. - -- **Auto-organization**: The Agent autonomously extracts and organizes structured knowledge during conversations, maintaining indexes and cross-references -- **Knowledge graph**: Automatically builds a knowledge graph from cross-references between pages, with interactive graph visualization in the Web console -- **Chat integration**: Knowledge document links referenced in Agent replies can be clicked directly in the Web console for viewing -- **CLI management**: Use `/knowledge` commands to view stats, browse directory, and toggle the feature with `/knowledge on|off` - - - - - -See [Personal Knowledge Base](/en/knowledge) for details. - -## 3. Task Planning and Tool Use - -Tools are the core of how the Agent accesses operating system resources. The Agent intelligently selects and invokes tools based on task requirements, performing file read/write, command execution, scheduled tasks, and more. Built-in tools are implemented in the project's `agent/tools/` directory. - -**Key tools:** file read/write/edit, Bash terminal, browser, file send, scheduler, memory search, web search, environment config, and more. - -### 3.1 Terminal and File Access - -Access to the OS terminal and file system is the most fundamental and core capability. Many other tools and skills build on top of this. Users can interact with the Agent from a mobile device to operate resources on their personal computer or server: - - - - - -### 3.2 Programming Capability - -Combining programming and system access, the Agent can execute the complete **Vibecoding workflow** — from information search, asset generation, coding, testing, deployment, Nginx configuration, to publishing — all triggered by a single command from your phone: - - - - - -### 3.3 Scheduled Tasks - -The `scheduler` tool enables dynamic scheduled tasks, supporting **one-time tasks, fixed intervals, and Cron expressions**. Tasks can be triggered as either a **fixed message send** or an **Agent dynamic task** execution: - - - - - -### 3.4 Browser - -The built-in `browser` tool allows the Agent to control a Chromium browser to visit web pages, fill forms, click elements, and take screenshots, with support for dynamic JS-rendered pages. Run `cow install-browser` to install with one command, automatically adapting to server (headless) and desktop environments: - - - - - -### 3.5 Environment Variable Management - -Secrets required by skills are stored in an environment variable file, managed by the `env_config` tool. You can update secrets through conversation, with built-in security protection and desensitization: - - - - - -## 4. Skills System - -The Skills system provides infinite extensibility for the Agent. Each Skill consists of a description file, execution scripts (optional), and resources (optional), describing how to complete specific types of tasks. Skills allow the Agent to follow instructions for complex workflows, invoke tools, or integrate third-party systems. - -- [Skill Hub](https://skills.cowagent.ai/): An open skill marketplace featuring official, community, and third-party skills. Install with one command. -- **Built-in skills:** Located in the project's `skills/` directory, including skill creator, image recognition, LinkAI agent, web fetch, and more. Built-in skills are automatically enabled based on dependency conditions (API keys, system commands, etc.). -- **Custom skills:** Created by users through conversation, stored in the workspace (`~/cow/skills/`), capable of implementing any complex business process or third-party integration. - -Install skills: `/skill install ` or `cow skill install `, supporting Skill Hub, GitHub, ClawHub, URL, and more. - -### 4.1 Creating Skills - -The `skill-creator` skill enables rapid skill creation through conversation. You can ask the Agent to codify a workflow as a skill, or send any API documentation and examples for the Agent to complete the integration directly: - - - - - -### 4.2 Web Search and Image Recognition - -- **Web search:** Built-in `web_search` tool, supports multiple search engines. Configure `BOCHA_API_KEY` or `LINKAI_API_KEY` to enable. -- **Image recognition:** Built-in `openai-image-vision` skill, supports `gpt-4.1-mini`, `gpt-4.1`, and other models. Requires `OPENAI_API_KEY`. - - - - - -### 4.3 Skill Hub - -Visit [skills.cowagent.ai](https://skills.cowagent.ai/) to browse all available skills, or use commands in conversation: - -```text -/skill list --remote # Browse Skill Hub -/skill search # Search skills -/skill install # Install with one command -``` - -Also supports installing skills from GitHub, ClawHub, LinkAI, and other third-party platforms. See [Install Skills](/en/skills/install) for details. - - - -## 5. CLI Command System - -CowAgent provides two command interaction methods, covering service management, skill installation, configuration, and more: - -- **Terminal CLI:** Run `cow ` in the system terminal, supporting `start`, `stop`, `restart`, `update`, `status`, `logs`, `skill`, etc. -- **Chat commands:** Type `/` in conversation. The Web console shows a command menu when you type `/`. - -```bash -cow start # Start service -cow stop # Stop service -cow update # Update and restart -cow skill install pptx # Install a skill -cow install-browser # Install browser tool -``` - -See [Command Overview](https://docs.cowagent.ai/en/cli) for details. diff --git a/docs/en/intro/index.mdx b/docs/en/intro/index.mdx deleted file mode 100644 index 373383b2..00000000 --- a/docs/en/intro/index.mdx +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Introduction -description: CowAgent - Open-source super AI assistant and Agent Harness ---- - -
- CowAgent -
- -**CowAgent** is an open-source super AI assistant and Agent Harness. It proactively plans tasks, runs tools and skills, and autonomously grows with memory and knowledge. - -CowAgent is lightweight, easy to deploy, and built to extend. Plug in any major LLM provider, run it across Web and major IM platforms, 24/7 on a personal computer or server. - - - - Open-source repository — Star and contribute - - - No setup required — experience CowAgent instantly - - - -## Core Capabilities - - - - Decomposes complex tasks and executes them step by step, looping over tools and skills until the goal is reached. - - - Three-tier architecture (context → daily → core), automatic Deep Dream distillation, hybrid keyword + vector retrieval. - - - Auto-curates structured knowledge into a Markdown wiki, builds an evolving knowledge graph with visual browsing. - - - A complete skill creation and execution engine. Install from Skill Hub or generate custom skills via natural-language conversation. - - - First-class support for text, images, voice, and files — recognition, generation, and delivery. - - - Built-in file I/O, terminal, browser, scheduler, memory retrieval, web search, and more — with native MCP integration. - - - Terminal CLI and in-chat commands for process management, skill installation, configuration, and context inspection. - - - Claude, GPT, Gemini, DeepSeek, Qwen, GLM, Kimi, MiniMax, Doubao, and more — swap providers from the Web console with one click. - - - A single Agent simultaneously serves Web, WeChat, Feishu, DingTalk, WeCom, QQ, and Official Accounts. - - - -## Quick Start - -Run one of the commands below to install, configure, and start CowAgent in a single step: - - - - ```bash - bash <(curl -fsSL https://cdn.link-ai.tech/code/cow/run.sh) - ``` - - - ```powershell - irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex - ``` - - - -Once started, open `http://localhost:9899` to access the **Web console** — the unified place to chat, configure providers, connect channels, and install skills. - - - - Complete installation and run guide - - - CowAgent system architecture - - - -## Disclaimer - -1. This project is licensed under the [MIT License](https://github.com/zhayujie/CowAgent/blob/master/LICENSE) and is intended for technical research and learning. You are responsible for complying with applicable laws and regulations in your jurisdiction; the maintainers assume no liability for any consequences arising from use of this project. -2. **Cost & safety:** Agent mode consumes substantially more tokens than plain chat — pick models that balance quality and cost. The Agent has access to your local operating system; deploy only in trusted environments. -3. CowAgent is a pure open-source project and does not participate in, authorize, or issue any cryptocurrency. - -## Community - -Scan the WeChat QR code to join the open-source community group: - - diff --git a/docs/en/knowledge/index.mdx b/docs/en/knowledge/index.mdx deleted file mode 100644 index f1610dc9..00000000 --- a/docs/en/knowledge/index.mdx +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Personal Knowledge Base -description: CowAgent personal knowledge base — structured knowledge accumulation, automatic organization, and knowledge graph ---- - -The personal knowledge base is the Agent's long-term structured knowledge store, saved in the `knowledge/` directory within the workspace. Unlike memory, which is organized by timeline, the knowledge base organizes content by topic — articles, conversation insights, and learning materials are structured into interlinked Markdown pages, forming a continuously growing knowledge network. - - - - - -## Core Concepts - -### Knowledge vs Memory - -| Dimension | Knowledge Base (knowledge/) | Long-term Memory (memory/) | -| --- | --- | --- | -| Organization | By topic, interlinked | By timeline, dated files | -| Writing | Agent actively structures content | Auto-summarized on context trimming | -| Content | Refined, structured knowledge | Raw conversation summaries | -| Use cases | Study notes, tech docs, project knowledge | Conversation history, event records | - -### Directory Structure - -``` -~/cow/knowledge/ -├── index.md # Knowledge index, entry point for all pages -├── log.md # Change log, records each write -├── concepts/ # Conceptual knowledge -│ └── machine-learning.md -├── entities/ # Entity knowledge (people, orgs, tools) -│ └── openai.md -└── sources/ # Source knowledge (articles, papers) - └── llm-wiki.md -``` - -The directory structure is flexible — the Agent automatically creates appropriate category directories based on actual content. Users can also customize the organization. - -## Automatic Organization - -Knowledge writing is an autonomous Agent behavior, triggered in these scenarios: - -- **User shares an article or document** — The Agent automatically extracts key information and creates a structured knowledge page -- **Conversation produces valuable conclusions** — The Agent organizes insights into knowledge pages and links them to existing knowledge -- **User explicitly requests organization** — Users can guide the Agent to organize and update knowledge through conversation - -Each knowledge page includes cross-reference links to related pages, gradually building a knowledge graph. - - - - - -## Knowledge Retrieval - -The Agent can retrieve knowledge during conversation through: - -- **Index lookup** — Quickly locate relevant pages via `knowledge/index.md` -- **Semantic search** — Search knowledge content via the `memory_search` tool -- **Direct read** — Read specific knowledge files via the `memory_get` tool - -## Web Console - -The web console provides a dedicated "Knowledge" module with: - -- **Document browsing** — Tree-style directory structure, searchable and collapsible, click to view content -- **Knowledge graph** — Interactive graph visualizing relationships between knowledge pages -- **Chat integration** — Knowledge document links referenced in Agent replies are clickable for direct navigation - - - - - - - - - -## CLI Commands - -Manage the knowledge base with the `/knowledge` command: - -| Command | Description | -| --- | --- | -| `/knowledge` | Show knowledge base statistics | -| `/knowledge list` | Display file directory as a tree | -| `/knowledge on` | Enable the knowledge base feature | -| `/knowledge off` | Disable the knowledge base feature | - -## Configuration - -| Parameter | Description | Default | -| --- | --- | --- | -| `knowledge` | Whether to enable the personal knowledge base | `true` | -| `agent_workspace` | Workspace path; knowledge is stored under the `knowledge/` subdirectory | `~/cow` | diff --git a/docs/en/memory/context.mdx b/docs/en/memory/context.mdx deleted file mode 100644 index 18fbdc8c..00000000 --- a/docs/en/memory/context.mdx +++ /dev/null @@ -1,81 +0,0 @@ ---- -title: Short-term Memory -description: Conversation context — message management, compression strategies, and context operations ---- - -Conversation context is the Agent's short-term memory, containing all messages in the current session (user input, Agent replies, tool calls and results). Proper context management is critical for the Agent's reasoning quality and cost control. - -## Context Structure - -Each conversation turn consists of: - -``` -User message → Agent thinking → Tool call → Tool result → ... → Agent final reply -``` - -A single turn may include multiple tool calls (controlled by `agent_max_steps`). All tool calls and results are retained in context until compressed or trimmed. - -## Key Configuration - -| Parameter | Description | Default | -| --- | --- | --- | -| `agent_max_context_tokens` | Maximum context token budget | `50000` | -| `agent_max_context_turns` | Maximum conversation turns in context | `20` | -| `agent_max_steps` | Maximum decision steps per turn (tool call count) | `15` | - -Configurable via `config.json` or the `/config` chat command. - -## Compression Strategy - -When context exceeds limits, the system automatically compresses to free space. The process has multiple stages: - -### 1. Tool Result Truncation - -Before each decision loop, the system checks tool call results in historical turns. Results exceeding **20,000 characters** are truncated, keeping only the beginning and end with a truncation notice. Current turn results are not affected. - -### 2. Turn Trimming - -When conversation turns exceed `agent_max_context_turns`: - -- The **oldest half** of complete turns is trimmed (preserving tool call chain integrity) -- Trimmed messages are summarized by LLM and **written to the daily memory file** -- Once the LLM summary is ready, it is also **injected into the first user message** of the retained context, helping the model maintain conversational continuity -- Summary injection runs asynchronously in the background and takes effect from the next turn onward - -### 3. Token Budget Trimming - -After turn trimming, if tokens still exceed the budget: - -- **Fewer than 5 turns**: All turns undergo **text compression** — each turn keeps only the first user text and last Agent reply, removing intermediate tool call chains -- **5 or more turns**: The **first half** of turns is trimmed again, with discarded content written to memory and a context summary injected - -### 4. Overflow Emergency Handling - -When the model API returns a context overflow error: - -1. All current messages are summarized and written to memory -2. Aggressive trimming is applied (tool results limited to 10K chars, user text to 10K, max 5 turns) -3. If still overflowing, the entire conversation context is cleared - -## Session Persistence - -Conversation messages are persisted to a local database, automatically restored after service restart. Restore strategy: - -- Restores the most recent **`max(3, max_context_turns / 6)`** turns -- Only retains each turn's **user text and Agent final reply**, not intermediate tool call chains -- Sessions older than **30 days** are automatically cleaned up - -## Commands - -Use these commands in chat to manage context: - -| Command | Description | -| --- | --- | -| `/context` | View current context statistics (message count, role distribution, total characters) | -| `/context clear` | Clear current session context | -| `/config agent_max_context_tokens 80000` | Adjust context token budget | -| `/config agent_max_context_turns 30` | Adjust context turn limit | - - - After clearing context, the Agent "forgets" previous conversation content. Content that was already written to long-term memory can still be retrieved via memory search. - diff --git a/docs/en/memory/deep-dream.mdx b/docs/en/memory/deep-dream.mdx deleted file mode 100644 index d0dd9e2d..00000000 --- a/docs/en/memory/deep-dream.mdx +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: Deep Dream -description: Deep Dream — automatic distillation from conversations to permanent memory ---- - -Deep Dream is the core consolidation mechanism of CowAgent's memory system, responsible for distilling scattered daily memories into refined long-term memory and generating dream diaries. - -## Memory Flow - -CowAgent's memory progresses through three stages from short-term to long-term: - -``` -Conversation context (short-term) → Daily memory (mid-term) → MEMORY.md (long-term) -``` - -### 1. Conversation → Daily Memory - -When conversation context is trimmed or during the daily scheduled summary, the system uses LLM to summarize conversation content into key events, writing them to the daily memory file `memory/YYYY-MM-DD.md`. - -Triggers: -- **Context trimming** — Trimmed content is summarized when turn or token limits are exceeded -- **Daily schedule** — Automatically triggered at 23:55 -- **API overflow** — Emergency save of current conversation summary - -### 2. Daily Memory → MEMORY.md (Distillation) - -After the daily summary completes, Deep Dream automatically runs distillation: - -1. **Read materials** — Current `MEMORY.md` + today's daily memory -2. **LLM distillation** — Deduplicate, merge, prune, extract new information -3. **Overwrite MEMORY.md** — Output the refined long-term memory -4. **Generate dream diary** — Record discoveries and insights from the consolidation - -### 3. Role of MEMORY.md - -`MEMORY.md` is injected into the system prompt for every conversation, keeping the Agent aware of user preferences, decisions, and key facts. Therefore it must stay concise — Deep Dream targets approximately 30 entries or fewer. - -## Distillation Rules - -Deep Dream follows these consolidation rules: - -| Operation | Description | -| --- | --- | -| **Merge & refine** | Combine similar entries into single high-density statements | -| **Extract new** | Pull preferences, decisions, people, experiences from daily memory | -| **Conflict update** | When new info contradicts old entries, newer info takes precedence | -| **Clean invalid** | Remove temporary records, blank entries, formatting artifacts | -| **Remove redundancy** | Delete old entries already covered by more refined statements | - -## Dream Diary - -Each distillation generates a dream diary saved at `memory/dreams/YYYY-MM-DD.md`, written in a narrative style recording: - -- Duplications or contradictions found -- New insights extracted from daily memory -- Cleanups and optimizations performed -- Overall observations - -Dream diaries can be viewed in the Web console under "Memory → Dream Diary" tab. - - - - - -## Manual Trigger - -In addition to the automatic daily run, you can manually trigger distillation in chat: - -```text -/memory dream [N] -``` - -- `N`: Consolidate the last N days of memory (default 3, max 30) -- Runs asynchronously in the background; you'll be notified in chat when complete -- Web notifications include clickable links to view MEMORY.md and dream diary -- Works without Agent initialization — can be used before the first conversation - - - After first deployment, it's recommended to run `/memory dream 30` once to distill all historical daily memories into MEMORY.md. - - -## Safety Mechanisms - -| Mechanism | Description | -| --- | --- | -| **Skip on no content** | Distillation skipped when no daily memory exists, avoiding empty overwrites | -| **Input dedup** | In scheduled tasks, automatically skipped when input materials haven't changed | -| **Async execution** | Distillation runs in a background thread, never blocking conversation | -| **Sequential guarantee** | In scheduled tasks, daily flush completes before distillation starts | -| **No fabrication** | Prompt explicitly constrains consolidation to existing materials only | diff --git a/docs/en/memory/index.mdx b/docs/en/memory/index.mdx deleted file mode 100644 index e3f6513f..00000000 --- a/docs/en/memory/index.mdx +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: Long-term Memory -description: CowAgent long-term memory system — file persistence, automatic writing, and hybrid retrieval ---- - -Long-term memory is stored in workspace files, persisting across sessions. The Agent loads historical memory on demand via retrieval tools during conversation, and automatically writes conversation summaries to long-term memory when context is trimmed. - -Memory Architecture - -## Memory Types - -### Core Memory (MEMORY.md) - -Stored in `~/cow/MEMORY.md`, containing long-term user preferences, important decisions, key facts, and other information that doesn't fade over time. The Agent reads and writes this file via tools to maintain long-term knowledge. - -### Daily Memory (memory/YYYY-MM-DD.md) - -Stored in `~/cow/memory/` directory, named by date (e.g., `2026-03-08.md`), recording daily conversation summaries and key events. Files are only created on first write to avoid generating empty files. - -### Dream Diary (memory/dreams/YYYY-MM-DD.md) - -A byproduct of the Deep Dream (memory distillation) process, recording discoveries, deduplication operations, and new insights from each consolidation. Stored in `~/cow/memory/dreams/` directory, named by date. - -## Automatic Writing - -The Agent automatically persists conversation content to long-term memory through the following mechanisms: - -- **On context trimming** — When conversation turns or tokens exceed the configured limit, the oldest half of the context is trimmed, and the discarded content is summarized by LLM into key information and written to the daily memory file. The summary is also asynchronously injected into the retained context for conversational continuity -- **Daily scheduled summary** — A full summary is automatically triggered at 23:55 every day, ensuring memory is preserved even on low-activity days (skipped if content hasn't changed) -- [Deep Dream (memory distillation)](/en/memory/deep-dream) — Runs automatically after the daily summary, distilling daily memories into MEMORY.md and generating a dream diary -- **On API context overflow** — When the model API returns a context overflow error, the current conversation summary is saved as an emergency measure - -All memory writes run asynchronously in a background thread (LLM summarization + file writing), never blocking normal conversation replies. - -## Memory Retrieval - -The memory system supports hybrid retrieval modes: - -- **Keyword retrieval** — FTS5 full-text index matching with BM25 ranking -- **Vector retrieval** — Embedding-based semantic similarity search, finds relevant memory even with different wording - -The Agent automatically triggers memory retrieval during conversation as needed, incorporating relevant historical information into context. Results are ranked by a combined score (default: 0.7 vector weight + 0.3 keyword weight). Daily memory scores decay over time (30-day half-life), while core memory does not decay. - -## Related Files - -Files related to memory in the workspace (default `~/cow`): - -| File | Description | -| --- | --- | -| `AGENT.md` | Agent personality and behavior settings | -| `USER.md` | User identity information and preferences | -| `RULE.md` | Custom rules and constraints | -| `MEMORY.md` | Core memory (long-term) | -| `memory/YYYY-MM-DD.md` | Daily memory (created on demand) | -| `memory/dreams/YYYY-MM-DD.md` | Dream diary (auto-generated by Deep Dream) | - -## Web Console - -The memory management page in the Web console allows browsing memory files and dream diaries, with tab switching support: - - - - - -## Configuration - -| Parameter | Description | Default | -| --- | --- | --- | -| `agent_workspace` | Workspace path, memory files stored under this directory | `~/cow` | -| `agent_max_context_tokens` | Max context tokens; when exceeded, content is trimmed and summarized into memory | `50000` | -| `agent_max_context_turns` | Max context turns; when exceeded, content is trimmed and summarized into memory | `20` | diff --git a/docs/en/models/claude.mdx b/docs/en/models/claude.mdx deleted file mode 100644 index bb831eb8..00000000 --- a/docs/en/models/claude.mdx +++ /dev/null @@ -1,50 +0,0 @@ ---- -title: Claude -description: Anthropic Claude model configuration (Text Chat + Image Understanding) ---- - -Claude is provided by Anthropic and supports both text chat and image understanding. The mainstream Sonnet / Opus models natively support vision, so no separate Vision model needs to be specified. - - - All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. - - -## Text Chat - -```json -{ - "model": "claude-opus-4-8", - "claude_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Supports `claude-opus-4-8`, `claude-opus-4-7`, `claude-sonnet-4-6`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-sonnet-4-0`, `claude-3-5-sonnet-latest`, etc. See [official models](https://docs.anthropic.com/en/docs/about-claude/models/overview) | -| `claude_api_key` | Create one in the [Claude Console](https://console.anthropic.com/settings/keys) | -| `claude_api_base` | Optional, defaults to `https://api.anthropic.com/v1`. Can be changed to a third-party proxy | - -### Model Selection - -| Model | Use Case | -| --- | --- | -| `claude-opus-4-8` | Default recommended, latest flagship; best for complex reasoning and long-running tasks | -| `claude-opus-4-7` | Previous-generation Opus flagship | -| `claude-sonnet-4-6` | Balanced cost and speed, lower cost | -| `claude-opus-4-6` / `claude-sonnet-4-5` / `claude-sonnet-4-0` | Earlier flagships at a lower price | - -## Image Understanding - -Once `claude_api_key` is configured, the Agent's Vision tool automatically uses the Claude main model to recognize images, with no extra setup required. - -To manually specify a Vision model, set it explicitly in the configuration file: - -```json -{ - "tools": { - "vision": { - "model": "claude-sonnet-4-6" - } - } -} -``` diff --git a/docs/en/models/coding-plan.mdx b/docs/en/models/coding-plan.mdx deleted file mode 100644 index b09715eb..00000000 --- a/docs/en/models/coding-plan.mdx +++ /dev/null @@ -1,139 +0,0 @@ ---- -title: Coding Plan -description: Coding Plan model configuration ---- - -> Coding Plan is a monthly subscription package offered by various providers, ideal for high-frequency Agent usage. CowAgent supports all Coding Plan providers via OpenAI-compatible mode. - - - Coding Plan API Base and API Key are usually separate from the standard pay-as-you-go ones. Please obtain them from each provider's platform. - - -## General Configuration - -All providers can be accessed via the OpenAI-compatible protocol, and can be quickly configured through the web console. Set the model provider to **OpenAI**, select a custom model and enter the model code, then fill in the corresponding provider's API Base and API Key: - - - -You can also configure directly in `config.json`: - -```json -{ - "bot_type": "openai", - "model": "MODEL_NAME", - "open_ai_api_base": "PROVIDER_CODING_PLAN_API_BASE", - "open_ai_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `bot_type` | Must be `openai` (OpenAI-compatible mode) | -| `model` | Model name supported by the provider | -| `open_ai_api_base` | Provider's Coding Plan API Base URL | -| `open_ai_api_key` | Provider's Coding Plan API Key | - ---- - -## Alibaba Cloud - -```json -{ - "bot_type": "openai", - "model": "qwen3.5-plus", - "open_ai_api_base": "https://coding.dashscope.aliyuncs.com/v1", - "open_ai_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | `qwen3.5-plus`, `qwen3-max-2026-01-23`, `qwen3-coder-next`, `qwen3-coder-plus`, `glm-5`, `glm-4.7`, `kimi-k2.5`, `MiniMax-M2.5` | -| `open_ai_api_base` | `https://coding.dashscope.aliyuncs.com/v1` | -| `open_ai_api_key` | Coding Plan specific key (not shared with pay-as-you-go) | - -Reference: [Quick Start](https://help.aliyun.com/zh/model-studio/coding-plan-quickstart?spm=a2c4g.11186623.help-menu-2400256.d_0_2_1.70115203zi5Igc), [Model List](https://help.aliyun.com/zh/model-studio/coding-plan) - ---- - -## MiniMax - -```json -{ - "bot_type": "openai", - "model": "MiniMax-M2.5", - "open_ai_api_base": "https://api.minimaxi.com/v1", - "open_ai_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`, `MiniMax-M2.1`, `MiniMax-M2` | -| `open_ai_api_base` | China: `https://api.minimaxi.com/v1`; Global: `https://api.minimax.io/v1` | -| `open_ai_api_key` | Coding Plan specific key (not shared with pay-as-you-go) | - -Reference: [China Key](https://platform.minimaxi.com/docs/coding-plan/quickstart), [Model List](https://platform.minimaxi.com/docs/guides/pricing-coding-plan), [Global Key](https://platform.minimax.io/docs/coding-plan/quickstart) - ---- - -## GLM - -```json -{ - "bot_type": "openai", - "model": "glm-4.7", - "open_ai_api_base": "https://open.bigmodel.cn/api/coding/paas/v4", - "open_ai_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | `glm-5`, `glm-4.7`, `glm-4.6`, `glm-4.5`, `glm-4.5-air` | -| `open_ai_api_base` | China: `https://open.bigmodel.cn/api/coding/paas/v4`; Global: `https://api.z.ai/api/coding/paas/v4` | -| `open_ai_api_key` | Shared with standard API | - -Reference: [China Quick Start](https://docs.bigmodel.cn/cn/coding-plan/quick-start), [Global Quick Start](https://docs.z.ai/devpack/quick-start) - ---- - -## Kimi - -```json -{ - "bot_type": "moonshot", - "model": "kimi-for-coding", - "moonshot_base_url": "https://api.kimi.com/coding/v1", - "moonshot_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Use `kimi-for-coding` for auto-updating model, or specify a model such as `kimi-k2.6` | -| `moonshot_base_url` | `https://api.kimi.com/coding/v1` | -| `moonshot_api_key` | Coding Plan specific key (not shared with pay-as-you-go) | - -Reference: [Key & Docs](https://www.kimi.com/code/docs/) - ---- - -## Volcengine - -```json -{ - "bot_type": "openai", - "model": "Doubao-Seed-2.0-Code", - "open_ai_api_base": "https://ark.cn-beijing.volces.com/api/coding/v3", - "open_ai_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | `Doubao-Seed-2.0-Code`, `Doubao-Seed-2.0-pro`, `Doubao-Seed-2.0-lite`, `Doubao-Seed-Code`, `MiniMax-M2.5`, `Kimi-K2.5`, `GLM-4.7`, `DeepSeek-V3.2` | -| `open_ai_api_base` | `https://ark.cn-beijing.volces.com/api/coding/v3` | -| `open_ai_api_key` | Shared with standard API | - -Reference: [Quick Start](https://www.volcengine.com/docs/82379/1928261?lang=zh) diff --git a/docs/en/models/custom.mdx b/docs/en/models/custom.mdx deleted file mode 100644 index 45a7d2e1..00000000 --- a/docs/en/models/custom.mdx +++ /dev/null @@ -1,62 +0,0 @@ ---- -title: Custom -description: Custom vendor configuration for third-party API proxies and local models ---- - -For model services accessed via the OpenAI-compatible protocol or locally deployed models, such as: - -- **Third-party API proxies**: call multiple models through a unified API base -- **Local models**: models deployed locally with tools like Ollama, vLLM, LocalAI -- **Private deployments**: model services deployed inside an enterprise - - - Difference from the `openai` vendor: when a custom vendor is selected, switching models via `/config model` does not automatically switch the vendor type — the custom API address is always used. - - -## Text Chat - -### Third-party API proxy - -```json -{ - "bot_type": "custom", - "model": "", - "custom_api_key": "YOUR_API_KEY", - "custom_api_base": "https://{your-proxy.com}/v1" -} -``` - -| Parameter | Description | -| --- | --- | -| `bot_type` | Must be set to `custom` | -| `model` | Model name; any model name supported by the proxy service | -| `custom_api_key` | API key provided by the proxy service | -| `custom_api_base` | API endpoint provided by the proxy service; must be OpenAI-compatible | - -### Local models - -Local models usually do not require an API key — only the API base needs to be filled in: - -```json -{ - "bot_type": "custom", - "model": "qwen3.5:27b", - "custom_api_base": "http://localhost:11434/v1" -} -``` - -Common local deployment tools and their default endpoints: - -| Tool | Default API Base | -| --- | --- | -| [Ollama](https://ollama.com) | `http://localhost:11434/v1` | -| [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` | -| [LocalAI](https://localai.io) | `http://localhost:8080/v1` | - -### Switching Models - -Switching models under a custom vendor only changes `model` — `bot_type` and the API endpoint remain unchanged: - -``` -/config model qwen3.5:27b -``` diff --git a/docs/en/models/deepseek.mdx b/docs/en/models/deepseek.mdx deleted file mode 100644 index 6de8d09b..00000000 --- a/docs/en/models/deepseek.mdx +++ /dev/null @@ -1,72 +0,0 @@ ---- -title: DeepSeek -description: DeepSeek model configuration (Text Chat + Thinking Mode) ---- - -DeepSeek is one of the default recommended vendors in Agent mode, focused on cost-effective text chat and task planning. - -## Text Chat - -```json -{ - "model": "deepseek-v4-flash", - "deepseek_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Supports `deepseek-v4-flash` (Default), `deepseek-v4-pro` | -| `deepseek_api_key` | Create one on the [DeepSeek Platform](https://platform.deepseek.com/api_keys) | -| `deepseek_api_base` | Optional, defaults to `https://api.deepseek.com/v1`. Can be changed to a third-party proxy | - -### Model Selection - -| Model | Use Case | -| --- | --- | -| `deepseek-v4-flash` | Default recommended; fast and low cost | -| `deepseek-v4-pro` | Smarter; better for complex tasks | - -## Thinking Mode - -The V4 series (`deepseek-v4-flash` / `deepseek-v4-pro`) supports an explicit "thinking mode": before producing the final answer, the model emits a chain of thought (`reasoning_content`) to improve answer quality. - -### Toggle - -Controlled by the global `enable_thinking` config, and can also be toggled from the Web Console's configuration page: - -```json -{ - "enable_thinking": true -} -``` - -- `true`: the model thinks before answering across all channels. The Web Console displays the thinking process; IM channels (WeChat / WeCom / DingTalk / Feishu) do not show it but still get better answers. -- `false`: thinking is disabled, responses are faster, and time-to-first-token is lower. - -### Reasoning Effort - -Under thinking mode, `reasoning_effort` controls reasoning intensity: - -```json -{ - "enable_thinking": true, - "reasoning_effort": "high" -} -``` - -| Value | Use Case | -| --- | --- | -| `high` (Default) | Day-to-day Agent tasks; balanced reasoning and speed | -| `max` | Complex coding, long-horizon planning, strictly constrained tasks; deeper reasoning but more time and output tokens | - -`reasoning_effort` only takes effect when `enable_thinking` is `true`; it is ignored automatically when the model does not support thinking mode. - -### Behavior Notes - -- **Sampling parameters**: in thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are ignored by the server (without errors). CowAgent automatically skips them. -- **Multi-turn tool calls**: when the history contains tool calls, DeepSeek requires every assistant message to include `reasoning_content`. CowAgent handles this automatically, so toggling thinking mode across turns will not cause errors. - - - `deepseek-v4-flash` is used by default; switch to `deepseek-v4-pro` for complex tasks; enable `enable_thinking` when deep reasoning is needed. - diff --git a/docs/en/models/doubao.mdx b/docs/en/models/doubao.mdx deleted file mode 100644 index 818275e5..00000000 --- a/docs/en/models/doubao.mdx +++ /dev/null @@ -1,66 +0,0 @@ ---- -title: Doubao -description: Doubao (Volcengine Ark) model configuration (Text / Image Understanding / Image Generation / Embedding) ---- - -Doubao (Volcengine Ark) supports text chat, image understanding, image generation (Seedream), and embedding. A single `ark_api_key` enables all capabilities. - - - All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. - - -## Text Chat - -```json -{ - "model": "doubao-seed-2-0-pro-260215", - "ark_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Can be `doubao-seed-2-0-pro-260215`, `doubao-seed-2-0-code-preview-260215`, `doubao-seed-2-0-lite-260215`, etc. | -| `ark_api_key` | Create one in the [Volcengine Ark Console](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) | -| `ark_base_url` | Optional, defaults to `https://ark.cn-beijing.volces.com/api/v3` | - -## Image Understanding - -Once `ark_api_key` is configured, the Agent's Vision tool automatically uses `doubao-seed-2-0-pro-260215` to recognize images, with no extra setup required. - -To manually specify a Vision model: - -```json -{ - "tools": { - "vision": { - "model": "doubao-seed-2-0-pro-260215" - } - } -} -``` - -## Image Generation - -```json -{ - "skills": { - "image-generation": { - "model": "seedream-5.0-lite" - } - } -} -``` - -Available models: `seedream-5.0-lite`, `seedream-4.5`. - -## Embedding - -```json -{ - "embedding_provider": "doubao", - "embedding_model": "doubao-embedding-vision-251215" -} -``` - -The default model is `doubao-embedding-vision-251215` (multimodal embedding); the dimension (1024 or 2048) can be set via `embedding_dimensions` in the configuration file. After changing the embedding, run `/memory rebuild-index` to rebuild the index. diff --git a/docs/en/models/gemini.mdx b/docs/en/models/gemini.mdx deleted file mode 100644 index b2d9520b..00000000 --- a/docs/en/models/gemini.mdx +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Gemini -description: Google Gemini model configuration (Text Chat + Image Understanding + Image Generation) ---- - -Google Gemini supports text chat, image understanding, and image generation (Nano Banana series). A single `gemini_api_key` enables all capabilities. - - - All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. - - -## Text Chat - -```json -{ - "model": "gemini-3.5-flash", - "gemini_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Recommended: `gemini-3.5-flash`; also supports `gemini-3.1-pro-preview`, `gemini-3.1-flash-lite-preview`, `gemini-3-flash-preview`, `gemini-3-pro-preview`, etc. See [official docs](https://ai.google.dev/gemini-api/docs/models) | -| `gemini_api_key` | Create one in [Google AI Studio](https://aistudio.google.com/app/apikey) | -| `gemini_api_base` | Optional, defaults to `https://generativelanguage.googleapis.com`. Can be changed to a third-party proxy | - -## Image Understanding - -All Gemini models natively support vision. Once `gemini_api_key` is configured, the Agent's Vision tool automatically uses the main model to recognize images, with no extra setup required. - -To manually specify a Vision model: - -```json -{ - "tools": { - "vision": { - "model": "gemini-3.1-flash-lite-preview" - } - } -} -``` - -## Image Generation - -```json -{ - "skills": { - "image-generation": { - "model": "gemini-3.1-flash-image-preview" - } - } -} -``` - -| Model ID | Alias | -| --- | --- | -| `gemini-3.1-flash-image-preview` | Nano Banana 2 | -| `gemini-3-pro-image-preview` | Nano Banana Pro | -| `gemini-2.5-flash-image` | Nano Banana | diff --git a/docs/en/models/glm.mdx b/docs/en/models/glm.mdx deleted file mode 100644 index 473a805c..00000000 --- a/docs/en/models/glm.mdx +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: GLM -description: Zhipu AI GLM model configuration (Text / Image Understanding / Speech-to-Text / Embedding) ---- - -Zhipu AI supports text chat, image understanding, speech-to-text (ASR), and embedding. A single `zhipu_ai_api_key` enables all capabilities. - - - All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. - - -## Text Chat - -```json -{ - "model": "glm-5.1", - "zhipu_ai_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Can be `glm-5.1`, `glm-5-turbo`, `glm-5`, `glm-4.7`, `glm-4-plus`, `glm-4-flash`, `glm-4-air`, etc. See [model codes](https://bigmodel.cn/dev/api/normal-model/glm-4) | -| `zhipu_ai_api_key` | Create one in the [Zhipu AI Console](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) | -| `zhipu_ai_api_base` | Optional, defaults to `https://open.bigmodel.cn/api/paas/v4` | - -## Image Understanding - -Zhipu's chat models (`glm-5.1`, `glm-5-turbo`, etc.) do not support vision; vision calls are uniformly routed to `glm-5v-turbo`. Once `zhipu_ai_api_key` is configured, the Agent's Vision tool automatically uses this model, with no need to specify it explicitly in the configuration file. - -## Speech-to-Text (ASR) - -```json -{ - "voice_to_text": "zhipu", - "voice_to_text_model": "glm-asr-2512" -} -``` - -| Parameter | Description | -| --- | --- | -| `voice_to_text` | Set to `zhipu` to enable Zhipu ASR | -| `voice_to_text_model` | Optional, defaults to `glm-asr-2512` | - -Credentials are automatically reused from `zhipu_ai_api_key`. Audio files should be smaller than 25MB; oversized files may be rejected by the server. - -## Embedding - -```json -{ - "embedding_provider": "zhipu", - "embedding_model": "embedding-3" -} -``` - -Available models: `embedding-3`, `embedding-2`. After changing the embedding, run `/memory rebuild-index` to rebuild the index. diff --git a/docs/en/models/index.mdx b/docs/en/models/index.mdx deleted file mode 100644 index 1a82d162..00000000 --- a/docs/en/models/index.mdx +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: Models Overview -description: Model vendors supported by CowAgent and their capability matrix ---- - -CowAgent supports a wide range of mainstream large language models. Model interfaces live under the project's `models/` directory. Beyond text chat, several vendors also provide vision understanding, image generation, speech-to-text, text-to-speech, and embeddings — all of which can be invoked on demand in the Agent flow. - -## Capability Matrix - -A snapshot of each vendor's capabilities. "Text" refers to the main chat model; the remaining columns show which Agent capabilities the vendor can power. - -| Vendor | Representative Models | Text | Vision | Image Gen | STT | TTS | Embedding | -| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: | -| [DeepSeek](/en/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | | -| [MiniMax](/en/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | | -| [Claude](/en/models/claude) | claude-opus-4-8 | ✅ | ✅ | | | | | -| [Gemini](/en/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | | -| [OpenAI](/en/models/openai) | gpt-5.5, o-series | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [GLM](/en/models/glm) | glm-5.1, glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ | -| [Qwen](/en/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Doubao](/en/models/doubao) | doubao-seed-2.0 series | ✅ | ✅ | ✅ | | | ✅ | -| [Kimi](/en/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | | -| [ERNIE](/en/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | | -| [MiMo](/en/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | | -| [LinkAI](/en/models/linkai) | 100+ models from multiple vendors | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Custom](/en/models/custom) | Local models / third-party proxies | ✅ | | | | | | - - - Every capability in the Web console (Vision / Image / STT / TTS / Embedding / Web Search) can be configured independently with its own vendor and model — there is no forced binding between them. - - -## How to Configure - -**Option 1 (recommended):** Manage models and capabilities online via the [Web console](/en/channels/web), with no need to edit the configuration file: - - - -**Option 2:** Edit `config.json` manually and fill in the model name and API key for the selected vendor. Every model also supports OpenAI-compatible access — just set `bot_type` to `openai` and configure `open_ai_api_base` and `open_ai_api_key`. diff --git a/docs/en/models/kimi.mdx b/docs/en/models/kimi.mdx deleted file mode 100644 index 3292a976..00000000 --- a/docs/en/models/kimi.mdx +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Kimi -description: Kimi (Moonshot) model configuration (Text Chat + Image Understanding) ---- - -Kimi is provided by Moonshot and supports both text chat and image understanding. The `kimi-k2.x` series natively supports vision. - - - All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. - - -## Text Chat - -```json -{ - "model": "kimi-k2.6", - "moonshot_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Can be `kimi-k2.6`, `kimi-k2.5`, `kimi-k2`, `moonshot-v1-8k`, `moonshot-v1-32k`, `moonshot-v1-128k` | -| `moonshot_api_key` | Create one in the [Moonshot Console](https://platform.moonshot.cn/console/api-keys) | -| `moonshot_base_url` | Optional, defaults to `https://api.moonshot.cn/v1` | - -## Image Understanding - -Once `moonshot_api_key` is configured, the Agent's Vision tool automatically uses `kimi-k2.6` to recognize images, with no extra setup required. - -To manually specify a Vision model: - -```json -{ - "tools": { - "vision": { - "model": "kimi-k2.6" - } - } -} -``` diff --git a/docs/en/models/linkai.mdx b/docs/en/models/linkai.mdx deleted file mode 100644 index f60c2160..00000000 --- a/docs/en/models/linkai.mdx +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: LinkAI -description: Access text, vision, image, speech, and embedding capabilities through the LinkAI platform ---- - -A single `linkai_api_key` gives you access to all capabilities of mainstream vendors such as OpenAI, Claude, Gemini, DeepSeek, MiniMax, Qwen, Kimi, and Doubao. - - - All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. - - -## Text Chat - -```json -{ - "use_linkai": true, - "linkai_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `use_linkai` | Set to `true` to enable | -| `linkai_api_key` | Create one in the [Console](https://link-ai.tech/console/interface) | -| `model` | Can be any code from the [model list](https://link-ai.tech/console/models) | - -See [Model Service](https://link-ai.tech/console/models) for more. - -## Image Understanding - -Once configured, the Agent's Vision tool automatically calls multimodal models via the gateway, with no extra setup required. To manually specify a Vision model: - -```json -{ - "tools": { - "vision": { - "model": "gpt-5.4-mini" - } - } -} -``` - -Available models: `gpt-4.1-mini`, `gpt-5.4-mini`, `qwen3.6-plus`, `doubao-seed-2-0-pro-260215`, `kimi-k2.6`, `claude-sonnet-4-6`, `gemini-3.1-flash-lite-preview`, etc. - -## Image Generation - -```json -{ - "skills": { - "image-generation": { - "model": "gpt-image-2" - } - } -} -``` - -| Model ID | Alias | -| --- | --- | -| `gpt-image-2` | OpenAI | -| `gemini-3.1-flash-image-preview` | Nano Banana 2 | -| `gemini-3-pro-image-preview` | Nano Banana Pro | -| `seedream-5.0-lite` | ByteDance Doubao Seedream | - -## Speech-to-Text (ASR) - -```json -{ - "voice_to_text": "linkai" -} -``` - -ASR uses Whisper by default; credentials are automatically reused from `linkai_api_key`. - -## Text-to-Speech (TTS) - -The TTS gateway supports multiple underlying engines. The engine is selected by `text_to_voice_model`, and the available voices change with the engine. - -```json -{ - "text_to_voice": "linkai", - "text_to_voice_model": "doubao", - "tts_voice_id": "BV001_streaming" -} -``` - -| `text_to_voice_model` | Engine | -| --- | --- | -| `tts-1` | OpenAI · Multi-language (voices like `alloy` / `nova` / `echo`, etc.) | -| `doubao` | ByteDance Doubao · Rich Chinese voices | -| `baidu` | Baidu · Chinese broadcaster voices | - -Voices differ by engine; we recommend selecting them visually in the Web Console under "Model Management → Text-to-Speech". - -## Embedding - -```json -{ - "embedding_provider": "linkai", - "embedding_model": "text-embedding-3-small" -} -``` - -The default model is `text-embedding-3-small` (OpenAI-compatible). After changing the embedding, run `/memory rebuild-index` to rebuild the index. diff --git a/docs/en/models/mimo.mdx b/docs/en/models/mimo.mdx deleted file mode 100644 index 6f808b8e..00000000 --- a/docs/en/models/mimo.mdx +++ /dev/null @@ -1,136 +0,0 @@ ---- -title: MiMo -description: Xiaomi MiMo model configuration (Text Chat + Image Understanding + Text-to-Speech) ---- - -Xiaomi MiMo is a native omni-modal large model. A single `mimo_api_key` enables text chat, image understanding, and text-to-speech all at once. - - - All capabilities below can be configured in one place via the "Model Management" page in the Web Console — no need to manually edit the configuration file. - - -## Text Chat - -```json -{ - "model": "mimo-v2.5-pro", - "mimo_api_key": "YOUR_API_KEY", - "mimo_api_base": "https://api.xiaomimimo.com/v1" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Default recommendation: `mimo-v2.5-pro`; `mimo-v2.5` is also supported | -| `mimo_api_key` | Create one in the [MiMo Open Platform](https://platform.xiaomimimo.com/console/api-keys) | -| `mimo_api_base` | Optional, defaults to `https://api.xiaomimimo.com/v1` | - -### Model Selection - -| Model | Use Case | -| --- | --- | -| `mimo-v2.5-pro` | Flagship: native omni-modal + Agent capability, up to 1M tokens context | -| `mimo-v2.5` | General-purpose, native omni-modal (text / image / video / audio) | - -## Thinking Mode - -The MiMo V2.5 series enables "thinking mode" by default: the model emits `reasoning_content` (chain-of-thought) before the final answer, improving performance on complex tasks. - -Use the global `enable_thinking` flag to toggle visibility (also switchable from the Web Console settings): - -```json -{ - "enable_thinking": true -} -``` - -## Image Understanding - -Once `mimo_api_key` is configured, the Agent's Vision tool can automatically use MiMo's vision models: - -- When the main model itself is multimodal (`mimo-v2.5-pro` / `mimo-v2.5`), images are handled directly by the main model with no extra setup. -- When the main model belongs to another vendor, the Vision tool falls back to `mimo-v2.5-pro` in order. - -To force a specific Vision model, set it explicitly in the configuration: - -```json -{ - "tools": { - "vision": { - "provider": "mimo", - "model": "mimo-v2.5-pro" - } - } -} -``` - -## Text-to-Speech (TTS) - -```json -{ - "text_to_voice": "mimo", - "text_to_voice_model": "mimo-v2.5-tts", - "tts_voice_id": "冰糖" -} -``` - -| Parameter | Description | -| --- | --- | -| `text_to_voice_model` | Currently only `mimo-v2.5-tts` (preset voices + singing mode) | -| `tts_voice_id` | Preset voice name (Chinese voice IDs use the Chinese name directly) | - -### Preset Voices - -| Voice ID | Description | -| --- | --- | -| `Mia` | English · Female | -| `Chloe` | English · Female | -| `Milo` | English · Male | -| `Dean` | English · Male | -| `冰糖` | Chinese · Female (default) | -| `茉莉` | Chinese · Female | -| `苏打` | Chinese · Male | -| `白桦` | Chinese · Male | - - -You can also pick a voice visually from the Web Console under "Model Management → Text-to-Speech". - -### Style Control - -MiMo TTS supports embedding **audio tags** in the synthesis text to control emotion, tone, dialect, persona, and even singing. Tags must appear in the **text that will be synthesized to speech (i.e. the Agent's reply)**, with the overall style tag placed at the very beginning: - -``` -(style)content-to-synthesize -``` - -Half-width `()`, full-width `()`, and `[]` brackets are all accepted. Both Chinese and English style descriptors work — pick whichever language expresses the timbre most precisely. Common examples: - -| Category | Example tags | -| --- | --- | -| Basic emotions | `happy` `sad` `angry` `fear` `surprised` `excited` `aggrieved` `calm` `indifferent` | -| Compound emotions | `wistful` `relieved` `helpless` `guilty` `at ease` `uneasy` `touched` | -| Overall tone | `gentle` `aloof` `lively` `serious` `languid` `playful` `deep` `sharp` `cutting` | -| Voice character | `magnetic` `mellow` `bright` `ethereal` `childlike` `aged` `sweet` `husky` | -| Persona | `squeaky` `mature lady` `young boy` `uncle` `Taiwanese accent` | -| Dialect | `Northeastern` `Sichuan` `Henan` `Cantonese` | -| Role-play | `Sun Wukong` `Lin Daiyu` | -| Singing | `sing` / `singing` | - -Examples: - -- `(magnetic)The night is deep, and the city is still breathing.` -- `(gentle)Take a breath. You've got this.` -- `(serious)This is the final warning before the system reboots.` -- `(singing)Oh, when the saints go marching in…` - -You can also insert fine-grained audio tags at any position in the text to control breathing, laughter, pauses, etc. For example: - -``` -(nervous, deep breath) Phew… stay calm, stay calm. (faster pace) I've rehearsed this intro fifty times, it'll be fine. -``` - -See the [MiMo speech synthesis documentation](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5) for the full tag list. - - - When CowAgent calls TTS, the Agent's reply text (including any `(...)` tags) is forwarded directly to MiMo for synthesis. Tell the model in its persona / system prompt to "prefix replies with a `(style)` tag to control the tone", and IM channels (WeChat / Feishu / DingTalk / WeCom) will play voice replies with the corresponding emotion, dialect, or even singing. - diff --git a/docs/en/models/minimax.mdx b/docs/en/models/minimax.mdx deleted file mode 100644 index d945d2ea..00000000 --- a/docs/en/models/minimax.mdx +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: MiniMax -description: MiniMax model configuration (Text / Image Understanding / Image Generation / Text-to-Speech) ---- - -MiniMax supports text chat, image understanding, image generation, and text-to-speech. A single `minimax_api_key` enables all capabilities. - - - All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. - - -## Text Chat - -```json -{ - "model": "MiniMax-M2.7", - "minimax_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Can be `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.1`, `MiniMax-M2.1-lightning`, `MiniMax-M2`, etc. | -| `minimax_api_key` | Create one in the [MiniMax Console](https://platform.minimaxi.com/user-center/basic-information/interface-key) | - -## Image Understanding - -MiniMax's M2.x chat models do not support vision natively; vision calls are uniformly routed to `MiniMax-Text-01`. Once `minimax_api_key` is configured, the Agent's Vision tool automatically uses this model, with no need to specify it explicitly in the configuration file. - -## Image Generation - -```json -{ - "skills": { - "image-generation": { - "model": "image-01" - } - } -} -``` - -Available models: `image-01`. - -## Text-to-Speech (TTS) - -```json -{ - "text_to_voice": "minimax", - "text_to_voice_model": "speech-2.8-hd", - "tts_voice_id": "female-shaonv" -} -``` - -| Parameter | Description | -| --- | --- | -| `text_to_voice_model` | `speech-2.8-hd` (emotional rendering, natural sound), `speech-2.8-turbo` (ultra-fast), `speech-2.6-hd`, `speech-2.6-turbo` | -| `tts_voice_id` | Voice ID; supports Chinese / Cantonese / English / Japanese / Korean — 70+ voices in total | - -Common voice examples: - -| Voice ID | Description | -| --- | --- | -| `female-shaonv` | Chinese · Young Girl (Female) | -| `female-yujie` | Chinese · Mature Lady (Female) | -| `female-tianmei` | Chinese · Sweet Female (Female) | -| `male-qn-jingying` | Chinese · Elite Youth (Male) | -| `male-qn-badao` | Chinese · Dominant Youth (Male) | -| `Cantonese_GentleLady` | Cantonese · Gentle Female Voice | -| `English_Graceful_Lady` | English · Graceful Lady | - -For the full voice list (70+ voices across Chinese / Cantonese / English / Japanese / Korean), see the [system voice list](https://platform.minimaxi.com/docs/faq/system-voice-id), or select visually in the Web Console under "Model Management → Text-to-Speech". diff --git a/docs/en/models/openai.mdx b/docs/en/models/openai.mdx deleted file mode 100644 index f8715562..00000000 --- a/docs/en/models/openai.mdx +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: OpenAI -description: OpenAI model configuration (Text / Vision / Image / Speech / Embedding) ---- - -OpenAI offers the most complete coverage and can simultaneously serve text chat, vision understanding, image generation, speech-to-text (ASR), text-to-speech (TTS), and embedding. A single `open_ai_api_key` lets the Agent use all of these capabilities. - - - All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. - - - -## Text Chat - -```json -{ - "model": "gpt-5.5", - "open_ai_api_key": "YOUR_API_KEY", - "open_ai_api_base": "https://api.openai.com/v1" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Same as OpenAI's [model parameter](https://platform.openai.com/docs/models); supports `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`, the `gpt-5` series, `gpt-4.1`, the o-series, etc. Agent mode defaults to `gpt-5.5`; use `gpt-5.4` for better cost-efficiency | -| `open_ai_api_key` | Create one on the [OpenAI Platform](https://platform.openai.com/api-keys) | -| `open_ai_api_base` | Optional; change it to access a third-party proxy | -| `bot_type` | Not required when using OpenAI's official models; set to `openai` when accessing other vendors via the compatible protocol | - -## Image Understanding - -OpenAI models like `gpt-5.5`, `gpt-5.4`, `gpt-4o`, and `gpt-4.1` natively support vision. Once `open_ai_api_key` is configured, the Agent's Vision tool automatically uses the main model to recognize images. If the main model does not support vision or you want to specify it explicitly, set it in the configuration file: - -```json -{ - "tools": { - "vision": { - "model": "gpt-5.4-mini" - } - } -} -``` - -Supported Vision models: `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`, `gpt-5`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4o`. - -## Image Generation - -Specify the image generation model in the configuration file; the Agent automatically routes image generation skill calls to OpenAI: - -```json -{ - "skills": { - "image-generation": { - "model": "gpt-image-2" - } - } -} -``` - -Supported image generation models: `gpt-image-2`, `gpt-image-1`. - -## Speech-to-Text (ASR) - -```json -{ - "voice_to_text": "openai", - "voice_to_text_model": "gpt-4o-mini-transcribe" -} -``` - -| Parameter | Description | -| --- | --- | -| `voice_to_text` | Set to `openai` to enable OpenAI speech-to-text | -| `voice_to_text_model` | Optional, defaults to `gpt-4o-mini-transcribe`; can also be `gpt-4o-transcribe`, `whisper-1` | - -Credentials are automatically reused from `open_ai_api_key`. - -## Text-to-Speech (TTS) - -```json -{ - "text_to_voice": "openai", - "text_to_voice_model": "tts-1", - "tts_voice_id": "alloy" -} -``` - -| Parameter | Description | -| --- | --- | -| `text_to_voice_model` | `tts-1`, `tts-1-hd`, `gpt-4o-mini-tts` | -| `tts_voice_id` | Voices: `alloy`, `echo`, `fable`, `onyx`, `nova`, `shimmer`, `ash`, `ballad`, `coral`, `sage`, `verse` | - -## Embedding - -```json -{ - "embedding_provider": "openai", - "embedding_model": "text-embedding-3-small" -} -``` - -Available models: `text-embedding-3-small`, `text-embedding-3-large`, `text-embedding-ada-002`. After changing the embedding, run `/memory rebuild-index` to rebuild the index. - diff --git a/docs/en/models/qianfan.mdx b/docs/en/models/qianfan.mdx deleted file mode 100644 index 13525967..00000000 --- a/docs/en/models/qianfan.mdx +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: ERNIE -description: ERNIE model configuration (Baidu Qianfan) ---- - -Option 1: Native integration (recommended): - -```json -{ - "model": "ernie-5.1", - "qianfan_api_key": "", - "qianfan_api_base": "https://qianfan.baidubce.com/v2" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Default recommendation: `ernie-5.1`; also supports `ernie-5.0`, `ernie-x1.1`, `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k` | -| `qianfan_api_key` | Qianfan API key, usually starting with `bce-v3/` | -| `qianfan_api_base` | Optional, defaults to `https://qianfan.baidubce.com/v2` | - -## Model Selection - -| Model | Use Case | -| --- | --- | -| `ernie-5.1` | Default recommendation; latest ERNIE flagship with the strongest overall capability | -| `ernie-5.0` | Previous-generation flagship with excellent overall capability | -| `ernie-x1.1` | Deep-thinking reasoning model with lower hallucination and stronger instruction following / tool calling | -| `ernie-4.5-turbo-128k` | Long-context and general chat | -| `ernie-4.5-turbo-32k` | General chat with a balanced context window and cost | - -## Vision tool - -Once `qianfan_api_key` is configured, Agent mode can auto-discover Qianfan for the Vision tool: - -- When the main model itself is multimodal (e.g. `ernie-5.1`, `ernie-5.0`, `ernie-x1.1`, `ernie-4.5-turbo-vl`), images are handled directly by the main model with no extra setup. -- When the main model is text-only (e.g. `ernie-4.5-turbo-128k`), the Vision tool automatically falls back to `ernie-4.5-turbo-vl`. - -To force a specific Vision model, set it explicitly in `config.json`: - -```json -{ - "tools": { - "vision": { - "model": "ernie-4.5-turbo-vl" - } - } -} -``` - -Option 2: OpenAI-compatible configuration: - -```json -{ - "model": "ernie-5.1", - "bot_type": "openai", - "open_ai_api_key": "", - "open_ai_api_base": "https://qianfan.baidubce.com/v2" -} -``` - - - Prefer `qianfan_api_key` for new configurations. Existing `wenxin`, `wenxin-4`, `baidu_wenxin_api_key`, and `baidu_wenxin_secret_key` configurations remain supported. - diff --git a/docs/en/models/qwen.mdx b/docs/en/models/qwen.mdx deleted file mode 100644 index 8e27269c..00000000 --- a/docs/en/models/qwen.mdx +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Qwen -description: Qwen model configuration (Text / Image Understanding / Image Generation / Speech-to-Text / Text-to-Speech / Embedding) ---- - -Qwen (Alibaba DashScope / Bailian) is one of the most fully-featured vendors. Text, image understanding, image generation, speech-to-text, text-to-speech, and embedding can all be enabled with a single `dashscope_api_key`. - - - All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. - - -## Text Chat - -```json -{ - "model": "qwen3.6-plus", - "dashscope_api_key": "YOUR_API_KEY" -} -``` - -| Parameter | Description | -| --- | --- | -| `model` | Can be `qwen3.6-plus`, `qwen3.7-max`, `qwen3.5-plus`, `qwen3-max`, `qwen-max`, `qwen-plus`, `qwen-turbo`, `qwq-plus`, etc. | -| `dashscope_api_key` | Create one in the [Bailian Console](https://bailian.console.aliyun.com/?tab=model#/api-key); see the [official docs](https://bailian.console.aliyun.com/?tab=api#/api) | - -## Image Understanding - -Once `dashscope_api_key` is configured, the Agent's Vision tool automatically calls Qwen's vision models to recognize images. Models like `qwen3-max` / `qwen3.5-plus` / `qwen3.6-plus` are already multimodal; if the main model is text-only (e.g. `qwen-turbo`), it automatically falls back to `qwen-vl-max`. - -To manually specify a Vision model: - -```json -{ - "tools": { - "vision": { - "model": "qwen3.6-plus" - } - } -} -``` - -Supported models: `qwen3.6-plus`, `qwen3.5-plus`, `qwen3-max`. - -## Image Generation - -```json -{ - "skills": { - "image-generation": { - "model": "qwen-image-2.0" - } - } -} -``` - -Available models: `qwen-image-2.0`, `qwen-image-2.0-pro`. - -## Speech-to-Text (ASR) - -```json -{ - "voice_to_text": "dashscope", - "voice_to_text_model": "qwen3-asr-flash" -} -``` - -| Parameter | Description | -| --- | --- | -| `voice_to_text` | Set to `dashscope` to enable Qwen ASR | -| `voice_to_text_model` | Optional, defaults to `qwen3-asr-flash` | - -Credentials are automatically reused from `dashscope_api_key`. A single audio segment should be smaller than 10MB and no longer than 300 seconds. - -## Text-to-Speech (TTS) - -```json -{ - "text_to_voice": "dashscope", - "text_to_voice_model": "qwen3-tts-flash", - "tts_voice_id": "Cherry" -} -``` - -| Parameter | Description | -| --- | --- | -| `text_to_voice_model` | Optional, defaults to `qwen3-tts-flash`; covers Mandarin, dialects, and major foreign languages | -| `tts_voice_id` | Voice ID; see the common list below | - -Common voice examples: - -| Voice ID | Description | -| --- | --- | -| `Cherry` | Qianyue · Sunny Female Voice | -| `Serena` | Suyao · Gentle Female Voice | -| `Ethan` | Chenxu · Sunny Male Voice | -| `Chelsie` | Qianxue · Anime Girl | -| `Dylan` | Beijing Dialect · Xiaodong | -| `Rocky` | Cantonese · Aqiang | -| `Sunny` | Sichuan Dialect · Qing'er | - -The full voice list (Mandarin / regional dialects / bilingual, etc.) can be selected visually in the Web Console under "Model Management → Text-to-Speech". - -## Embedding - -```json -{ - "embedding_provider": "dashscope", - "embedding_model": "text-embedding-v4" -} -``` - -The default model is `text-embedding-v4`. After changing the embedding, run `/memory rebuild-index` to rebuild the index. diff --git a/docs/en/releases/overview.mdx b/docs/en/releases/overview.mdx deleted file mode 100644 index ce932884..00000000 --- a/docs/en/releases/overview.mdx +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Changelog -description: CowAgent version history ---- - -| Version | Date | Description | -| --- | --- | --- | -| [2.0.9](/en/releases/v2.0.9) | 2026.05.22 | Model management console, MCP protocol support, browser persistent login, new models (gpt-5.5, gemini-3.5-flash, qwen3.7-max, etc.), deployment hardening | -| [2.0.8](/en/releases/v2.0.8) | 2026.05.06 | Major Feishu channel upgrade (voice, streaming and Markdown, one-click QR-scan setup), DeepSeek V4 and Baidu models, scheduler tool enhancements | -| [2.0.7](/en/releases/v2.0.7) | 2026.04.22 | Image Generation Skill (6-provider auto-routing), new models (Kimi K2.6, Claude Opus 4.7, GLM 5.1), knowledge base and Web Console improvements | -| [2.0.6](/en/releases/v2.0.6) | 2026.04.14 | Project rename, Knowledge Base system, Deep Dream Memory Distillation, Smart Context Compression, Web Console multi-session and various improvements | -| [2.0.5](/en/releases/v2.0.5) | 2026.04.01 | Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more | -| [2.0.4](/en/releases/v2.0.4) | 2026.03.22 | Personal WeChat channel, new model support, Japanese docs, script refactoring and bug fixes | -| [2.0.3](/en/releases/v2.0.3) | 2026.03.18 | WeCom Smart Bot and QQ channels, Coding Plan support, multiple new models, Web file processing, memory system upgrade | -| [2.0.2](/en/releases/v2.0.2) | 2026.02.27 | Web Console upgrade, multi-channel concurrency, session persistence | -| [2.0.1](/en/releases/v2.0.1) | 2026.02.13 | Built-in Web Search tool, smart context management, multiple fixes | -| [2.0.0](/en/releases/v2.0.0) | 2026.02.03 | Full upgrade to AI super assistant | -| 1.7.6 | 2025.05.23 | Web Channel optimization, AgentMesh plugin | -| 1.7.5 | 2025.04.11 | DeepSeek model | -| 1.7.4 | 2024.12.13 | Gemini 2.0 model, Web Channel | -| 1.7.3 | 2024.10.31 | Stability improvements, database features | -| 1.7.2 | 2024.09.26 | One-click install script, o1 model | -| 1.7.0 | 2024.08.02 | iFlytek 4.0 model, knowledge base references | -| 1.6.9 | 2024.07.19 | gpt-4o-mini, Alibaba voice recognition | -| 1.6.8 | 2024.07.05 | Claude 3.5, Gemini 1.5 Pro | -| 1.6.0 | 2024.04.26 | Kimi integration, gpt-4-turbo upgrade | -| 1.5.8 | 2024.03.26 | GLM-4, Claude-3, edge-tts | -| 1.5.2 | 2023.11.10 | Feishu channel, image recognition chat | -| 1.5.0 | 2023.11.10 | gpt-4-turbo, dall-e-3, tts multimodal | -| 1.0.0 | 2022.12.12 | Project created, first ChatGPT integration | - -See [GitHub Releases](https://github.com/zhayujie/CowAgent/releases) for full history. diff --git a/docs/en/releases/v2.0.0.mdx b/docs/en/releases/v2.0.0.mdx deleted file mode 100644 index e6c37533..00000000 --- a/docs/en/releases/v2.0.0.mdx +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: v2.0.0 -description: CowAgent 2.0 - Full upgrade from chatbot to AI super assistant ---- - -CowAgent 2.0 is a comprehensive upgrade from a chatbot to an **AI super assistant** — capable of autonomous thinking and task planning, long-term memory, operating computers, and creating and executing skills. - -**Release Date**: 2026.02.03 | [GitHub Release](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) - -## Key Updates - -### Agent Core - -- **Complex Task Planning**: Autonomous planning with multi-turn reasoning -- **Long-term Memory**: Persistent memory with keyword and vector search -- **Built-in Tools**: 10+ tools including file ops, Bash, browser, scheduler -- **Web search**: Built-in `web_search` tool, supports multiple search engines, configure corresponding API key to use -- **Skills System**: Skill engine with built-in and custom skill support -- **Security & Cost**: Secret management, prompt controls, token limits - -### Other - -- **Channels**: Feishu/DingTalk WebSocket support, image/file messages -- **Models**: claude-sonnet-4-5, gemini-3-pro-preview, glm-4.7, MiniMax-M2.1, qwen3-max -- **Deployment**: One-click install, configure, run, and management script - -## Long-term Memory - - - - - -## Task Planning & Tools - - - - - - - - - - - - - -## Skills System - - - - - - - - - - - - - -## Contributing - -Welcome to [submit feedback](https://github.com/zhayujie/CowAgent/issues) and [contribute code](https://github.com/zhayujie/CowAgent/pulls). diff --git a/docs/en/releases/v2.0.1.mdx b/docs/en/releases/v2.0.1.mdx deleted file mode 100644 index 7de5746a..00000000 --- a/docs/en/releases/v2.0.1.mdx +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: v2.0.1 -description: CowAgent 2.0.1 - Built-in Web Search, smart context management, multiple fixes ---- - -**Release Date**: 2026.02.27 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.0..2.0.1) - -## New Features - -- **Built-in Web Search tool**: Integrated web search as a built-in Agent tool, reducing decision cost ([4f0ea5d](https://github.com/zhayujie/CowAgent/commit/4f0ea5d7568d61db91ff69c91c429e785fd1b1c2)) -- **Claude Opus 4.6 model support**: Added support for Claude Opus 4.6 model ([#2661](https://github.com/zhayujie/CowAgent/pull/2661)) -- **WeCom image recognition**: Support image message recognition in WeCom channel ([#2667](https://github.com/zhayujie/CowAgent/pull/2667)) - -## Improvements - -- **Smart context management**: Resolved chat context overflow with intelligent context trimming strategy to prevent token limits ([cea7fb7](https://github.com/zhayujie/CowAgent/commit/cea7fb7490c53454602bf05955a0e9f059bcf0fd), [8acf2db](https://github.com/zhayujie/CowAgent/commit/8acf2dbdfe713b84ad74b761b7f86674b1c1904d)) [#2663](https://github.com/zhayujie/CowAgent/issues/2663) -- **Runtime info dynamic update**: Automatic update of timestamps and other runtime info in system prompts via dynamic functions ([#2655](https://github.com/zhayujie/CowAgent/pull/2655), [#2657](https://github.com/zhayujie/CowAgent/pull/2657)) -- **Skill prompt optimization**: Improved Skill system prompt generation, simplified tool descriptions for better Agent performance ([6c21833](https://github.com/zhayujie/CowAgent/commit/6c218331b1f1208ea8be6bf226936d3b556ade3e)) -- **GLM custom API Base URL**: Support custom API Base URL for GLM models ([#2660](https://github.com/zhayujie/CowAgent/pull/2660)) -- **Startup script optimization**: Improved `run.sh` script interaction and configuration flow ([#2656](https://github.com/zhayujie/CowAgent/pull/2656)) -- **Decision step logging**: Added Agent decision step logging for debugging ([cb303e6](https://github.com/zhayujie/CowAgent/commit/cb303e6109c50c8dfef1f5e6c1ec47223bf3cd11)) - -## Bug Fixes - -- **Scheduler memory loss**: Fixed memory loss caused by Scheduler dispatcher ([a77a874](https://github.com/zhayujie/CowAgent/commit/a77a8741b500a408c6f5c8868856fb4b018fe9db)) -- **Empty tool calls & long results**: Fixed handling of empty tool calls and excessively long tool results ([0542700](https://github.com/zhayujie/CowAgent/commit/0542700f9091ebb08c1a56103b0f0f45f24aa621)) -- **OpenAI Function Call**: Fixed function call compatibility with OpenAI models ([158c87a](https://github.com/zhayujie/CowAgent/commit/158c87ab8b05bae054cc1b4eacdbb64fc1062ba9)) -- **Claude tool name field**: Removed extraneous tool name field from Claude model responses ([eec10cb](https://github.com/zhayujie/CowAgent/commit/eec10cb5db6a3d5bc12ef606606532237d2c5f6e)) -- **MiniMax reasoning**: Optimized MiniMax model reasoning content handling, hidden thinking process output ([c72cda3](https://github.com/zhayujie/CowAgent/commit/c72cda33864bd1542012ee6e0a8bd8c6c88cb5ed), [72b1cac](https://github.com/zhayujie/CowAgent/commit/72b1cacea1ba0d1f3dedacbab2e088e98fd7e172)) -- **GLM thinking process**: Hidden GLM model thinking process display ([72b1cac](https://github.com/zhayujie/CowAgent/commit/72b1cacea1ba0d1f3dedacbab2e088e98fd7e172)) -- **Feishu connection & SSL**: Fixed Feishu channel SSL certificate errors and connection issues ([229b14b](https://github.com/zhayujie/CowAgent/commit/229b14b6fcabe7123d53cab1dea39f38dab26d6d), [8674421](https://github.com/zhayujie/CowAgent/commit/867442155e7f095b4f38b0856f8c1d8312b5fcf7)) -- **model_type validation**: Fixed `AttributeError` caused by non-string `model_type` ([#2666](https://github.com/zhayujie/CowAgent/pull/2666)) - -## Platform Compatibility - -- **Windows compatibility**: Fixed path handling, file encoding, and `os.getuid()` unavailability on Windows across multiple tool modules ([051ffd7](https://github.com/zhayujie/CowAgent/commit/051ffd78a372f71a967fd3259e37fe19131f83cf), [5264f7c](https://github.com/zhayujie/CowAgent/commit/5264f7ce18360ee4db5dcb4ebe67307977d40014)) diff --git a/docs/en/releases/v2.0.2.mdx b/docs/en/releases/v2.0.2.mdx deleted file mode 100644 index 80e9f6d8..00000000 --- a/docs/en/releases/v2.0.2.mdx +++ /dev/null @@ -1,98 +0,0 @@ ---- -title: v2.0.2 -description: CowAgent 2.0.2 - Web Console upgrade, multi-channel concurrency, session persistence ---- - -**Release Date**: 2026.02.27 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.1...master) - -## Highlights - -### 🖥️ Web Console Upgrade - -The Web Console has been fully upgraded with streaming conversation output, visual display of tool execution and reasoning processes, and online management of **models, skills, memory, channels, and Agent configuration**. - -#### Chat Interface - -Supports streaming output with real-time display of the Agent's reasoning process and tool calls, providing intuitive observation of the Agent's decision-making: - - - -#### Model Management - -Manage model configurations online without manually editing config files: - - - -#### Skill Management - -View and manage Agent skills (Skills) online: - - - -#### Memory Management - -View and manage Agent memory online: - - - -#### Channel Management - -Manage connected channels online with real-time connect/disconnect operations: - - - -#### Scheduled Tasks - -View and manage scheduled tasks online, including one-time tasks, fixed intervals, and Cron expressions: - - - -#### Logs - -View Agent runtime logs in real-time for monitoring and troubleshooting: - - - -Related commits: [f1a1413](https://github.com/zhayujie/CowAgent/commit/f1a1413), [c0702c8](https://github.com/zhayujie/CowAgent/commit/c0702c8), [394853c](https://github.com/zhayujie/CowAgent/commit/394853c), [1c71c4e](https://github.com/zhayujie/CowAgent/commit/1c71c4e), [5e3eccb](https://github.com/zhayujie/CowAgent/commit/5e3eccb), [e1dc037](https://github.com/zhayujie/CowAgent/commit/e1dc037), [5edbf4c](https://github.com/zhayujie/CowAgent/commit/5edbf4c), [7d258b5](https://github.com/zhayujie/CowAgent/commit/7d258b5) - -### 🔀 Multi-Channel Concurrency - -Multiple channels (e.g., Feishu, DingTalk, WeCom, Web) can now run simultaneously, each in an independent thread without interference. - -Configuration: Set multiple channels in `config.json` via `channel_type` separated by commas, or connect/disconnect channels in real-time from the Web Console's channel management page. - -```json -{ - "channel_type": "web,feishu,dingtalk" -} -``` - -Related commits: [4694594](https://github.com/zhayujie/CowAgent/commit/4694594), [7cce224](https://github.com/zhayujie/CowAgent/commit/7cce224), [7d258b5](https://github.com/zhayujie/CowAgent/commit/7d258b5), [c9adddb](https://github.com/zhayujie/CowAgent/commit/c9adddb) - -### 💾 Session Persistence - -Session history is now persisted to a local SQLite database. Conversation context is automatically restored after service restarts. Historical conversations in the Web Console are also restored. - -Related commits: [29bfbec](https://github.com/zhayujie/CowAgent/commit/29bfbec), [9917552](https://github.com/zhayujie/CowAgent/commit/9917552), [925d728](https://github.com/zhayujie/CowAgent/commit/925d728) - -## New Models - -- **Gemini 3.1 Pro Preview**: Added `gemini-3.1-pro-preview` model support ([52d7cad](https://github.com/zhayujie/CowAgent/commit/52d7cad)) -- **Claude 4.6 Sonnet**: Added `claude-4.6-sonnet` model support ([52d7cad](https://github.com/zhayujie/CowAgent/commit/52d7cad)) -- **Qwen3.5 Plus**: Added `qwen3.5-plus` model support ([e59a289](https://github.com/zhayujie/CowAgent/commit/e59a289)) -- **MiniMax M2.5**: Added `Minimax-M2.5` model support ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) -- **GLM-5**: Added `glm-5` model support ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) -- **Kimi K2.5**: Added `kimi-k2.5` model support ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) -- **Doubao 2.0 Code**: Added `doubao-2.0-code` coding-specialized model ([ab28ee5](https://github.com/zhayujie/CowAgent/commit/ab28ee5)) -- **DashScope Models**: Added Alibaba Cloud DashScope model name support ([ce58f23](https://github.com/zhayujie/CowAgent/commit/ce58f23)) - -## Website & Documentation - -- **Official Website**: [cowagent.ai](https://cowagent.ai/) -- **Documentation**: [docs.cowagent.ai](https://docs.cowagent.ai/) - -## Bug Fixes - -- **Gemini DingTalk image recognition**: Fixed Gemini unable to process image markers in DingTalk channel ([05a3304](https://github.com/zhayujie/CowAgent/commit/05a3304)) ([#2670](https://github.com/zhayujie/CowAgent/pull/2670)) Thanks [@SgtPepper114](https://github.com/SgtPepper114) -- **Startup script dependencies**: Fixed dependency installation issue in `run.sh` script ([b6fc9fa](https://github.com/zhayujie/CowAgent/commit/b6fc9fa)) -- **Bare except cleanup**: Replaced `bare except` with `except Exception` for better exception handling ([adca89b](https://github.com/zhayujie/CowAgent/commit/adca89b)) ([#2674](https://github.com/zhayujie/CowAgent/pull/2674)) Thanks [@haosenwang1018](https://github.com/haosenwang1018) diff --git a/docs/en/releases/v2.0.3.mdx b/docs/en/releases/v2.0.3.mdx deleted file mode 100644 index 5f9a837d..00000000 --- a/docs/en/releases/v2.0.3.mdx +++ /dev/null @@ -1,91 +0,0 @@ ---- -title: v2.0.3 -description: CowAgent 2.0.3 - WeCom Smart Bot and QQ channels, Web Console file handling, memory system upgrade ---- - -## 🔌 New Channels - -### WeCom Smart Bot - -Added the WeCom Smart Bot (`wecom_bot`) channel with streaming card output, support for receiving and replying to text and image messages, and full configuration through the Web Console. - -Documentation: [WeCom Smart Bot](https://docs.cowagent.ai/en/channels/wecom-bot). - -Related commits: [d4480b6](https://github.com/zhayujie/CowAgent/commit/d4480b6), [a42f31f](https://github.com/zhayujie/CowAgent/commit/a42f31f), [4ecd4df](https://github.com/zhayujie/CowAgent/commit/4ecd4df), [8b45d6c](https://github.com/zhayujie/CowAgent/commit/8b45d6c) - -### QQ Channel - -Added the QQ official bot (`qq`) channel with support for text and image messages in both private chats and group chats. - -Documentation: [QQ Bot](https://docs.cowagent.ai/en/channels/qq). - -Related commits: [005a0e1](https://github.com/zhayujie/CowAgent/commit/005a0e1), [a4d54f5](https://github.com/zhayujie/CowAgent/commit/a4d54f5) - -## 🖥️ Web Console File Input and Processing - -The Web Console chat UI now supports file and image uploads — files can be sent directly to the agent for processing. The Read tool gains parsing support for Office documents (Word, Excel, PPT). - -Related commits: [30c6d9b](https://github.com/zhayujie/CowAgent/commit/30c6d9b) - -## 🤖 New Models - -- **GPT-5.4 Series**: Added `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano` ([1623deb](https://github.com/zhayujie/CowAgent/commit/1623deb)) -- **Gemini 3.1 Flash Lite Preview**: Added `gemini-3.1-flash-lite-preview` ([ba915f2](https://github.com/zhayujie/CowAgent/commit/ba915f2)) - -## 💰 Coding Plan Support - -Added integration with vendor Coding Plan (monthly programming subscription) tiers via the unified OpenAI-compatible path. Supported vendors include Aliyun, MiniMax, GLM, Kimi, and Volcengine. - -See [Coding Plan docs](https://docs.cowagent.ai/en/models/coding-plan) for detailed configuration. - -## 🧠 Memory System Upgrade - -Memory flush improvements: - -- Use the LLM to summarize out-of-window conversations into compact daily memory entries -- Summarization runs asynchronously on a background thread, never blocking replies -- Smarter batch trimming policy reduces flush frequency -- Daily scheduled flush as a safety net for low-activity scenarios -- Fixed context-memory loss issues - -Related commits: [022c13f](https://github.com/zhayujie/CowAgent/commit/022c13f), [c116235](https://github.com/zhayujie/CowAgent/commit/c116235) - -## 🔧 Tool Refactoring - -- **Image Vision**: Image recognition (Vision) is refactored from a Skill into a built-in Tool with a dedicated Vision Provider configuration, improving stability and maintainability ([a50fafa](https://github.com/zhayujie/CowAgent/commit/a50fafa), [3b8b562](https://github.com/zhayujie/CowAgent/commit/3b8b562)) -- **Web Fetch**: Web fetch is refactored from a Skill into a built-in Tool with support for downloading and parsing remote documents (PDF, Word, Excel, PPT) ([ccb9030](https://github.com/zhayujie/CowAgent/commit/ccb9030), [fa61744](https://github.com/zhayujie/CowAgent/commit/fa61744)) - -## 🐳 Docker Deployment Improvements - -- **Config Template Alignment**: `docker-compose.yml` env vars aligned with `config-template.json`, covering full model API key and Agent settings -- **Web Console Port Mapping**: Added `9899` port mapping so the Web Console is reachable in browser after Docker deployment -- **Hot Config Reload**: Bot API key and API base are now read at request time — changes from the Web Console take effect without restart -- **Workspace Persistence**: Added a `./cow` volume mount so agent workspace data (memories, persona, skills, etc.) persists across container rebuilds and upgrades - -## ⚡ Performance Improvements - -- **Faster Startup**: The Feishu channel imports its dependencies lazily, avoiding a 4–10s startup delay ([924dc79](https://github.com/zhayujie/CowAgent/commit/924dc79)) -- **Channel Stability**: Improved channel connection stability and added env-var support for channel configuration ([f1c04bc](https://github.com/zhayujie/CowAgent/commit/f1c04bc), [46d97fd](https://github.com/zhayujie/CowAgent/commit/46d97fd)) - -## 🐛 Bug Fixes - -- **bot_type Propagation**: Fixed `bot_type` propagation under Agent mode ([#2691](https://github.com/zhayujie/CowAgent/pull/2691)) Thanks [@Weikjssss](https://github.com/Weikjssss) -- **bot_type Resolution Priority**: Adjusted `bot_type` resolution priority under Agent mode ([#2692](https://github.com/zhayujie/CowAgent/pull/2692)) Thanks [@6vision](https://github.com/6vision) -- **Zhipu Config**: Fixed Zhipu `bot_type` naming, Web Console persistence, and regex escaping ([#2693](https://github.com/zhayujie/CowAgent/pull/2693)) Thanks [@6vision](https://github.com/6vision) -- **OpenAI-Compat Layer**: Unified error handling via the `openai_compat` layer ([#2688](https://github.com/zhayujie/CowAgent/pull/2688)) Thanks [@JasonOA888](https://github.com/JasonOA888) -- **OpenAI-Compat Migration**: Completed the `openai_compat` migration across all model bots ([#2689](https://github.com/zhayujie/CowAgent/pull/2689)) -- **Gemini Tool Calling**: Fixed tool-call matching for Gemini ([eda82ba](https://github.com/zhayujie/CowAgent/commit/eda82ba)) -- **Session Concurrency**: Fixed race conditions in concurrent session scenarios ([9879878](https://github.com/zhayujie/CowAgent/commit/9879878)) -- **History Recovery**: Fixed incomplete history recovery — only user/assistant text messages are restored, tool calls are stripped ([b788a3d](https://github.com/zhayujie/CowAgent/commit/b788a3d), [a33ce97](https://github.com/zhayujie/CowAgent/commit/a33ce97)) -- **Feishu Group Chat**: Removed the `bot_name` dependency for Feishu group chats ([b641bff](https://github.com/zhayujie/CowAgent/commit/b641bff)) -- **Safari Compatibility**: Fixed an IME Enter key issue that mistakenly sent messages on Safari ([0687916](https://github.com/zhayujie/CowAgent/commit/0687916)) -- **Windows Compatibility**: Fixed bash-style `$VAR` to `%VAR%` env-var conversion on Windows ([7c67513](https://github.com/zhayujie/CowAgent/commit/7c67513)) -- **MiniMax Params**: Added a `max_tokens` cap for MiniMax models ([1767413](https://github.com/zhayujie/CowAgent/commit/1767413)) -- **.gitignore**: Added Python directory ignore rules ([#2683](https://github.com/zhayujie/CowAgent/pull/2683)) Thanks [@pelioo](https://github.com/pelioo) -- **AGENT.md Proactive Evolution**: Improved the system prompt guidance around AGENT.md — instead of waiting for explicit user edits, the agent now proactively detects persona/style shifts in the conversation and updates AGENT.md accordingly - -## 📦 Upgrade - -Run `./run.sh update` for a one-click upgrade, or manually pull the latest code and restart. See [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade) for details. - -**Release Date**: 2026.03.18 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.2...2.0.3) diff --git a/docs/en/releases/v2.0.4.mdx b/docs/en/releases/v2.0.4.mdx deleted file mode 100644 index fc976fa0..00000000 --- a/docs/en/releases/v2.0.4.mdx +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: v2.0.4 -description: CowAgent 2.0.4 - Personal WeChat channel, new model support, Japanese docs, script refactoring and bug fixes ---- - -## 🔌 Personal WeChat Channel - -Added personal WeChat (`weixin`) channel — the most important update in this release. Simply scan a QR code to connect CowAgent to your personal WeChat account, with support for: - -- **Messaging**: Send and receive text, image, file, and video messages; receive voice messages -- **QR Code Login**: QR code displayed in terminal, scan with WeChat to log in; auto-refresh on expiry -- **Credential Persistence**: Login credentials saved to `~/.weixin_cow_credentials.json` automatically, no re-scan needed on restart -- **Session Auto-Reconnect**: Automatically clears expired credentials and re-initiates QR code login -- **Web Console Integration**: Add WeChat channel from the Web Console with synchronized QR code login flow -- **Docker & Script Support**: Both `run.sh` and `docker-compose.yml` now support the WeChat channel - -Documentation: [WeChat Channel](https://docs.cowagent.ai/channels/weixin). - -Related commits: [ce89869](https://github.com/zhayujie/CowAgent/commit/ce89869), [a483ec0](https://github.com/zhayujie/CowAgent/commit/a483ec0), [c1421e0](https://github.com/zhayujie/CowAgent/commit/c1421e0) - -## 🤖 New Models - -- **MiniMax-M2.7**: Added MiniMax-M2.7 model support -- **GLM-5-Turbo**: Added Zhipu glm-5-turbo model support - -Related commits: [9192f6f](https://github.com/zhayujie/CowAgent/commit/9192f6f) - -## 🔧 Script Refactoring - -- **run.sh Refactoring**: Extracted shared logic and eliminated duplication, reducing from 600+ lines to 177 lines ([49d8707](https://github.com/zhayujie/CowAgent/commit/49d8707)) -- **Executable Permission**: Fixed `run.sh` file permission issue ([652156e](https://github.com/zhayujie/CowAgent/commit/652156e)) - -## ⚡ Improvements - -- **Unified Request Headers**: Added identification headers to external requests across Agent services (Chat, Embedding, Vision, WebSearch, etc.) ([b4e711f](https://github.com/zhayujie/CowAgent/commit/b4e711f)) -- **Auto-Repair Messages**: Enhanced message protocol fault tolerance with automatic repair of malformed message sequences ([b8b57e3](https://github.com/zhayujie/CowAgent/commit/b8b57e3)) - -## 🌍 Japanese Documentation - -Added complete Japanese documentation covering getting started guide, channel integration, model configuration and other major sections. Thanks [@Ikko Ashimine](https://github.com/ikoamu) - -Related commits: [5487c0b](https://github.com/zhayujie/CowAgent/commit/5487c0b) - -## 🐛 Bug Fixes - -- **WeCom Bot Compatibility**: Fixed compatibility with older `websocket-client` versions, added unified WebSocket compatibility layer ([bc7f627](https://github.com/zhayujie/CowAgent/commit/bc7f627)) -- **run.sh PID**: Fixed process PID retrieval error in `run.sh` ([9febb07](https://github.com/zhayujie/CowAgent/commit/9febb07)) -- **Feishu Encoding**: Fixed message and log encoding issue in Feishu channel ([7d0e156](https://github.com/zhayujie/CowAgent/commit/7d0e156)) -- **Feishu Config**: Removed redundant `feishu_bot_name` dependency in `run.sh` ([1b5be1b](https://github.com/zhayujie/CowAgent/commit/1b5be1b)) - -## 📦 Upgrade - -Run `./run.sh update` for a one-click upgrade, or manually pull the latest code and restart. See [Upgrade Guide](https://docs.cowagent.ai/guide/upgrade) for details. - -**Release Date**: 2026.03.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.3...master) diff --git a/docs/en/releases/v2.0.5.mdx b/docs/en/releases/v2.0.5.mdx deleted file mode 100644 index ad45398e..00000000 --- a/docs/en/releases/v2.0.5.mdx +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: v2.0.5 -description: CowAgent 2.0.5 - Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more ---- - -## 🖥️ Cow CLI - -New CLI command system for managing CowAgent from terminal and chat: - -- **Terminal commands**: Run `cow ` for `start`, `stop`, `restart`, `update`, `status`, `logs`, etc. -- **Chat commands**: Type `/` in conversation for `/help`, `/status`, `/config`, `/skill`, `/context`, `/logs`, `/version`, etc. -- **Web console**: Type `/` in the input box to open a slash command menu, with arrow-key input history -- **Windows support**: New PowerShell script `scripts/run.ps1` with `cow` command support - -Docs: [Command Overview](https://docs.cowagent.ai/en/cli) - - - -## 🧩 Cow Skill Hub Open Source - -[Cow Skill Hub](https://skills.cowagent.ai) is now open source and live — browse, search, install, and publish AI Agent skills: - -- **One-command install**: `/skill install ` in chat or `cow skill install ` in terminal -- **Multi-source**: Install from Skill Hub, GitHub, ClawHub, LinkAI, and more -- **Search**: `/skill search` and `/skill list --remote` to browse the hub -- **Publish**: Submit your own skills at [skills.cowagent.ai/submit](https://skills.cowagent.ai/submit) -- **Mirror**: Mirror acceleration for faster downloads in China - -Open source repo: [cow-skill-hub](https://github.com/zhayujie/cow-skill-hub) - -Docs: [Skill Hub](https://docs.cowagent.ai/en/skills/hub), [Install Skills](https://docs.cowagent.ai/en/skills/install) - - - -## 🌐 Browser Tool - -New Browser tool — Agent can control a Chromium browser to visit and interact with web pages: - -- **Navigation & interaction**: `navigate`, `click`, `fill`, `select`, `scroll`, `press`, etc. -- **Page snapshot**: Compact DOM snapshot for efficient page understanding, auto-snapshot after navigation -- **Screenshot**: Save page screenshots to workspace -- **JavaScript execution**: Run custom scripts on pages -- **CLI install**: `cow install-browser` for one-command setup -- **Docker support**: Browser install built into Docker image - -Docs: [Browser Tool](https://docs.cowagent.ai/en/tools/browser) - - - -## 🤖 WeCom Bot QR Code Setup - -WeCom Bot channel now supports QR code scan for one-click bot creation: - -- **QR scan in Web console**: Select "Scan QR" mode, scan with WeCom to auto-create and connect a bot — no manual configuration needed -- **Manual mode**: Still supports manual Bot ID and Secret input -- **Stream push optimization**: Throttled push to avoid WebSocket congestion - -Docs: [WeCom Bot](https://docs.cowagent.ai/en/channels/wecom-bot) - -PR: [#2735](https://github.com/zhayujie/CowAgent/pull/2735). Thanks [@WecomTeam](https://github.com/WecomTeam) - -## 🐛 Other Improvements & Fixes - -- **DeepSeek module**: Independent DeepSeek Bot with dedicated `deepseek_api_key` config ([#2719](https://github.com/zhayujie/CowAgent/pull/2719)). Thanks [@6vision](https://github.com/6vision) -- **Web console**: Slash command menu, input history, new model options, mobile optimization ([#2731](https://github.com/zhayujie/CowAgent/pull/2731)). Thanks [@zkjqd](https://github.com/zkjqd) -- **Context loss**: Fix context loss after trimming ([393f0c0](https://github.com/zhayujie/CowAgent/commit/393f0c0)) -- **System prompt**: Fix system prompt not rebuilding on every turn ([13f5fde](https://github.com/zhayujie/CowAgent/commit/13f5fde)) -- **Gemini**: Fix missing model attribute in GoogleGeminiBot ([#2716](https://github.com/zhayujie/CowAgent/pull/2716)). Thanks [@cowagent](https://github.com/cowagent) -- **WeChat channel**: Fix file send failures and filename loss ([6d9b7ba](https://github.com/zhayujie/CowAgent/commit/6d9b7ba), [45faa9c](https://github.com/zhayujie/CowAgent/commit/45faa9c)) -- **Docker**: Fix volume permissions, reduce image size ([3eb8348](https://github.com/zhayujie/CowAgent/commit/3eb8348), [4470d4c](https://github.com/zhayujie/CowAgent/commit/4470d4c)) -- **Security**: Fix Memory Content path traversal risk. Thanks [@August829](https://github.com/August829) - -## 📦 Upgrade - -Run `cow update` or `./run.sh update` to upgrade, or pull the latest code and restart. See [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade). - -**Release Date**: 2026.04.01 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.4...master) diff --git a/docs/en/releases/v2.0.6.mdx b/docs/en/releases/v2.0.6.mdx deleted file mode 100644 index a41f75aa..00000000 --- a/docs/en/releases/v2.0.6.mdx +++ /dev/null @@ -1,83 +0,0 @@ ---- -title: v2.0.6 -description: CowAgent 2.0.6 - Knowledge Base, Deep Dream Memory Distillation, Smart Context Compression, Web Console Multi-Session and More ---- - -## Project Renamed to CowAgent - -The repository has been officially renamed from `chatgpt-on-wechat` to **CowAgent**, evolving into a full-featured AI Agent assistant. - -- New URL: [github.com/zhayujie/CowAgent](https://github.com/zhayujie/CowAgent) — GitHub auto-redirects the old URL -- CLI commands, config files, and documentation links remain compatible — no extra steps needed - -## 📚 Knowledge Base - -New personal knowledge base system — Agent can autonomously build and maintain structured knowledge, retrieving it on demand during conversations: - -- **Index-driven self-organizing structure**: Knowledge is stored in `knowledge/` directory, auto-organized by category, with each knowledge page as an independent Markdown file -- **Auto-write**: Send files, links, or other knowledge to the Agent, or it will automatically create/update knowledge pages when valuable information is identified in conversation -- **Hybrid retrieval**: Supports keyword full-text search and vector semantic retrieval, loading relevant knowledge on demand during conversations -- **Visualization**: File tree browsing and knowledge graph visualization, with in-document links for direct navigation -- **Command management**: `/knowledge` for stats, `/knowledge list` for directory structure, `/knowledge on|off` to toggle - - - - -Docs: [Knowledge Base](https://docs.cowagent.ai/en/knowledge) - -## 🌙 Deep Dream Memory Distillation - -A new memory consolidation mechanism that automatically distills scattered conversation memories into refined long-term memory daily: - -- **Three-tier memory flow**: Conversation context (short-term) → Daily memory (mid-term) → MEMORY.md (long-term), forming a complete memory lifecycle -- **Auto-distillation**: Runs daily at 23:55, reads the day's daily memory and MEMORY.md, performs deduplication, merging, and pruning via LLM, outputting a refined MEMORY.md -- **Dream diary**: Each distillation generates a narrative-style dream diary recording discoveries and insights, stored in `memory/dreams/` -- **Manual trigger**: `/memory dream [N]` to manually trigger with configurable lookback days (default 3, max 30), with chat notification on completion -- **Web console**: Memory management page now includes a "Dream Diary" tab for browsing all dream diaries - -Docs: [Deep Dream](https://docs.cowagent.ai/en/memory/deep-dream) - - - -## 🧠 Smart Context Compression - -When context exceeds limits, trimmed portions are summarized by LLM and asynchronously injected to maintain conversation continuity: - -- **Async LLM summary**: Trimmed messages are summarized into key information by LLM, written to daily memory files and injected into retained context -- **Multi-model compatible**: Uses the primary model for summarization, compatible with Claude, OpenAI, MiniMax and other model message format requirements - -Docs: [Short-term Memory](https://docs.cowagent.ai/en/memory/context) - -## 💬 Web Console Upgrades - -Multiple enhancements to the Web console: - -- **Multi-session management**: Create and switch between independent sessions, sidebar session list with auto-generated and manually editable titles -- **Password protection**: Set a login password via `web_console_password` config option -- **Deep thinking**: Display model thinking process in Web console, controlled by `enable_thinking` config option -- **Scheduled push**: Scheduled task results can be pushed to Web console -- **Message copy**: One-click copy of raw Markdown content from AI reply bubbles -- **Language toggle**: Top language switch button now shows current language for more intuitive interaction - -## 🤖 Model Updates - -- **Vision optimization**: Image recognition tool prefers the primary model with automatic multi-provider fallback. Docs: [Vision Tool](https://docs.cowagent.ai/en/tools/vision) -- **MiniMax new model**: Added MiniMax-M2.7-highspeed model and MiniMax TTS voice synthesis support. Thanks @octo-patch -- **Qwen**: Added qwen3.6-plus model support - -## 🐛 Other Improvements & Fixes - -- **Memory prompts**: `MEMORY.md` injected into system prompt by default, with refined memory retrieval and write trigger conditions for enhanced proactive writing -- **System prompt**: Optimized system prompt style and tone guidance -- **Browser tool**: Enhanced implicit interactive element detection -- **File send**: Fixed common file types (tar.gz, zip, etc.) not being sent correctly. Thanks @6vision -- **macOS compatibility**: Fixed network pre-check timeout compatibility issue. Thanks @Moliang Zhou -- **Windows compatibility**: Fixed PowerShell compatibility, process updates, terminal encoding and other issues on Windows -- **Python 3.13+**: Fixed missing `legacy-cgi` dependency for Python 3.13+ -- **WeChat channel**: Updated personal WeChat channel version - -## 📦 Upgrade - -Run `cow update` or `./run.sh update` to upgrade, or pull the latest code and restart. See [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade). - -**Release Date**: 2026.04.14 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.5...master) diff --git a/docs/en/releases/v2.0.7.mdx b/docs/en/releases/v2.0.7.mdx deleted file mode 100644 index 522e5339..00000000 --- a/docs/en/releases/v2.0.7.mdx +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: v2.0.7 -description: CowAgent 2.0.7 - Image Generation Skill (6-provider auto-routing), new models, knowledge base enhancements, Web Console improvements and bug fixes ---- - -## 🎨 Image Generation Skill - -New built-in `image-generation` skill supporting text-to-image, image-to-image, and multi-image fusion across six major providers: - -- **6-provider auto-routing**: OpenAI (GPT-Image-2) → Gemini (Nano Banana) → Seedream (Volcengine Ark) → Qwen (DashScope) → MiniMax → LinkAI — automatically selects from configured providers in fixed priority order, with automatic fallback on failure -- **Zero model selection**: Just configure an API key and it works — no need to manually specify a model. You can also name a specific model in conversation (e.g. "draw a cat with seedream") -- **Flexible control**: Supports `quality`, `size` (512/1K–4K), and `aspect_ratio` parameters, with each provider automatically mapping to its supported values -- **Image editing**: Pass existing images for editing, style transfer, or multi-image fusion (Seedream supports up to 14 reference images) -- **Skill-level config**: Pin a default model via `skills.image-generation.model` in `config.json` -- **Image lightbox**: All images in the Web console now support click-to-enlarge preview - -Docs: [Image Generation Skill](https://docs.cowagent.ai/en/skills/image-generation) - -## 🤖 New Model Support - -- **Kimi K2.6**: Added `kimi-k2.6` model support -- **Claude Opus 4.7**: Added `claude-opus-4-7` model support -- **GLM 5.1**: Added `glm-5.1` model support -- **Kimi Coding Plan**: Support for Kimi Coding Plan mode -- **Custom model providers**: New custom model provider configuration for easier integration with additional vendors - -## 💬 Web Console Improvements - -- **Smart auto-scroll**: Improved chat scroll behaviour — no longer forces scroll to bottom while the user is reading earlier messages -- **Reasoning content cap**: Deep thinking content capped at 4 KB to prevent frontend lag -- **Mobile optimisation**: Session sidebar hidden by default on mobile, with overlay dismiss support -- **Session title fix**: Fixed title auto-generation fallback logic and Bridge reset on config change -- **Image preview dedup**: Fixed duplicate image rendering within the same message - -## 📚 Knowledge Base Enhancements - -- **Nested directory support**: Knowledge base listing and display now support multi-level nested directories -- **Root-level file display**: Show `index.md`, `log.md` and other root-level files in the knowledge tree -- **Empty state stats fix**: Root-level files no longer interfere with empty-state detection - -## 🌙 Dream Memory Improvements - -- **Structured organisation**: Dream memory files are now auto-archived by date with a cleaner directory structure -- **Schedule jitter**: Daily dream trigger includes random jitter to avoid concurrency conflicts in cluster deployments - -## 🛠 Skill System Improvements - -- **Skill manager refresh**: `/skill` commands now automatically refresh the skill manager to keep state in sync -- **Installation sources**: Skill installation supports multiple source formats (URL, zip, local file, etc.) with automatic target directory handling - -## 🐛 Other Fixes - -- **Gemini fix**: Fixed Gemini tool calls not returning results -- **Agent retry**: Empty-response retries no longer drop `tool_calls` -- **Docker env sync**: Fixed environment variables not syncing after config update in Docker environments -- **Python 3.7 compat**: Deferred `Literal` import for Python 3.7 compatibility -- **Model switch notification**: Fixed bot_type change notification not showing after model switch. Thanks @6vision -- **Config command**: `/config` now supports setting `enable_thinking` -- **Thinking display**: Deep thinking display disabled by default - -## 📦 Upgrade - -Run `cow update` or `./run.sh update` to upgrade, or pull the latest code and restart. See [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade). - -**Release Date**: 2026.04.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.6...master) diff --git a/docs/en/releases/v2.0.8.mdx b/docs/en/releases/v2.0.8.mdx deleted file mode 100644 index 3fcc29da..00000000 --- a/docs/en/releases/v2.0.8.mdx +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: v2.0.8 -description: CowAgent 2.0.8 - Major Feishu channel upgrade (voice, streaming typewriter, one-click QR app creation), DeepSeek V4 / ERNIE 5.0 support, scheduler memory enhancements and multiple fixes ---- - -## 🪶 Major Feishu Channel Upgrade - -### 1. One-click QR-scan App Creation - -No more manual app setup, permission scopes and event subscriptions in the Feishu Open Platform. When `feishu_app_id` is not configured, both the Web Console and CLI startup flow now show a QR-scan entry — scan with Feishu, authorize, and the bot is created and config is filled back automatically. Out-of-the-box. - -Documentation: [Feishu Channel](https://docs.cowagent.ai/en/channels/feishu) - -### 2. Voice Messages - -Receive Feishu voice messages with automatic speech-to-text, and reply in voice via TTS. Recognition accuracy for short Chinese voice messages has been improved. - -### 3. Streaming Typewriter Replies - -Integrated with Feishu CardKit streaming cards, **enabled by default**, matching the Web Console experience: - -- Multi-turn agent flows render intermediate updates and the final reply on separate cards -- Tuned for high-throughput models like DeepSeek to keep pace with the Web Console -- Falls back to plain text replies automatically when not supported, no manual config needed -- Requires Feishu client ≥ 7.20 - -The voice and streaming building blocks come from a community contribution #2791. Thanks [@yangluxin613](https://github.com/yangluxin613) - -## 🤖 New Model Support - -- **DeepSeek V4 series**: Added `deepseek-v4-pro` / `deepseek-v4-flash`, with `deepseek-v4-flash` set as the new default -- **Unified thinking-mode toggle**: DeepSeek V4, Qwen3 and other thinking-capable models now share the same `enable_thinking` switch -- **ERNIE first-class integration**: New `qianfan` provider supporting `ernie-5.0` (default recommendation), `ernie-x1.1`, `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k`. Dedicated `qianfan_api_key` / `qianfan_api_base` settings keep OpenAI config clean; legacy `wenxin` / `wenxin-4` paths are fully preserved. #2790 Thanks [@jimmyzhuu](https://github.com/jimmyzhuu) - - Documentation: [ERNIE](https://docs.cowagent.ai/en/models/qianfan) - -## 🌐 Translation Provider - -- **Youdao translator**: Added a Youdao provider to the `translate/` module using the v3 SHA-256 signing scheme, with automatic ISO 639-1 language-code mapping (`zh`, `zh-TW`, etc.) #2797 Thanks [@Zmjjeff7](https://github.com/Zmjjeff7) - -## 🛠 OpenAI Client Refactor - -- **Drop SDK dependency**: The OpenAI bot is reimplemented on a native HTTP client — leaner startup, fewer dependency conflicts -- **Web Console hint**: API base inputs in the model config UI now include version-path placeholder hints - -## ⏰ Scheduler Memory Enhancements - -- **Follow-up on task results**: Scheduled task results are automatically injected into the receiver's session history — the next turn can ask follow-up questions without re-stating context. Thanks [@huangrichao2020](https://github.com/huangrichao2020) -- **No long-term memory pollution**: Scheduler-injected pairs are excluded from the daily memory flush so high-frequency tasks don't drown the memory store -- **Bounded scheduler context**: The scheduler's own session context is automatically capped, so long-running periodic tasks don't accumulate state and slow down replies - -## 🔧 Tools and Safety - -- **Vision model selection**: `tools.vision.model` config now actually takes effect, with automatic fallback when unconfigured #2792 -- **Bash safety prompt**: The destructive-deletion confirm prompt is now scoped to paths outside the workspace — routine in-workspace operations are no longer interrupted - -## 🐛 Other Fixes - -- Fixed Deep Dream firing duplicate runs in multi-instance setups -- Fixed missing `reasoning_content` on some history turns in DeepSeek multi-turn conversations - -## 📦 Upgrade - -Source-code deployments can run `cow update` or `./run.sh update` for a one-click upgrade, or pull the latest code and restart manually. See [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade) for details. - -> ⚠️ One-click Feishu app creation requires `lark-oapi>=1.5.5`. `cow update` pulls it automatically; manual deployments must update dependencies. - -**Release Date**: 2026.05.05 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.7...2.0.8) diff --git a/docs/en/releases/v2.0.9.mdx b/docs/en/releases/v2.0.9.mdx deleted file mode 100644 index ccae36fc..00000000 --- a/docs/en/releases/v2.0.9.mdx +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: v2.0.9 -description: CowAgent 2.0.9 - Web Console model management, MCP protocol support, browser persistent login, new models and deployment hardening ---- - -## 🖥️ Model Management Console - -The Web Console adds a new **Models** page that organizes everything by **provider × capability**, covering chat, image, voice, embedding and search models in one place: - -- **Per-provider configuration**: Each provider's API Key / API Base is configured once at the top, and every capability below picks it up automatically — no more re-entering credentials -- **Image models**: Image understanding and image generation can each pick their own provider and model independently; falls back to the main model when unspecified -- **Voice models**: ASR (speech-to-text) and TTS (text-to-speech) can be configured independently, with new Qwen and Zhipu ASR/TTS models added -- **Embedding models**: Configurable embedding models (used for memory and knowledge-base retrieval), with new support for OpenAI, Tongyi, Doubao, Zhipu and others; run `/memory rebuild-index` after switching to rebuild the index online -- **Search capability**: Web search has been upgraded to support Bocha, Baidu, Zhipu and more providers — in auto mode the agent can synthesize results from multiple sources for deeper research - -Documentation: [Models Overview](https://docs.cowagent.ai/en/models) - -20260522113305 - - -## 🧩 MCP Protocol Support - -Adds support for **MCP (Model Context Protocol)**, expanding from a fixed built-in toolset to an open, pluggable tool ecosystem — any MCP-compatible service can be plugged in directly as an agent tool. - -- Native JSON-RPC implementation, zero extra dependencies, supports both `stdio` and `sse` transports -- Compatible with the `mcpServers` configuration style used by Claude Desktop / Cursor, reads `~/cow/mcp.json` by default - -Documentation: [MCP Tools](https://docs.cowagent.ai/en/tools/mcp). Thanks [@yangluxin613](https://github.com/yangluxin613) (#2801) - -## 🌐 Browser Persistent Login - -For sites that require login or have anti-bot protection, the browser tool can now persist a login session for long-term reuse, and supports attaching to your real Chrome browser to bypass fingerprint detection: - -- **Persistent user profile (default)**: Uses `~/.cow/browser_profile` as the browser user data dir by default; once logged in, sessions are reused automatically on subsequent runs -- **CDP mode**: Configure `tools.browser.cdp_endpoint` to take over a real Chrome instance with full browser permissions - -Documentation: [Browser Tool](https://docs.cowagent.ai/en/tools/browser). Thanks [@leafmove](https://github.com/leafmove) (#2809) - -## 🤖 New Models and Improvements - -- **New models**: `gpt-5.5`, `gemini-3.5-flash`, `qwen3.7-max`, `ernie-5.1` -- **Improvements**: DeepSeek V4 supports the `reasoning_effort` thinking-depth parameter; fixed thinking models like MiMo failing to connect via the OpenAI-compatible protocol - -## 🔒 Deployment & Security - -- **Bind to localhost by default**: The Web Console `web_host` now defaults to `127.0.0.1`; for server deployments, set it to `0.0.0.0` and configure a password manually. Thanks @August829, @yidaozhongqing, @YLChen-007, @icysun -- **Fully bundled frontend assets**: All third-party CSS / JS are now served locally — the console works offline and on intranet deployments. Thanks [@gitlayzer](https://github.com/gitlayzer) (#2816) - -## 🛠 UX Improvements & Fixes - -- **TTS rolls out to more channels**: Web Console, Personal WeChat, Feishu, DingTalk and WeCom Smart Bot all support voice replies — see the [Channels Overview](https://docs.cowagent.ai/en/channels) -- **Log panel enhancements**: Differentiated highlighting by log level, with level-based filtering. Thanks [@yangluxin613](https://github.com/yangluxin613) (#2807) -- **Auto-launch Web Console**: The Web Console now opens automatically on startup. Thanks [@yangluxin613](https://github.com/yangluxin613) (#2804) -- **Clean Ctrl+C exit**: No more long `KeyboardInterrupt` stack traces. Thanks [@yangluxin613](https://github.com/yangluxin613) (#2806) -- **Folder upload**: Web Console supports directory uploads, with path validation adapted for Windows. Thanks [@TryToMakeUsBetter](https://github.com/TryToMakeUsBetter) (#2814) -- Fixed scheduled tasks executing duplicates under certain conditions. Thanks [@CNXudiandian](https://github.com/CNXudiandian) (#2820) -- Fixed one-shot scheduled tasks with timezone not firing. Thanks @AethericSpace -- Fixed failed tool calls not being displayed after page refresh. Thanks [@a1094174619](https://github.com/a1094174619) (#2822) -- Fixed WeCom bot messages with illegal control characters failing to be delivered. Thanks [@Jacques-Zhao](https://github.com/Jacques-Zhao) (#2810) - -## 📦 Upgrade - -Source-code deployments can run `cow update` for a one-click upgrade, or pull the latest code and restart manually. See the [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade) for details. - -**Release Date**: 2026.05.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.8...2.0.9) diff --git a/docs/en/skills/create.mdx b/docs/en/skills/create.mdx deleted file mode 100644 index cfdbde02..00000000 --- a/docs/en/skills/create.mdx +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: Create Skills -description: Create custom skills through conversation ---- - -CowAgent includes a built-in Skill Creator that lets you quickly create, install, or update skills through natural language conversation. - -## Usage - -Simply describe the skill you want in a conversation, and the Agent will handle the creation: - -- Codify workflows as skills: "Create a skill from this deployment process" -- Integrate third-party APIs: "Create a skill based on this API documentation" -- Install remote skills: "Install xxx skill for me" - -## Creation Flow - -1. Tell the Agent what skill you want to create -2. Agent automatically generates `SKILL.md` description and execution scripts -3. Skill is saved to the workspace `~/cow/skills/` directory -4. Agent will automatically recognize and use the skill in future conversations - - - - - -## SKILL.md Format - -Created skills follow the standard SKILL.md format: - -```markdown ---- -name: my-skill -description: Brief description of the skill -metadata: - emoji: 🔧 - requires: - bins: ["curl"] - env: ["MY_API_KEY"] - primaryEnv: "MY_API_KEY" ---- - -# My Skill - -Detailed instructions... -``` - -| Field | Description | -| --- | --- | -| `name` | Skill name, must match directory name | -| `description` | Skill description, Agent decides whether to invoke based on this | -| `metadata.requires.bins` | Required system commands | -| `metadata.requires.env` | Required environment variables | -| `metadata.always` | Always load (default false) | - - - See the [Skill Creator documentation](https://github.com/zhayujie/CowAgent/blob/master/skills/skill-creator/SKILL.md) for details. - diff --git a/docs/en/skills/hub.mdx b/docs/en/skills/hub.mdx deleted file mode 100644 index 0a9e73e1..00000000 --- a/docs/en/skills/hub.mdx +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Skill Hub -description: Browse, search, and install AI Agent skills ---- - -[Cow Skill Hub](https://skills.cowagent.ai/) is an open-source skill marketplace for AI Agents, aggregating official picks, community contributions, and third-party skills from GitHub, ClawHub, and beyond. - -Source code: [github.com/zhayujie/cow-skill-hub](https://github.com/zhayujie/cow-skill-hub) - - - -## Features - -- **Browse skills** — filter by category (Featured / Community / Third-party) and tags -- **Search skills** — find skills by name or description -- **View details** — read the skill manifest, file contents, install command, and required environment variables -- **One-click install** — copy the install command and run it in CowAgent - -## Installing a skill - -Run the install command in chat or in your terminal: - - -```text Chat -/skill install -``` - -```bash Terminal -cow skill install -``` - - -You can also browse the marketplace directly from chat: - -```text -/skill list --remote -/skill search -``` - -Beyond the curated list, you can install third-party skills from **GitHub, ClawHub, LinkAI, or any URL** via the CLI. See [Installing skills](/en/skills/install) for details. - -## Contributing a skill - -To submit your own skill: - -1. Visit [skills.cowagent.ai/submit](https://skills.cowagent.ai/submit) -2. Sign in with GitHub or Google -3. Upload a folder or zip file containing `SKILL.md` -4. Skill name, display name, and description are auto-detected — adjust as needed -5. Submit for review; skills go live after security and quality checks - - - -Skill file layout: - -``` -your-skill/ -├── SKILL.md # required, in the root -├── scripts/ # optional, runtime scripts -└── resources/ # optional, additional assets -``` - - - Skills are built around the `SKILL.md` manifest. You can also download `SKILL.md` from a skill's detail page and use it with any Agent that supports custom instructions (OpenClaw, Cursor, Claude Code, and more). - diff --git a/docs/en/skills/image-generation.mdx b/docs/en/skills/image-generation.mdx deleted file mode 100644 index 608fa3bc..00000000 --- a/docs/en/skills/image-generation.mdx +++ /dev/null @@ -1,98 +0,0 @@ ---- -title: image-generation -description: Text-to-image / image-to-image / multi-image fusion with automatic multi-provider routing and fallback ---- - -A general-purpose image generation and editing skill supporting six providers: OpenAI, Gemini, Seedream (Volcengine Ark), Qwen (DashScope), MiniMax, and LinkAI. Configure any one provider's key to start using it; configure multiple to enable automatic fallback. - -## Supported Models - -| Provider | Models / Aliases | Notes | -| --- | --- | --- | -| OpenAI | `gpt-image-2`, `gpt-image-1` | General-purpose, high quality, supports `quality` parameter | -| Gemini Nano Banana | `nano-banana-2`, `nano-banana-pro`, `nano-banana` | Corresponds to the image variants of `gemini-3.1-flash`, `gemini-3-pro`, `gemini-2.5-flash` | -| Seedream (Volcengine Ark) | `seedream-5.0-lite`, `seedream-4.5` | Native 2K–4K, up to 14 reference images for fusion | -| Qwen (DashScope) | `qwen-image-2.0`, `qwen-image-2.0-pro` | Strong with Chinese text rendering and text-image layouts | -| MiniMax | `image-01` | Fast and simple | -| LinkAI | Any model | Universal gateway, used as fallback | - -## Model Selection - -By default, "auto routing + automatic fallback" is used: - -1. Pick the first configured provider in the order `OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI` -2. On errors such as 401, model not enabled, or network issues, automatically switch to the next provider -3. If the user specifies a model in the conversation (e.g. "use seedream to draw a cat"), the corresponding provider is promoted to the front - -To pin a specific model: - -```json -{ - "skills": { - "image-generation": { - "model": "seedream-5.0-lite" - } - } -} -``` - -## Configuring API Keys - - - It is recommended to configure providers from the "Model Management" page in the [Web console](/en/channels/web). Chat model keys configured there are automatically reused by the image generation skill — no need to set them twice. You can also edit the configuration file manually or temporarily set keys in a conversation using the `env_config` tool. - - -Credentials are shared with the main model providers: - -| Field | Provider | -| --- | --- | -| `openai_api_key` | OpenAI | -| `gemini_api_key` | Gemini | -| `ark_api_key` | Volcengine Ark (Seedream) | -| `dashscope_api_key` | Alibaba DashScope (Qwen) | -| `minimax_api_key` | MiniMax | -| `linkai_api_key` | LinkAI | - - -## Enabling and Disabling - -The skill automatically adjusts its status based on API keys: - -- **Key configured**: the Agent calls the skill directly when it receives a drawing request -- **Key not configured**: the skill still appears in context (marked as "needs configuration") — the Agent will guide the user to set up a key - -To control it manually: - -```text -/skill disable image-generation # Disable -/skill enable image-generation # Re-enable -``` - -Equivalent terminal commands: `cow skill disable image-generation` / `cow skill enable image-generation`. - -## Parameters - -| Parameter | Type | Required | Default | Description | -| --- | --- | --- | --- | --- | -| `prompt` | string | Yes | — | Image description | -| `image_url` | string / list | No | null | Input image for editing — local path or URL; pass a list for multi-image fusion | -| `quality` | string | No | auto | `low` / `medium` / `high`, supported only by some providers | -| `size` | string | No | auto | `512` / `1K` / `2K` / `3K` / `4K`, or pixel value like `1024x1024` | -| `aspect_ratio` | string | No | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9`; Gemini also supports `1:4` / `4:1` / `1:8` / `8:1` | - - - **Higher quality and larger size cost more and take longer.** For everyday conversations, use the defaults (`auto`) or `quality=low` + `size=1K` — about 20 seconds per image. For posters or when high resolution is explicitly requested, use `quality=high` + `size=2K/4K` — may take 1–5 minutes. - - -## Common Use Cases - -- **Text-to-image**: generate illustrations, posters, icons, avatars, storyboards, etc. from a description -- **Image-to-image**: change styles, swap elements, add decorations or text on an existing image -- **Multi-image fusion**: combine multiple reference images into one (outfit swaps, character group photos, etc.) - - -- Bash timeout should be set to 600 seconds: each provider has a 300-second HTTP timeout, and the script may try multiple providers sequentially -- Input images are automatically compressed to ≤ 4 MB with the longest edge ≤ 4096 px -- Gemini / Seedream / Qwen / MiniMax do not support the `quality` parameter -- Seedream defaults to 2K; `seedream-5.0-lite` supports up to 3K; `seedream-4.5` supports up to 4K - diff --git a/docs/en/skills/index.mdx b/docs/en/skills/index.mdx deleted file mode 100644 index de57d94a..00000000 --- a/docs/en/skills/index.mdx +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: Skills Overview -description: CowAgent skills system introduction ---- - -Skills provide infinite extensibility for the Agent. Each Skill consists of a description file (`SKILL.md`), execution scripts (optional), and resources (optional), describing how to accomplish specific types of tasks. - -The difference between Skills and Tools: Tools are atomic operations implemented in code (e.g., file read/write, command execution), while Skills are high-level workflows based on description files that can combine multiple Tools to complete complex tasks. - -## Getting Skills - -CowAgent offers multiple ways to acquire skills: - -- **Cow Skill Hub** — Browse and install community skills via `/skill list --remote` -- **GitHub** — Install directly from GitHub repositories, with batch install support -- **ClawHub** — Install ClawHub skills via `/skill install clawhub:name` -- **URL** — Install from zip archives or SKILL.md links -- **Conversational creation** — Let the Agent create skills through natural language conversation - -See [Install Skills](/en/skills/install) and [Skill Management Commands](/en/cli/skill) for details. You can also [create skills](/en/skills/create) through conversation. - -## Skill Loading Priority - -1. **Workspace skills** (highest): `~/cow/skills/` -2. **Project built-in skills** (lowest): `skills/` - -Skills with the same name are overridden by priority. - -## Skill File Structure - -``` -skills/ -├── my-skill/ -│ ├── SKILL.md # Skill description (frontmatter + instructions) -│ ├── scripts/ # Execution scripts (optional) -│ └── resources/ # Additional resources (optional) -``` - -### SKILL.md Format - -```markdown ---- -name: my-skill -description: Brief description of the skill -metadata: - emoji: 🔧 - requires: - bins: ["curl"] - env: ["MY_API_KEY"] - primaryEnv: "MY_API_KEY" ---- - -# My Skill - -Detailed instructions... -``` - -| Field | Description | -| --- | --- | -| `name` | Skill name, must match directory name | -| `description` | Skill description, Agent decides whether to invoke based on this | -| `metadata.requires.bins` | Required system commands | -| `metadata.requires.env` | Required environment variables | -| `metadata.always` | Always load (default false) | diff --git a/docs/en/skills/install.mdx b/docs/en/skills/install.mdx deleted file mode 100644 index 7a70205f..00000000 --- a/docs/en/skills/install.mdx +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Install Skills -description: Install skills from multiple sources with a single command ---- - -CowAgent supports installing skills from [Cow Skill Hub](https://skills.cowagent.ai/), GitHub, ClawHub, LinkAI, and any URL via a unified `install` command. Use `/skill install` in chat or `cow skill install` in the terminal. - -## From the Skill Hub - -Browse all available skills at [skills.cowagent.ai](https://skills.cowagent.ai/) and install by name: - -```text -/skill list --remote -/skill install pptx -``` - -## From GitHub - -Any GitHub-hosted skill can be installed directly. Supports both repository-level batch install and subdirectory-level single install: - -```text -/skill install larksuite/cli -/skill install https://github.com/larksuite/cli/tree/main/skills/lark-im -``` - -## From ClawHub - -All [ClawHub](https://clawhub.ai/) skills (40k+) can be installed with a single command: - -```text -/skill install clawhub: -``` - -## From LinkAI - -All public resources on [LinkAI](https://link-ai.tech/console) (10k+ apps / workflows / plugins), as well as your own resources (apps, workflows, knowledge bases, databases, plugins), can be installed via: - -```text -/skill install linkai: -``` - -> Every resource created on the LinkAI platform has a unique `code`. Find it on each resource's page in the [console](https://link-ai.tech/console). - -## From URL - -Supports zip archives and SKILL.md file links: - -```text -/skill install https://cdn.link-ai.tech/skills/pptx.zip -/skill install https://example.com/path/to/SKILL.md -``` - -## Manage Skills - -```text -/skill list # View installed skills -/skill info pptx # View skill details -/skill enable pptx # Enable a skill -/skill disable pptx # Disable a skill -/skill uninstall pptx # Uninstall a skill -``` - - - All commands above work in the terminal by replacing `/skill` with `cow skill`. See [Skill Management Commands](/en/cli/skill) for full documentation. - diff --git a/docs/en/skills/knowledge-wiki.mdx b/docs/en/skills/knowledge-wiki.mdx deleted file mode 100644 index 14ae9c90..00000000 --- a/docs/en/skills/knowledge-wiki.mdx +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: knowledge-wiki -description: Maintain a local structured knowledge base with automatic archiving, categorisation, and cross-referencing ---- - -Organises notes, insights, and reference materials from your conversations into a structured local knowledge base, automatically maintaining an index and cross-references between pages. - -`knowledge-wiki` maintains a `knowledge/` directory in your workspace — essentially the Agent's "second brain". The skill is marked `always: true`, so it is **always loaded** and requires no external dependencies. - -## When It Triggers - -- You share an article, document, or URL that you want to keep for future reference -- A conversation produces conclusions worth retaining long-term -- You want to look up something you accumulated earlier - -## Directory Structure - -``` -knowledge/ -├── index.md # Global index (must be maintained) -├── log.md # Operation log (append-only) -└── / # Category subdirectories (grouped by content) - └── .md # Knowledge page (lowercase-hyphenated filename) -``` - -## Three Core Operations - -### 1. Ingest - -When you share some material, the Agent will: - -1. Read and understand the original content, extracting key information -2. Decide which category it belongs to — check `index.md` first; create a new category if none fits -3. Generate a knowledge page at `knowledge//.md` -4. Update the index `index.md` and the log `log.md` - -### 2. Synthesise - -When a conversation produces new conclusions or insights: - -1. Create a new knowledge page under an appropriate category -2. Add cross-links to and from related existing pages -3. Update the index and log - -### 3. Query - -When you ask about previously accumulated knowledge: - -1. Search `index.md` for potentially relevant pages -2. Open specific pages with the `read` tool -3. Supplement with `memory_search` if needed -4. Include links to knowledge pages in the answer so you can click through to the source - -## Page Format - -```markdown -# Page Title - -> Source: - -Body content. Link between pages using relative paths: -[Related Page](../category/related-page.md) - -## Key Points - -- ... - -## Related Pages - -- [Page A](../category/page-a.md) — why it's related -``` - - -- `> Source:` records where this knowledge came from. Always include it when there is a clear source -- Cross-references are important: when creating or updating a page, remember to add back-links in the related pages too -- **Only link to pages that already exist.** If a concept deserves its own page, create it first, then add the link - - -## Index Format - -`knowledge/index.md` uses a flat list grouped by category, one knowledge page per line: - -```markdown -# Knowledge Index - -## Category A -- [Page Title](category-a/page-slug.md) — one-line summary - -## Category B -- [Page Title](category-b/page-slug.md) — one-line summary -``` - -No tables, no emojis. Category names and organisation can be adjusted freely. - -## Log Format - -`knowledge/log.md` is append-only — newest entries go at the bottom: - -```markdown -## [YYYY-MM-DD] ingest | Page Title -## [YYYY-MM-DD] synthesize | Page Title -``` - -## Writing Guidelines - -- **Filenames**: lowercase with hyphens, e.g. `machine-learning.md` -- **One topic per page** — link related content across pages -- **Update, don't duplicate** — if a page already exists, update it rather than creating a new one -- **Always update the index** `knowledge/index.md` after any change -- **Distill, don't copy** — capture the key points, not the entire source -- **Use full paths when referencing knowledge pages in conversations**, e.g. `[Title](knowledge//.md)`. Use relative paths only for inter-page links -- **Include links when answering questions based on knowledge pages** so users can dig deeper diff --git a/docs/en/skills/skill-creator.mdx b/docs/en/skills/skill-creator.mdx deleted file mode 100644 index 58853f52..00000000 --- a/docs/en/skills/skill-creator.mdx +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: skill-creator -description: Create, install, and update skills — standardises SKILL.md format and directory structure ---- - -`skill-creator` is a "meta-skill" that helps the Agent create, install, and update other skills, ensuring every skill follows a consistent `SKILL.md` format and directory layout. - -## When It Triggers - -- The user wants to install a skill from a URL or remote repository -- The user wants to create a brand-new skill from scratch -- An existing skill needs upgrading or restructuring - -## What Is a Skill? - -A skill is a reusable instruction set plus optional scripts and assets. It injects domain expertise into the Agent so it can handle specific tasks like a specialist. - -A skill typically contains: - -1. **Specialised workflow** — step-by-step instructions for a category of tasks -2. **Tool usage** — how to call a particular API or process a particular file format -3. **Domain knowledge** — team conventions, business rules, data schemas, etc. -4. **Attached resources** — scripts, reference docs, templates, etc. - - -**Core principle: less is more.** Only write what the Agent wouldn't figure out on its own. For every line you add, ask yourself: is it worth the tokens? - - -## Directory Structure - -``` -skill-name/ -├── SKILL.md # Required: skill definition -│ ├── YAML frontmatter (name / description are mandatory) -│ └── Markdown body (instructions + examples) -└── Optional resources - ├── scripts/ # Executable scripts (Python / Bash, etc.) - ├── references/ # Large reference docs the Agent reads on demand - └── assets/ # Templates, icons, etc. used directly in output -``` - -## SKILL.md Specification - -Frontmatter fields in the SKILL.md header: - -| Field | Description | -| --- | --- | -| `name` | Skill name — lowercase with hyphens, must match the directory name | -| `description` | **The most important field.** Clearly state what the skill does and when to use it. The Agent reads this to decide whether to invoke it. All trigger-related descriptions go here, not in the body | -| `metadata.cowagent.requires.bins` | System CLI tools that must be installed | -| `metadata.cowagent.requires.env` | Required environment variables (all must be present) | -| `metadata.cowagent.requires.anyEnv` | Multiple API keys — at least one must be set | -| `metadata.cowagent.requires.anyBins` | Multiple tools — at least one must be installed | -| `metadata.cowagent.always` | Set to `true` to always load, skipping dependency checks | -| `metadata.cowagent.emoji` | Display emoji (optional) | -| `metadata.cowagent.os` | OS restriction, e.g. `["darwin", "linux"]` | - - -The `category` field does not need to be set manually — the system automatically sets it to `skill`. - - -Two ways to declare API key dependencies: - -```yaml -metadata: - cowagent: - requires: - env: ["MYAPI_KEY"] # Must be present -``` - -```yaml -metadata: - cowagent: - requires: - anyEnv: ["OPENAI_API_KEY", "LINKAI_API_KEY"] # At least one -``` - -**Skills are auto-enabled/disabled based on dependencies**: they activate when all required environment variables are present and deactivate when any are missing — no need for manual `/skill enable`. - -## Resource Directories - -| Directory | What goes here | What does NOT go here | -| --- | --- | --- | -| `scripts/` | Code that needs to run repeatedly, or scripts that produce deterministic results | Demo-only code snippets | -| `references/` | Documents **over 500 lines** that genuinely won't fit in SKILL.md (e.g. a full DB schema) | General API docs, tutorials, examples | -| `assets/` | Files that appear in the final output (templates, icons, boilerplate, etc.) | Explanatory documentation | - - -**In principle, everything goes in `SKILL.md`** — only split into resource directories when it truly won't fit. - -Do not add `README.md`, `CHANGELOG.md`, or `INSTALLATION_GUIDE.md` to a skill — put everything in `SKILL.md`. Resource directories should only contain scripts that actually run or assets that are actually used. - - -## Installing External Skills - -After installation, the skill lands in `/skills//`. - -| Source | How to install | -| --- | --- | -| URL (single file) | curl / web_fetch | -| URL (zip archive) | Download and extract | -| Local SKILL.md | Read directly | -| Local zip archive | Extract | - -Installation steps: - -1. Locate the `SKILL.md` (may be at the root or in a subdirectory of the archive) -2. Read the `name` from the frontmatter -3. Copy the **entire skill directory** (including `SKILL.md`, `scripts/`, `assets/`, etc.) to `/skills//` -4. If the archive contains an `INSTALL.md` or similar setup script, run it — but the final result must still reside under `/skills//` - -## Creating a Skill from Scratch - -Recommended order: - -1. **Clarify requirements** — ask the user for a few concrete use cases (don't ask too many at once) -2. **Plan the structure** — does this skill need scripts? Reference docs? Template assets? -3. **Scaffold** — use the init script: - - ```bash - scripts/init_skill.py --path /skills [--resources scripts,references,assets] [--examples] - ``` - -4. **Fill in content** — write SKILL.md, add scripts and resources. Always test scripts after writing them -5. **Validate** (optional): - - ```bash - scripts/quick_validate.py /skills/ - ``` - -6. **Iterate** — keep improving based on real-world usage feedback - -## Naming Conventions - -- Use only lowercase letters, digits, and hyphens. Normalise user-given names, e.g. `Plan Mode` → `plan-mode` -- Maximum 64 characters -- Keep it short, start with a verb, make it self-explanatory -- Use tool names as prefixes when appropriate, e.g. `gh-address-comments`, `linear-address-issue` -- The directory name and the `name` field must match exactly - -## Three-Level Loading - -Skills are not loaded into context all at once — they use a three-level progressive loading mechanism: - -1. **Metadata** (`name` + `description`) — always in context (~100 words). The Agent uses this to decide whether to invoke the skill -2. **SKILL.md body** — loaded only when the skill is activated; keep it under 500 lines -3. **Resource files** — read on demand by the Agent - -For skills with multiple variants (e.g. multi-cloud deployment), organise like this: - -``` -cloud-deploy/ -├── SKILL.md # Main workflow and provider selection logic -└── references/ - ├── aws.md - ├── gcp.md - └── azure.md -``` - -When the user picks AWS, the Agent only reads `aws.md` — no need to load all three providers. - -## Common Design Patterns - -**Step-by-step**: numbered steps with corresponding scripts. - -```markdown -1. Analyse form structure (run analyze_form.py) -2. Generate field mappings (edit fields.json) -3. Auto-fill the form (run fill_form.py) -``` - -**Branching**: different flows based on user intent. - -```markdown -1. Determine operation type: - **Creating new content?** → follow the "Create" workflow - **Editing existing content?** → follow the "Edit" workflow -``` - -**Template-based**: when output format has strict requirements, include a template in SKILL.md for the Agent to follow. diff --git a/docs/en/tools/bash.mdx b/docs/en/tools/bash.mdx deleted file mode 100644 index 60b20918..00000000 --- a/docs/en/tools/bash.mdx +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: bash - Terminal -description: Execute system commands ---- - -Execute Bash commands in the current working directory, returns stdout and stderr. API keys configured via `env_config` are automatically injected into the environment. - -## Dependencies - -No extra dependencies, available by default. - -## Parameters - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `command` | string | Yes | Command to execute | -| `timeout` | integer | No | Timeout in seconds | - -## Use Cases - -- Install packages and dependencies -- Run code and tests -- Deploy applications and services (Nginx config, process management, etc.) -- System administration and troubleshooting - - - - diff --git a/docs/en/tools/browser.mdx b/docs/en/tools/browser.mdx deleted file mode 100644 index 4c6fda82..00000000 --- a/docs/en/tools/browser.mdx +++ /dev/null @@ -1,172 +0,0 @@ ---- -title: browser - Browser -description: Control a browser to access and interact with web pages ---- - -Control a Chromium browser for web navigation, element interaction and content extraction. Supports JavaScript-rendered pages and uses a compact DOM snapshot so the Agent can efficiently understand page structure. - -## Installation - - - - ```bash - cow install-browser - ``` - - This command will: - - Install the `playwright` Python package (with auto-fallback for older systems) - - Install system dependencies on Linux - - Download the Chromium browser (Linux servers automatically use the headless build) - - Detect China-mainland networks and use mirror acceleration - - - ```bash - pip install playwright - playwright install chromium - ``` - - On Linux servers, install system dependencies as well: - ```bash - sudo playwright install-deps chromium - ``` - - On older systems (e.g. Ubuntu 18.04, glibc < 2.28), install a compatible version: - ```bash - pip install playwright==1.28.0 - python -m playwright install chromium - ``` - - To accelerate the Chromium download from China: - ```bash - export PLAYWRIGHT_DOWNLOAD_HOST=https://registry.npmmirror.com/-/binary/playwright - python -m playwright install chromium - ``` - - - - - 1. Supported on Ubuntu 20.04+, Debian 10+, macOS and Windows. Older systems such as Ubuntu 18.04 will fall back to a compatible version automatically. - 2. The browser tool has heavy dependencies (~300MB) and is optional. For lightweight web content retrieval, use the `web_fetch` tool. - - -## Workflow - -A typical browser workflow for the Agent: - -1. **`navigate`** — Open the target URL -2. **`snapshot`** — Get a compact DOM with auto-numbered interactive elements (`ref`) -3. **`click` / `fill` / `select`** — Operate elements by `ref` -4. **`snapshot`** — Snapshot again to verify the result - -## Supported Actions - -| Action | Description | Key parameters | -| --- | --- | --- | -| `navigate` | Open URL | `url` | -| `snapshot` | Get structured page text (primary way) | `selector` (optional) | -| `click` | Click an element | `ref` or `selector` | -| `fill` | Fill text into an input | `ref` or `selector`, `text` | -| `select` | Select a dropdown option | `ref` or `selector`, `value` | -| `scroll` | Scroll the page | `direction` (up/down/left/right) | -| `screenshot` | Save a screenshot to the workspace | `full_page` | -| `wait` | Wait for an element or timeout | `selector`, `timeout` | -| `press` | Press a key (Enter, Tab, etc.) | `key` | -| `back` / `forward` | Browser back / forward | - | -| `get_text` | Get an element's text content | `selector` | -| `evaluate` | Run JavaScript | `script` | - -## Use Cases - -- Access a URL to retrieve dynamic page content -- Fill in forms and log in -- Operate web elements (click buttons, select options, etc.) -- Verify the result of a deployed web page -- Scrape content that requires JS rendering - -## Run Mode - -The browser picks a mode based on the runtime environment: - -| Environment | Mode | -| --- | --- | -| macOS / Windows | Headed (browser window visible) | -| Linux desktop (with DISPLAY) | Headed | -| Linux server (no DISPLAY) | Headless | - -You can override it in `config.json`: - -```json -{ - "tools": { - "browser": { - "headless": true - } - } -} -``` - -## Persistent Login - -**Log in to a target site once and the Agent can keep using it.** Two ways are supported: - -### Option 1: Persistent mode (default) - -Works out of the box. Login state is saved under `~/.cow/browser_profile`. No configuration needed. - -To disable persistence and start with a clean environment every time: - -```json -{ - "tools": { - "browser": { - "persistent": false - } - } -} -``` - -### Option 2: CDP mode (attach to real Chrome) - -Have the Agent connect to a separately launched real Chrome (instead of the Chromium bundled with Playwright) for full browser fingerprints. Useful for sites with strict bot detection. - -Launch Chrome with a debugging port and a dedicated user data directory: - - - - ```bash - "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \ - --remote-debugging-port=9222 \ - --user-data-dir="$HOME/.cow/chrome-cdp" - ``` - - - ```bash - google-chrome \ - --remote-debugging-port=9222 \ - --user-data-dir="$HOME/.cow/chrome-cdp" - ``` - - - ```powershell - & "C:\Program Files\Google\Chrome\Application\chrome.exe" ` - --remote-debugging-port=9222 ` - --user-data-dir="$env:USERPROFILE\.cow\chrome-cdp" - ``` - - - -Then point the Agent at the endpoint in `config.json`: - -```json -{ - "tools": { - "browser": { - "cdp_endpoint": "http://localhost:9222" - } - } -} -``` - - - Chrome 137+ requires `--remote-debugging-port` to be paired with a dedicated `--user-data-dir`. As a result, the CDP-launched Chrome **cannot directly reuse the login state of your daily Chrome**; you'll need to log in once inside this dedicated profile. - diff --git a/docs/en/tools/edit.mdx b/docs/en/tools/edit.mdx deleted file mode 100644 index f231c6b9..00000000 --- a/docs/en/tools/edit.mdx +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: edit - File Edit -description: Edit files via precise text replacement ---- - -Edit files via precise text replacement. If `oldText` is empty, appends to the end of the file. - -## Dependencies - -No extra dependencies, available by default. - -## Parameters - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `path` | string | Yes | File path | -| `oldText` | string | Yes | Original text to replace (empty to append) | -| `newText` | string | Yes | Replacement text | - -## Use Cases - -- Modify specific parameters in configuration files -- Fix bugs in code -- Insert content at specific positions in files diff --git a/docs/en/tools/env-config.mdx b/docs/en/tools/env-config.mdx deleted file mode 100644 index 23f75bf8..00000000 --- a/docs/en/tools/env-config.mdx +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: env_config - Environment -description: Manage API keys and secrets ---- - -Manage environment variables (API keys and secrets) in the workspace `.env` file, with secure conversational updates. Built-in security protection and desensitization. - -## Dependencies - -| Dependency | Install Command | -| --- | --- | -| `python-dotenv` ≥ 1.0.0 | `pip install python-dotenv>=1.0.0` | - -Included when installing optional dependencies: `pip3 install -r requirements-optional.txt` - -## Parameters - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `action` | string | Yes | Operation type: `get`, `set`, `list`, `delete` | -| `key` | string | No | Environment variable name | -| `value` | string | No | Environment variable value (only for `set`) | - -## Usage - -Tell the Agent what key you need to configure, and it will automatically invoke this tool: - -- "Configure my BOCHA_API_KEY" -- "Set OPENAI_API_KEY to sk-xxx" -- "Show configured environment variables" - -Configured keys are automatically injected into the `bash` tool's execution environment. - - - - diff --git a/docs/en/tools/index.mdx b/docs/en/tools/index.mdx deleted file mode 100644 index b85930e8..00000000 --- a/docs/en/tools/index.mdx +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Tools Overview -description: CowAgent built-in tools system ---- - -Tools are the core capability for Agent to access operating system resources. The Agent intelligently selects and invokes tools based on task requirements, performing file operations, command execution, web search, scheduled tasks, and more. Tools are implemented in the `agent/tools/` directory. - -## Built-in Tools - -The following tools are available by default with no extra configuration: - - - - Read file content, supports text, images, PDF - - - Create or overwrite files - - - Edit files via precise text replacement - - - List directory contents - - - Execute system commands - - - Send files or images to user - - - Search and read long-term memory - - - -## Optional Tools - -The following tools require additional dependencies or API key configuration: - - - - Manage API keys and secrets - - - Create and manage scheduled tasks - - - Search the internet for real-time information - - - -## MCP Tools - -Integrate thousands of community tools (maps, GitHub, Notion, etc.) via the [Model Context Protocol](https://modelcontextprotocol.io). Configure `mcp.json` once, ready to use: - - - - Supports standard stdio / SSE transports. Hot-reload, zero code changes. - - diff --git a/docs/en/tools/ls.mdx b/docs/en/tools/ls.mdx deleted file mode 100644 index e9a5f656..00000000 --- a/docs/en/tools/ls.mdx +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: ls - Directory List -description: List directory contents ---- - -List directory contents, sorted alphabetically, directories suffixed with `/`, includes hidden files. - -## Dependencies - -No extra dependencies, available by default. - -## Parameters - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `path` | string | Yes | Directory path, relative paths are based on workspace directory | -| `limit` | integer | No | Maximum entries to return, default 500 | - -## Use Cases - -- Browse project structure -- Find specific files -- Check if a directory exists diff --git a/docs/en/tools/mcp.mdx b/docs/en/tools/mcp.mdx deleted file mode 100644 index fc320fe0..00000000 --- a/docs/en/tools/mcp.mdx +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: MCP Tools -description: Integrate external tool ecosystems via the Model Context Protocol ---- - -CowAgent supports the [Model Context Protocol (MCP)](https://modelcontextprotocol.io), allowing the Agent to directly invoke tens of thousands of community MCP tools. Configure `mcp.json` once and the tools are exposed to the LLM in exactly the same way as built-in tools — automatically selected and invoked. - -## Configuration File - -CowAgent reads `~/cow/mcp.json`. If the file does not exist, no MCP tools are loaded — and no error is raised. - -For Docker deployments, the official `docker-compose.yml` already mounts the host's `./cow` directory to `/home/agent/cow` inside the container (i.e. the container user's `~/cow`). Just drop `mcp.json` into the host's `./cow/` directory and it will take effect. - -### Standard Format - -Fully compatible with the MCP community standard, identical to Claude Desktop / Cursor: - -```json -{ - "mcpServers": { - "": { - "command": "npx", - "args": ["-y", "some-mcp-package"], - "env": { - "API_KEY": "your-key-here" - } - } - } -} -``` - -| Field | Required | Description | -| --- | --- | --- | -| `command` | stdio | Executable to launch the server (e.g. `npx`, `python`, `uvx`) | -| `args` | No | Arguments passed to `command` | -| `env` | No | Environment variables for the subprocess, commonly used for API keys | -| `url` | SSE / Streamable HTTP | Remote endpoint URL (alternative to `command`) | -| `type` | Remote | Remote transport type: `sse` or `streamable-http` (defaults to `sse`) | -| `headers` | No | Extra HTTP headers for remote requests (e.g. `Authorization`); Streamable HTTP only | -| `disabled` | No | When `true`, this server is skipped — handy for temporary disabling | - -### Full Example - -```json -{ - "mcpServers": { - "fetch": { - "command": "uvx", - "args": ["mcp-server-fetch"] - }, - "github": { - "command": "npx", - "args": ["-y", "@modelcontextprotocol/server-github"], - "env": { - "GITHUB_PERSONAL_ACCESS_TOKEN": "" - } - } - } -} -``` - -- **fetch**: Generic web page fetcher that returns page text content. No API key required. -- **github**: Access GitHub repos, issues, PRs, etc. Requires a Personal Access Token. - -## Let the Agent Configure It for You - -CowAgent ships with `read` / `write` / `edit` tools, so **you can simply send the MCP config to the Agent and ask it to write the file**: - -For example: - -```markdown -Add this MCP to ~/cow/mcp.json: - -{"mcpServers":{"fetch":{"command":"uvx","args":["mcp-server-fetch"]}}} -``` - -The Agent will: - -1. Read the existing MCP config and merge the new server entry, preserving existing ones -2. Hot-reload the new MCP server, so the corresponding tools become available on the next message - -## How It Works - -- **Async loading at startup**: All servers configured in `mcp.json` are loaded asynchronously in the background, never blocking the main loop — chat is usable immediately. -- **Hot reload**: When you or the Agent modifies `mcp.json`, changed servers are automatically reloaded after the current message — no need to restart cow. -- **Flat exposure**: Each method exposed by an MCP server appears as an individual tool. The LLM picks one directly without a second-stage decision. - -## Supported Transports - -| Transport | Description | Config Field | -| --- | --- | --- | -| **stdio** | Subprocess communication. The most common option, with the richest community ecosystem. | `command` + `args` | -| **SSE** | HTTP Server-Sent Events. Legacy remote transport. | `url` (default) | -| **Streamable HTTP** | New unified remote transport, gradually replacing SSE. | `type: "streamable-http"` + `url` | - -## Troubleshooting - -| Symptom | What to Check | -| --- | --- | -| Agent has no MCP tools after startup | Verify that `~/cow/mcp.json` exists and contains valid JSON | -| A specific server fails to load | Look for `[MCP] Server 'xxx' load failed` in startup logs — usually missing dependencies or API keys | -| Changes to `mcp.json` aren't applied | Changes take effect on **the next message**. If the server config didn't actually change (e.g. only comments edited), no restart is triggered | -| Docker deployment | Make sure host's `./cow` is mounted to `/home/agent/cow` in the container, then just drop `mcp.json` into host's `./cow/`. Or just ask the Agent to do it | - -## Recommended MCP Marketplaces - -You can browse third-party MCP marketplaces and copy a JSON config to use directly, for example: - -- [mcp.so](https://mcp.so) — Global MCP service index -- [ModelScope MCP Hub](https://modelscope.cn/mcp) — ModelScope's MCP hub, more reliable from mainland China - -Any MCP server that follows the standard protocol (stdio / SSE / Streamable HTTP) integrates with CowAgent out of the box. diff --git a/docs/en/tools/memory.mdx b/docs/en/tools/memory.mdx deleted file mode 100644 index a1874eef..00000000 --- a/docs/en/tools/memory.mdx +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: memory - Memory & Knowledge -description: Search and read long-term memory and knowledge base files ---- - -The memory tool contains two sub-tools: `memory_search` (search memory) and `memory_get` (read memory or knowledge files). - -When the [knowledge base](/en/knowledge) feature is enabled, both tools also support accessing files under the `knowledge/` directory. - -## Dependencies - -No extra dependencies, available by default. Managed by the Agent Core memory system. - -## memory_search - -Search historical memory and knowledge base content with hybrid keyword and vector retrieval. - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `query` | string | Yes | Search query | - -## memory_get - -Read the content of a specific memory or knowledge file. - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `path` | string | Yes | Relative path to the file (e.g. `MEMORY.md`, `memory/2026-01-01.md`, `knowledge/concepts/rag.md`) | -| `start_line` | integer | No | Start line number | -| `end_line` | integer | No | End line number | - -## How It Works - -The Agent automatically invokes memory tools in these scenarios: - -- When the user shares important information → stores to memory -- When historical context is needed → searches relevant memory -- When conversation reaches a certain length → extracts summary for storage -- When discussing domain knowledge → retrieves relevant pages from the knowledge base - - - When `knowledge` is set to `false` in config, the tool descriptions and search scope automatically adjust to include only memory files. - diff --git a/docs/en/tools/read.mdx b/docs/en/tools/read.mdx deleted file mode 100644 index 56b56570..00000000 --- a/docs/en/tools/read.mdx +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: read - File Read -description: Read file content ---- - -Read file content. Supports text files, PDF files, images (returns metadata), and more. - -## Dependencies - -No extra dependencies, available by default. - -## Parameters - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `path` | string | Yes | File path, relative paths are based on workspace directory | -| `offset` | integer | No | Start line number (1-indexed), negative values read from the end | -| `limit` | integer | No | Number of lines to read | - -## Use Cases - -- View configuration files, log files -- Read code files for analysis -- Check image/video file info diff --git a/docs/en/tools/scheduler.mdx b/docs/en/tools/scheduler.mdx deleted file mode 100644 index 18c211bf..00000000 --- a/docs/en/tools/scheduler.mdx +++ /dev/null @@ -1,80 +0,0 @@ ---- -title: scheduler - Scheduler -description: Create and manage scheduled tasks ---- - -Create and manage dynamic scheduled tasks with flexible scheduling and execution modes. - -## Dependencies - -| Dependency | Install Command | -| --- | --- | -| `croniter` ≥ 2.0.0 | `pip install croniter>=2.0.0` | - -Included in core dependencies: `pip3 install -r requirements.txt` - -## Scheduling Modes - -| Mode | Description | -| --- | --- | -| One-time | Execute once at a specified time | -| Fixed interval | Repeat at fixed time intervals | -| Cron expression | Define complex schedules using Cron syntax | - -## Execution Modes - -- **Fixed message**: Send a preset message when triggered -- **Agent dynamic task**: Agent intelligently executes the task when triggered - -## Usage - -Create and manage scheduled tasks with natural language: - -- "Send me a weather report every morning at 9 AM" -- "Check server status every 2 hours" -- "Remind me about the meeting tomorrow at 3 PM" -- "Show all scheduled tasks" - - - - - -## Results injected into the conversation - -Scheduled tasks run inside an isolated session (so internal planning and tool calls do not pollute the user's chat), but the **final output** is written back to the user's real session as a message pair. You can directly follow up — e.g. "expand on point 2 from earlier". - -**Default policy** - -- Output of Agent dynamic tasks is injected into the conversation -- Fixed-message tasks are not injected by default (configurable) -- Each session keeps the most recent **3 pairs** of scheduler messages; older pairs are pruned automatically. Regular user messages are unaffected - -**Configuration** - -| Key | Default | Description | -| --- | --- | --- | -| `scheduler_inject_to_session` | `true` | Master switch | -| `scheduler_inject_max_per_session` | `3` | Max scheduler message pairs kept per session | -| `scheduler_inject_send_message` | `false` | Whether to also inject fixed-message tasks | - -```json -{ - "scheduler_inject_to_session": true, - "scheduler_inject_max_per_session": 3, - "scheduler_inject_send_message": false -} -``` - -## Context inside scheduled task execution - -The isolated session for scheduled tasks retains a few recent runs of conversation history, so you can naturally do "compare with last time" or "continue from previous conclusion". To prevent prompts from growing unbounded for high-frequency tasks (e.g. a 5-minute monitor), history is auto-trimmed: - -``` -scheduler_keep_turns = max(1, agent_max_context_turns / 5) -``` - -`agent_max_context_turns` defaults to `20`, so each scheduled run keeps the most recent **4 turns** of history by default. Increase `agent_max_context_turns` if you need longer memory. - - -For group-chat scenarios (Feishu / WeCom group bots / DingTalk, etc.), the user's real `session_id` looks like `user_id:group_id` — different from `receiver`. Scheduler records the correct `session_id` when a task is created. For older `tasks.json` entries missing this field, the runtime falls back to `receiver`, matching legacy behavior. - diff --git a/docs/en/tools/send.mdx b/docs/en/tools/send.mdx deleted file mode 100644 index 1cf089ac..00000000 --- a/docs/en/tools/send.mdx +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: send - File Send -description: Send files to user ---- - -Send files to the user (images, videos, audio, documents, etc.), used when the user explicitly requests to send/share a file. - -## Dependencies - -No extra dependencies, available by default. - -## Parameters - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `path` | string | Yes | File path, can be absolute or relative to workspace | -| `message` | string | No | Accompanying message | - -## Use Cases - -- Send generated code or documents to the user -- Send screenshots, charts -- Share downloaded files diff --git a/docs/en/tools/vision.mdx b/docs/en/tools/vision.mdx deleted file mode 100644 index 4db6bec0..00000000 --- a/docs/en/tools/vision.mdx +++ /dev/null @@ -1,75 +0,0 @@ ---- -title: vision - Image Understanding -description: Analyze image content (recognition, description, OCR, etc.) ---- - -Analyze local images or image URLs using Vision API. Supports content description, text extraction (OCR), object recognition, and more. - -## Model Selection - -The vision tool uses a multi-level auto-selection strategy with automatic fallback — no manual configuration required: - -1. **Main model** — uses the currently configured main model for image recognition (must be a multimodal model) -2. **Other configured models** — auto-discovers other multimodal models with configured API keys as alternatives - -If the current provider fails, the tool automatically tries the next one until it succeeds or all fail. - -### Supported Models - -| Provider | Vision Model | Notes | -| --- | --- | --- | -| OpenAI / Compatible | Main model | All OpenAI-protocol-compatible multimodal models | -| Qwen (DashScope) | Main model | e.g. qwen3.6-plus, etc. | -| Claude | Main model | Anthropic native image format | -| Gemini | Main model | inlineData format | -| Doubao | Main model | doubao-seed-2-0 series natively supported | -| Kimi (Moonshot) | Main model | kimi-k2.6, kimi-k2.5 natively supported | -| ERNIE | Main model | Defaults to the multimodal main model (e.g. `ernie-5.1`); falls back to `ernie-4.5-turbo-vl` when the main model is not multimodal | -| ZhipuAI | glm-5v-turbo | Always uses the dedicated vision model | -| MiniMax | MiniMax-Text-01 | Always uses the dedicated vision model | - - - ZhipuAI and MiniMax text models do not support image understanding, so their dedicated vision models are always used automatically. - - -> When `use_linkai=true`, LinkAI's multimodal model is used by default. - -## Custom Configuration - -To specify the model used by Vision, configure it in `config.json`, for example: - -```json -{ - "tools": { - "vision": { - "model": "gpt-4.1" - } - } -} -``` - -The specified model is **used first**, and the tool automatically routes to the corresponding provider based on the model name; on failure, it falls back to other configured providers. - -In most cases no configuration is needed — the tool works automatically as long as the main model supports multimodal input or any vision-capable API key is configured. - -## Parameters - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `image` | string | Yes | Local file path or HTTP(S) image URL | -| `question` | string | Yes | Question to ask about the image | - -Supported image formats: jpg, jpeg, png, gif, webp - - - -## Use Cases - -- Describe image content -- Extract text from images (OCR) -- Identify objects, colors, scenes -- Analyze screenshots and scanned documents - - - Images larger than 1MB are automatically compressed before upload. All images (including remote URLs) are converted to base64 for transmission to ensure compatibility with all model backends. - diff --git a/docs/en/tools/web-fetch.mdx b/docs/en/tools/web-fetch.mdx deleted file mode 100644 index 0a0349b9..00000000 --- a/docs/en/tools/web-fetch.mdx +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: web_fetch - Web Fetch -description: Fetch web pages and document content ---- - -Fetch the content of an HTTP/HTTPS URL. Web pages are extracted as readable text; document files (PDF, Word, Excel, etc.) are downloaded and parsed automatically. - -## Parameters - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `url` | string | Yes | HTTP/HTTPS URL (web page or document) | - -## Supported file types - -| Type | Formats | -| --- | --- | -| PDF | `.pdf` | -| Word | `.docx` | -| Text | `.txt`, `.md`, `.csv`, `.log` | -| Spreadsheet | `.xls`, `.xlsx` | -| Presentation | `.ppt`, `.pptx` | - -## Use cases - -- Extract readable text from a web page -- Download and parse remote documents -- Inspect API response bodies - - - `web_fetch` only retrieves static HTML. For pages that require JavaScript rendering (such as SPAs), use the `browser` tool instead. - diff --git a/docs/en/tools/web-search.mdx b/docs/en/tools/web-search.mdx deleted file mode 100644 index 80c1eac1..00000000 --- a/docs/en/tools/web-search.mdx +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: web_search - Web Search -description: Search the internet for real-time information, with support for multiple search providers ---- - -Search the internet for real-time information, news, research, and more. Supports four backends — Bocha, ERNIE, GLM, and LinkAI — and works once any one of them is configured. - - - It is recommended to configure providers and routing strategy visually from the "Model Management → Search" panel in the [Web console](/en/channels/web), without manually editing the configuration file. - - -## Providers - -| Provider | Credential | Apply | -| --- | --- | --- | -| Bocha | `tools.web_search.bocha_api_key` | [Bocha Open Platform](https://open.bochaai.com/) | -| ERNIE | Reuses `qianfan_api_key` | [Qianfan Console](https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy) | -| Zhipu | Reuses `zhipu_ai_api_key` | [Zhipu Open Platform](https://docs.bigmodel.cn/cn/guide/tools/web-search) | -| LinkAI | Reuses `linkai_api_key` | [LinkAI Console](https://link-ai.tech/console/interface) | - -Except for Bocha which requires a dedicated `bocha_api_key`, the other three reuse the corresponding model's API key — configuring the model automatically grants search capability. - -## Routing Strategy - -```json -{ - "tools": { - "web_search": { - "strategy": "auto", - "provider": "" - } - } -} -``` - -- `auto` (default): the Agent intelligently picks among configured providers and may call multiple providers in a single task to gather more comprehensive results; when none is specified, falls back through `bocha → qianfan → zhipu → linkai`. -- `fixed`: always use the provider specified in `provider`; falls back to the auto order if that provider's credentials are missing. - -## Tool Parameters - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `query` | string | Yes | Search keywords | -| `count` | integer | No | Number of results (1–50, default 10) | -| `freshness` | string | No | Time range: `noLimit` (default), `oneDay`, `oneWeek`, `oneMonth`, `oneYear`, or date range like `2025-01-01..2025-02-01` | -| `summary` | boolean | No | Whether to return page summaries (default false) | -| `provider` | string | No | Available when multiple providers are configured under the `auto` strategy; used to switch provider for a single call | - - - If none of the four credentials are configured, this tool is not registered with the Agent. - diff --git a/docs/en/tools/write.mdx b/docs/en/tools/write.mdx deleted file mode 100644 index 2c0a10f0..00000000 --- a/docs/en/tools/write.mdx +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: write - File Write -description: Create or overwrite files ---- - -Write content to a file. Creates the file if it doesn't exist, overwrites if it does. Automatically creates parent directories. - -## Dependencies - -No extra dependencies, available by default. - -## Parameters - -| Parameter | Type | Required | Description | -| --- | --- | --- | --- | -| `path` | string | Yes | File path | -| `content` | string | Yes | Content to write | - -## Use Cases - -- Create new code files or scripts -- Generate configuration files -- Save processing results - - - Single writes should not exceed 10KB. For large files, create a skeleton first, then use the edit tool to add content in chunks. - diff --git a/docs/guide/manual-install.mdx b/docs/guide/manual-install.mdx index 799a1191..154f99a8 100644 --- a/docs/guide/manual-install.mdx +++ b/docs/guide/manual-install.mdx @@ -1,11 +1,11 @@ --- -title: 手动安装 -description: 手动部署 CowAgent(源码 / Docker) +title: Manual Install +description: Deploy CowAgent manually (source code / Docker) --- -## 源码部署 +## Source Code Deployment -### 1. 克隆项目代码 +### 1. Clone the project ```bash git clone https://github.com/zhayujie/CowAgent @@ -13,170 +13,136 @@ cd CowAgent/ ``` - 若遇到网络问题可使用国内仓库地址:https://gitee.com/zhayujie/CowAgent + For network issues, use the mirror: https://gitee.com/zhayujie/CowAgent -### 2. 安装依赖 +### 2. Install dependencies -核心依赖(必选): +Core dependencies (required): ```bash pip3 install -r requirements.txt ``` -扩展依赖(可选,建议安装): +Optional dependencies (recommended): ```bash pip3 install -r requirements-optional.txt ``` -> 国内网络可使用镜像源加速:`pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple` +### 3. Install Cow CLI -### 3. 安装 Cow CLI - -安装命令行工具,用于管理服务和技能: +Install the command-line tool for managing services and skills: ```bash pip3 install -e . ``` -安装后即可使用 `cow` 命令: +Then use the `cow` command: ```bash cow help ``` - 此步骤为推荐操作。安装后可以使用 `cow start`、`cow stop`、`cow update` 等命令管理服务,也可以使用 `cow skill` 管理技能。如果不安装 CLI,可以使用 `./run.sh` 或 `python3 app.py` 运行。 + This step is recommended. After installation you can use `cow start`, `cow stop`, `cow update` to manage the service, and `cow skill` to manage skills. Without the CLI, you can use `./run.sh` or `python3 app.py` to run. -### 3.1 安装浏览器工具(可选) +### 4. Configure -如需使用浏览器工具(控制浏览器访问网页、填写表单等),运行: - -```bash -cow install-browser -``` - -该命令会自动安装 Playwright 和 Chromium 浏览器。详细说明参考 [浏览器工具文档](/tools/browser)。 - - - 浏览器工具依赖较重(~300MB),如不需要可跳过,不影响其他功能正常使用。 - - -### 4. 配置 - -复制配置文件模板并编辑: +Copy the config template and edit: ```bash cp config-template.json config.json ``` -在 `config.json` 中填写模型 API Key 和通道类型等配置,详细说明参考各 [模型文档](/models/minimax)。 +Fill in model API keys, channel type, and other settings in `config.json`. See the [model docs](/models/index) for details. -### 5. 运行 +### 5. Run -**使用 Cow CLI 运行(推荐):** +**Using Cow CLI (recommended):** ```bash cow start ``` -**或者本地前台运行:** +**Or run locally in foreground:** ```bash python3 app.py ``` -运行后默认启动 Web 控制台,访问 `http://localhost:9899` 开始对话和管理Agent。 +By default, the Web console starts. Access `http://localhost:9899` to chat. -**服务器后台运行(不使用 CLI 时):** +**Background run on server (without CLI):** ```bash nohup python3 app.py & tail -f nohup.out ``` - **服务器公网访问 Web 控制台**:默认 `web_host` 仅监听 `127.0.0.1`(本机访问),需公网访问时请在 `config.json` 中将 `web_host` 设为 `0.0.0.0`,同时强烈建议设置 `web_password` 启用鉴权。此外还需在防火墙/安全组中放行 `9899` 端口,建议仅对指定 IP 开放以保证安全。 + **Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs. -## Docker 部署 +## Docker Deployment -使用 Docker 部署无需下载源码和安装依赖。Agent模式下更推荐使用源码部署以获得更多系统访问能力。 +Docker deployment does not require cloning source code or installing dependencies. For Agent mode, source deployment is recommended for broader system access. - 需要安装 [Docker](https://docs.docker.com/engine/install/) 和 docker-compose。 + Requires [Docker](https://docs.docker.com/engine/install/) and docker-compose. -**1. 下载配置文件** +**1. Download config** ```bash curl -O https://cdn.link-ai.tech/code/cow/docker-compose.yml ``` -打开 `docker-compose.yml` 填写所需配置。 +Edit `docker-compose.yml` with your configuration. -**2. 启动容器** +**2. Start container** ```bash sudo docker compose up -d ``` -**3. 查看日志** +**3. View logs** ```bash sudo docker logs -f chatgpt-on-wechat ``` - **Docker 公网访问 Web 控制台**:在 `docker-compose.yml` 中将 `WEB_HOST` 设为 `0.0.0.0`(容器内默认绑定 `127.0.0.1` 无法从宿主机外访问),同时强烈建议设置 `WEB_PASSWORD` 启用鉴权。此外需确保 `9899` 端口正确映射到宿主机,并在防火墙/安全组放行该端口。 + **Running in Docker?** Set `WEB_HOST` to `0.0.0.0` in `docker-compose.yml` so the console is reachable from outside the container, and set `WEB_PASSWORD` to protect it. Make sure port `9899` is mapped to the host and open in your firewall or security group. -## 核心配置项 +## Core Configuration - - - ```json - { - "channel_type": "web", - "model": "deepseek-v4-flash", - "deepseek_api_key": "", - "agent": true, - "agent_workspace": "~/cow", - "agent_max_context_tokens": 40000, - "agent_max_context_turns": 30, - "agent_max_steps": 15, - "cow_lang": "auto" - } - ``` - - - ```yaml - environment: - CHANNEL_TYPE: 'web' - MODEL: 'deepseek-v4-flash' - DEEPSEEK_API_KEY: 'your-api-key' - DEEPSEEK_API_BASE: 'https://api.deepseek.com/v1' - AGENT: 'True' - AGENT_MAX_CONTEXT_TOKENS: 40000 - AGENT_MAX_CONTEXT_TURNS: 30 - AGENT_MAX_STEPS: 15 - COW_LANG: 'auto' - ``` - - +```json +{ + "channel_type": "web", + "model": "deepseek-v4-flash", + "deepseek_api_key": "", + "agent": true, + "agent_workspace": "~/cow", + "agent_max_context_tokens": 40000, + "agent_max_context_turns": 30, + "agent_max_steps": 15, + "cow_lang": "auto" +} +``` -| 参数 | 环境变量 | 说明 | 默认值 | -| --- | --- | --- | --- | -| `channel_type` | `CHANNEL_TYPE` | 接入渠道类型 | `web` | -| `model` | `MODEL` | 模型名称 | `deepseek-v4-flash` | -| `agent` | `AGENT` | 是否启用 Agent 模式 | `true` | -| `agent_workspace` | - | Agent 工作空间路径 | `~/cow` | -| `agent_max_context_tokens` | `AGENT_MAX_CONTEXT_TOKENS` | 最大上下文 tokens | `40000` | -| `agent_max_context_turns` | `AGENT_MAX_CONTEXT_TURNS` | 最大上下文记忆轮次 | `30` | -| `agent_max_steps` | `AGENT_MAX_STEPS` | 单次任务最大决策步数 | `15` | -| `cow_lang` | `COW_LANG` | 界面、命令文案、系统提示词等的语言,`auto` 自动检测,可设为 `zh` / `en` | `auto` | +| Parameter | Description | Default | +| --- | --- | --- | +| `channel_type` | Channel type | `web` | +| `model` | Model name | `deepseek-v4-flash` | +| `agent` | Enable Agent mode | `true` | +| `agent_workspace` | Agent workspace path | `~/cow` | +| `agent_max_context_tokens` | Max context tokens | `40000` | +| `agent_max_context_turns` | Max context turns | `30` | +| `agent_max_steps` | Max decision steps per task | `15` | +| `cow_lang` | Language for the UI, command text and system prompts; `auto` to detect, or set `zh` / `en` | `auto` | - 全部配置项可在项目 [`config.py`](https://github.com/zhayujie/CowAgent/blob/master/config.py) 文件中查看。Docker 部署时,配置项名称需转为大写环境变量格式。 + Full configuration options are in the project [`config.py`](https://github.com/zhayujie/CowAgent/blob/master/config.py). diff --git a/docs/guide/quick-start.mdx b/docs/guide/quick-start.mdx index dd71ee88..ed5ec3f1 100644 --- a/docs/guide/quick-start.mdx +++ b/docs/guide/quick-start.mdx @@ -1,13 +1,13 @@ --- -title: 一键安装 -description: 使用脚本一键安装和管理 CowAgent +title: One-click Install +description: One-click install and manage CowAgent with scripts --- -项目提供了一键安装、配置、启动、管理程序的脚本,推荐使用脚本快速运行。 +The project provides scripts for one-click install, configuration, startup, and management. Script-based deployment is recommended for quick setup. -支持 Linux、macOS、Windows 操作系统,需安装 Python 3.7 ~ 3.12(推荐 3.9)。 +Supports Linux, macOS, and Windows. Requires Python 3.7-3.12 (3.9 recommended). -## 安装命令 +## Install Command @@ -22,37 +22,37 @@ description: 使用脚本一键安装和管理 CowAgent -脚本自动执行以下流程: +The script automatically performs these steps: -1. 检查 Python 环境(需要 Python 3.7+) -2. 安装必要工具(git、curl 等) -3. 克隆项目代码到 `~/CowAgent` -4. 安装 Python 依赖和 Cow CLI -5. 引导配置 AI 模型和通信渠道 -6. 启动服务 +1. Check Python environment (requires Python 3.7+) +2. Install required tools (git, curl, etc.) +3. Clone project to `~/CowAgent` +4. Install Python dependencies and Cow CLI +5. Guided configuration for AI model and channel +6. Start service -运行后默认启动 Web 控制台,访问 `http://localhost:9899` 开始对话和管理Agent。 +By default, the Web console starts after installation. Access `http://localhost:9899` to begin chatting. - **服务器部署需要公网访问控制台时**,请在 `config.json` 中将 `web_host` 设为 `0.0.0.0`(默认仅监听 `127.0.0.1` 本机访问),同时强烈建议设置 `web_password` 启用鉴权。然后通过 `http://:9899` 访问,并确保防火墙/安全组放行 `9899` 端口。 + **Deploying on a server?** By default `web_host` only listens on `127.0.0.1` (local access only). Set `web_host` to `0.0.0.0` in `config.json` to make the console reachable from outside, and set `web_password` to protect it. Don't forget to open port `9899` in your firewall or security group — ideally restricted to specific IPs. -## 管理命令 +## Management Commands -安装完成后,使用 `cow` CLI 管理服务: +After installation, use the `cow` command to manage the service: -| 命令 | 说明 | +| Command | Description | | --- | --- | -| `cow start` | 启动服务 | -| `cow stop` | 停止服务 | -| `cow restart` | 重启服务 | -| `cow status` | 查看运行状态 | -| `cow logs` | 查看实时日志 | -| `cow update` | 更新代码并重启 | -| `cow install-browser` | 安装浏览器工具依赖 | +| `cow start` | Start service | +| `cow stop` | Stop service | +| `cow restart` | Restart service | +| `cow status` | Check run status | +| `cow logs` | View real-time logs | +| `cow update` | Update code and restart | +| `cow install-browser` | Install browser tool dependencies | -更多命令和用法参考 [命令文档](/cli/index)。 +See the [Commands documentation](/cli/index) for more details. - 如果 `cow` 命令不可用,也可以使用 `./run.sh <命令>`(Linux/macOS)或 `.\scripts\run.ps1 <命令>`(Windows)作为替代,功能等效。 + If the `cow` command is not available, you can use `./run.sh ` (Linux/macOS) or `.\scripts\run.ps1 ` (Windows) as a fallback. Both are functionally equivalent. diff --git a/docs/guide/upgrade.mdx b/docs/guide/upgrade.mdx index 7a36d706..d1cd5df6 100644 --- a/docs/guide/upgrade.mdx +++ b/docs/guide/upgrade.mdx @@ -1,31 +1,31 @@ --- -title: 更新升级 -description: CowAgent 的升级方式说明 +title: Upgrade +description: How to upgrade CowAgent --- -## 命令升级(推荐) +## Recommended: One-line upgrade -使用 `cow update` 一键完成代码更新和服务重启: +Use `cow update` to pull the latest code and restart the service in one step: ```bash cow update ``` -该命令会自动完成以下流程: +The command runs the following automatically: -1. 拉取最新代码(`git pull`) -2. 停止当前服务 -3. 更新 Python 依赖 -4. 重新安装 CLI -5. 启动服务 +1. Pull the latest code (`git pull`) +2. Stop the running service +3. Update Python dependencies +4. Reinstall the CLI +5. Start the service - 如果未安装 Cow CLI,也可以使用 `./run.sh update` 完成相同操作。 + If the Cow CLI is not installed, `./run.sh update` performs the same operations. -## 手动升级 +## Manual upgrade -在项目根目录下执行: +Run the following inside the project root: ```bash git pull @@ -33,23 +33,23 @@ pip3 install -r requirements.txt pip3 install -e . ``` -更新完成后重启服务: +Then restart the service: ```bash -# 使用 Cow CLI (推荐) +# Using Cow CLI (recommended) cow restart -# 或使用 run.sh +# Or using run.sh ./run.sh restart -# 或使用 nohup 直接运行 +# Or restart manually with nohup kill $(ps -ef | grep app.py | grep -v grep | awk '{print $2}') nohup python3 app.py & tail -f nohup.out ``` -## Docker 升级 +## Docker upgrade -在 `docker-compose.yml` 所在目录下执行: +Run the following in the directory containing `docker-compose.yml`: ```bash sudo docker compose pull @@ -57,5 +57,5 @@ sudo docker compose up -d ``` - 升级前建议备份 `config.json` 配置文件。Docker 环境下如需保留数据,可通过 volume 挂载持久化工作空间目录。 + Back up `config.json` before upgrading. For Docker deployments, mount the workspace directory as a volume to persist data across upgrades. diff --git a/docs/intro/architecture.mdx b/docs/intro/architecture.mdx index 9d8c3da2..98084b48 100644 --- a/docs/intro/architecture.mdx +++ b/docs/intro/architecture.mdx @@ -1,41 +1,41 @@ --- -title: 项目架构 -description: CowAgent 2.0 的系统架构和核心设计 +title: Architecture +description: CowAgent 2.0 system architecture and core design --- -CowAgent 2.0 从简单的聊天机器人全面升级为超级智能助理,采用 Agent 架构设计,具备自主思考、规划任务、长期记忆和技能扩展等能力。 +CowAgent 2.0 has evolved from a simple chatbot into a super intelligent assistant with Agent architecture, featuring autonomous thinking, task planning, long-term memory, and skill extensibility. -## 系统架构 +## System Architecture -CowAgent 的整体架构由以下核心模块组成: +CowAgent's architecture consists of the following core modules: -CowAgent Architecture +CowAgent Architecture -| 模块 | 说明 | +| Module | Description | | --- | --- | -| **Plan** | 理解用户意图,将复杂任务分解为多步骤计划,循环调用工具直到完成目标 | -| **Memory** | 自动将重要信息持久化为核心记忆和日级记忆,支持关键词和向量混合检索,跨会话保持上下文连续性 | -| **Knowledge** | 以主题维度组织结构化知识,Agent 自主整理有价值信息为 Markdown 页面,维护索引和交叉引用,构建持续增长的知识网络 | -| **Tools** | Agent 访问操作系统资源的核心能力,内置文件读写、终端执行、浏览器操作、定时调度、记忆检索、联网搜索等 10+ 种工具 | -| **Skills** | 加载和管理 Skills,支持从 Skill Hub、GitHub 等一键安装,或通过对话创建自定义技能 | -| **Models** | 模型层,统一接入 OpenAI、Claude、Gemini、DeepSeek、MiniMax、GLM、Qwen 等国内外主流大语言模型 | -| **Channels** | 消息通道层,负责接收和发送消息,支持 Web 控制台、微信、飞书、钉钉、企微、公众号等,统一消息协议 | -| **CLI** | 命令行系统,提供终端命令(`cow`)和对话命令(`/`),支持进程管理、技能安装、配置修改、知识库管理等操作 | +| **Plan** | Understands user intent, decomposes complex tasks into multi-step plans, and iteratively invokes tools until the goal is achieved | +| **Memory** | Automatically persists important information as core memory and daily memory, with hybrid keyword and vector retrieval for cross-session context continuity | +| **Knowledge** | Organizes structured knowledge by topic. The Agent autonomously distills valuable information into Markdown pages, maintaining indexes and cross-references to build a growing knowledge network | +| **Tools** | Core capability for Agent to access OS resources. 10+ built-in tools including file read/write, terminal, browser, scheduler, memory search, web search, and more | +| **Skills** | Loads and manages Skills. Supports one-click installation from Skill Hub, GitHub, and more, or custom skill creation through conversation | +| **Models** | Model layer with unified access to OpenAI, Claude, Gemini, DeepSeek, MiniMax, GLM, Qwen, and other mainstream LLMs | +| **Channels** | Message channel layer for receiving and sending messages. Supports Web console, WeChat, Feishu, DingTalk, WeCom, WeChat Official Account, and more with a unified protocol | +| **CLI** | Command-line system providing terminal commands (`cow`) and chat commands (`/`) for process management, skill installation, configuration, knowledge base management, and more | -## Agent 模式 +## Agent Mode Workflow -启用 Agent 模式后,CowAgent 会以自主智能体的方式运行,核心工作流如下: +When Agent mode is enabled, CowAgent runs as an autonomous agent with the following workflow: -1. **接收消息** — 通过通道接收用户输入 -2. **理解意图** — 分析任务需求和上下文 -3. **规划任务** — 将复杂任务分解为多个步骤 -4. **调用工具** — 选择合适的工具执行每个步骤 -5. **记忆与知识更新** — 将重要信息存入长期记忆,将结构化知识整理至知识库 -6. **返回结果** — 将执行结果发送回用户 +1. **Receive Message** — Receive user input through channels +2. **Understand Intent** — Analyze task requirements and context +3. **Plan Task** — Break complex tasks into multiple steps +4. **Invoke Tools** — Select and execute appropriate tools for each step +5. **Update Memory & Knowledge** — Store important information in long-term memory and organize structured knowledge into the knowledge base +6. **Return Result** — Send execution results back to the user -## 工作空间 +## Workspace Directory Structure -Agent 的工作空间默认位于 `~/cow` 目录,用于存储系统提示词、记忆文件、技能文件等: +The Agent workspace is located at `~/cow` by default and stores system prompts, memory files, and skill files: ``` ~/cow/ @@ -52,36 +52,36 @@ Agent 的工作空间默认位于 `~/cow` 目录,用于存储系统提示词 └── skill-2/ ``` -秘钥文件单独存储在 `~/.cow` 目录(出于安全考虑): +Secret keys are stored separately in `~/.cow` directory for security: ``` ~/.cow/ └── .env # Secret keys for skills ``` -## 核心配置 +## Core Configuration -在 `config.json` 中配置 Agent 模式的核心参数: +Configure Agent mode parameters in `config.json`: ```json { "agent": true, "agent_workspace": "~/cow", - "agent_max_context_tokens": 40000, - "agent_max_context_turns": 30, - "agent_max_steps": 15, + "agent_max_context_tokens": 50000, + "agent_max_context_turns": 20, + "agent_max_steps": 20, "enable_thinking": false, "cow_lang": "auto" } ``` -| 参数 | 说明 | 默认值 | +| Parameter | Description | Default | | --- | --- | --- | -| `agent` | 是否启用 Agent 模式 | `true` | -| `agent_workspace` | 工作空间路径 | `~/cow` | -| `agent_max_context_tokens` | 最大上下文 token 数 | `50000` | -| `agent_max_context_turns` | 最大上下文记忆轮次 | `20` | -| `agent_max_steps` | 单次任务最大决策步数 | `20` | -| `enable_thinking` | 是否启用深度思考模式 | `false` | -| `knowledge` | 是否启用个人知识库 | `true` | -| `cow_lang` | 界面、命令文案、系统提示词等的语言,`auto` 自动检测,可设为 `zh` / `en` | `auto` | +| `agent` | Enable Agent mode | `true` | +| `agent_workspace` | Workspace path | `~/cow` | +| `agent_max_context_tokens` | Max context tokens | `50000` | +| `agent_max_context_turns` | Max context turns | `20` | +| `agent_max_steps` | Max decision steps per task | `20` | +| `enable_thinking` | Enable deep-thinking mode | `false` | +| `knowledge` | Enable personal knowledge base | `true` | +| `cow_lang` | Language for the UI, command text and system prompts; `auto` to detect, or set `zh` / `en` | `auto` | diff --git a/docs/intro/features.mdx b/docs/intro/features.mdx index ae0820a3..3f27012a 100644 --- a/docs/intro/features.mdx +++ b/docs/intro/features.mdx @@ -1,142 +1,139 @@ --- -title: 功能介绍 -description: CowAgent 长期记忆、个人知识库、任务规划、技能系统、CLI 命令、浏览器工具详细说明 +title: Features +description: CowAgent long-term memory, task planning, skills system, CLI commands, and browser tool in detail --- -## 1. 长期记忆 +## 1. Long-term Memory -> 记忆系统让 Agent 能够长期记住重要信息,采用三层记忆流转架构:对话上下文(短期)→ 天级记忆(中期)→ MEMORY.md(长期),形成完整的记忆生命周期。 +The memory system enables the Agent to remember important information over time, using a three-tier memory flow: conversation context (short-term) → daily memory (mid-term) → MEMORY.md (long-term), forming a complete memory lifecycle. -第一次启动 Agent 时,Agent 会主动询问关键信息,并记录至工作空间(默认 `~/cow`)中的智能体设定、用户身份、记忆文件中。 +On first launch, the Agent proactively asks the user for key information and records it in the workspace (default `~/cow`) — including agent settings, user identity, and memory files. -在后续的长期对话中,Agent 会在需要时智能记录或检索记忆,并对自身设定、用户偏好、记忆文件等进行不断更新。每日自动执行 **梦境蒸馏(Deep Dream)**,将分散的天级记忆整合为精炼的长期记忆,同时生成叙事风格的梦境日记。 +In subsequent long-term conversations, the Agent intelligently stores or retrieves memory as needed, continuously updating its own settings, user preferences, and memory files. **Deep Dream** distillation runs daily, consolidating scattered daily memories into refined long-term memory and generating a narrative-style dream diary. -详细说明请参考 [长期记忆](/memory) 和 [梦境蒸馏](/memory/deep-dream)。 +See [Long-term Memory](/memory) and [Deep Dream](/memory/deep-dream) for details. -## 2. 个人知识库 +## 2. Personal Knowledge Base -> 知识库系统让 Agent 能够持续积累和组织结构化知识。与按时间线记录的记忆不同,知识库以主题为维度,将文章、对话洞察、学习材料等整理为互相关联的 Markdown 页面,形成持续增长的知识网络。 +> The knowledge base system enables the Agent to continuously accumulate and organize structured knowledge. Unlike memory which records along a timeline, the knowledge base is organized by topics, transforming articles, conversation insights, and learning materials into interconnected Markdown pages that form a continuously growing knowledge network. -Agent 会在对话中自动将有价值的信息整理为知识页面,维护交叉引用和索引,通过 Web 控制台可浏览文档和查看知识图谱。知识库存储在工作空间的 `~/cow/knowledge/` 目录下。 +The Agent automatically organizes valuable information from conversations into knowledge pages, maintaining cross-references and indexes. The Web console provides document browsing and knowledge graph visualization. Knowledge is stored in `~/cow/knowledge/` within the workspace. -- **自动整理**:Agent 在对话中自主提取和整理结构化知识,维护索引和交叉引用 -- **知识图谱**:基于页面间的交叉引用自动构建知识图谱,Web 控制台提供可视化关系图浏览 -- **对话联动**:Agent 回复中引用的知识文档链接可在 Web 控制台中直接点击跳转查看 -- **CLI 管理**:通过 `/knowledge` 命令查看统计、浏览目录,通过 `/knowledge on|off` 开关功能 +- **Auto-organization**: The Agent autonomously extracts and organizes structured knowledge during conversations, maintaining indexes and cross-references +- **Knowledge graph**: Automatically builds a knowledge graph from cross-references between pages, with interactive graph visualization in the Web console +- **Chat integration**: Knowledge document links referenced in Agent replies can be clicked directly in the Web console for viewing +- **CLI management**: Use `/knowledge` commands to view stats, browse directory, and toggle the feature with `/knowledge on|off` -详细说明请参考 [个人知识库](/knowledge)。 +See [Personal Knowledge Base](/knowledge) for details. -## 3. 任务规划和工具调用 +## 3. Task Planning and Tool Use -工具是 Agent 访问操作系统资源的核心,Agent 会根据任务需求智能选择和调用工具,完成文件读写、命令执行、定时任务等各类操作。内置工具的实现在项目的 `agent/tools/` 目录下。 +Tools are the core of how the Agent accesses operating system resources. The Agent intelligently selects and invokes tools based on task requirements, performing file read/write, command execution, scheduled tasks, and more. Built-in tools are implemented in the project's `agent/tools/` directory. -**主要工具:** 文件读写编辑、Bash 终端、浏览器操作、文件发送、定时调度、记忆搜索、联网搜索、环境配置等。 +**Key tools:** file read/write/edit, Bash terminal, browser, file send, scheduler, memory search, web search, environment config, and more. -### 3.1 终端和文件访问 +### 3.1 Terminal and File Access -针对操作系统的终端和文件的访问能力,是最基础和核心的工具,其他很多工具或技能都是基于此进行扩展。用户可通过手机端与 Agent 交互,操作个人电脑或服务器上的资源: +Access to the OS terminal and file system is the most fundamental and core capability. Many other tools and skills build on top of this. Users can interact with the Agent from a mobile device to operate resources on their personal computer or server: -### 3.2 编程能力 +### 3.2 Programming Capability -基于编程能力和系统访问能力,Agent 可以实现从信息搜索、图片等素材生成、编码、测试、部署、Nginx 配置修改、发布的 **Vibecoding 全流程**,通过手机端简单的一句命令完成应用的快速 demo: +Combining programming and system access, the Agent can execute the complete **Vibecoding workflow** — from information search, asset generation, coding, testing, deployment, Nginx configuration, to publishing — all triggered by a single command from your phone: -### 3.3 定时任务 +### 3.3 Scheduled Tasks -基于 `scheduler` 工具实现动态定时任务,支持**一次性任务、固定时间间隔、Cron 表达式**三种形式,任务触发可选择**固定消息发送**或 **Agent 动态任务**执行两种模式: +The `scheduler` tool enables dynamic scheduled tasks, supporting **one-time tasks, fixed intervals, and Cron expressions**. Tasks can be triggered as either a **fixed message send** or an **Agent dynamic task** execution: -### 3.4 浏览器操作 +### 3.4 Browser -内置 `browser` 工具,Agent 可控制浏览器访问网页、填写表单、点击元素、截图,支持动态 JS 渲染页面。运行 `cow install-browser` 一键安装,自动适配服务器(无头模式)和桌面环境: +The built-in `browser` tool allows the Agent to control a Chromium browser to visit web pages, fill forms, click elements, and take screenshots, with support for dynamic JS-rendered pages. Run `cow install-browser` to install with one command, automatically adapting to server (headless) and desktop environments: - + -### 3.5 环境变量管理 +### 3.5 Environment Variable Management -技能所需的秘钥存储在环境变量文件中,由 `env_config` 工具进行管理,你可以通过对话的方式更新秘钥,工具内置安全保护和脱敏策略: +Secrets required by skills are stored in an environment variable file, managed by the `env_config` tool. You can update secrets through conversation, with built-in security protection and desensitization: -## 4. 技能系统 +## 4. Skills System -技能系统为 Agent 提供无限的扩展性,每个 Skill 由说明文件、运行脚本(可选)、资源(可选)组成,描述如何完成特定类型的任务。通过 Skill 可以让 Agent 遵循说明完成复杂流程、调用各类工具或对接第三方系统。 +The Skills system provides infinite extensibility for the Agent. Each Skill consists of a description file, execution scripts (optional), and resources (optional), describing how to complete specific types of tasks. Skills allow the Agent to follow instructions for complex workflows, invoke tools, or integrate third-party systems. -- [Skill Hub](https://skills.cowagent.ai/):开放的技能广场,汇集官方推荐、社区贡献和第三方技能,支持一键安装。 -- **内置技能:** 在项目的 `skills/` 目录下,包含技能创造器、图像识别、LinkAI 智能体、网页抓取等。内置 Skill 根据依赖条件(API Key、系统命令等)自动判断是否启用。 -- **自定义技能:** 由用户通过对话创建,存放在工作空间中(`~/cow/skills/`),可实现任何复杂的业务流程和第三方系统对接。 +- [Skill Hub](https://skills.cowagent.ai/): An open skill marketplace featuring official, community, and third-party skills. Install with one command. +- **Built-in skills:** Located in the project's `skills/` directory, including skill creator, image recognition, LinkAI agent, web fetch, and more. Built-in skills are automatically enabled based on dependency conditions (API keys, system commands, etc.). +- **Custom skills:** Created by users through conversation, stored in the workspace (`~/cow/skills/`), capable of implementing any complex business process or third-party integration. -安装技能:`/skill install <名称>` 或 `cow skill install <名称>`,支持从 Skill Hub、GitHub、ClawHub、URL 等来源安装。 +Install skills: `/skill install ` or `cow skill install `, supporting Skill Hub, GitHub, ClawHub, URL, and more. -### 4.1 创建技能 +### 4.1 Creating Skills -通过 `skill-creator` 技能可以通过对话的方式快速创建技能。你可以让 Agent 将某个工作流程固化为技能,或者把任意接口文档和示例发送给 Agent,让他直接完成对接: +The `skill-creator` skill enables rapid skill creation through conversation. You can ask the Agent to codify a workflow as a skill, or send any API documentation and examples for the Agent to complete the integration directly: -### 4.2 搜索和图像识别 +### 4.2 Web Search and Image Recognition -- **联网搜索:** 内置 `web_search` 工具,支持多种搜索引擎,配置 `BOCHA_API_KEY` 或 `LINKAI_API_KEY` 后启用。 -- **图像识别:** 内置 `openai-image-vision` 技能,可使用 `gpt-4.1-mini`、`gpt-4.1` 等模型,依赖 `OPENAI_API_KEY`。 +- **Web search:** Built-in `web_search` tool, supports multiple search engines. Configure `BOCHA_API_KEY` or `LINKAI_API_KEY` to enable. +- **Image recognition:** Built-in `openai-image-vision` skill, supports `gpt-4.1-mini`, `gpt-4.1`, and other models. Requires `OPENAI_API_KEY`. -### 4.3 技能广场 +### 4.3 Skill Hub -访问 [skills.cowagent.ai](https://skills.cowagent.ai/) 浏览所有可用技能,或在对话中执行: +Visit [skills.cowagent.ai](https://skills.cowagent.ai/) to browse all available skills, or use commands in conversation: ```text -/skill list --remote # 浏览技能广场 -/skill search <关键词> # 搜索技能 -/skill install <名称> # 一键安装 +/skill list --remote # Browse Skill Hub +/skill search # Search skills +/skill install # Install with one command ``` -同时还支持安装Github、ClawHub、LinkAI等第三方平台上的所有技能,详情查看 [技能安装](/skills/install) +Also supports installing skills from GitHub, ClawHub, LinkAI, and other third-party platforms. See [Install Skills](/skills/install) for details. +## 5. CLI Command System -## 5. CLI 命令系统 +CowAgent provides two command interaction methods, covering service management, skill installation, configuration, and more: -CowAgent 提供两种命令交互方式,覆盖服务管理、技能安装、配置调整等日常运维操作: - -- **终端 CLI:** 在系统终端执行 `cow <命令>`,支持 `start`、`stop`、`restart`、`update`、`status`、`logs`、`skill` 等 -- **对话命令:** 在对话中输入 `/<命令>`,Web 控制台输入 `/` 可弹出指令菜单快速选择 +- **Terminal CLI:** Run `cow ` in the system terminal, supporting `start`, `stop`, `restart`, `update`, `status`, `logs`, `skill`, etc. +- **Chat commands:** Type `/` in conversation. The Web console shows a command menu when you type `/`. ```bash -cow start # 启动服务 -cow stop # 停止服务 -cow update # 更新并重启 -cow skill install pptx # 安装技能 -cow install-browser # 安装浏览器工具 +cow start # Start service +cow stop # Stop service +cow update # Update and restart +cow skill install pptx # Install a skill +cow install-browser # Install browser tool ``` -详细命令参考 [命令总览](https://docs.cowagent.ai/cli)。 - - +See [Command Overview](https://docs.cowagent.ai/en/cli) for details. diff --git a/docs/intro/index.mdx b/docs/intro/index.mdx index 10e64813..508d1362 100644 --- a/docs/intro/index.mdx +++ b/docs/intro/index.mdx @@ -1,57 +1,60 @@ --- -title: 项目介绍 -description: CowAgent - 基于大模型的超级AI助理 +title: Introduction +description: CowAgent - Open-source super AI assistant and Agent Harness ---
CowAgent
-**CowAgent** 是基于大模型的超级AI助理,能够主动思考和任务规划、操作计算机和外部资源、创造和执行Skills、拥有长期记忆和知识库并不断成长。 +**CowAgent** is an open-source super AI assistant and Agent Harness. It proactively plans tasks, runs tools and skills, and autonomously grows with memory and knowledge. -CowAgent 支持灵活切换多种模型,能处理文本、语音、图片、文件等多模态消息,可接入微信、飞书、钉钉、企业微信应用、微信公众号、网页中使用,7×24小时运行于你的个人电脑或服务器中。 +CowAgent is lightweight, easy to deploy, and built to extend. Plug in any major LLM provider, run it across Web and major IM platforms, 24/7 on a personal computer or server. - 开源代码仓库,欢迎 Star 和贡献 + Open-source repository — Star and contribute - - 无需安装,立即在线体验 CowAgent + + No setup required — experience CowAgent instantly -## 核心能力 +## Core Capabilities - - 能够理解复杂任务并自主规划执行,持续思考和调用各类工具和技能直到完成目标。 + + Decomposes complex tasks and executes them step by step, looping over tools and skills until the goal is reached. - - 三层记忆流转(上下文→天级记忆→全局记忆),每日梦境蒸馏整理,支持关键词及向量检索。 + + Three-tier architecture (context → daily → core), automatic Deep Dream distillation, hybrid keyword + vector retrieval. - - 自动整理结构化知识,支持知识图谱可视化,通过交叉引用构建持续增长的知识网络。 + + Auto-curates structured knowledge into a Markdown wiki, builds an evolving knowledge graph with visual browsing. - - 实现了Skills创建和运行的引擎,内置多种技能,并支持通过自然语言对话完成自定义Skills开发。 + + A complete skill creation and execution engine. Install from Skill Hub or generate custom skills via natural-language conversation. - - 内置文件读写、终端执行、浏览器操作、定时任务、消息发送等工具,Agent 可自主调用工具完成复杂任务。 + + First-class support for text, images, voice, and files — recognition, generation, and delivery. - - 提供终端 CLI 和对话中的命令,支持进程管理、技能安装、配置修改、上下文查看等常用操作。 + + Built-in file I/O, terminal, browser, scheduler, memory retrieval, web search, and more — with native MCP integration. - - 支持 OpenAI, Claude, Gemini, DeepSeek, MiniMax, GLM, Qwen, Kimi, Doubao 等国内外主流模型厂商。 + + Terminal CLI and in-chat commands for process management, skill installation, configuration, and context inspection. - - 支持运行在本地计算机或服务器,可集成到微信、网页、飞书、钉钉、微信公众号、企业微信应用中使用。 + + Claude, GPT, Gemini, DeepSeek, Qwen, GLM, Kimi, MiniMax, Doubao, and more — swap providers from the Web console with one click. + + + A single Agent simultaneously serves Web, WeChat, Feishu, DingTalk, WeCom, QQ, and Official Accounts. -## 快速体验 +## Quick Start -在终端执行以下命令,即可一键安装、配置、启动 CowAgent: +Run one of the commands below to install, configure, and start CowAgent in a single step: @@ -66,19 +69,25 @@ CowAgent 支持灵活切换多种模型,能处理文本、语音、图片、 -运行后默认会启动 Web 控制台,通过访问 `http://localhost:9899` 可以在网页端进行对话、配置、应用通道接入等操作。 +Once started, open `http://localhost:9899` to access the **Web console** — the unified place to chat, configure providers, connect channels, and install skills. - - 查看完整的安装和运行指南 + + Complete installation and run guide - - 了解 CowAgent 的系统架构设计 + + CowAgent system architecture -## 社区 +## Disclaimer -添加小助手微信加入开源项目交流群: +1. This project is licensed under the [MIT License](https://github.com/zhayujie/CowAgent/blob/master/LICENSE) and is intended for technical research and learning. You are responsible for complying with applicable laws and regulations in your jurisdiction; the maintainers assume no liability for any consequences arising from use of this project. +2. **Cost & safety:** Agent mode consumes substantially more tokens than plain chat — pick models that balance quality and cost. The Agent has access to your local operating system; deploy only in trusted environments. +3. CowAgent is a pure open-source project and does not participate in, authorize, or issue any cryptocurrency. + +## Community + +Scan the WeChat QR code to join the open-source community group: diff --git a/docs/knowledge/index.mdx b/docs/knowledge/index.mdx index 4f9aa797..f1610dc9 100644 --- a/docs/knowledge/index.mdx +++ b/docs/knowledge/index.mdx @@ -1,96 +1,93 @@ --- -title: 个人知识库 -description: CowAgent 的个人知识库系统 — 结构化知识沉淀、自动整理与知识图谱 +title: Personal Knowledge Base +description: CowAgent personal knowledge base — structured knowledge accumulation, automatic organization, and knowledge graph --- -个人知识库是 Agent 的长期结构化知识存储,保存在工作空间的 `knowledge/` 目录下。与按时间线组织的记忆不同,知识库以主题为维度,将用户分享的文章、对话中的洞察、学习材料等整理为互相关联的 Markdown 页面,形成可持续增长的知识网络。 +The personal knowledge base is the Agent's long-term structured knowledge store, saved in the `knowledge/` directory within the workspace. Unlike memory, which is organized by timeline, the knowledge base organizes content by topic — articles, conversation insights, and learning materials are structured into interlinked Markdown pages, forming a continuously growing knowledge network. -## 核心概念 +## Core Concepts -### 知识 vs 记忆 +### Knowledge vs Memory -| 维度 | 知识库(knowledge/) | 长期记忆(memory/) | +| Dimension | Knowledge Base (knowledge/) | Long-term Memory (memory/) | | --- | --- | --- | -| 组织方式 | 按主题分类、互相关联 | 按时间线、日期文件 | -| 写入方式 | Agent 主动整理结构化内容 | 上下文裁剪时自动摘要 | -| 内容特点 | 提炼后的结构化知识 | 原始对话摘要 | -| 典型用途 | 学习笔记、技术文档、项目知识 | 对话历史、事件记录 | +| Organization | By topic, interlinked | By timeline, dated files | +| Writing | Agent actively structures content | Auto-summarized on context trimming | +| Content | Refined, structured knowledge | Raw conversation summaries | +| Use cases | Study notes, tech docs, project knowledge | Conversation history, event records | -### 目录结构 +### Directory Structure ``` ~/cow/knowledge/ -├── index.md # 知识索引,所有页面的入口 -├── log.md # 变更日志,记录每次写入 -├── concepts/ # 概念类知识 +├── index.md # Knowledge index, entry point for all pages +├── log.md # Change log, records each write +├── concepts/ # Conceptual knowledge │ └── machine-learning.md -├── entities/ # 实体类知识(人物、组织、工具) +├── entities/ # Entity knowledge (people, orgs, tools) │ └── openai.md -└── sources/ # 来源类知识(文章、论文) +└── sources/ # Source knowledge (articles, papers) └── llm-wiki.md ``` -目录结构是灵活的 — Agent 会根据实际内容自动创建合适的分类目录。用户也可以通过对话的方式自定义目录组织方式。 +The directory structure is flexible — the Agent automatically creates appropriate category directories based on actual content. Users can also customize the organization. +## Automatic Organization + +Knowledge writing is an autonomous Agent behavior, triggered in these scenarios: + +- **User shares an article or document** — The Agent automatically extracts key information and creates a structured knowledge page +- **Conversation produces valuable conclusions** — The Agent organizes insights into knowledge pages and links them to existing knowledge +- **User explicitly requests organization** — Users can guide the Agent to organize and update knowledge through conversation + +Each knowledge page includes cross-reference links to related pages, gradually building a knowledge graph. -## 自动整理 +## Knowledge Retrieval -知识库的写入是 Agent 的自主行为,在以下场景中触发: +The Agent can retrieve knowledge during conversation through: -- **用户分享文章或文档** — Agent 自动提取关键信息,创建结构化知识页面 -- **对话产生有价值的结论** — Agent 将洞察整理为知识页面,并与已有知识建立关联 -- **用户主动要求整理** — 用户可以通过对话指导 Agent 组织和更新知识 +- **Index lookup** — Quickly locate relevant pages via `knowledge/index.md` +- **Semantic search** — Search knowledge content via the `memory_search` tool +- **Direct read** — Read specific knowledge files via the `memory_get` tool + +## Web Console + +The web console provides a dedicated "Knowledge" module with: + +- **Document browsing** — Tree-style directory structure, searchable and collapsible, click to view content +- **Knowledge graph** — Interactive graph visualizing relationships between knowledge pages +- **Chat integration** — Knowledge document links referenced in Agent replies are clickable for direct navigation - -每个知识页面都包含与其他页面的交叉引用链接,逐步构建起一个知识图谱。 - -## 知识检索 - -Agent 在对话中可以通过以下方式检索知识: - -- **索引查阅** — 通过 `knowledge/index.md` 快速定位相关知识页面 -- **语义搜索** — 通过 `memory_search` 工具对知识库内容进行语义检索 -- **直接读取** — 通过 `memory_get` 工具读取特定知识文件 - -## Web 控制台 - -Web 控制台提供了专用的「知识」模块,支持: - -- **文档浏览** — 树状目录结构,可搜索、可折叠,点击查看文档内容 -- **知识图谱** — 可视化展示知识之间的关联关系,节点可直接跳转至文档 -- **对话联动** — Agent 回复中引用的知识文档链接可直接点击跳转查看 - +## CLI Commands -## CLI 命令 +Manage the knowledge base with the `/knowledge` command: -通过 `/knowledge` 命令管理知识库: - -| 命令 | 说明 | +| Command | Description | | --- | --- | -| `/knowledge` | 显示知识库统计信息 | -| `/knowledge list` | 以树状结构显示文件目录 | -| `/knowledge on` | 开启知识库功能 | -| `/knowledge off` | 关闭知识库功能 | +| `/knowledge` | Show knowledge base statistics | +| `/knowledge list` | Display file directory as a tree | +| `/knowledge on` | Enable the knowledge base feature | +| `/knowledge off` | Disable the knowledge base feature | -## 相关配置 +## Configuration -| 参数 | 说明 | 默认值 | +| Parameter | Description | Default | | --- | --- | --- | -| `knowledge` | 是否启用个人知识库功能 | `true` | -| `agent_workspace` | 工作空间路径,知识库存储在此目录的 `knowledge/` 子目录下 | `~/cow` | +| `knowledge` | Whether to enable the personal knowledge base | `true` | +| `agent_workspace` | Workspace path; knowledge is stored under the `knowledge/` subdirectory | `~/cow` | diff --git a/docs/memory/context.mdx b/docs/memory/context.mdx index 3d358f9d..18fbdc8c 100644 --- a/docs/memory/context.mdx +++ b/docs/memory/context.mdx @@ -1,81 +1,81 @@ --- -title: 短期记忆 -description: 对话上下文 — 消息管理、压缩策略和上下文操作 +title: Short-term Memory +description: Conversation context — message management, compression strategies, and context operations --- -对话上下文是 Agent 的短期记忆,包含当前会话中的所有消息(用户输入、Agent 回复、工具调用及结果)。合理管理上下文对于 Agent 的推理质量和成本控制至关重要。 +Conversation context is the Agent's short-term memory, containing all messages in the current session (user input, Agent replies, tool calls and results). Proper context management is critical for the Agent's reasoning quality and cost control. -## 上下文结构 +## Context Structure -每一轮对话由以下消息组成: +Each conversation turn consists of: ``` -用户消息 → Agent 思考 → 工具调用 → 工具结果 → ... → Agent 最终回复 +User message → Agent thinking → Tool call → Tool result → ... → Agent final reply ``` -一轮中可能包含多次工具调用(Agent 的决策步数由 `agent_max_steps` 控制),所有工具调用和结果都会保留在上下文中,直到被压缩或裁剪。 +A single turn may include multiple tool calls (controlled by `agent_max_steps`). All tool calls and results are retained in context until compressed or trimmed. -## 关键配置 +## Key Configuration -| 参数 | 说明 | 默认值 | +| Parameter | Description | Default | | --- | --- | --- | -| `agent_max_context_tokens` | 上下文最大 token 预算 | `50000` | -| `agent_max_context_turns` | 上下文最大对话轮次 | `20` | -| `agent_max_steps` | 单轮对话最大决策步数(工具调用次数) | `15` | +| `agent_max_context_tokens` | Maximum context token budget | `50000` | +| `agent_max_context_turns` | Maximum conversation turns in context | `20` | +| `agent_max_steps` | Maximum decision steps per turn (tool call count) | `15` | -可通过 `config.json` 或对话中的 `/config` 命令修改。 +Configurable via `config.json` or the `/config` chat command. -## 压缩策略 +## Compression Strategy -当上下文超出限制时,系统会自动执行压缩以释放空间。整个过程分为多个阶段: +When context exceeds limits, the system automatically compresses to free space. The process has multiple stages: -### 1. 工具结果截断 +### 1. Tool Result Truncation -在每次决策循环开始前,系统会检查历史轮次中的工具调用结果。超过 **20000 字符** 的工具结果会被截断,仅保留首尾内容和截断说明。当前轮次的工具结果不受影响。 +Before each decision loop, the system checks tool call results in historical turns. Results exceeding **20,000 characters** are truncated, keeping only the beginning and end with a truncation notice. Current turn results are not affected. -### 2. 轮次裁剪 +### 2. Turn Trimming -当对话轮次超过 `agent_max_context_turns` 时: +When conversation turns exceed `agent_max_context_turns`: -- 裁剪 **最早一半** 的完整轮次(保证工具调用链的完整性) -- 被裁剪的消息会通过 LLM 总结后**写入当天的日级记忆文件** -- LLM 摘要完成后,同时将摘要**注入到保留消息的第一条用户消息开头**,帮助模型在后续对话中保持上下文连贯性 -- 摘要注入在后台异步完成,不阻塞当前回复;注入的摘要在下一轮对话时生效 +- The **oldest half** of complete turns is trimmed (preserving tool call chain integrity) +- Trimmed messages are summarized by LLM and **written to the daily memory file** +- Once the LLM summary is ready, it is also **injected into the first user message** of the retained context, helping the model maintain conversational continuity +- Summary injection runs asynchronously in the background and takes effect from the next turn onward -### 3. Token 预算裁剪 +### 3. Token Budget Trimming -裁剪轮次后,如果 token 数仍超出预算: +After turn trimming, if tokens still exceed the budget: -- **轮次 < 5 时**:对所有轮次进行**文本压缩** — 每轮只保留第一条用户文本和最后一条 Agent 回复,去掉中间的工具调用链 -- **轮次 ≥ 5 时**:再次裁剪**前半轮次**,被丢弃内容同样写入记忆并注入上下文摘要 +- **Fewer than 5 turns**: All turns undergo **text compression** — each turn keeps only the first user text and last Agent reply, removing intermediate tool call chains +- **5 or more turns**: The **first half** of turns is trimmed again, with discarded content written to memory and a context summary injected -### 4. 溢出应急处理 +### 4. Overflow Emergency Handling -当模型 API 返回上下文溢出错误时: +When the model API returns a context overflow error: -1. 先将当前所有消息总结写入记忆 -2. 执行激进裁剪(工具结果限制 10K 字符、用户文本限制 10K、最多保留 5 轮) -3. 如果仍然溢出,清空整个对话上下文 +1. All current messages are summarized and written to memory +2. Aggressive trimming is applied (tool results limited to 10K chars, user text to 10K, max 5 turns) +3. If still overflowing, the entire conversation context is cleared -## 会话持久化 +## Session Persistence -对话消息会持久化到本地数据库,服务重启后自动恢复。恢复策略: +Conversation messages are persisted to a local database, automatically restored after service restart. Restore strategy: -- 恢复最近的 **`max(3, max_context_turns / 6)`** 轮对话 -- 只保留每轮的**用户文本和 Agent 最终回复**,不恢复中间工具调用链 -- 超过 **30 天**的历史会话自动清理 +- Restores the most recent **`max(3, max_context_turns / 6)`** turns +- Only retains each turn's **user text and Agent final reply**, not intermediate tool call chains +- Sessions older than **30 days** are automatically cleaned up -## 操作命令 +## Commands -在对话中可以使用以下命令管理上下文: +Use these commands in chat to manage context: -| 命令 | 说明 | +| Command | Description | | --- | --- | -| `/context` | 查看当前上下文统计(消息数、角色分布、总字符数) | -| `/context clear` | 清空当前会话上下文 | -| `/config agent_max_context_tokens 80000` | 调整上下文 token 预算 | -| `/config agent_max_context_turns 30` | 调整上下文轮次上限 | +| `/context` | View current context statistics (message count, role distribution, total characters) | +| `/context clear` | Clear current session context | +| `/config agent_max_context_tokens 80000` | Adjust context token budget | +| `/config agent_max_context_turns 30` | Adjust context turn limit | - 清空上下文后,Agent 会"忘记"之前的对话内容。被裁剪和清空的内容如果已经写入长期记忆,仍可通过记忆检索找回。 + After clearing context, the Agent "forgets" previous conversation content. Content that was already written to long-term memory can still be retrieved via memory search. diff --git a/docs/memory/deep-dream.mdx b/docs/memory/deep-dream.mdx index 726ec5e2..d0dd9e2d 100644 --- a/docs/memory/deep-dream.mdx +++ b/docs/memory/deep-dream.mdx @@ -1,94 +1,90 @@ --- -title: 梦境蒸馏 -description: Deep Dream — 从对话到永久记忆的自动蒸馏机制 +title: Deep Dream +description: Deep Dream — automatic distillation from conversations to permanent memory --- -梦境蒸馏(Deep Dream)是 CowAgent 记忆系统的核心整理机制,负责将分散的天级记忆蒸馏为精炼的长期记忆,并生成梦境日记。 +Deep Dream is the core consolidation mechanism of CowAgent's memory system, responsible for distilling scattered daily memories into refined long-term memory and generating dream diaries. -## 记忆流转 +## Memory Flow -CowAgent 的记忆从短期到长期经历三个阶段: +CowAgent's memory progresses through three stages from short-term to long-term: ``` -对话上下文(短期)→ 天级记忆(中期)→ MEMORY.md(长期) +Conversation context (short-term) → Daily memory (mid-term) → MEMORY.md (long-term) ``` -### 1. 对话 → 天级记忆 +### 1. Conversation → Daily Memory -当对话上下文被裁剪或每日定时总结时,系统使用 LLM 将对话内容摘要为关键事件,写入当天的天级记忆文件 `memory/YYYY-MM-DD.md`。 +When conversation context is trimmed or during the daily scheduled summary, the system uses LLM to summarize conversation content into key events, writing them to the daily memory file `memory/YYYY-MM-DD.md`. -触发时机: -- **上下文裁剪** — 轮次或 token 超限时,裁剪的内容被总结写入 -- **每日定时** — 23:55 自动触发全量总结 -- **API 溢出** — 紧急保存当前对话摘要 +Triggers: +- **Context trimming** — Trimmed content is summarized when turn or token limits are exceeded +- **Daily schedule** — Automatically triggered at 23:55 +- **API overflow** — Emergency save of current conversation summary -### 2. 天级记忆 → MEMORY.md(蒸馏) +### 2. Daily Memory → MEMORY.md (Distillation) -每日总结完成后,Deep Dream 自动执行蒸馏: +After the daily summary completes, Deep Dream automatically runs distillation: -1. **读取材料** — 当前 `MEMORY.md` + 当天的天级记忆 -2. **LLM 蒸馏** — 去重、合并、修剪、提取新信息 -3. **覆写 MEMORY.md** — 输出精炼后的长期记忆 -4. **生成梦境日记** — 记录整理过程的发现和洞察 +1. **Read materials** — Current `MEMORY.md` + today's daily memory +2. **LLM distillation** — Deduplicate, merge, prune, extract new information +3. **Overwrite MEMORY.md** — Output the refined long-term memory +4. **Generate dream diary** — Record discoveries and insights from the consolidation -### 3. MEMORY.md 的作用 +### 3. Role of MEMORY.md -`MEMORY.md` 会被注入到每次对话的系统提示词中,让 Agent 始终了解用户的偏好、决策和关键事实。因此它必须保持精炼——Deep Dream 会控制在约 30 条以内。 +`MEMORY.md` is injected into the system prompt for every conversation, keeping the Agent aware of user preferences, decisions, and key facts. Therefore it must stay concise — Deep Dream targets approximately 30 entries or fewer. -## 蒸馏规则 +## Distillation Rules -Deep Dream 遵循以下整理规则: +Deep Dream follows these consolidation rules: -| 操作 | 说明 | +| Operation | Description | | --- | --- | -| **合并提炼** | 含义相近的多条合并为一条高密度表述 | -| **新增萃取** | 从天级记忆中提取偏好、决策、人物、经验等 | -| **冲突更新** | 新信息与旧条目矛盾时,以新信息为准 | -| **清理无效** | 删除临时性记录、空白条目、格式残留 | -| **删除冗余** | 已被更精炼表述涵盖的旧条目删除 | +| **Merge & refine** | Combine similar entries into single high-density statements | +| **Extract new** | Pull preferences, decisions, people, experiences from daily memory | +| **Conflict update** | When new info contradicts old entries, newer info takes precedence | +| **Clean invalid** | Remove temporary records, blank entries, formatting artifacts | +| **Remove redundancy** | Delete old entries already covered by more refined statements | -## 梦境日记 +## Dream Diary -每次蒸馏会生成一篇梦境日记,保存在 `memory/dreams/YYYY-MM-DD.md`,用叙事风格记录: +Each distillation generates a dream diary saved at `memory/dreams/YYYY-MM-DD.md`, written in a narrative style recording: -- 发现了哪些重复或矛盾 -- 从天级记忆中提取了什么新洞察 -- 做了哪些清理和优化 -- 整体感受和观察 +- Duplications or contradictions found +- New insights extracted from daily memory +- Cleanups and optimizations performed +- Overall observations -梦境日记可在 Web 控制台的「记忆管理 → 梦境日记」tab 中查看。 +Dream diaries can be viewed in the Web console under "Memory → Dream Diary" tab. -## 手动触发 +## Manual Trigger -除了每日自动执行外,也可以在对话中手动触发: +In addition to the automatic daily run, you can manually trigger distillation in chat: ```text /memory dream [N] ``` -- `N`:整理近 N 天的记忆(默认 3 天,最大 30 天) -- 蒸馏在后台异步执行,完成后在对话中通知结果 -- Web 端通知包含可点击链接,直接跳转查看 MEMORY.md 和梦境日记 -- 无需 Agent 初始化,首次对话前即可使用 - - - - +- `N`: Consolidate the last N days of memory (default 3, max 30) +- Runs asynchronously in the background; you'll be notified in chat when complete +- Web notifications include clickable links to view MEMORY.md and dream diary +- Works without Agent initialization — can be used before the first conversation - 首次部署后可以手动执行一次 `/memory dream 30`,将历史天级记忆全量蒸馏到 MEMORY.md。 + After first deployment, it's recommended to run `/memory dream 30` once to distill all historical daily memories into MEMORY.md. -## 安全机制 +## Safety Mechanisms -| 机制 | 说明 | +| Mechanism | Description | | --- | --- | -| **无新内容跳过** | 没有天级记忆时不执行蒸馏,避免空覆写 | -| **输入去重** | 定时任务中,输入材料未变化时自动跳过 | -| **异步执行** | 蒸馏在后台线程运行,不阻塞对话 | -| **顺序保证** | 定时任务中,天级 flush 全部完成后才启动蒸馏 | -| **禁止编造** | 提示词明确约束只能基于已有材料整理,不得推测或添加 | +| **Skip on no content** | Distillation skipped when no daily memory exists, avoiding empty overwrites | +| **Input dedup** | In scheduled tasks, automatically skipped when input materials haven't changed | +| **Async execution** | Distillation runs in a background thread, never blocking conversation | +| **Sequential guarantee** | In scheduled tasks, daily flush completes before distillation starts | +| **No fabrication** | Prompt explicitly constrains consolidation to existing materials only | diff --git a/docs/memory/index.mdx b/docs/memory/index.mdx index c6dc0e65..069b6eab 100644 --- a/docs/memory/index.mdx +++ b/docs/memory/index.mdx @@ -1,71 +1,71 @@ --- -title: 长期记忆 -description: CowAgent 的长期记忆系统 — 文件持久化、自动写入与混合检索 +title: Long-term Memory +description: CowAgent long-term memory system — file persistence, automatic writing, and hybrid retrieval --- -长期记忆保存在工作空间文件中,跨会话持久存在。Agent 在对话中通过检索工具按需加载历史记忆,也会在上下文裁剪时自动将对话摘要写入长期记忆。 +Long-term memory is stored in workspace files, persisting across sessions. The Agent loads historical memory on demand via retrieval tools during conversation, and automatically writes conversation summaries to long-term memory when context is trimmed. -Memory Architecture +Memory Architecture -## 记忆类型 +## Memory Types -### 核心记忆(MEMORY.md) +### Core Memory (MEMORY.md) -存储在 `~/cow/MEMORY.md` 中,包含用户的长期偏好、重要决策、关键事实等不会随时间淡化的信息。Agent 可通过工具读写此文件来维护长期知识。 +Stored in `~/cow/MEMORY.md`, containing long-term user preferences, important decisions, key facts, and other information that doesn't fade over time. The Agent reads and writes this file via tools to maintain long-term knowledge. -### 日级记忆(memory/YYYY-MM-DD.md) +### Daily Memory (memory/YYYY-MM-DD.md) -存储在 `~/cow/memory/` 目录下,按日期命名(如 `2026-03-08.md`),记录每天的对话摘要和关键事件。仅在首次写入时创建,避免生成空文件。 +Stored in `~/cow/memory/` directory, named by date (e.g., `2026-03-08.md`), recording daily conversation summaries and key events. Files are only created on first write to avoid generating empty files. -### 梦境日记(memory/dreams/YYYY-MM-DD.md) +### Dream Diary (memory/dreams/YYYY-MM-DD.md) -Deep Dream(记忆蒸馏)过程的副产物,记录每次整理的发现、去重合并操作和新洞察。存储在 `~/cow/memory/dreams/` 目录下,按日期命名。 +A byproduct of the Deep Dream (memory distillation) process, recording discoveries, deduplication operations, and new insights from each consolidation. Stored in `~/cow/memory/dreams/` directory, named by date. -## 自动写入 +## Automatic Writing -Agent 通过以下机制自动将对话内容持久化为长期记忆: +The Agent automatically persists conversation content to long-term memory through the following mechanisms: -- **上下文裁剪时** — 当对话轮次或 token 超出配置上限时,裁剪最早一半的上下文,使用 LLM 将被裁剪的内容总结为关键信息写入当天记忆文件,并将摘要异步注入到保留的上下文中,帮助模型保持对话连贯性 -- **每日定时总结** — 每天 23:55 自动触发一次全量总结,防止低活跃日无记忆留存(内容无变化时自动跳过) -- [梦境蒸馏(Deep Dream)](/memory/deep-dream) — 每日总结完成后自动执行,将天级记忆蒸馏合并到 MEMORY.md,并生成梦境日记 -- **API 上下文溢出时** — 当模型 API 返回上下文溢出错误时,紧急保存当前对话摘要 +- **On context trimming** — When conversation turns or tokens exceed the configured limit, the oldest half of the context is trimmed, and the discarded content is summarized by LLM into key information and written to the daily memory file. The summary is also asynchronously injected into the retained context for conversational continuity +- **Daily scheduled summary** — A full summary is automatically triggered at 23:55 every day, ensuring memory is preserved even on low-activity days (skipped if content hasn't changed) +- [Deep Dream (memory distillation)](/memory/deep-dream) — Runs automatically after the daily summary, distilling daily memories into MEMORY.md and generating a dream diary +- **On API context overflow** — When the model API returns a context overflow error, the current conversation summary is saved as an emergency measure -所有记忆写入均在后台异步执行(LLM 总结 + 文件写入),不阻塞正常对话回复。 +All memory writes run asynchronously in a background thread (LLM summarization + file writing), never blocking normal conversation replies. -## 记忆检索 +## Memory Retrieval -记忆系统支持混合检索模式: +The memory system supports hybrid retrieval modes: -- **关键词检索** — 基于 FTS5 全文索引匹配历史记忆,支持 BM25 排序 -- **向量检索** — 基于 embedding 语义相似度搜索,即使表述不同也能找到相关记忆 +- **Keyword retrieval** — FTS5 full-text index matching with BM25 ranking +- **Vector retrieval** — Embedding-based semantic similarity search, finds relevant memory even with different wording -Agent 会在对话中根据需要自动触发记忆检索,将相关历史信息纳入上下文。检索结果按混合评分排序(默认向量权重 0.7、关键词权重 0.3),日级记忆会随时间衰减(半衰期 30 天),核心记忆不衰减。 +The Agent automatically triggers memory retrieval during conversation as needed, incorporating relevant historical information into context. Results are ranked by a combined score (default: 0.7 vector weight + 0.3 keyword weight). Daily memory scores decay over time (30-day half-life), while core memory does not decay. -## 相关文件 +## Related Files -工作空间(默认 `~/cow`)中与记忆相关的文件: +Files related to memory in the workspace (default `~/cow`): -| 文件 | 说明 | +| File | Description | | --- | --- | -| `AGENT.md` | Agent 的人格和行为设定 | -| `USER.md` | 用户身份信息和偏好 | -| `RULE.md` | 自定义规则和约束 | -| `MEMORY.md` | 核心记忆(长期) | -| `memory/YYYY-MM-DD.md` | 日级记忆(按需创建) | -| `memory/dreams/YYYY-MM-DD.md` | 梦境日记(Deep Dream 自动生成) | +| `AGENT.md` | Agent personality and behavior settings | +| `USER.md` | User identity information and preferences | +| `RULE.md` | Custom rules and constraints | +| `MEMORY.md` | Core memory (long-term) | +| `memory/YYYY-MM-DD.md` | Daily memory (created on demand) | +| `memory/dreams/YYYY-MM-DD.md` | Dream diary (auto-generated by Deep Dream) | -## Web 控制台 +## Web Console -在 Web 控制台的记忆管理页面中,可浏览记忆文件和梦境日记,支持通过 Tab 切换查看: +The memory management page in the Web console allows browsing memory files and dream diaries, with tab switching support: -## 相关配置 +## Configuration -| 参数 | 说明 | 默认值 | +| Parameter | Description | Default | | --- | --- | --- | -| `agent_workspace` | 工作空间路径,记忆文件存储在此目录下 | `~/cow` | -| `agent_max_context_tokens` | 最大上下文 token 数,超出时裁剪并总结写入记忆 | `50000` | -| `agent_max_context_turns` | 最大上下文轮次,超出时裁剪并总结写入记忆 | `20` | +| `agent_workspace` | Workspace path, memory files stored under this directory | `~/cow` | +| `agent_max_context_tokens` | Max context tokens; when exceeded, content is trimmed and summarized into memory | `50000` | +| `agent_max_context_turns` | Max context turns; when exceeded, content is trimmed and summarized into memory | `20` | diff --git a/docs/models/claude.mdx b/docs/models/claude.mdx index ee1809d6..bb831eb8 100644 --- a/docs/models/claude.mdx +++ b/docs/models/claude.mdx @@ -1,15 +1,15 @@ --- title: Claude -description: Anthropic Claude 模型配置(文本对话 + 图像理解) +description: Anthropic Claude model configuration (Text Chat + Image Understanding) --- -Claude 由 Anthropic 提供,支持文本对话与图像理解,主流 Sonnet / Opus 模型均原生支持视觉,无需额外指定 Vision 模型。 +Claude is provided by Anthropic and supports both text chat and image understanding. The mainstream Sonnet / Opus models natively support vision, so no separate Vision model needs to be specified. - 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. -## 文本对话 +## Text Chat ```json { @@ -18,26 +18,26 @@ Claude 由 Anthropic 提供,支持文本对话与图像理解,主流 Sonnet } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 支持 `claude-opus-4-8`、`claude-opus-4-7`、`claude-sonnet-4-6`、`claude-opus-4-6`、`claude-sonnet-4-5`、`claude-sonnet-4-0`、`claude-3-5-sonnet-latest` 等,参考 [官方模型](https://docs.anthropic.com/en/docs/about-claude/models/overview) | -| `claude_api_key` | 在 [Claude 控制台](https://console.anthropic.com/settings/keys) 创建 | -| `claude_api_base` | 可选,默认为 `https://api.anthropic.com/v1`,可改为第三方代理 | +| `model` | Supports `claude-opus-4-8`, `claude-opus-4-7`, `claude-sonnet-4-6`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-sonnet-4-0`, `claude-3-5-sonnet-latest`, etc. See [official models](https://docs.anthropic.com/en/docs/about-claude/models/overview) | +| `claude_api_key` | Create one in the [Claude Console](https://console.anthropic.com/settings/keys) | +| `claude_api_base` | Optional, defaults to `https://api.anthropic.com/v1`. Can be changed to a third-party proxy | -### 模型选择 +### Model Selection -| 模型 | 适用场景 | +| Model | Use Case | | --- | --- | -| `claude-opus-4-8` | 默认推荐,最新旗舰,复杂推理与长链路任务效果最佳 | -| `claude-opus-4-7` | 上一代 Opus 旗舰 | -| `claude-sonnet-4-6` | 性价比与速度平衡,成本更低 | -| `claude-opus-4-6` / `claude-sonnet-4-5` / `claude-sonnet-4-0` | 更早的旗舰,价格更低 | +| `claude-opus-4-8` | Default recommended, latest flagship; best for complex reasoning and long-running tasks | +| `claude-opus-4-7` | Previous-generation Opus flagship | +| `claude-sonnet-4-6` | Balanced cost and speed, lower cost | +| `claude-opus-4-6` / `claude-sonnet-4-5` / `claude-sonnet-4-0` | Earlier flagships at a lower price | -## 图像理解 +## Image Understanding -配置 `claude_api_key` 后 Agent 的 Vision 工具会自动使用 Claude 主模型识别图像,无需额外配置。 +Once `claude_api_key` is configured, the Agent's Vision tool automatically uses the Claude main model to recognize images, with no extra setup required. -如需手动指定 Vision 模型,可在配置文件中显式配置: +To manually specify a Vision model, set it explicitly in the configuration file: ```json { diff --git a/docs/models/coding-plan.mdx b/docs/models/coding-plan.mdx index a8341638..b09715eb 100644 --- a/docs/models/coding-plan.mdx +++ b/docs/models/coding-plan.mdx @@ -1,41 +1,41 @@ --- title: Coding Plan -description: Coding Plan 模式模型配置 +description: Coding Plan model configuration --- -> Coding Plan 是各厂商推出的编程包月套餐,适合高频使用 Agent 的场景。CowAgent 支持通过 OpenAI 兼容方式接入各厂商的 Coding Plan 接口。 +> Coding Plan is a monthly subscription package offered by various providers, ideal for high-frequency Agent usage. CowAgent supports all Coding Plan providers via OpenAI-compatible mode. - Coding Plan 的 API Base 和 API Key 通常与普通按量计费接口不通用,请在各厂商平台单独获取。 + Coding Plan API Base and API Key are usually separate from the standard pay-as-you-go ones. Please obtain them from each provider's platform. -## 通用配置格式 +## General Configuration -所有厂商均可使用 OpenAI 兼容协议接入,可在web控制台快速配置。设置模型厂商为**OpenAI**,选择自定义模型并填入模型编码,最后填写对应厂商的API Base 和 API Key: +All providers can be accessed via the OpenAI-compatible protocol, and can be quickly configured through the web console. Set the model provider to **OpenAI**, select a custom model and enter the model code, then fill in the corresponding provider's API Base and API Key: -也可通过 `config.json` 配置文件直接修改: +You can also configure directly in `config.json`: ```json { "bot_type": "openai", - "model": "模型名称", - "open_ai_api_base": "厂商 Coding Plan API Base", + "model": "MODEL_NAME", + "open_ai_api_base": "PROVIDER_CODING_PLAN_API_BASE", "open_ai_api_key": "YOUR_API_KEY" } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `bot_type` | 固定为 `openai`(OpenAI 兼容方式) | -| `model` | 各厂商支持的模型名称 | -| `open_ai_api_base` | 各厂商 Coding Plan 专用 API Base | -| `open_ai_api_key` | 各厂商 Coding Plan 专用 API Key | +| `bot_type` | Must be `openai` (OpenAI-compatible mode) | +| `model` | Model name supported by the provider | +| `open_ai_api_base` | Provider's Coding Plan API Base URL | +| `open_ai_api_key` | Provider's Coding Plan API Key | --- -## 阿里云 +## Alibaba Cloud ```json { @@ -46,13 +46,13 @@ description: Coding Plan 模式模型配置 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | `qwen3.5-plus`、`qwen3-max-2026-01-23`、`qwen3-coder-next`、`qwen3-coder-plus`、`glm-5`、`glm-4.7`、`kimi-k2.5`、`MiniMax-M2.5` | +| `model` | `qwen3.5-plus`, `qwen3-max-2026-01-23`, `qwen3-coder-next`, `qwen3-coder-plus`, `glm-5`, `glm-4.7`, `kimi-k2.5`, `MiniMax-M2.5` | | `open_ai_api_base` | `https://coding.dashscope.aliyuncs.com/v1` | -| `open_ai_api_key` | Coding Plan 专用 Key(与按量计费接口不通用) | +| `open_ai_api_key` | Coding Plan specific key (not shared with pay-as-you-go) | -官方文档:[快速开始](https://help.aliyun.com/zh/model-studio/coding-plan-quickstart?spm=a2c4g.11186623.help-menu-2400256.d_0_2_1.70115203zi5Igc)、[模型列表](https://help.aliyun.com/zh/model-studio/coding-plan) +Reference: [Quick Start](https://help.aliyun.com/zh/model-studio/coding-plan-quickstart?spm=a2c4g.11186623.help-menu-2400256.d_0_2_1.70115203zi5Igc), [Model List](https://help.aliyun.com/zh/model-studio/coding-plan) --- @@ -67,18 +67,17 @@ description: Coding Plan 模式模型配置 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | `MiniMax-M2.5`、`MiniMax-M2.5-highspeed`、`MiniMax-M2.1`、`MiniMax-M2` | -| `open_ai_api_base` | 国内:`https://api.minimaxi.com/v1`;海外:`https://api.minimax.io/v1` | -| `open_ai_api_key` | Coding Plan 专用 Key(与按量计费接口不通用) | +| `model` | `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`, `MiniMax-M2.1`, `MiniMax-M2` | +| `open_ai_api_base` | China: `https://api.minimaxi.com/v1`; Global: `https://api.minimax.io/v1` | +| `open_ai_api_key` | Coding Plan specific key (not shared with pay-as-you-go) | -官方文档:[国内 Key 获取](https://platform.minimaxi.com/docs/coding-plan/quickstart)、[模型列表](https://platform.minimaxi.com/docs/guides/pricing-coding-plan)、[国际 Key 获取](https://platform.minimax.io/docs/coding-plan/quickstart) +Reference: [China Key](https://platform.minimaxi.com/docs/coding-plan/quickstart), [Model List](https://platform.minimaxi.com/docs/guides/pricing-coding-plan), [Global Key](https://platform.minimax.io/docs/coding-plan/quickstart) --- - -## 智谱 GLM +## GLM ```json { @@ -89,34 +88,13 @@ description: Coding Plan 模式模型配置 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | `glm-5`、`glm-4.7`、`glm-4.6`、`glm-4.5`、`glm-4.5-air` | -| `open_ai_api_base` | 中国区:`https://open.bigmodel.cn/api/coding/paas/v4`;全球区:`https://api.z.ai/api/coding/paas/v4` | -| `open_ai_api_key` | API Key 与普通接口通用 | +| `model` | `glm-5`, `glm-4.7`, `glm-4.6`, `glm-4.5`, `glm-4.5-air` | +| `open_ai_api_base` | China: `https://open.bigmodel.cn/api/coding/paas/v4`; Global: `https://api.z.ai/api/coding/paas/v4` | +| `open_ai_api_key` | Shared with standard API | -官方文档:[国内版快速开始](https://docs.bigmodel.cn/cn/coding-plan/quick-start)、[国际版快速开始](https://docs.z.ai/devpack/quick-start) - ---- - -## 火山引擎 - -```json -{ - "bot_type": "openai", - "model": "Doubao-Seed-2.0-Code", - "open_ai_api_base": "https://ark.cn-beijing.volces.com/api/coding/v3", - "open_ai_api_key": "YOUR_API_KEY" -} -``` - -| 参数 | 说明 | -| --- | --- | -| `model` | `Doubao-Seed-2.0-Code`、`Doubao-Seed-2.0-pro`、`Doubao-Seed-2.0-lite`、`Doubao-Seed-Code`、`MiniMax-M2.5`、`Kimi-K2.5`、`GLM-4.7`、`DeepSeek-V3.2` | -| `open_ai_api_base` | `https://ark.cn-beijing.volces.com/api/coding/v3` | -| `open_ai_api_key` | API Key 与普通接口通用 | - -官方文档:[快速开始](https://www.volcengine.com/docs/82379/1928261?lang=zh) +Reference: [China Quick Start](https://docs.bigmodel.cn/cn/coding-plan/quick-start), [Global Quick Start](https://docs.z.ai/devpack/quick-start) --- @@ -131,10 +109,31 @@ description: Coding Plan 模式模型配置 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 填写 `kimi-for-coding` 会自动更新模型,或指定模型例如 `kimi-k2.6` | +| `model` | Use `kimi-for-coding` for auto-updating model, or specify a model such as `kimi-k2.6` | | `moonshot_base_url` | `https://api.kimi.com/coding/v1` | -| `moonshot_api_key` | Coding Plan 专用 Key(与按量计费接口不通用) | +| `moonshot_api_key` | Coding Plan specific key (not shared with pay-as-you-go) | -官方文档:[Key 获取](https://www.kimi.com/code/docs/) +Reference: [Key & Docs](https://www.kimi.com/code/docs/) + +--- + +## Volcengine + +```json +{ + "bot_type": "openai", + "model": "Doubao-Seed-2.0-Code", + "open_ai_api_base": "https://ark.cn-beijing.volces.com/api/coding/v3", + "open_ai_api_key": "YOUR_API_KEY" +} +``` + +| Parameter | Description | +| --- | --- | +| `model` | `Doubao-Seed-2.0-Code`, `Doubao-Seed-2.0-pro`, `Doubao-Seed-2.0-lite`, `Doubao-Seed-Code`, `MiniMax-M2.5`, `Kimi-K2.5`, `GLM-4.7`, `DeepSeek-V3.2` | +| `open_ai_api_base` | `https://ark.cn-beijing.volces.com/api/coding/v3` | +| `open_ai_api_key` | Shared with standard API | + +Reference: [Quick Start](https://www.volcengine.com/docs/82379/1928261?lang=zh) diff --git a/docs/models/custom.mdx b/docs/models/custom.mdx index 2673a8de..45a7d2e1 100644 --- a/docs/models/custom.mdx +++ b/docs/models/custom.mdx @@ -1,21 +1,21 @@ --- -title: 自定义 -description: 自定义厂商配置,适用于第三方 API 代理和本地模型 +title: Custom +description: Custom vendor configuration for third-party API proxies and local models --- -适用于通过 OpenAI 兼容协议接入的第三方模型服务或本地部署的模型,例如: +For model services accessed via the OpenAI-compatible protocol or locally deployed models, such as: -- **第三方 API 代理**:使用统一的 API Base 调用多种模型 -- **本地模型**:通过 Ollama、vLLM、LocalAI 等工具在本地部署的模型 -- **私有化部署**:企业内部部署的模型服务 +- **Third-party API proxies**: call multiple models through a unified API base +- **Local models**: models deployed locally with tools like Ollama, vLLM, LocalAI +- **Private deployments**: model services deployed inside an enterprise - 与 `openai` 厂商的区别:选择自定义厂商后,通过 `/config model` 切换模型时,不会自动切换厂商类型,始终使用自定义的 API 地址。 + Difference from the `openai` vendor: when a custom vendor is selected, switching models via `/config model` does not automatically switch the vendor type — the custom API address is always used. -## 文本对话 +## Text Chat -### 第三方 API 代理 +### Third-party API proxy ```json { @@ -26,16 +26,16 @@ description: 自定义厂商配置,适用于第三方 API 代理和本地模 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `bot_type` | 必须设为 `custom` | -| `model` | 模型名称,填写代理服务支持的任意模型名 | -| `custom_api_key` | API 密钥,由代理服务提供 | -| `custom_api_base` | API 地址,由代理服务提供,需兼容 OpenAI 协议 | +| `bot_type` | Must be set to `custom` | +| `model` | Model name; any model name supported by the proxy service | +| `custom_api_key` | API key provided by the proxy service | +| `custom_api_base` | API endpoint provided by the proxy service; must be OpenAI-compatible | -### 本地模型 +### Local models -本地模型通常不需要 API Key,只需填写 API Base: +Local models usually do not require an API key — only the API base needs to be filled in: ```json { @@ -45,17 +45,17 @@ description: 自定义厂商配置,适用于第三方 API 代理和本地模 } ``` -常见的本地部署工具及默认地址: +Common local deployment tools and their default endpoints: -| 工具 | 默认 API Base | +| Tool | Default API Base | | --- | --- | | [Ollama](https://ollama.com) | `http://localhost:11434/v1` | | [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` | | [LocalAI](https://localai.io) | `http://localhost:8080/v1` | -### 切换模型 +### Switching Models -自定义厂商下切换模型时,只会修改 `model`,不会改变 `bot_type` 和 API 地址: +Switching models under a custom vendor only changes `model` — `bot_type` and the API endpoint remain unchanged: ``` /config model qwen3.5:27b diff --git a/docs/models/deepseek.mdx b/docs/models/deepseek.mdx index 57b96d55..6de8d09b 100644 --- a/docs/models/deepseek.mdx +++ b/docs/models/deepseek.mdx @@ -1,11 +1,11 @@ --- title: DeepSeek -description: DeepSeek 模型配置(文本对话 + 思考模式) +description: DeepSeek model configuration (Text Chat + Thinking Mode) --- -DeepSeek 是当前 Agent 模式默认推荐的厂商之一,主打高性价比的文本对话和任务规划能力。 +DeepSeek is one of the default recommended vendors in Agent mode, focused on cost-effective text chat and task planning. -## 文本对话 +## Text Chat ```json { @@ -14,26 +14,26 @@ DeepSeek 是当前 Agent 模式默认推荐的厂商之一,主打高性价比 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 支持 `deepseek-v4-flash`(默认)、`deepseek-v4-pro` | -| `deepseek_api_key` | 在 [DeepSeek 平台](https://platform.deepseek.com/api_keys) 创建 | -| `deepseek_api_base` | 可选,默认为 `https://api.deepseek.com/v1`,可修改为第三方代理地址 | +| `model` | Supports `deepseek-v4-flash` (Default), `deepseek-v4-pro` | +| `deepseek_api_key` | Create one on the [DeepSeek Platform](https://platform.deepseek.com/api_keys) | +| `deepseek_api_base` | Optional, defaults to `https://api.deepseek.com/v1`. Can be changed to a third-party proxy | -### 模型选择 +### Model Selection -| 模型 | 适用场景 | +| Model | Use Case | | --- | --- | -| `deepseek-v4-flash` | 默认推荐,速度快、成本低 | -| `deepseek-v4-pro` | 更智能,复杂任务效果更强 | +| `deepseek-v4-flash` | Default recommended; fast and low cost | +| `deepseek-v4-pro` | Smarter; better for complex tasks | -## 思考模式 +## Thinking Mode -V4 系列(`deepseek-v4-flash` / `deepseek-v4-pro`)支持显式的「思考模式」:模型在输出最终回答前,先输出一段思维链(`reasoning_content`),从而提升答案质量。 +The V4 series (`deepseek-v4-flash` / `deepseek-v4-pro`) supports an explicit "thinking mode": before producing the final answer, the model emits a chain of thought (`reasoning_content`) to improve answer quality. -### 开关 +### Toggle -通过全局配置 `enable_thinking` 控制,也可在 web控制台 - 配置页面中进行切换: +Controlled by the global `enable_thinking` config, and can also be toggled from the Web Console's configuration page: ```json { @@ -41,12 +41,12 @@ V4 系列(`deepseek-v4-flash` / `deepseek-v4-pro`)支持显式的「思考 } ``` -- `true`:所有渠道下模型都会先思考再作答。Web 控制台会展示思考过程,IM 渠道(微信 / 企微 / 钉钉 / 飞书)虽不展示但同样获得更好答案。 -- `false`:关闭思考,响应更快,首字延迟更低。 +- `true`: the model thinks before answering across all channels. The Web Console displays the thinking process; IM channels (WeChat / WeCom / DingTalk / Feishu) do not show it but still get better answers. +- `false`: thinking is disabled, responses are faster, and time-to-first-token is lower. -### 推理强度 +### Reasoning Effort -思考模式下可通过 `reasoning_effort` 控制推理强度: +Under thinking mode, `reasoning_effort` controls reasoning intensity: ```json { @@ -55,18 +55,18 @@ V4 系列(`deepseek-v4-flash` / `deepseek-v4-pro`)支持显式的「思考 } ``` -| 取值 | 适用场景 | +| Value | Use Case | | --- | --- | -| `high`(默认) | 日常 Agent 任务,思考与速度的平衡 | -| `max` | 复杂编码、长链路规划、严格约束的任务,推理更深但耗时与输出 token 更多 | +| `high` (Default) | Day-to-day Agent tasks; balanced reasoning and speed | +| `max` | Complex coding, long-horizon planning, strictly constrained tasks; deeper reasoning but more time and output tokens | -`reasoning_effort` 仅在 `enable_thinking` 为 `true` 时生效;模型不支持思考模式时该字段自动忽略。 +`reasoning_effort` only takes effect when `enable_thinking` is `true`; it is ignored automatically when the model does not support thinking mode. -### 行为说明 +### Behavior Notes -- **采样参数**:思考模式下 `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` 会被服务端忽略(不会报错),CowAgent 会自动跳过传入。 -- **多轮工具调用**:当历史中包含工具调用时,DeepSeek 要求所有 assistant 消息必须回传 `reasoning_content`。CowAgent 会自动处理回传逻辑,跨轮次切换思考开关也不会出错。 +- **Sampling parameters**: in thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are ignored by the server (without errors). CowAgent automatically skips them. +- **Multi-turn tool calls**: when the history contains tool calls, DeepSeek requires every assistant message to include `reasoning_content`. CowAgent handles this automatically, so toggling thinking mode across turns will not cause errors. - 默认使用 `deepseek-v4-flash`;复杂任务可使用 `deepseek-v4-pro`;需要深度推理可开启 `enable_thinking`。 + `deepseek-v4-flash` is used by default; switch to `deepseek-v4-pro` for complex tasks; enable `enable_thinking` when deep reasoning is needed. diff --git a/docs/models/doubao.mdx b/docs/models/doubao.mdx index cfdc5670..818275e5 100644 --- a/docs/models/doubao.mdx +++ b/docs/models/doubao.mdx @@ -1,15 +1,15 @@ --- -title: 豆包 Doubao -description: 豆包(火山方舟)模型配置(文本 / 图像理解 / 图像生成 / 向量) +title: Doubao +description: Doubao (Volcengine Ark) model configuration (Text / Image Understanding / Image Generation / Embedding) --- -豆包(火山方舟)支持文本对话、图像理解、图像生成(Seedream)和向量能力,一份 `ark_api_key` 即可启用全部能力。 +Doubao (Volcengine Ark) supports text chat, image understanding, image generation (Seedream), and embedding. A single `ark_api_key` enables all capabilities. - 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. -## 文本对话 +## Text Chat ```json { @@ -18,17 +18,17 @@ description: 豆包(火山方舟)模型配置(文本 / 图像理解 / 图 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 可填 `doubao-seed-2-0-pro-260215`、`doubao-seed-2-0-code-preview-260215`、`doubao-seed-2-0-lite-260215` 等 | -| `ark_api_key` | 在 [火山方舟控制台](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) 创建 | -| `ark_base_url` | 可选,默认为 `https://ark.cn-beijing.volces.com/api/v3` | +| `model` | Can be `doubao-seed-2-0-pro-260215`, `doubao-seed-2-0-code-preview-260215`, `doubao-seed-2-0-lite-260215`, etc. | +| `ark_api_key` | Create one in the [Volcengine Ark Console](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) | +| `ark_base_url` | Optional, defaults to `https://ark.cn-beijing.volces.com/api/v3` | -## 图像理解 +## Image Understanding -配置 `ark_api_key` 后 Agent 的 Vision 工具会自动使用 `doubao-seed-2-0-pro-260215` 识别图像,无需额外配置。 +Once `ark_api_key` is configured, the Agent's Vision tool automatically uses `doubao-seed-2-0-pro-260215` to recognize images, with no extra setup required. -如需手动指定 Vision 模型: +To manually specify a Vision model: ```json { @@ -40,7 +40,7 @@ description: 豆包(火山方舟)模型配置(文本 / 图像理解 / 图 } ``` -## 图像生成 +## Image Generation ```json { @@ -52,9 +52,9 @@ description: 豆包(火山方舟)模型配置(文本 / 图像理解 / 图 } ``` -可选模型:`seedream-5.0-lite`、`seedream-4.5`。 +Available models: `seedream-5.0-lite`, `seedream-4.5`. -## 向量 +## Embedding ```json { @@ -63,4 +63,4 @@ description: 豆包(火山方舟)模型配置(文本 / 图像理解 / 图 } ``` -默认模型 `doubao-embedding-vision-251215`(多模态 embedding),可在配置文件中通过 `embedding_dimensions` 指定 1024 或 2048 维。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。 +The default model is `doubao-embedding-vision-251215` (multimodal embedding); the dimension (1024 or 2048) can be set via `embedding_dimensions` in the configuration file. After changing the embedding, run `/memory rebuild-index` to rebuild the index. diff --git a/docs/models/gemini.mdx b/docs/models/gemini.mdx index f1c8991a..b2d9520b 100644 --- a/docs/models/gemini.mdx +++ b/docs/models/gemini.mdx @@ -1,15 +1,15 @@ --- title: Gemini -description: Google Gemini 模型配置(文本对话 + 图像理解 + 图像生成) +description: Google Gemini model configuration (Text Chat + Image Understanding + Image Generation) --- -Google Gemini 支持文本对话、图像理解和图像生成(Nano Banana 系列),一个 `gemini_api_key` 即可启用全部能力。 +Google Gemini supports text chat, image understanding, and image generation (Nano Banana series). A single `gemini_api_key` enables all capabilities. - 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. -## 文本对话 +## Text Chat ```json { @@ -18,17 +18,17 @@ Google Gemini 支持文本对话、图像理解和图像生成(Nano Banana 系 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 推荐 `gemini-3.5-flash`,亦支持 `gemini-3.1-pro-preview`、`gemini-3.1-flash-lite-preview`、`gemini-3-flash-preview`、`gemini-3-pro-preview` 等,参考 [官方文档](https://ai.google.dev/gemini-api/docs/models) | -| `gemini_api_key` | 在 [Google AI Studio](https://aistudio.google.com/app/apikey) 创建 | -| `gemini_api_base` | 可选,默认为 `https://generativelanguage.googleapis.com`,可改为第三方代理 | +| `model` | Recommended: `gemini-3.5-flash`; also supports `gemini-3.1-pro-preview`, `gemini-3.1-flash-lite-preview`, `gemini-3-flash-preview`, `gemini-3-pro-preview`, etc. See [official docs](https://ai.google.dev/gemini-api/docs/models) | +| `gemini_api_key` | Create one in [Google AI Studio](https://aistudio.google.com/app/apikey) | +| `gemini_api_base` | Optional, defaults to `https://generativelanguage.googleapis.com`. Can be changed to a third-party proxy | -## 图像理解 +## Image Understanding -Gemini 全系列模型均原生支持视觉,配置 `gemini_api_key` 后 Agent 的 Vision 工具会自动使用主模型识别图像,无需额外配置。 +All Gemini models natively support vision. Once `gemini_api_key` is configured, the Agent's Vision tool automatically uses the main model to recognize images, with no extra setup required. -如需手动指定 Vision 模型: +To manually specify a Vision model: ```json { @@ -40,7 +40,7 @@ Gemini 全系列模型均原生支持视觉,配置 `gemini_api_key` 后 Agent } ``` -## 图像生成 +## Image Generation ```json { @@ -52,7 +52,7 @@ Gemini 全系列模型均原生支持视觉,配置 `gemini_api_key` 后 Agent } ``` -| 模型 ID | 别名 | +| Model ID | Alias | | --- | --- | | `gemini-3.1-flash-image-preview` | Nano Banana 2 | | `gemini-3-pro-image-preview` | Nano Banana Pro | diff --git a/docs/models/glm.mdx b/docs/models/glm.mdx index ad5f8fd3..473a805c 100644 --- a/docs/models/glm.mdx +++ b/docs/models/glm.mdx @@ -1,15 +1,15 @@ --- -title: 智谱 GLM -description: 智谱 AI GLM 模型配置(文本 / 图像理解 / 语音识别 / 向量) +title: GLM +description: Zhipu AI GLM model configuration (Text / Image Understanding / Speech-to-Text / Embedding) --- -智谱 AI 支持文本对话、图像理解、语音识别(ASR)和向量(Embedding),一份 `zhipu_ai_api_key` 即可启用全部能力。 +Zhipu AI supports text chat, image understanding, speech-to-text (ASR), and embedding. A single `zhipu_ai_api_key` enables all capabilities. - 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. -## 文本对话 +## Text Chat ```json { @@ -18,17 +18,17 @@ description: 智谱 AI GLM 模型配置(文本 / 图像理解 / 语音识别 / } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 可填 `glm-5.1`、`glm-5-turbo`、`glm-5`、`glm-4.7`、`glm-4-plus`、`glm-4-flash`、`glm-4-air` 等,参考 [模型编码](https://bigmodel.cn/dev/api/normal-model/glm-4) | -| `zhipu_ai_api_key` | 在 [智谱 AI 控制台](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) 创建 | -| `zhipu_ai_api_base` | 可选,默认为 `https://open.bigmodel.cn/api/paas/v4` | +| `model` | Can be `glm-5.1`, `glm-5-turbo`, `glm-5`, `glm-4.7`, `glm-4-plus`, `glm-4-flash`, `glm-4-air`, etc. See [model codes](https://bigmodel.cn/dev/api/normal-model/glm-4) | +| `zhipu_ai_api_key` | Create one in the [Zhipu AI Console](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) | +| `zhipu_ai_api_base` | Optional, defaults to `https://open.bigmodel.cn/api/paas/v4` | -## 图像理解 +## Image Understanding -智谱 chat 系列模型(`glm-5.1`、`glm-5-turbo` 等)不支持视觉,视觉调用统一路由到 `glm-5v-turbo`。配置 `zhipu_ai_api_key` 后 Agent 的 Vision 工具会自动使用该模型,无需在配置文件中显式指定。 +Zhipu's chat models (`glm-5.1`, `glm-5-turbo`, etc.) do not support vision; vision calls are uniformly routed to `glm-5v-turbo`. Once `zhipu_ai_api_key` is configured, the Agent's Vision tool automatically uses this model, with no need to specify it explicitly in the configuration file. -## 语音识别 +## Speech-to-Text (ASR) ```json { @@ -37,14 +37,14 @@ description: 智谱 AI GLM 模型配置(文本 / 图像理解 / 语音识别 / } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `voice_to_text` | 设为 `zhipu` 启用智谱 ASR | -| `voice_to_text_model` | 可选,默认 `glm-asr-2512` | +| `voice_to_text` | Set to `zhipu` to enable Zhipu ASR | +| `voice_to_text_model` | Optional, defaults to `glm-asr-2512` | -凭证自动复用 `zhipu_ai_api_key`。语音文件建议小于 25MB,超大文件可能被服务端拒绝。 +Credentials are automatically reused from `zhipu_ai_api_key`. Audio files should be smaller than 25MB; oversized files may be rejected by the server. -## 向量 +## Embedding ```json { @@ -53,4 +53,4 @@ description: 智谱 AI GLM 模型配置(文本 / 图像理解 / 语音识别 / } ``` -可选模型:`embedding-3`、`embedding-2`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。 +Available models: `embedding-3`, `embedding-2`. After changing the embedding, run `/memory rebuild-index` to rebuild the index. diff --git a/docs/models/index.mdx b/docs/models/index.mdx index 02402b6a..c3744234 100644 --- a/docs/models/index.mdx +++ b/docs/models/index.mdx @@ -1,40 +1,38 @@ --- -title: 模型概览 -description: CowAgent 支持的模型厂商及能力矩阵 +title: Models Overview +description: Model vendors supported by CowAgent and their capability matrix --- -CowAgent 支持国内外主流厂商的大语言模型,模型接口实现在项目的 `models/` 目录下。除文本对话外,部分厂商还提供视觉理解、图像生成、语音识别、语音合成、向量等能力,可在 Agent 流程中按需调用。 +CowAgent supports a wide range of mainstream large language models. Model interfaces live under the project's `models/` directory. Beyond text chat, several vendors also provide vision understanding, image generation, speech-to-text, text-to-speech, and embeddings — all of which can be invoked on demand in the Agent flow. +## Capability Matrix -## 模型能力总览 +A snapshot of each vendor's capabilities. "Text" refers to the main chat model; the remaining columns show which Agent capabilities the vendor can power. -各厂商提供的能力一览。「文本」指主对话模型,其余列表示该厂商可承担对应 Agent 能力。 - -| 厂商 | 代表模型 | 文本 | 图像理解 | 图像生成 | 语音识别 | 语音合成 | 向量 | +| Vendor | Representative Models | Text | Vision | Image Gen | STT | TTS | Embedding | | --- | --- | :-: | :-: | :-: | :-: | :-: | :-: | | [DeepSeek](/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | | | [MiniMax](/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | | | [Claude](/models/claude) | claude-opus-4-8 | ✅ | ✅ | | | | | | [Gemini](/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | | -| [OpenAI](/models/openai) | gpt-5.5、o 系列 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [智谱 GLM](/models/glm) | glm-5.1、glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ | -| [通义千问](/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [豆包 Doubao](/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ | +| [OpenAI](/models/openai) | gpt-5.5, o-series | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [GLM](/models/glm) | glm-5.1, glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ | +| [Qwen](/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Doubao](/models/doubao) | doubao-seed-2.0 series | ✅ | ✅ | ✅ | | | ✅ | | [Kimi](/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | | -| [百度千帆](/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | | -| [小米 MiMo](/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | | -| [LinkAI](/models/linkai) | 多厂商 100+ 模型统一接入 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [自定义](/models/custom) |本地模型 / 三方代理 | ✅ | | | | | | +| [ERNIE](/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | | +| [MiMo](/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | | +| [LinkAI](/models/linkai) | 100+ models from multiple vendors | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Custom](/models/custom) | Local models / third-party proxies | ✅ | | | | | | - Web 控制台中各项能力(视觉 / 图像 / 语音识别 / 语音合成 / 向量 / 网络搜索)均可独立配置厂商与模型,互相之间不强制绑定。 + Every capability in the Web console (Vision / Image / STT / TTS / Embedding / Web Search) can be configured independently with its own vendor and model — there is no forced binding between them. +## How to Configure -## 配置方式 +**Option 1 (recommended):** Manage models and capabilities online via the [Web console](/channels/web), with no need to edit the configuration file: -**方式一(推荐):** 通过 [Web 控制台](/channels/web) 在线管理模型与各项能力,无需手动编辑配置文件: + - - -**方式二:** 手动编辑 `config.json`,根据所选模型填写对应的模型名称和 API Key。每个模型也支持 OpenAI 兼容方式接入,将 `bot_type` 设为 `openai`,配置 `open_ai_api_base` 和 `open_ai_api_key` 即可。 +**Option 2:** Edit `config.json` manually and fill in the model name and API key for the selected vendor. Every model also supports OpenAI-compatible access — just set `bot_type` to `openai` and configure `open_ai_api_base` and `open_ai_api_key`. diff --git a/docs/models/kimi.mdx b/docs/models/kimi.mdx index beb5beaf..3292a976 100644 --- a/docs/models/kimi.mdx +++ b/docs/models/kimi.mdx @@ -1,15 +1,15 @@ --- title: Kimi -description: Kimi(Moonshot)模型配置(文本对话 + 图像理解) +description: Kimi (Moonshot) model configuration (Text Chat + Image Understanding) --- -Kimi 由 Moonshot 提供,支持文本对话与图像理解,`kimi-k2.x` 系列原生支持视觉。 +Kimi is provided by Moonshot and supports both text chat and image understanding. The `kimi-k2.x` series natively supports vision. - 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. -## 文本对话 +## Text Chat ```json { @@ -18,17 +18,17 @@ Kimi 由 Moonshot 提供,支持文本对话与图像理解,`kimi-k2.x` 系 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 可填 `kimi-k2.6`、`kimi-k2.5`、`kimi-k2`、`moonshot-v1-8k`、`moonshot-v1-32k`、`moonshot-v1-128k` | -| `moonshot_api_key` | 在 [Moonshot 控制台](https://platform.moonshot.cn/console/api-keys) 创建 | -| `moonshot_base_url` | 可选,默认为 `https://api.moonshot.cn/v1` | +| `model` | Can be `kimi-k2.6`, `kimi-k2.5`, `kimi-k2`, `moonshot-v1-8k`, `moonshot-v1-32k`, `moonshot-v1-128k` | +| `moonshot_api_key` | Create one in the [Moonshot Console](https://platform.moonshot.cn/console/api-keys) | +| `moonshot_base_url` | Optional, defaults to `https://api.moonshot.cn/v1` | -## 图像理解 +## Image Understanding -配置 `moonshot_api_key` 后 Agent 的 Vision 工具会自动使用 `kimi-k2.6` 识别图像,无需额外配置。 +Once `moonshot_api_key` is configured, the Agent's Vision tool automatically uses `kimi-k2.6` to recognize images, with no extra setup required. -如需手动指定 Vision 模型: +To manually specify a Vision model: ```json { diff --git a/docs/models/linkai.mdx b/docs/models/linkai.mdx index 68647ebc..f60c2160 100644 --- a/docs/models/linkai.mdx +++ b/docs/models/linkai.mdx @@ -1,15 +1,15 @@ --- title: LinkAI -description: 通过 LinkAI 平台统一接入文本、视觉、图像、语音与向量能力 +description: Access text, vision, image, speech, and embedding capabilities through the LinkAI platform --- -通过一份 `linkai_api_key` 即可访问 OpenAI、Claude、Gemini、DeepSeek、MiniMax、Qwen、Kimi、豆包 等主流厂商的全部能力。 +A single `linkai_api_key` gives you access to all capabilities of mainstream vendors such as OpenAI, Claude, Gemini, DeepSeek, MiniMax, Qwen, Kimi, and Doubao. - 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. -## 文本对话 +## Text Chat ```json { @@ -18,17 +18,17 @@ description: 通过 LinkAI 平台统一接入文本、视觉、图像、语音 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `use_linkai` | 设为 `true` 启用 | -| `linkai_api_key` | 在 [控制台](https://link-ai.tech/console/interface) 创建 | -| `model` | 可填写 [模型列表](https://link-ai.tech/console/models) 中任意编码 | +| `use_linkai` | Set to `true` to enable | +| `linkai_api_key` | Create one in the [Console](https://link-ai.tech/console/interface) | +| `model` | Can be any code from the [model list](https://link-ai.tech/console/models) | -前往 [模型服务](https://link-ai.tech/console/models) 了解更多。 +See [Model Service](https://link-ai.tech/console/models) for more. -## 图像理解 +## Image Understanding -配置完成后 Agent 的 Vision 工具会自动调用网关上的多模态模型,无需额外配置。如需手动指定 Vision 模型: +Once configured, the Agent's Vision tool automatically calls multimodal models via the gateway, with no extra setup required. To manually specify a Vision model: ```json { @@ -40,9 +40,9 @@ description: 通过 LinkAI 平台统一接入文本、视觉、图像、语音 } ``` -可选模型:`gpt-4.1-mini`、`gpt-5.4-mini`、`qwen3.6-plus`、`doubao-seed-2-0-pro-260215`、`kimi-k2.6`、`claude-sonnet-4-6`、`gemini-3.1-flash-lite-preview` 等。 +Available models: `gpt-4.1-mini`, `gpt-5.4-mini`, `qwen3.6-plus`, `doubao-seed-2-0-pro-260215`, `kimi-k2.6`, `claude-sonnet-4-6`, `gemini-3.1-flash-lite-preview`, etc. -## 图像生成 +## Image Generation ```json { @@ -54,14 +54,14 @@ description: 通过 LinkAI 平台统一接入文本、视觉、图像、语音 } ``` -| 模型 ID | 别名 | +| Model ID | Alias | | --- | --- | | `gpt-image-2` | OpenAI | | `gemini-3.1-flash-image-preview` | Nano Banana 2 | | `gemini-3-pro-image-preview` | Nano Banana Pro | -| `seedream-5.0-lite` | 字节豆包 Seedream | +| `seedream-5.0-lite` | ByteDance Doubao Seedream | -## 语音识别 +## Speech-to-Text (ASR) ```json { @@ -69,11 +69,11 @@ description: 通过 LinkAI 平台统一接入文本、视觉、图像、语音 } ``` -ASR 固定使用 Whisper,凭证自动复用 `linkai_api_key`。 +ASR uses Whisper by default; credentials are automatically reused from `linkai_api_key`. -## 语音合成 +## Text-to-Speech (TTS) -语音合成网关下支持多个底层 TTS 引擎,按 `text_to_voice_model` 选择引擎,音色随引擎切换。 +The TTS gateway supports multiple underlying engines. The engine is selected by `text_to_voice_model`, and the available voices change with the engine. ```json { @@ -83,15 +83,15 @@ ASR 固定使用 Whisper,凭证自动复用 `linkai_api_key`。 } ``` -| `text_to_voice_model` | 引擎说明 | +| `text_to_voice_model` | Engine | | --- | --- | -| `tts-1` | OpenAI · 多语种通用(音色 `alloy` / `nova` / `echo` 等) | -| `doubao` | 字节豆包 · 中文音色丰富 | -| `baidu` | 百度 · 中文主播音色 | +| `tts-1` | OpenAI · Multi-language (voices like `alloy` / `nova` / `echo`, etc.) | +| `doubao` | ByteDance Doubao · Rich Chinese voices | +| `baidu` | Baidu · Chinese broadcaster voices | -不同引擎对应的音色不同,建议在 Web 控制台「模型管理 → 语音合成」中可视化选择。 +Voices differ by engine; we recommend selecting them visually in the Web Console under "Model Management → Text-to-Speech". -## 向量 +## Embedding ```json { @@ -100,4 +100,4 @@ ASR 固定使用 Whisper,凭证自动复用 `linkai_api_key`。 } ``` -默认模型 `text-embedding-3-small`(OpenAI 兼容)。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。 +The default model is `text-embedding-3-small` (OpenAI-compatible). After changing the embedding, run `/memory rebuild-index` to rebuild the index. diff --git a/docs/models/mimo.mdx b/docs/models/mimo.mdx index ea445df9..6f808b8e 100644 --- a/docs/models/mimo.mdx +++ b/docs/models/mimo.mdx @@ -1,15 +1,15 @@ --- -title: 小米 MiMo -description: 小米 MiMo 模型配置(文本对话 + 图像理解 + 语音合成) +title: MiMo +description: Xiaomi MiMo model configuration (Text Chat + Image Understanding + Text-to-Speech) --- -小米 MiMo 是原生全模态大模型,单 `mimo_api_key` 即可同时启用文本对话、图像理解与语音合成。 +Xiaomi MiMo is a native omni-modal large model. A single `mimo_api_key` enables text chat, image understanding, and text-to-speech all at once. - 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + All capabilities below can be configured in one place via the "Model Management" page in the Web Console — no need to manually edit the configuration file. -## 文本对话 +## Text Chat ```json { @@ -19,24 +19,24 @@ description: 小米 MiMo 模型配置(文本对话 + 图像理解 + 语音合 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 默认推荐 `mimo-v2.5-pro`,也可使用 `mimo-v2.5` | -| `mimo_api_key` | 在 [MiMo 开放平台](https://platform.xiaomimimo.com/console/api-keys) 创建 | -| `mimo_api_base` | 可选,默认为 `https://api.xiaomimimo.com/v1` | +| `model` | Default recommendation: `mimo-v2.5-pro`; `mimo-v2.5` is also supported | +| `mimo_api_key` | Create one in the [MiMo Open Platform](https://platform.xiaomimimo.com/console/api-keys) | +| `mimo_api_base` | Optional, defaults to `https://api.xiaomimimo.com/v1` | -### 模型选择 +### Model Selection -| 模型 | 适用场景 | +| Model | Use Case | | --- | --- | -| `mimo-v2.5-pro` | 旗舰,原生全模态 + Agent 能力,最高 100 万 tokens 上下文 | -| `mimo-v2.5` | 综合版,原生全模态(文本 / 图像 / 视频 / 音频) | +| `mimo-v2.5-pro` | Flagship: native omni-modal + Agent capability, up to 1M tokens context | +| `mimo-v2.5` | General-purpose, native omni-modal (text / image / video / audio) | -## 思考模式 +## Thinking Mode -MiMo V2.5 系列默认开启「思考模式」:模型在输出最终回答前会先输出 `reasoning_content`(思维链),提升复杂任务表现。 +The MiMo V2.5 series enables "thinking mode" by default: the model emits `reasoning_content` (chain-of-thought) before the final answer, improving performance on complex tasks. -通过全局配置 `enable_thinking` 控制是否展示(也可在 Web 控制台 - 配置页面切换): +Use the global `enable_thinking` flag to toggle visibility (also switchable from the Web Console settings): ```json { @@ -44,14 +44,14 @@ MiMo V2.5 系列默认开启「思考模式」:模型在输出最终回答前 } ``` -## 图像理解 +## Image Understanding -配置 `mimo_api_key` 后,Agent 的 Vision 工具可以自动使用 MiMo 视觉模型: +Once `mimo_api_key` is configured, the Agent's Vision tool can automatically use MiMo's vision models: -- 当主模型本身是多模态时(`mimo-v2.5-pro` / `mimo-v2.5`),直接由主模型识别图像,无需额外配置 -- 当主模型是其他厂商时,Vision 工具会根据顺序自动 fallback 到 `mimo-v2.5-pro` +- When the main model itself is multimodal (`mimo-v2.5-pro` / `mimo-v2.5`), images are handled directly by the main model with no extra setup. +- When the main model belongs to another vendor, the Vision tool falls back to `mimo-v2.5-pro` in order. -如需手动指定 Vision 模型,可在配置文件中显式配置: +To force a specific Vision model, set it explicitly in the configuration: ```json { @@ -64,7 +64,7 @@ MiMo V2.5 系列默认开启「思考模式」:模型在输出最终回答前 } ``` -## 语音合成 +## Text-to-Speech (TTS) ```json { @@ -74,62 +74,63 @@ MiMo V2.5 系列默认开启「思考模式」:模型在输出最终回答前 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `text_to_voice_model` | 当前仅支持 `mimo-v2.5-tts`(预置音色 + 唱歌模式) | -| `tts_voice_id` | 预置音色名(中文音色直接使用中文名作为 ID) | +| `text_to_voice_model` | Currently only `mimo-v2.5-tts` (preset voices + singing mode) | +| `tts_voice_id` | Preset voice name (Chinese voice IDs use the Chinese name directly) | -### 预置音色 +### Preset Voices -| 音色 ID | 说明 | +| Voice ID | Description | | --- | --- | -| `冰糖` | 中文 · 女声(默认) | -| `茉莉` | 中文 · 女声 | -| `苏打` | 中文 · 男声 | -| `白桦` | 中文 · 男声 | -| `Mia` | 英文 · 女声 | -| `Chloe` | 英文 · 女声 | -| `Milo` | 英文 · 男声 | -| `Dean` | 英文 · 男声 | +| `Mia` | English · Female | +| `Chloe` | English · Female | +| `Milo` | English · Male | +| `Dean` | English · Male | +| `冰糖` | Chinese · Female (default) | +| `茉莉` | Chinese · Female | +| `苏打` | Chinese · Male | +| `白桦` | Chinese · Male | -也可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。 -### 风格控制 +You can also pick a voice visually from the Web Console under "Model Management → Text-to-Speech". -MiMo TTS 支持在合成文本中嵌入 **音频标签** 来控制情绪、语调、方言、角色甚至唱歌。标签需出现在 **最终被合成为语音的文本(即 Agent 回复内容)** 中,整体风格标签写在开头: +### Style Control + +MiMo TTS supports embedding **audio tags** in the synthesis text to control emotion, tone, dialect, persona, and even singing. Tags must appear in the **text that will be synthesized to speech (i.e. the Agent's reply)**, with the overall style tag placed at the very beginning: ``` -(风格)待合成内容 +(style)content-to-synthesize ``` -支持半角 `()`、全角 `()` 或 `[]` 三种括号。常见风格示例: +Half-width `()`, full-width `()`, and `[]` brackets are all accepted. Both Chinese and English style descriptors work — pick whichever language expresses the timbre most precisely. Common examples: -| 类型 | 示例标签 | +| Category | Example tags | | --- | --- | -| 基础情绪 | `开心` `悲伤` `愤怒` `恐惧` `惊讶` `兴奋` `委屈` `平静` `冷漠` | -| 复合情绪 | `怅然` `欣慰` `无奈` `愧疚` `释然` `忐忑` `动情` | -| 整体语调 | `温柔` `高冷` `活泼` `严肃` `慵懒` `俏皮` `深沉` `干练` `凌厉` | -| 音色定位 | `磁性` `醇厚` `清亮` `空灵` `稚嫩` `苍老` `甜美` `沙哑` | -| 人设腔调 | `夹子音` `御姐音` `正太音` `大叔音` `台湾腔` | -| 方言 | `东北话` `四川话` `河南话` `粤语` | -| 角色扮演 | `孙悟空` `林黛玉` | -| 唱歌 | `唱歌`(等价于 `sing` / `singing`) | +| Basic emotions | `happy` `sad` `angry` `fear` `surprised` `excited` `aggrieved` `calm` `indifferent` | +| Compound emotions | `wistful` `relieved` `helpless` `guilty` `at ease` `uneasy` `touched` | +| Overall tone | `gentle` `aloof` `lively` `serious` `languid` `playful` `deep` `sharp` `cutting` | +| Voice character | `magnetic` `mellow` `bright` `ethereal` `childlike` `aged` `sweet` `husky` | +| Persona | `squeaky` `mature lady` `young boy` `uncle` `Taiwanese accent` | +| Dialect | `Northeastern` `Sichuan` `Henan` `Cantonese` | +| Role-play | `Sun Wukong` `Lin Daiyu` | +| Singing | `sing` / `singing` | -示例: +Examples: -- (磁性)夜已经深了,城市还在呼吸。 -- (东北话)哎呀妈呀,这天儿也忒冷了吧! -- (粤语)呢个真係好正啊! -- (唱歌)原谅我这一生不羁放纵爱自由… +- `(magnetic)The night is deep, and the city is still breathing.` +- `(gentle)Take a breath. You've got this.` +- `(serious)This is the final warning before the system reboots.` +- `(singing)Oh, when the saints go marching in…` -也可以在文本任意位置插入细粒度音频标签来控制呼吸、笑声、停顿等,例如: +You can also insert fine-grained audio tags at any position in the text to control breathing, laughter, pauses, etc. For example: ``` -(紧张,深呼吸)呼……冷静,冷静。(语速加快)自我介绍我背了五十遍了,应该没问题。 +(nervous, deep breath) Phew… stay calm, stay calm. (faster pace) I've rehearsed this intro fifty times, it'll be fine. ``` -完整标签列表参见 [MiMo 语音合成文档](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5)。 +See the [MiMo speech synthesis documentation](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5) for the full tag list. - CowAgent 在调用 TTS 时会将 Agent 的回复原文(含 `(...)` 标签)直接送入 MiMo 合成。你可以在人设 / 系统提示词里要求模型「在回复开头用 `(风格)` 标签控制语气」,即可让 IM 渠道(微信 / 飞书 / 钉钉 / 企微)的语音回复带上情绪、方言、唱歌等效果。 + When CowAgent calls TTS, the Agent's reply text (including any `(...)` tags) is forwarded directly to MiMo for synthesis. Tell the model in its persona / system prompt to "prefix replies with a `(style)` tag to control the tone", and IM channels (WeChat / Feishu / DingTalk / WeCom) will play voice replies with the corresponding emotion, dialect, or even singing. diff --git a/docs/models/minimax.mdx b/docs/models/minimax.mdx index 8282f88b..d945d2ea 100644 --- a/docs/models/minimax.mdx +++ b/docs/models/minimax.mdx @@ -1,15 +1,15 @@ --- title: MiniMax -description: MiniMax 模型配置(文本 / 图像理解 / 图像生成 / 语音合成) +description: MiniMax model configuration (Text / Image Understanding / Image Generation / Text-to-Speech) --- -MiniMax 支持文本对话、图像理解、图像生成与语音合成,一份 `minimax_api_key` 即可启用全部能力。 +MiniMax supports text chat, image understanding, image generation, and text-to-speech. A single `minimax_api_key` enables all capabilities. - 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. -## 文本对话 +## Text Chat ```json { @@ -18,16 +18,16 @@ MiniMax 支持文本对话、图像理解、图像生成与语音合成,一份 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 可填 `MiniMax-M2.7`、`MiniMax-M2.7-highspeed`、`MiniMax-M2.5`、`MiniMax-M2.1`、`MiniMax-M2.1-lightning`、`MiniMax-M2` 等 | -| `minimax_api_key` | 在 [MiniMax 控制台](https://platform.minimaxi.com/user-center/basic-information/interface-key) 创建 | +| `model` | Can be `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.1`, `MiniMax-M2.1-lightning`, `MiniMax-M2`, etc. | +| `minimax_api_key` | Create one in the [MiniMax Console](https://platform.minimaxi.com/user-center/basic-information/interface-key) | -## 图像理解 +## Image Understanding -MiniMax 的 M2.x 系列 chat 模型本身不支持视觉,视觉调用统一路由到 `MiniMax-Text-01`。配置 `minimax_api_key` 后 Agent 的 Vision 工具会自动使用该模型,无需在配置文件中显式指定。 +MiniMax's M2.x chat models do not support vision natively; vision calls are uniformly routed to `MiniMax-Text-01`. Once `minimax_api_key` is configured, the Agent's Vision tool automatically uses this model, with no need to specify it explicitly in the configuration file. -## 图像生成 +## Image Generation ```json { @@ -39,9 +39,9 @@ MiniMax 的 M2.x 系列 chat 模型本身不支持视觉,视觉调用统一路 } ``` -可选模型:`image-01`。 +Available models: `image-01`. -## 语音合成 +## Text-to-Speech (TTS) ```json { @@ -51,21 +51,21 @@ MiniMax 的 M2.x 系列 chat 模型本身不支持视觉,视觉调用统一路 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `text_to_voice_model` | `speech-2.8-hd`(情绪渲染、自然听感)、`speech-2.8-turbo`(极速)、`speech-2.6-hd`、`speech-2.6-turbo` | -| `tts_voice_id` | 音色 ID,支持中文 / 粤语 / 英 / 日 / 韩,共 70+ 种 | +| `text_to_voice_model` | `speech-2.8-hd` (emotional rendering, natural sound), `speech-2.8-turbo` (ultra-fast), `speech-2.6-hd`, `speech-2.6-turbo` | +| `tts_voice_id` | Voice ID; supports Chinese / Cantonese / English / Japanese / Korean — 70+ voices in total | -常用音色示例: +Common voice examples: -| 音色 ID | 说明 | +| Voice ID | Description | | --- | --- | -| `female-shaonv` | 中文 · 少女(女) | -| `female-yujie` | 中文 · 御姐(女) | -| `female-tianmei` | 中文 · 甜美女性(女) | -| `male-qn-jingying` | 中文 · 精英青年(男) | -| `male-qn-badao` | 中文 · 霸道青年(男) | -| `Cantonese_GentleLady` | 粤语 · 温柔女声 | -| `English_Graceful_Lady` | 英文 · Graceful Lady | +| `female-shaonv` | Chinese · Young Girl (Female) | +| `female-yujie` | Chinese · Mature Lady (Female) | +| `female-tianmei` | Chinese · Sweet Female (Female) | +| `male-qn-jingying` | Chinese · Elite Youth (Male) | +| `male-qn-badao` | Chinese · Dominant Youth (Male) | +| `Cantonese_GentleLady` | Cantonese · Gentle Female Voice | +| `English_Graceful_Lady` | English · Graceful Lady | -完整音色(中文 / 粤语 / 英 / 日 / 韩共 70+ 种)可参考 [系统音色列表](https://platform.minimaxi.com/docs/faq/system-voice-id),也可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。 +For the full voice list (70+ voices across Chinese / Cantonese / English / Japanese / Korean), see the [system voice list](https://platform.minimaxi.com/docs/faq/system-voice-id), or select visually in the Web Console under "Model Management → Text-to-Speech". diff --git a/docs/models/openai.mdx b/docs/models/openai.mdx index aad83c8f..f8715562 100644 --- a/docs/models/openai.mdx +++ b/docs/models/openai.mdx @@ -1,16 +1,16 @@ --- title: OpenAI -description: OpenAI 模型配置(文本 / 视觉 / 图像 / 语音 / 向量) +description: OpenAI model configuration (Text / Vision / Image / Speech / Embedding) --- -OpenAI 是覆盖最完整的厂商,可同时承担文本对话、视觉理解、图像生成、语音识别(ASR)、语音合成(TTS)和向量(Embedding)能力。一份 `open_ai_api_key` 即可让 Agent 用到全部能力。 +OpenAI offers the most complete coverage and can simultaneously serve text chat, vision understanding, image generation, speech-to-text (ASR), text-to-speech (TTS), and embedding. A single `open_ai_api_key` lets the Agent use all of these capabilities. - 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. -## 文本对话 +## Text Chat ```json { @@ -20,16 +20,16 @@ OpenAI 是覆盖最完整的厂商,可同时承担文本对话、视觉理解 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 与 OpenAI 接口的 [model 参数](https://platform.openai.com/docs/models) 一致,支持 `gpt-5.5`、`gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano`、`gpt-5` 系列、`gpt-4.1`、o 系列等;Agent 模式默认 `gpt-5.5`,追求性价比可改为 `gpt-5.4` | -| `open_ai_api_key` | 在 [OpenAI 平台](https://platform.openai.com/api-keys) 创建 | -| `open_ai_api_base` | 可选,修改可接入第三方代理 | -| `bot_type` | 使用 OpenAI 官方模型时无需填写;通过兼容协议接入厂商模型时需设为 `openai` | +| `model` | Same as OpenAI's [model parameter](https://platform.openai.com/docs/models); supports `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`, the `gpt-5` series, `gpt-4.1`, the o-series, etc. Agent mode defaults to `gpt-5.5`; use `gpt-5.4` for better cost-efficiency | +| `open_ai_api_key` | Create one on the [OpenAI Platform](https://platform.openai.com/api-keys) | +| `open_ai_api_base` | Optional; change it to access a third-party proxy | +| `bot_type` | Not required when using OpenAI's official models; set to `openai` when accessing other vendors via the compatible protocol | -## 图像理解 +## Image Understanding -`gpt-5.5`、`gpt-5.4`、`gpt-4o`、`gpt-4.1` 等 OpenAI 模型均原生支持视觉,配置 `open_ai_api_key` 后 Agent 的 Vision 工具会自动使用主模型识别图像。若主模型不支持视觉或希望显式指定,可在配置文件中配置: +OpenAI models like `gpt-5.5`, `gpt-5.4`, `gpt-4o`, and `gpt-4.1` natively support vision. Once `open_ai_api_key` is configured, the Agent's Vision tool automatically uses the main model to recognize images. If the main model does not support vision or you want to specify it explicitly, set it in the configuration file: ```json { @@ -41,11 +41,11 @@ OpenAI 是覆盖最完整的厂商,可同时承担文本对话、视觉理解 } ``` -支持的 Vision 模型:`gpt-5.5`、`gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano`、`gpt-5`、`gpt-4.1`、`gpt-4.1-mini`、`gpt-4o`。 +Supported Vision models: `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`, `gpt-5`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4o`. -## 图像生成 +## Image Generation -在配置文件中指定图像生成模型,Agent 调用图像生成技能时会自动路由到 OpenAI: +Specify the image generation model in the configuration file; the Agent automatically routes image generation skill calls to OpenAI: ```json { @@ -57,9 +57,9 @@ OpenAI 是覆盖最完整的厂商,可同时承担文本对话、视觉理解 } ``` -支持的图像生成模型:`gpt-image-2`、`gpt-image-1`。 +Supported image generation models: `gpt-image-2`, `gpt-image-1`. -## 语音识别 +## Speech-to-Text (ASR) ```json { @@ -68,14 +68,14 @@ OpenAI 是覆盖最完整的厂商,可同时承担文本对话、视觉理解 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `voice_to_text` | 设为 `openai` 启用 OpenAI 语音识别 | -| `voice_to_text_model` | 可选,默认 `gpt-4o-mini-transcribe`;也可填 `gpt-4o-transcribe`、`whisper-1` | +| `voice_to_text` | Set to `openai` to enable OpenAI speech-to-text | +| `voice_to_text_model` | Optional, defaults to `gpt-4o-mini-transcribe`; can also be `gpt-4o-transcribe`, `whisper-1` | -凭证自动复用 `open_ai_api_key`。 +Credentials are automatically reused from `open_ai_api_key`. -## 语音合成 +## Text-to-Speech (TTS) ```json { @@ -85,12 +85,12 @@ OpenAI 是覆盖最完整的厂商,可同时承担文本对话、视觉理解 } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `text_to_voice_model` | `tts-1`、`tts-1-hd`、`gpt-4o-mini-tts` | -| `tts_voice_id` | 音色:`alloy`、`echo`、`fable`、`onyx`、`nova`、`shimmer`、`ash`、`ballad`、`coral`、`sage`、`verse` | +| `text_to_voice_model` | `tts-1`, `tts-1-hd`, `gpt-4o-mini-tts` | +| `tts_voice_id` | Voices: `alloy`, `echo`, `fable`, `onyx`, `nova`, `shimmer`, `ash`, `ballad`, `coral`, `sage`, `verse` | -## 向量 +## Embedding ```json { @@ -99,5 +99,5 @@ OpenAI 是覆盖最完整的厂商,可同时承担文本对话、视觉理解 } ``` -可选模型:`text-embedding-3-small`、`text-embedding-3-large`、`text-embedding-ada-002`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。 +Available models: `text-embedding-3-small`, `text-embedding-3-large`, `text-embedding-ada-002`. After changing the embedding, run `/memory rebuild-index` to rebuild the index. diff --git a/docs/models/qianfan.mdx b/docs/models/qianfan.mdx index bdd87214..13525967 100644 --- a/docs/models/qianfan.mdx +++ b/docs/models/qianfan.mdx @@ -1,48 +1,42 @@ --- -title: 百度千帆 -description: 百度千帆 ERNIE 模型配置(文本对话 + 图像理解) +title: ERNIE +description: ERNIE model configuration (Baidu Qianfan) --- -百度千帆提供 ERNIE 系列模型,支持文本对话与图像理解。 - - - 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 - - -## 文本对话 +Option 1: Native integration (recommended): ```json { "model": "ernie-5.1", - "qianfan_api_key": "YOUR_API_KEY", + "qianfan_api_key": "", "qianfan_api_base": "https://qianfan.baidubce.com/v2" } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 默认推荐使用 `ernie-5.1`;也可使用 `ernie-5.0`、`ernie-x1.1`、`ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k` | -| `qianfan_api_key` | 千帆 API Key,格式通常以 `bce-v3/` 开头 | -| `qianfan_api_base` | 可选,默认为 `https://qianfan.baidubce.com/v2` | +| `model` | Default recommendation: `ernie-5.1`; also supports `ernie-5.0`, `ernie-x1.1`, `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k` | +| `qianfan_api_key` | Qianfan API key, usually starting with `bce-v3/` | +| `qianfan_api_base` | Optional, defaults to `https://qianfan.baidubce.com/v2` | -### 模型选择 +## Model Selection -| 模型 | 适用场景 | +| Model | Use Case | | --- | --- | -| `ernie-5.1` | 默认推荐,文心新一代旗舰模型,综合能力最强 | -| `ernie-5.0` | 上一代旗舰模型,综合能力优异 | -| `ernie-x1.1` | 深度思考推理模型,幻觉更低、指令遵循与工具调用更强 | -| `ernie-4.5-turbo-128k` | 长上下文和通用对话 | -| `ernie-4.5-turbo-32k` | 通用对话,成本和上下文更均衡 | +| `ernie-5.1` | Default recommendation; latest ERNIE flagship with the strongest overall capability | +| `ernie-5.0` | Previous-generation flagship with excellent overall capability | +| `ernie-x1.1` | Deep-thinking reasoning model with lower hallucination and stronger instruction following / tool calling | +| `ernie-4.5-turbo-128k` | Long-context and general chat | +| `ernie-4.5-turbo-32k` | General chat with a balanced context window and cost | -## 图像理解 +## Vision tool -配置 `qianfan_api_key` 后,Agent 的 Vision 工具可以自动使用千帆视觉模型: +Once `qianfan_api_key` is configured, Agent mode can auto-discover Qianfan for the Vision tool: -- 当主模型本身是多模态时(如 `ernie-5.1`、`ernie-5.0`、`ernie-x1.1`、`ernie-4.5-turbo-vl`),直接由主模型识别图像,无需额外配置 -- 当主模型是纯文本时(如 `ernie-4.5-turbo-128k`),Vision 工具会自动 fallback 到 `ernie-4.5-turbo-vl` +- When the main model itself is multimodal (e.g. `ernie-5.1`, `ernie-5.0`, `ernie-x1.1`, `ernie-4.5-turbo-vl`), images are handled directly by the main model with no extra setup. +- When the main model is text-only (e.g. `ernie-4.5-turbo-128k`), the Vision tool automatically falls back to `ernie-4.5-turbo-vl`. -如需手动指定 Vision 模型,可在配置文件中显式配置: +To force a specific Vision model, set it explicitly in `config.json`: ```json { @@ -54,6 +48,17 @@ description: 百度千帆 ERNIE 模型配置(文本对话 + 图像理解) } ``` +Option 2: OpenAI-compatible configuration: + +```json +{ + "model": "ernie-5.1", + "bot_type": "openai", + "open_ai_api_key": "", + "open_ai_api_base": "https://qianfan.baidubce.com/v2" +} +``` + - 新配置推荐使用 `qianfan_api_key`。旧的 `wenxin`、`wenxin-4`、`baidu_wenxin_api_key`、`baidu_wenxin_secret_key` 配置仍保持兼容。 + Prefer `qianfan_api_key` for new configurations. Existing `wenxin`, `wenxin-4`, `baidu_wenxin_api_key`, and `baidu_wenxin_secret_key` configurations remain supported. diff --git a/docs/models/qwen.mdx b/docs/models/qwen.mdx index 765bae64..8e27269c 100644 --- a/docs/models/qwen.mdx +++ b/docs/models/qwen.mdx @@ -1,15 +1,15 @@ --- -title: 通义千问 Qwen -description: 通义千问模型配置(文本 / 图像理解 / 图像生成 / 语音识别 / 语音合成 / 向量) +title: Qwen +description: Qwen model configuration (Text / Image Understanding / Image Generation / Speech-to-Text / Text-to-Speech / Embedding) --- -通义千问(DashScope / 百炼)是国内覆盖最完整的厂商之一,文本、图像理解、图像生成、语音识别、语音合成与向量能力均可用一份 `dashscope_api_key` 启用。 +Qwen (Alibaba DashScope / Bailian) is one of the most fully-featured vendors. Text, image understanding, image generation, speech-to-text, text-to-speech, and embedding can all be enabled with a single `dashscope_api_key`. - 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + All capabilities below can be configured in one place via the "Model Management" page in the Web Console, with no need to manually edit the configuration file. -## 文本对话 +## Text Chat ```json { @@ -18,16 +18,16 @@ description: 通义千问模型配置(文本 / 图像理解 / 图像生成 / } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `model` | 可填 `qwen3.6-plus`、`qwen3.7-max`、`qwen3.5-plus`、`qwen3-max`、`qwen-max`、`qwen-plus`、`qwen-turbo`、`qwq-plus` 等 | -| `dashscope_api_key` | 在 [百炼控制台](https://bailian.console.aliyun.com/?tab=model#/api-key) 创建,参考 [官方文档](https://bailian.console.aliyun.com/?tab=api#/api) | +| `model` | Can be `qwen3.6-plus`, `qwen3.7-max`, `qwen3.5-plus`, `qwen3-max`, `qwen-max`, `qwen-plus`, `qwen-turbo`, `qwq-plus`, etc. | +| `dashscope_api_key` | Create one in the [Bailian Console](https://bailian.console.aliyun.com/?tab=model#/api-key); see the [official docs](https://bailian.console.aliyun.com/?tab=api#/api) | -## 图像理解 +## Image Understanding -配置 `dashscope_api_key` 后 Agent 的 Vision 工具会自动调用千问的视觉模型识别图像。`qwen3-max` / `qwen3.5-plus` / `qwen3.6-plus` 等模型本身就是多模态;若主模型是纯文本(如 `qwen-turbo`),会自动回落到 `qwen-vl-max`。 +Once `dashscope_api_key` is configured, the Agent's Vision tool automatically calls Qwen's vision models to recognize images. Models like `qwen3-max` / `qwen3.5-plus` / `qwen3.6-plus` are already multimodal; if the main model is text-only (e.g. `qwen-turbo`), it automatically falls back to `qwen-vl-max`. -如需手动指定 Vision 模型: +To manually specify a Vision model: ```json { @@ -39,9 +39,9 @@ description: 通义千问模型配置(文本 / 图像理解 / 图像生成 / } ``` -支持模型:`qwen3.6-plus`、`qwen3.5-plus`、`qwen3-max`。 +Supported models: `qwen3.6-plus`, `qwen3.5-plus`, `qwen3-max`. -## 图像生成 +## Image Generation ```json { @@ -53,9 +53,9 @@ description: 通义千问模型配置(文本 / 图像理解 / 图像生成 / } ``` -可选模型:`qwen-image-2.0`、`qwen-image-2.0-pro`。 +Available models: `qwen-image-2.0`, `qwen-image-2.0-pro`. -## 语音识别 +## Speech-to-Text (ASR) ```json { @@ -64,14 +64,14 @@ description: 通义千问模型配置(文本 / 图像理解 / 图像生成 / } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `voice_to_text` | 设为 `dashscope` 启用通义千问 ASR | -| `voice_to_text_model` | 可选,默认 `qwen3-asr-flash` | +| `voice_to_text` | Set to `dashscope` to enable Qwen ASR | +| `voice_to_text_model` | Optional, defaults to `qwen3-asr-flash` | -凭证自动复用 `dashscope_api_key`。单段音频建议小于 10MB、时长不超过 300 秒。 +Credentials are automatically reused from `dashscope_api_key`. A single audio segment should be smaller than 10MB and no longer than 300 seconds. -## 语音合成 +## Text-to-Speech (TTS) ```json { @@ -81,26 +81,26 @@ description: 通义千问模型配置(文本 / 图像理解 / 图像生成 / } ``` -| 参数 | 说明 | +| Parameter | Description | | --- | --- | -| `text_to_voice_model` | 可选,默认 `qwen3-tts-flash`,覆盖普通话、方言与主流外语 | -| `tts_voice_id` | 音色 ID,详见下方常用列表 | +| `text_to_voice_model` | Optional, defaults to `qwen3-tts-flash`; covers Mandarin, dialects, and major foreign languages | +| `tts_voice_id` | Voice ID; see the common list below | -常用音色示例: +Common voice examples: -| 音色 ID | 说明 | +| Voice ID | Description | | --- | --- | -| `Cherry` | 芊悦 · 阳光女声 | -| `Serena` | 苏瑶 · 温柔女声 | -| `Ethan` | 晨煦 · 阳光男声 | -| `Chelsie` | 千雪 · 二次元少女 | -| `Dylan` | 北京话 · 晓东 | -| `Rocky` | 粤语 · 阿强 | -| `Sunny` | 四川话 · 晴儿 | +| `Cherry` | Qianyue · Sunny Female Voice | +| `Serena` | Suyao · Gentle Female Voice | +| `Ethan` | Chenxu · Sunny Male Voice | +| `Chelsie` | Qianxue · Anime Girl | +| `Dylan` | Beijing Dialect · Xiaodong | +| `Rocky` | Cantonese · Aqiang | +| `Sunny` | Sichuan Dialect · Qing'er | -完整音色(普通话 / 各地方言 / 双语等)可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。 +The full voice list (Mandarin / regional dialects / bilingual, etc.) can be selected visually in the Web Console under "Model Management → Text-to-Speech". -## 向量 +## Embedding ```json { @@ -109,4 +109,4 @@ description: 通义千问模型配置(文本 / 图像理解 / 图像生成 / } ``` -默认模型 `text-embedding-v4`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。 +The default model is `text-embedding-v4`. After changing the embedding, run `/memory rebuild-index` to rebuild the index. diff --git a/docs/releases/overview.mdx b/docs/releases/overview.mdx index 020265e6..aa68112c 100644 --- a/docs/releases/overview.mdx +++ b/docs/releases/overview.mdx @@ -1,32 +1,32 @@ --- -title: 更新日志 -description: CowAgent 版本更新历史 +title: Changelog +description: CowAgent version history --- -| 版本 | 日期 | 说明 | +| Version | Date | Description | | --- | --- | --- | -| [2.0.9](/releases/v2.0.9) | 2026.05.22 | 新增模型管理、MCP 协议支持、浏览器登录态持久化、新模型接入(gpt-5.5、gemini-3.5-flash、qwen3.7-max 等)、部署安全加固 | -| [2.0.8](/releases/v2.0.8) | 2026.05.06 | 飞书渠道全面升级(语音、流式输出和Markdown、扫码一键接入)、DeepSeek V4和百度模型新增、定时任务工具增强 | -| [2.0.7](/releases/v2.0.7) | 2026.04.22 | 图像生成技能(六厂商自动路由)、新模型支持(Kimi K2.6、Claude Opus 4.7、GLM 5.1)、知识库增强、Web 控制台优化 | -| [2.0.6](/releases/v2.0.6) | 2026.04.14 | 项目更名、知识库系统、梦境记忆蒸馏、上下文智能压缩、Web 控制台多会话及多项优化 | -| [2.0.5](/releases/v2.0.5) | 2026.04.01 | Cow CLI、Skill Hub 开源、浏览器工具、企微扫码创建、多项优化和修复 | -| [2.0.4](/releases/v2.0.4) | 2026.03.22 | 新增个人微信通道、新模型支持、日文文档、脚本重构及多项修复 | -| [2.0.3](/releases/v2.0.3) | 2026.03.18 | 新增企微智能机器人和 QQ 通道、支持Coding Plan、新增多个模型、Web端文件处理、记忆系统升级 | -| [2.0.2](/releases/v2.0.2) | 2026.02.27 | Web 控制台升级、多通道同时运行、会话持久化 | -| [2.0.1](/releases/v2.0.1) | 2026.02.13 | 内置 Web Search 工具、智能上下文管理、多项修复 | -| [2.0.0](/releases/v2.0.0) | 2026.02.03 | 全面升级为超级 Agent 助理 | -| 1.7.6 | 2025.05.23 | Web Channel 优化、AgentMesh 多智能体插件 | -| 1.7.5 | 2025.04.11 | DeepSeek 模型 | -| 1.7.4 | 2024.12.13 | Gemini 2.0 模型、Web Channel | -| 1.7.3 | 2024.10.31 | 稳定性提升、数据库功能 | -| 1.7.2 | 2024.09.26 | 一键安装脚本、o1 模型 | -| 1.7.0 | 2024.08.02 | 讯飞 4.0 模型、知识库引用 | -| 1.6.9 | 2024.07.19 | gpt-4o-mini、阿里语音识别 | -| 1.6.8 | 2024.07.05 | Claude 3.5、Gemini 1.5 Pro | -| 1.6.0 | 2024.04.26 | Kimi 接入、gpt-4-turbo 升级 | -| 1.5.8 | 2024.03.26 | GLM-4、Claude-3、edge-tts | -| 1.5.2 | 2023.11.10 | 飞书通道、图像识别对话 | -| 1.5.0 | 2023.11.10 | gpt-4-turbo、dall-e-3、tts 多模态 | -| 1.0.0 | 2022.12.12 | 项目创建,首次接入 ChatGPT 模型 | +| [2.0.9](/releases/v2.0.9) | 2026.05.22 | Model management console, MCP protocol support, browser persistent login, new models (gpt-5.5, gemini-3.5-flash, qwen3.7-max, etc.), deployment hardening | +| [2.0.8](/releases/v2.0.8) | 2026.05.06 | Major Feishu channel upgrade (voice, streaming and Markdown, one-click QR-scan setup), DeepSeek V4 and Baidu models, scheduler tool enhancements | +| [2.0.7](/releases/v2.0.7) | 2026.04.22 | Image Generation Skill (6-provider auto-routing), new models (Kimi K2.6, Claude Opus 4.7, GLM 5.1), knowledge base and Web Console improvements | +| [2.0.6](/releases/v2.0.6) | 2026.04.14 | Project rename, Knowledge Base system, Deep Dream Memory Distillation, Smart Context Compression, Web Console multi-session and various improvements | +| [2.0.5](/releases/v2.0.5) | 2026.04.01 | Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more | +| [2.0.4](/releases/v2.0.4) | 2026.03.22 | Personal WeChat channel, new model support, Japanese docs, script refactoring and bug fixes | +| [2.0.3](/releases/v2.0.3) | 2026.03.18 | WeCom Smart Bot and QQ channels, Coding Plan support, multiple new models, Web file processing, memory system upgrade | +| [2.0.2](/releases/v2.0.2) | 2026.02.27 | Web Console upgrade, multi-channel concurrency, session persistence | +| [2.0.1](/releases/v2.0.1) | 2026.02.13 | Built-in Web Search tool, smart context management, multiple fixes | +| [2.0.0](/releases/v2.0.0) | 2026.02.03 | Full upgrade to AI super assistant | +| 1.7.6 | 2025.05.23 | Web Channel optimization, AgentMesh plugin | +| 1.7.5 | 2025.04.11 | DeepSeek model | +| 1.7.4 | 2024.12.13 | Gemini 2.0 model, Web Channel | +| 1.7.3 | 2024.10.31 | Stability improvements, database features | +| 1.7.2 | 2024.09.26 | One-click install script, o1 model | +| 1.7.0 | 2024.08.02 | iFlytek 4.0 model, knowledge base references | +| 1.6.9 | 2024.07.19 | gpt-4o-mini, Alibaba voice recognition | +| 1.6.8 | 2024.07.05 | Claude 3.5, Gemini 1.5 Pro | +| 1.6.0 | 2024.04.26 | Kimi integration, gpt-4-turbo upgrade | +| 1.5.8 | 2024.03.26 | GLM-4, Claude-3, edge-tts | +| 1.5.2 | 2023.11.10 | Feishu channel, image recognition chat | +| 1.5.0 | 2023.11.10 | gpt-4-turbo, dall-e-3, tts multimodal | +| 1.0.0 | 2022.12.12 | Project created, first ChatGPT integration | -更多历史版本请查看 [GitHub Releases](https://github.com/zhayujie/CowAgent/releases)。 +See [GitHub Releases](https://github.com/zhayujie/CowAgent/releases) for full history. diff --git a/docs/releases/v2.0.0.mdx b/docs/releases/v2.0.0.mdx index 3436bc19..e6c37533 100644 --- a/docs/releases/v2.0.0.mdx +++ b/docs/releases/v2.0.0.mdx @@ -1,105 +1,63 @@ --- title: v2.0.0 -description: CowAgent 2.0 - 从聊天机器人到超级智能助理的全面升级 +description: CowAgent 2.0 - Full upgrade from chatbot to AI super assistant --- -CowAgent 2.0 实现了从聊天机器人到**超级智能助理**的全面升级!现在它能够主动思考和规划任务、拥有长期记忆、操作计算机和外部资源、创造和执行技能,真正理解你并和你一起成长。 +CowAgent 2.0 is a comprehensive upgrade from a chatbot to an **AI super assistant** — capable of autonomous thinking and task planning, long-term memory, operating computers, and creating and executing skills. -**发布日期**:2026.02.03 | [GitHub Release](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) +**Release Date**: 2026.02.03 | [GitHub Release](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) -## 重点更新 +## Key Updates -### Agent 核心能力 +### Agent Core -- **复杂任务规划**:能够理解复杂任务并自主规划执行,持续思考和调用工具直到完成目标,支持多轮推理和上下文理解 -- **长期记忆**:自动将对话记忆持久化至本地文件和数据库中,包括全局记忆和天级记忆,支持关键词及向量检索 -- **内置系统工具**:内置实现 10+ 种工具,包括文件操作、Bash 终端、浏览器、文件发送、定时任务、记忆管理等 -- **Skills**:新增 Skill 运行引擎,内置多种技能,并支持通过自然语言对话完成自定义 Skills 开发 -- **安全和成本**:通过秘钥管理工具、提示词控制、系统权限等手段控制 Agent 的访问安全;通过最大记忆轮次、最大上下文 token、工具执行步数对 token 成本进行限制 +- **Complex Task Planning**: Autonomous planning with multi-turn reasoning +- **Long-term Memory**: Persistent memory with keyword and vector search +- **Built-in Tools**: 10+ tools including file ops, Bash, browser, scheduler +- **Web search**: Built-in `web_search` tool, supports multiple search engines, configure corresponding API key to use +- **Skills System**: Skill engine with built-in and custom skill support +- **Security & Cost**: Secret management, prompt controls, token limits -### 其他更新 +### Other -- **渠道优化**:飞书及钉钉接入渠道支持长连接接入(无需公网 IP)、支持图片/文件消息的接收和发送 -- **模型更新**:新增 claude-sonnet-4-5、gemini-3-pro-preview、glm-4.7、MiniMax-M2.1、qwen3-max 等最新模型 -- **部署优化**:增加一键安装、配置、运行、管理的脚本,简化部署流程 +- **Channels**: Feishu/DingTalk WebSocket support, image/file messages +- **Models**: claude-sonnet-4-5, gemini-3-pro-preview, glm-4.7, MiniMax-M2.1, qwen3-max +- **Deployment**: One-click install, configure, run, and management script -## 长期记忆系统 - -Agent 会在用户分享重要信息时主动存储,也会在对话达到一定长度时自动提取摘要。支持语义搜索和向量检索的混合检索模式。 - -**首次启动**时,Agent 会主动询问关键信息,并记录至工作空间(默认 `~/cow`)中的智能体设定、用户身份、记忆文件中。 - -**长期对话**中,Agent 会智能记录或检索记忆,不断更新自身设定、用户偏好,总结经验和教训,真正实现自主思考和持续成长。 +## Long-term Memory -## 任务规划与工具调用 - -Agent 根据任务需求智能选择和调用工具,完成各类复杂操作。 - -### 终端和文件访问 - -最基础和核心的工具能力,用户可通过手机端与 Agent 交互,操作个人电脑或服务器上的资源: +## Task Planning & Tools -### 应用编程能力 - -基于编程能力和系统访问能力,Agent 可实现从信息搜索、素材生成、编码、测试、部署、Nginx 配置、发布的 **Vibecoding 全流程**,通过手机端一句命令完成应用快速 demo。 - -### 定时任务 - -支持 **一次性任务、固定时间间隔、Cron 表达式** 三种形式,任务触发可选择 **固定消息发送** 或 **Agent 动态任务执行** 两种模式: - -### 环境变量管理 - -通过 `env_config` 工具管理技能所需秘钥,支持对话式更新,内置安全保护和脱敏策略: - - - - - -## 技能系统 - -每个 Skill 由说明文件、运行脚本(可选)、资源(可选)组成,为 Agent 提供无限扩展性。 - -### 技能创造器 - -通过对话方式快速创建技能,将工作流程固化或对接任意第三方接口: +## Skills System -### 网页搜索和图像识别 - -- **网页搜索**:内置 `web_search` 工具,支持多种搜索引擎,配置对应 API Key 即可使用 -- **图像识别**:支持 `gpt-4.1-mini`、`gpt-4.1` 等模型,配置 `OPENAI_API_KEY` 即可使用 - -### 三方知识库和插件 - -`linkai-agent` 技能可将 [LinkAI](https://link-ai.tech/) 上的所有智能体作为 Skill 使用,实现多智能体决策: - -## 参与共建 +## Contributing -2.0 版本后,项目将持续升级 Agent 能力、拓展接入渠道、内置工具、技能系统,降低模型成本和提升安全性。欢迎 [提出反馈](https://github.com/zhayujie/CowAgent/issues) 和 [贡献代码](https://github.com/zhayujie/CowAgent/pulls)。 +Welcome to [submit feedback](https://github.com/zhayujie/CowAgent/issues) and [contribute code](https://github.com/zhayujie/CowAgent/pulls). diff --git a/docs/releases/v2.0.1.mdx b/docs/releases/v2.0.1.mdx index da7b6745..7de5746a 100644 --- a/docs/releases/v2.0.1.mdx +++ b/docs/releases/v2.0.1.mdx @@ -1,36 +1,36 @@ --- title: v2.0.1 -description: CowAgent 2.0.1 - 内置 Web Search、智能上下文管理、多项修复 +description: CowAgent 2.0.1 - Built-in Web Search, smart context management, multiple fixes --- -**发布日期**:2026.02 | [GitHub Release](https://github.com/zhayujie/CowAgent/releases/tag/2.0.1) | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.0..2.0.1) +**Release Date**: 2026.02.27 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.0..2.0.1) -## 新特性 +## New Features -- **内置 Web Search 工具**:将网络搜索作为 Agent 内置工具集成,降低决策成本 ([4f0ea5d](https://github.com/zhayujie/CowAgent/commit/4f0ea5d7568d61db91ff69c91c429e785fd1b1c2)) -- **Claude Opus 4.6 模型支持**:新增对 Claude Opus 4.6 模型的支持 ([#2661](https://github.com/zhayujie/CowAgent/pull/2661)) -- **企业微信图片消息识别**:支持企业微信渠道的图片消息识别功能 ([#2667](https://github.com/zhayujie/CowAgent/pull/2667)) +- **Built-in Web Search tool**: Integrated web search as a built-in Agent tool, reducing decision cost ([4f0ea5d](https://github.com/zhayujie/CowAgent/commit/4f0ea5d7568d61db91ff69c91c429e785fd1b1c2)) +- **Claude Opus 4.6 model support**: Added support for Claude Opus 4.6 model ([#2661](https://github.com/zhayujie/CowAgent/pull/2661)) +- **WeCom image recognition**: Support image message recognition in WeCom channel ([#2667](https://github.com/zhayujie/CowAgent/pull/2667)) -## 优化 +## Improvements -- **智能上下文管理**:解决聊天上下文溢出问题,新增智能上下文裁剪策略,防止 token 超限 ([cea7fb7](https://github.com/zhayujie/CowAgent/commit/cea7fb7490c53454602bf05955a0e9f059bcf0fd), [8acf2db](https://github.com/zhayujie/CowAgent/commit/8acf2dbdfe713b84ad74b761b7f86674b1c1904d)) [#2663](https://github.com/zhayujie/CowAgent/issues/2663) -- **运行时信息动态更新**:通过动态函数方案实现系统提示词中时间戳等运行时信息的自动更新 ([#2655](https://github.com/zhayujie/CowAgent/pull/2655), [#2657](https://github.com/zhayujie/CowAgent/pull/2657)) -- **Skill 提示词优化**:改进 Skill 系统提示词生成逻辑,简化工具描述,提升 Agent 表现 ([6c21833](https://github.com/zhayujie/CowAgent/commit/6c218331b1f1208ea8be6bf226936d3b556ade3e)) -- **智谱 AI 自定义 API Base URL**:支持智谱 AI 配置自定义 API Base URL ([#2660](https://github.com/zhayujie/CowAgent/pull/2660)) -- **启动脚本优化**:改进 `run.sh` 脚本的交互体验和配置流程 ([#2656](https://github.com/zhayujie/CowAgent/pull/2656)) -- **决策轮次日志**:新增 Agent 决策轮次的日志记录,便于调试 ([cb303e6](https://github.com/zhayujie/CowAgent/commit/cb303e6109c50c8dfef1f5e6c1ec47223bf3cd11)) +- **Smart context management**: Resolved chat context overflow with intelligent context trimming strategy to prevent token limits ([cea7fb7](https://github.com/zhayujie/CowAgent/commit/cea7fb7490c53454602bf05955a0e9f059bcf0fd), [8acf2db](https://github.com/zhayujie/CowAgent/commit/8acf2dbdfe713b84ad74b761b7f86674b1c1904d)) [#2663](https://github.com/zhayujie/CowAgent/issues/2663) +- **Runtime info dynamic update**: Automatic update of timestamps and other runtime info in system prompts via dynamic functions ([#2655](https://github.com/zhayujie/CowAgent/pull/2655), [#2657](https://github.com/zhayujie/CowAgent/pull/2657)) +- **Skill prompt optimization**: Improved Skill system prompt generation, simplified tool descriptions for better Agent performance ([6c21833](https://github.com/zhayujie/CowAgent/commit/6c218331b1f1208ea8be6bf226936d3b556ade3e)) +- **GLM custom API Base URL**: Support custom API Base URL for GLM models ([#2660](https://github.com/zhayujie/CowAgent/pull/2660)) +- **Startup script optimization**: Improved `run.sh` script interaction and configuration flow ([#2656](https://github.com/zhayujie/CowAgent/pull/2656)) +- **Decision step logging**: Added Agent decision step logging for debugging ([cb303e6](https://github.com/zhayujie/CowAgent/commit/cb303e6109c50c8dfef1f5e6c1ec47223bf3cd11)) -## 问题修复 +## Bug Fixes -- **定时任务记忆丢失**:修复 Scheduler 调度器导致的记忆丢失问题 ([a77a874](https://github.com/zhayujie/CowAgent/commit/a77a8741b500a408c6f5c8868856fb4b018fe9db)) -- **空工具调用与超长结果**:修复空 tool calls 及过长工具返回结果的异常处理 ([0542700](https://github.com/zhayujie/CowAgent/commit/0542700f9091ebb08c1a56103b0f0f45f24aa621)) -- **OpenAI Function Call**:修复 OpenAI 模型的 function call 调用兼容性问题 ([158c87a](https://github.com/zhayujie/CowAgent/commit/158c87ab8b05bae054cc1b4eacdbb64fc1062ba9)) -- **Claude 工具名字段**:移除 Claude 模型响应中多余的 tool name 字段 ([eec10cb](https://github.com/zhayujie/CowAgent/commit/eec10cb5db6a3d5bc12ef606606532237d2c5f6e)) -- **MiniMax 推理优化**:优化 MiniMax 模型 reasoning content 处理,隐藏思考过程输出 ([c72cda3](https://github.com/zhayujie/CowAgent/commit/c72cda33864bd1542012ee6e0a8bd8c6c88cb5ed), [72b1cac](https://github.com/zhayujie/CowAgent/commit/72b1cacea1ba0d1f3dedacbab2e088e98fd7e172)) -- **智谱 AI 思考过程**:隐藏智谱 AI 模型的思考过程展示 ([72b1cac](https://github.com/zhayujie/CowAgent/commit/72b1cacea1ba0d1f3dedacbab2e088e98fd7e172)) -- **飞书连接与证书**:修复飞书渠道的 SSL 证书错误和连接异常问题 ([229b14b](https://github.com/zhayujie/CowAgent/commit/229b14b6fcabe7123d53cab1dea39f38dab26d6d), [8674421](https://github.com/zhayujie/CowAgent/commit/867442155e7f095b4f38b0856f8c1d8312b5fcf7)) -- **model_type 类型校验**:修复非字符串 `model_type` 导致的 `AttributeError` ([#2666](https://github.com/zhayujie/CowAgent/pull/2666)) +- **Scheduler memory loss**: Fixed memory loss caused by Scheduler dispatcher ([a77a874](https://github.com/zhayujie/CowAgent/commit/a77a8741b500a408c6f5c8868856fb4b018fe9db)) +- **Empty tool calls & long results**: Fixed handling of empty tool calls and excessively long tool results ([0542700](https://github.com/zhayujie/CowAgent/commit/0542700f9091ebb08c1a56103b0f0f45f24aa621)) +- **OpenAI Function Call**: Fixed function call compatibility with OpenAI models ([158c87a](https://github.com/zhayujie/CowAgent/commit/158c87ab8b05bae054cc1b4eacdbb64fc1062ba9)) +- **Claude tool name field**: Removed extraneous tool name field from Claude model responses ([eec10cb](https://github.com/zhayujie/CowAgent/commit/eec10cb5db6a3d5bc12ef606606532237d2c5f6e)) +- **MiniMax reasoning**: Optimized MiniMax model reasoning content handling, hidden thinking process output ([c72cda3](https://github.com/zhayujie/CowAgent/commit/c72cda33864bd1542012ee6e0a8bd8c6c88cb5ed), [72b1cac](https://github.com/zhayujie/CowAgent/commit/72b1cacea1ba0d1f3dedacbab2e088e98fd7e172)) +- **GLM thinking process**: Hidden GLM model thinking process display ([72b1cac](https://github.com/zhayujie/CowAgent/commit/72b1cacea1ba0d1f3dedacbab2e088e98fd7e172)) +- **Feishu connection & SSL**: Fixed Feishu channel SSL certificate errors and connection issues ([229b14b](https://github.com/zhayujie/CowAgent/commit/229b14b6fcabe7123d53cab1dea39f38dab26d6d), [8674421](https://github.com/zhayujie/CowAgent/commit/867442155e7f095b4f38b0856f8c1d8312b5fcf7)) +- **model_type validation**: Fixed `AttributeError` caused by non-string `model_type` ([#2666](https://github.com/zhayujie/CowAgent/pull/2666)) -## 平台兼容 +## Platform Compatibility -- **Windows 兼容性适配**:修复 Windows 平台下路径处理、文件编码及 `os.getuid()` 不可用等问题,涉及多个工具模块 ([051ffd7](https://github.com/zhayujie/CowAgent/commit/051ffd78a372f71a967fd3259e37fe19131f83cf), [5264f7c](https://github.com/zhayujie/CowAgent/commit/5264f7ce18360ee4db5dcb4ebe67307977d40014)) +- **Windows compatibility**: Fixed path handling, file encoding, and `os.getuid()` unavailability on Windows across multiple tool modules ([051ffd7](https://github.com/zhayujie/CowAgent/commit/051ffd78a372f71a967fd3259e37fe19131f83cf), [5264f7c](https://github.com/zhayujie/CowAgent/commit/5264f7ce18360ee4db5dcb4ebe67307977d40014)) diff --git a/docs/releases/v2.0.2.mdx b/docs/releases/v2.0.2.mdx index ad17bcba..80e9f6d8 100644 --- a/docs/releases/v2.0.2.mdx +++ b/docs/releases/v2.0.2.mdx @@ -1,63 +1,65 @@ --- title: v2.0.2 -description: CowAgent 2.0.2 - Web 控制台升级、多通道同时运行、会话持久化 +description: CowAgent 2.0.2 - Web Console upgrade, multi-channel concurrency, session persistence --- -## ✨ 重点更新 +**Release Date**: 2026.02.27 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.1...master) -### 🖥️ Web 控制台升级 +## Highlights -本次对 Web 控制台进行了全面升级,支持流式对话输出、工具执行过程和思考过程的可视化展示,并支持对模型、技能、记忆、通道、Agent 配置的在线查看和管理。 +### 🖥️ Web Console Upgrade -#### 对话界面 +The Web Console has been fully upgraded with streaming conversation output, visual display of tool execution and reasoning processes, and online management of **models, skills, memory, channels, and Agent configuration**. -支持流式输出,可实时展示 Agent 的思考过程(Reasoning)和工具调用过程(Tool Calls),更直观地观察 Agent 的决策过程: +#### Chat Interface + +Supports streaming output with real-time display of the Agent's reasoning process and tool calls, providing intuitive observation of the Agent's decision-making: -#### 模型管理 +#### Model Management -支持在线管理模型配置,无需手动编辑配置文件: +Manage model configurations online without manually editing config files: -#### 技能管理 +#### Skill Management -支持在线查看和管理 Agent 技能(Skills): +View and manage Agent skills (Skills) online: -#### 记忆管理 +#### Memory Management -支持在线查看和管理 Agent 记忆: +View and manage Agent memory online: -#### 通道管理 +#### Channel Management -支持在线管理接入通道,支持实时连接/断开操作: +Manage connected channels online with real-time connect/disconnect operations: -#### 定时任务 +#### Scheduled Tasks -支持在线查看和管理定时任务,包括一次性任务、固定间隔、Cron 表达式等多种调度方式的可视化管理: +View and manage scheduled tasks online, including one-time tasks, fixed intervals, and Cron expressions: -#### 日志 +#### Logs -支持在线实时查看 Agent 运行日志,便于监控运行状态和排查问题: +View Agent runtime logs in real-time for monitoring and troubleshooting: -相关提交:[f1a1413](https://github.com/zhayujie/CowAgent/commit/f1a1413), [c0702c8](https://github.com/zhayujie/CowAgent/commit/c0702c8), [394853c](https://github.com/zhayujie/CowAgent/commit/394853c), [1c71c4e](https://github.com/zhayujie/CowAgent/commit/1c71c4e), [5e3eccb](https://github.com/zhayujie/CowAgent/commit/5e3eccb), [e1dc037](https://github.com/zhayujie/CowAgent/commit/e1dc037), [5edbf4c](https://github.com/zhayujie/CowAgent/commit/5edbf4c), [7d258b5](https://github.com/zhayujie/CowAgent/commit/7d258b5) +Related commits: [f1a1413](https://github.com/zhayujie/CowAgent/commit/f1a1413), [c0702c8](https://github.com/zhayujie/CowAgent/commit/c0702c8), [394853c](https://github.com/zhayujie/CowAgent/commit/394853c), [1c71c4e](https://github.com/zhayujie/CowAgent/commit/1c71c4e), [5e3eccb](https://github.com/zhayujie/CowAgent/commit/5e3eccb), [e1dc037](https://github.com/zhayujie/CowAgent/commit/e1dc037), [5edbf4c](https://github.com/zhayujie/CowAgent/commit/5edbf4c), [7d258b5](https://github.com/zhayujie/CowAgent/commit/7d258b5) -### 🔀 多通道同时运行 +### 🔀 Multi-Channel Concurrency -支持多个接入通道(如飞书、钉钉、企微应用、Web 等)同时运行,每个通道在独立子线程中启动,互不干扰。 +Multiple channels (e.g., Feishu, DingTalk, WeCom, Web) can now run simultaneously, each in an independent thread without interference. -配置方式:在 `config.json` 中通过 `channel_type` 配置多个通道,以逗号分隔,也可在 Web 控制台的通道管理页面中实时连接或断开各通道。 +Configuration: Set multiple channels in `config.json` via `channel_type` separated by commas, or connect/disconnect channels in real-time from the Web Console's channel management page. ```json { @@ -65,34 +67,32 @@ description: CowAgent 2.0.2 - Web 控制台升级、多通道同时运行、会 } ``` -相关提交:[4694594](https://github.com/zhayujie/CowAgent/commit/4694594), [7cce224](https://github.com/zhayujie/CowAgent/commit/7cce224), [7d258b5](https://github.com/zhayujie/CowAgent/commit/7d258b5), [c9adddb](https://github.com/zhayujie/CowAgent/commit/c9adddb) +Related commits: [4694594](https://github.com/zhayujie/CowAgent/commit/4694594), [7cce224](https://github.com/zhayujie/CowAgent/commit/7cce224), [7d258b5](https://github.com/zhayujie/CowAgent/commit/7d258b5), [c9adddb](https://github.com/zhayujie/CowAgent/commit/c9adddb) -### 💾 会话持久化 +### 💾 Session Persistence -会话历史支持持久化存储至本地 SQLite 数据库,服务重启后会话上下文自动恢复,不再丢失。Web 控制台中的历史对话记录也会同步恢复展示。 +Session history is now persisted to a local SQLite database. Conversation context is automatically restored after service restarts. Historical conversations in the Web Console are also restored. -相关提交:[29bfbec](https://github.com/zhayujie/CowAgent/commit/29bfbec), [9917552](https://github.com/zhayujie/CowAgent/commit/9917552), [925d728](https://github.com/zhayujie/CowAgent/commit/925d728) +Related commits: [29bfbec](https://github.com/zhayujie/CowAgent/commit/29bfbec), [9917552](https://github.com/zhayujie/CowAgent/commit/9917552), [925d728](https://github.com/zhayujie/CowAgent/commit/925d728) -### 🤖 新增模型 +## New Models -- **Gemini 3.1 Pro Preview**:新增 `gemini-3.1-pro-preview` 模型支持 ([52d7cad](https://github.com/zhayujie/CowAgent/commit/52d7cad)) -- **Claude 4.6 Sonnet**:新增 `claude-4.6-sonnet` 模型支持 ([52d7cad](https://github.com/zhayujie/CowAgent/commit/52d7cad)) -- **Qwen3.5 Plus**:新增 `qwen3.5-plus` 模型支持 ([e59a289](https://github.com/zhayujie/CowAgent/commit/e59a289)) -- **MiniMax M2.5**:新增 `Minimax-M2.5` 模型支持 ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) -- **GLM-5**:新增 `glm-5` 模型支持 ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) -- **Kimi K2.5**:新增 `kimi-k2.5` 模型支持 ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) -- **Doubao 2.0 Code**:新增 `doubao-2.0-code` 编程专用模型 ([ab28ee5](https://github.com/zhayujie/CowAgent/commit/ab28ee5)) -- **DashScope 模型**:新增阿里云 DashScope 模型名称支持 ([ce58f23](https://github.com/zhayujie/CowAgent/commit/ce58f23)) +- **Gemini 3.1 Pro Preview**: Added `gemini-3.1-pro-preview` model support ([52d7cad](https://github.com/zhayujie/CowAgent/commit/52d7cad)) +- **Claude 4.6 Sonnet**: Added `claude-4.6-sonnet` model support ([52d7cad](https://github.com/zhayujie/CowAgent/commit/52d7cad)) +- **Qwen3.5 Plus**: Added `qwen3.5-plus` model support ([e59a289](https://github.com/zhayujie/CowAgent/commit/e59a289)) +- **MiniMax M2.5**: Added `Minimax-M2.5` model support ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) +- **GLM-5**: Added `glm-5` model support ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) +- **Kimi K2.5**: Added `kimi-k2.5` model support ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) +- **Doubao 2.0 Code**: Added `doubao-2.0-code` coding-specialized model ([ab28ee5](https://github.com/zhayujie/CowAgent/commit/ab28ee5)) +- **DashScope Models**: Added Alibaba Cloud DashScope model name support ([ce58f23](https://github.com/zhayujie/CowAgent/commit/ce58f23)) -### 🌐 新增官网和文档中心 +## Website & Documentation -- **官网上线**:[cowagent.ai](https://cowagent.ai/) -- **文档中心上线**:[docs.cowagent.ai](https://docs.cowagent.ai/) +- **Official Website**: [cowagent.ai](https://cowagent.ai/) +- **Documentation**: [docs.cowagent.ai](https://docs.cowagent.ai/) -### 🐛 问题修复 +## Bug Fixes -- **Gemini 钉钉图片识别**:修复 Gemini 在钉钉通道中无法处理图片标记的问题 ([05a3304](https://github.com/zhayujie/CowAgent/commit/05a3304)) ([#2670](https://github.com/zhayujie/CowAgent/pull/2670)) Thanks [@SgtPepper114](https://github.com/SgtPepper114) -- **启动脚本依赖**:修复 `run.sh` 脚本的依赖安装问题 ([b6fc9fa](https://github.com/zhayujie/CowAgent/commit/b6fc9fa)) -- **裸异常捕获**:将代码中的 `bare except` 替换为 `except Exception`,提升异常处理规范性 ([adca89b](https://github.com/zhayujie/CowAgent/commit/adca89b)) ([#2674](https://github.com/zhayujie/CowAgent/pull/2674)) Thanks [@haosenwang1018](https://github.com/haosenwang1018) - -**发布日期**:2026.02.27 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.1...master) +- **Gemini DingTalk image recognition**: Fixed Gemini unable to process image markers in DingTalk channel ([05a3304](https://github.com/zhayujie/CowAgent/commit/05a3304)) ([#2670](https://github.com/zhayujie/CowAgent/pull/2670)) Thanks [@SgtPepper114](https://github.com/SgtPepper114) +- **Startup script dependencies**: Fixed dependency installation issue in `run.sh` script ([b6fc9fa](https://github.com/zhayujie/CowAgent/commit/b6fc9fa)) +- **Bare except cleanup**: Replaced `bare except` with `except Exception` for better exception handling ([adca89b](https://github.com/zhayujie/CowAgent/commit/adca89b)) ([#2674](https://github.com/zhayujie/CowAgent/pull/2674)) Thanks [@haosenwang1018](https://github.com/haosenwang1018) diff --git a/docs/releases/v2.0.3.mdx b/docs/releases/v2.0.3.mdx index 22d2d1f6..5f9a837d 100644 --- a/docs/releases/v2.0.3.mdx +++ b/docs/releases/v2.0.3.mdx @@ -1,91 +1,91 @@ --- title: v2.0.3 -description: CowAgent 2.0.3 - 新增企微智能机器人和 QQ 通道、Web 控制台文件处理、记忆系统升级 +description: CowAgent 2.0.3 - WeCom Smart Bot and QQ channels, Web Console file handling, memory system upgrade --- -## 🔌 新增接入通道 +## 🔌 New Channels -### 企业微信智能机器人 +### WeCom Smart Bot -新增企业微信智能机器人(`wecom_bot`)通道,支持流式卡片消息输出,支持文本和图片消息的接收与回复,可在 Web 控制台中进行通道配置和管理。 +Added the WeCom Smart Bot (`wecom_bot`) channel with streaming card output, support for receiving and replying to text and image messages, and full configuration through the Web Console. -接入文档:[企微智能机器人接入](https://docs.cowagent.ai/channels/wecom-bot)。 +Documentation: [WeCom Smart Bot](https://docs.cowagent.ai/en/channels/wecom-bot). -相关提交:[d4480b6](https://github.com/zhayujie/CowAgent/commit/d4480b6), [a42f31f](https://github.com/zhayujie/CowAgent/commit/a42f31f), [4ecd4df](https://github.com/zhayujie/CowAgent/commit/4ecd4df), [8b45d6c](https://github.com/zhayujie/CowAgent/commit/8b45d6c) +Related commits: [d4480b6](https://github.com/zhayujie/CowAgent/commit/d4480b6), [a42f31f](https://github.com/zhayujie/CowAgent/commit/a42f31f), [4ecd4df](https://github.com/zhayujie/CowAgent/commit/4ecd4df), [8b45d6c](https://github.com/zhayujie/CowAgent/commit/8b45d6c) -### QQ 通道 +### QQ Channel -新增 QQ 官方机器人(`qq`)通道,支持文本和图片消息的接收与回复,支持私聊和群聊场景。 +Added the QQ official bot (`qq`) channel with support for text and image messages in both private chats and group chats. -接入文档参考:[QQ机器人接入](https://docs.cowagent.ai/channels/qq)。 +Documentation: [QQ Bot](https://docs.cowagent.ai/en/channels/qq). -相关提交:[005a0e1](https://github.com/zhayujie/CowAgent/commit/005a0e1), [a4d54f5](https://github.com/zhayujie/CowAgent/commit/a4d54f5) +Related commits: [005a0e1](https://github.com/zhayujie/CowAgent/commit/005a0e1), [a4d54f5](https://github.com/zhayujie/CowAgent/commit/a4d54f5) -## 🖥️ Web 控制台支持文件输入和处理 +## 🖥️ Web Console File Input and Processing -Web 控制台对话界面支持文件和图片上传,可直接发送文件给 Agent 进行处理。同时 Read 工具新增对 Office 文档(Word、Excel、PPT)的解析能力。 +The Web Console chat UI now supports file and image uploads — files can be sent directly to the agent for processing. The Read tool gains parsing support for Office documents (Word, Excel, PPT). -相关提交:[30c6d9b](https://github.com/zhayujie/CowAgent/commit/30c6d9b) +Related commits: [30c6d9b](https://github.com/zhayujie/CowAgent/commit/30c6d9b) -## 🤖 新增模型 +## 🤖 New Models -- **GPT-5.4 系列**:新增 `gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano` 模型支持 ([1623deb](https://github.com/zhayujie/CowAgent/commit/1623deb)) -- **Gemini 3.1 Flash Lite Preview**:新增 `gemini-3.1-flash-lite-preview` 模型支持 ([ba915f2](https://github.com/zhayujie/CowAgent/commit/ba915f2)) +- **GPT-5.4 Series**: Added `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano` ([1623deb](https://github.com/zhayujie/CowAgent/commit/1623deb)) +- **Gemini 3.1 Flash Lite Preview**: Added `gemini-3.1-flash-lite-preview` ([ba915f2](https://github.com/zhayujie/CowAgent/commit/ba915f2)) -## 💰 Coding Plan 支持 +## 💰 Coding Plan Support -新增各厂商 Coding Plan(编程包月套餐)的接入支持,通过 OpenAI 兼容方式统一接入。目前已支持阿里云、MiniMax、智谱 GLM、Kimi、火山引擎等厂商。 +Added integration with vendor Coding Plan (monthly programming subscription) tiers via the unified OpenAI-compatible path. Supported vendors include Aliyun, MiniMax, GLM, Kimi, and Volcengine. -详细配置参考 [Coding Plan 文档](https://docs.cowagent.ai/models/coding-plan)。 +See [Coding Plan docs](https://docs.cowagent.ai/en/models/coding-plan) for detailed configuration. -## 🧠 记忆系统升级 +## 🧠 Memory System Upgrade -记忆写入(Memory Flush)升级: +Memory flush improvements: -- 使用 LLM 对超出上下文窗口的对话内容进行智能摘要,生成精炼的每日记忆条目 -- 摘要在后台线程异步执行,不阻塞回复 -- 优化上下文批量裁剪策略,降低冲刷频率 -- 新增每日定时冲刷兜底机制,避免低活跃场景下记忆丢失 -- 修复上下文记忆丢失问题 +- Use the LLM to summarize out-of-window conversations into compact daily memory entries +- Summarization runs asynchronously on a background thread, never blocking replies +- Smarter batch trimming policy reduces flush frequency +- Daily scheduled flush as a safety net for low-activity scenarios +- Fixed context-memory loss issues -相关提交:[022c13f](https://github.com/zhayujie/CowAgent/commit/022c13f), [c116235](https://github.com/zhayujie/CowAgent/commit/c116235) +Related commits: [022c13f](https://github.com/zhayujie/CowAgent/commit/022c13f), [c116235](https://github.com/zhayujie/CowAgent/commit/c116235) -## 🔧 工具重构 +## 🔧 Tool Refactoring -- **图片识别**:将图片识别(Image Vision)从 Skill 重构为内置 Tool,新增独立的图片视觉提供方(Vision Provider)配置,提升稳定性和可维护性 ([a50fafa](https://github.com/zhayujie/CowAgent/commit/a50fafa), [3b8b562](https://github.com/zhayujie/CowAgent/commit/3b8b562)) -- **网页抓取**:将网页抓取(Web Fetch)从 Skill 重构为内置 Tool,支持远程文档文件(PDF、Word、Excel、PPT)的下载和解析 ([ccb9030](https://github.com/zhayujie/CowAgent/commit/ccb9030), [fa61744](https://github.com/zhayujie/CowAgent/commit/fa61744)) +- **Image Vision**: Image recognition (Vision) is refactored from a Skill into a built-in Tool with a dedicated Vision Provider configuration, improving stability and maintainability ([a50fafa](https://github.com/zhayujie/CowAgent/commit/a50fafa), [3b8b562](https://github.com/zhayujie/CowAgent/commit/3b8b562)) +- **Web Fetch**: Web fetch is refactored from a Skill into a built-in Tool with support for downloading and parsing remote documents (PDF, Word, Excel, PPT) ([ccb9030](https://github.com/zhayujie/CowAgent/commit/ccb9030), [fa61744](https://github.com/zhayujie/CowAgent/commit/fa61744)) -## 🐳 Docker 部署优化 +## 🐳 Docker Deployment Improvements -- **配置模板对齐**:`docker-compose.yml` 环境变量与 `config-template.json` 对齐,补充完整的模型 API Key 和 Agent 等配置项 -- **Web 控制台端口映射**:新增 `9899` 端口映射,Docker 部署后可通过浏览器访问 Web 控制台 -- **配置热更新**:各模型 Bot 的 API Key 和 API Base 改为实时读取,通过 Web 控制台修改配置后无需重启即可生效 -- **工作空间持久化**:新增 `./cow` Volume 挂载,Agent 工作空间数据(记忆、人格、技能等)持久化到宿主机,容器重建或升级不丢失 +- **Config Template Alignment**: `docker-compose.yml` env vars aligned with `config-template.json`, covering full model API key and Agent settings +- **Web Console Port Mapping**: Added `9899` port mapping so the Web Console is reachable in browser after Docker deployment +- **Hot Config Reload**: Bot API key and API base are now read at request time — changes from the Web Console take effect without restart +- **Workspace Persistence**: Added a `./cow` volume mount so agent workspace data (memories, persona, skills, etc.) persists across container rebuilds and upgrades -## ⚡ 性能优化 +## ⚡ Performance Improvements -- **启动加速**:飞书通道采用懒加载方式导入依赖,避免 4-10 秒的启动延迟 ([924dc79](https://github.com/zhayujie/CowAgent/commit/924dc79)) -- **通道稳定性**:优化通道连接稳定性,支持通道配置通过环境变量设置 ([f1c04bc](https://github.com/zhayujie/CowAgent/commit/f1c04bc), [46d97fd](https://github.com/zhayujie/CowAgent/commit/46d97fd)) +- **Faster Startup**: The Feishu channel imports its dependencies lazily, avoiding a 4–10s startup delay ([924dc79](https://github.com/zhayujie/CowAgent/commit/924dc79)) +- **Channel Stability**: Improved channel connection stability and added env-var support for channel configuration ([f1c04bc](https://github.com/zhayujie/CowAgent/commit/f1c04bc), [46d97fd](https://github.com/zhayujie/CowAgent/commit/46d97fd)) -## 🐛 问题修复 +## 🐛 Bug Fixes -- **bot_type 配置**:修复 Agent 模式下 `bot_type` 配置传递问题 ([#2691](https://github.com/zhayujie/CowAgent/pull/2691)) Thanks [@Weikjssss](https://github.com/Weikjssss) -- **bot_type 优先级**:调整 Agent 模式下 `bot_type` 的解析优先级 ([#2692](https://github.com/zhayujie/CowAgent/pull/2692)) Thanks [@6vision](https://github.com/6vision) -- **智谱模型配置**:修复智谱 `bot_type` 命名、Web 控制台持久化及正则转义问题 ([#2693](https://github.com/zhayujie/CowAgent/pull/2693)) Thanks [@6vision](https://github.com/6vision) -- **OpenAI 兼容层**:使用 `openai_compat` 层统一错误处理 ([#2688](https://github.com/zhayujie/CowAgent/pull/2688)) Thanks [@JasonOA888](https://github.com/JasonOA888) -- **OpenAI 兼容迁移**:完成所有模型 Bot 的 `openai_compat` 迁移 ([#2689](https://github.com/zhayujie/CowAgent/pull/2689)) -- **Gemini 工具调用**:修复 Gemini 模型的工具调用匹配问题 ([eda82ba](https://github.com/zhayujie/CowAgent/commit/eda82ba)) -- **会话并发**:修复会话并发场景下的竞态条件问题 ([9879878](https://github.com/zhayujie/CowAgent/commit/9879878)) -- **历史消息恢复**:修复历史会话消息不完整问题,仅恢复 user/assistant 文本消息,剥离工具调用 ([b788a3d](https://github.com/zhayujie/CowAgent/commit/b788a3d), [a33ce97](https://github.com/zhayujie/CowAgent/commit/a33ce97)) -- **飞书群聊**:移除飞书群聊场景下对 `bot_name` 的依赖 ([b641bff](https://github.com/zhayujie/CowAgent/commit/b641bff)) -- **Safari 兼容**:修复 Safari 浏览器 IME 回车键误触发消息发送问题 ([0687916](https://github.com/zhayujie/CowAgent/commit/0687916)) -- **Windows 兼容**:修复 Windows 下 bash 风格 `$VAR` 环境变量转换为 `%VAR%` 的问题 ([7c67513](https://github.com/zhayujie/CowAgent/commit/7c67513)) -- **MiniMax 参数**:增加 MiniMax 模型的 `max_tokens` 限制 ([1767413](https://github.com/zhayujie/CowAgent/commit/1767413)) -- **.gitignore 更新**:添加 Python 目录忽略规则 ([#2683](https://github.com/zhayujie/CowAgent/pull/2683)) Thanks [@pelioo](https://github.com/pelioo) -- **AGENT.md 主动演进**:优化系统提示词中对 AGENT.md 的更新引导,从被动的"用户修改时更新"改为主动识别对话中的性格、风格变化并自动更新 +- **bot_type Propagation**: Fixed `bot_type` propagation under Agent mode ([#2691](https://github.com/zhayujie/CowAgent/pull/2691)) Thanks [@Weikjssss](https://github.com/Weikjssss) +- **bot_type Resolution Priority**: Adjusted `bot_type` resolution priority under Agent mode ([#2692](https://github.com/zhayujie/CowAgent/pull/2692)) Thanks [@6vision](https://github.com/6vision) +- **Zhipu Config**: Fixed Zhipu `bot_type` naming, Web Console persistence, and regex escaping ([#2693](https://github.com/zhayujie/CowAgent/pull/2693)) Thanks [@6vision](https://github.com/6vision) +- **OpenAI-Compat Layer**: Unified error handling via the `openai_compat` layer ([#2688](https://github.com/zhayujie/CowAgent/pull/2688)) Thanks [@JasonOA888](https://github.com/JasonOA888) +- **OpenAI-Compat Migration**: Completed the `openai_compat` migration across all model bots ([#2689](https://github.com/zhayujie/CowAgent/pull/2689)) +- **Gemini Tool Calling**: Fixed tool-call matching for Gemini ([eda82ba](https://github.com/zhayujie/CowAgent/commit/eda82ba)) +- **Session Concurrency**: Fixed race conditions in concurrent session scenarios ([9879878](https://github.com/zhayujie/CowAgent/commit/9879878)) +- **History Recovery**: Fixed incomplete history recovery — only user/assistant text messages are restored, tool calls are stripped ([b788a3d](https://github.com/zhayujie/CowAgent/commit/b788a3d), [a33ce97](https://github.com/zhayujie/CowAgent/commit/a33ce97)) +- **Feishu Group Chat**: Removed the `bot_name` dependency for Feishu group chats ([b641bff](https://github.com/zhayujie/CowAgent/commit/b641bff)) +- **Safari Compatibility**: Fixed an IME Enter key issue that mistakenly sent messages on Safari ([0687916](https://github.com/zhayujie/CowAgent/commit/0687916)) +- **Windows Compatibility**: Fixed bash-style `$VAR` to `%VAR%` env-var conversion on Windows ([7c67513](https://github.com/zhayujie/CowAgent/commit/7c67513)) +- **MiniMax Params**: Added a `max_tokens` cap for MiniMax models ([1767413](https://github.com/zhayujie/CowAgent/commit/1767413)) +- **.gitignore**: Added Python directory ignore rules ([#2683](https://github.com/zhayujie/CowAgent/pull/2683)) Thanks [@pelioo](https://github.com/pelioo) +- **AGENT.md Proactive Evolution**: Improved the system prompt guidance around AGENT.md — instead of waiting for explicit user edits, the agent now proactively detects persona/style shifts in the conversation and updates AGENT.md accordingly -## 📦 升级方式 +## 📦 Upgrade -源码部署可执行 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 +Run `./run.sh update` for a one-click upgrade, or manually pull the latest code and restart. See [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade) for details. -**发布日期**:2026.03.18 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.2...master) +**Release Date**: 2026.03.18 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.2...2.0.3) diff --git a/docs/releases/v2.0.4.mdx b/docs/releases/v2.0.4.mdx index 55264a4b..fc976fa0 100644 --- a/docs/releases/v2.0.4.mdx +++ b/docs/releases/v2.0.4.mdx @@ -1,51 +1,55 @@ --- title: v2.0.4 -description: CowAgent 2.0.4 - 新增个人微信通道、新模型支持、日文文档、脚本重构及多项修复 +description: CowAgent 2.0.4 - Personal WeChat channel, new model support, Japanese docs, script refactoring and bug fixes --- -## 🔌 新增个人微信通道 +## 🔌 Personal WeChat Channel -新增个人微信(`weixin`)通道,微信扫描二维码即可将 CowAgent 接入个人微信,支持以下功能: +Added personal WeChat (`weixin`) channel — the most important update in this release. Simply scan a QR code to connect CowAgent to your personal WeChat account, with support for: -- **消息收发**:支持文本、图片、文件、视频消息的接收与回复,支持语音消息接收和识别 -- **扫码登录**:终端显示二维码,微信扫码确认即可登录,二维码过期自动刷新 -- **凭证持久化**:登录凭证自动保存至 `~/.weixin_cow_credentials.json`,重启无需重新扫码 -- **Session 自动重连**:Session 过期后自动清除旧凭证并重新发起扫码登录 -- **Web 控制台接入**:支持在 Web 控制台中添加微信通道,扫码登录流程同步展示 -- **Docker 和脚本支持**:`run.sh` 和 `docker-compose.yml` 均已适配微信通道 +- **Messaging**: Send and receive text, image, file, and video messages; receive voice messages +- **QR Code Login**: QR code displayed in terminal, scan with WeChat to log in; auto-refresh on expiry +- **Credential Persistence**: Login credentials saved to `~/.weixin_cow_credentials.json` automatically, no re-scan needed on restart +- **Session Auto-Reconnect**: Automatically clears expired credentials and re-initiates QR code login +- **Web Console Integration**: Add WeChat channel from the Web Console with synchronized QR code login flow +- **Docker & Script Support**: Both `run.sh` and `docker-compose.yml` now support the WeChat channel -接入文档:[微信接入](https://docs.cowagent.ai/channels/weixin)。 +Documentation: [WeChat Channel](https://docs.cowagent.ai/channels/weixin). -相关提交:[ce89869](https://github.com/zhayujie/CowAgent/commit/ce89869) +Related commits: [ce89869](https://github.com/zhayujie/CowAgent/commit/ce89869), [a483ec0](https://github.com/zhayujie/CowAgent/commit/a483ec0), [c1421e0](https://github.com/zhayujie/CowAgent/commit/c1421e0) -## 🤖 新增模型 +## 🤖 New Models -- **MiniMax-M2.7**:新增 MiniMax-M2.7 模型支持 -- **GLM-5-Turbo**:新增智谱 glm-5-turbo 模型支持 +- **MiniMax-M2.7**: Added MiniMax-M2.7 model support +- **GLM-5-Turbo**: Added Zhipu glm-5-turbo model support -相关提交:[9192f6f](https://github.com/zhayujie/CowAgent/commit/9192f6f) +Related commits: [9192f6f](https://github.com/zhayujie/CowAgent/commit/9192f6f) -## 🔧 脚本重构 +## 🔧 Script Refactoring -- **run.sh 重构**:提取公共逻辑,精简脚本代码([49d8707](https://github.com/zhayujie/CowAgent/commit/49d8707)) -- **可执行权限**:修复 `run.sh` 文件权限问题 ([652156e](https://github.com/zhayujie/CowAgent/commit/652156e)) -- **PID 获取**:修复 `run.sh` 中进程 PID 获取错误的问题 ([9febb07](https://github.com/zhayujie/CowAgent/commit/9febb07)) +- **run.sh Refactoring**: Extracted shared logic and eliminated duplication, reducing from 600+ lines to 177 lines ([49d8707](https://github.com/zhayujie/CowAgent/commit/49d8707)) +- **Executable Permission**: Fixed `run.sh` file permission issue ([652156e](https://github.com/zhayujie/CowAgent/commit/652156e)) -## 🌍 文档更新 +## ⚡ Improvements -新增完整的日文文档,覆盖入门指南、通道接入、模型配置等主要章节。Thanks [@Ikko Ashimine](https://github.com/ikoamu) +- **Unified Request Headers**: Added identification headers to external requests across Agent services (Chat, Embedding, Vision, WebSearch, etc.) ([b4e711f](https://github.com/zhayujie/CowAgent/commit/b4e711f)) +- **Auto-Repair Messages**: Enhanced message protocol fault tolerance with automatic repair of malformed message sequences ([b8b57e3](https://github.com/zhayujie/CowAgent/commit/b8b57e3)) -相关提交:[5487c0b](https://github.com/zhayujie/CowAgent/commit/5487c0b) +## 🌍 Japanese Documentation -## 🐛 问题修复 +Added complete Japanese documentation covering getting started guide, channel integration, model configuration and other major sections. Thanks [@Ikko Ashimine](https://github.com/ikoamu) -- **企微机器人兼容**:修复旧版 `websocket-client` 的兼容性问题,新增统一的 WebSocket 兼容层 ([bc7f627](https://github.com/zhayujie/CowAgent/commit/bc7f627)) -- **消息自动修复**:增强消息协议的容错能力,自动修复格式异常的消息序列 ([b8b57e3](https://github.com/zhayujie/CowAgent/commit/b8b57e3)) -- **飞书编码**:修复飞书通道消息和日志的编码问题 ([7d0e156](https://github.com/zhayujie/CowAgent/commit/7d0e156)) -- **飞书配置**:移除 `run.sh` 中对 `feishu_bot_name` 的冗余依赖 ([1b5be1b](https://github.com/zhayujie/CowAgent/commit/1b5be1b)) +Related commits: [5487c0b](https://github.com/zhayujie/CowAgent/commit/5487c0b) -## 📦 升级方式 +## 🐛 Bug Fixes -源码部署可执行 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 +- **WeCom Bot Compatibility**: Fixed compatibility with older `websocket-client` versions, added unified WebSocket compatibility layer ([bc7f627](https://github.com/zhayujie/CowAgent/commit/bc7f627)) +- **run.sh PID**: Fixed process PID retrieval error in `run.sh` ([9febb07](https://github.com/zhayujie/CowAgent/commit/9febb07)) +- **Feishu Encoding**: Fixed message and log encoding issue in Feishu channel ([7d0e156](https://github.com/zhayujie/CowAgent/commit/7d0e156)) +- **Feishu Config**: Removed redundant `feishu_bot_name` dependency in `run.sh` ([1b5be1b](https://github.com/zhayujie/CowAgent/commit/1b5be1b)) -**发布日期**:2026.03.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.3...master) +## 📦 Upgrade + +Run `./run.sh update` for a one-click upgrade, or manually pull the latest code and restart. See [Upgrade Guide](https://docs.cowagent.ai/guide/upgrade) for details. + +**Release Date**: 2026.03.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.3...master) diff --git a/docs/releases/v2.0.5.mdx b/docs/releases/v2.0.5.mdx index bf0c0e53..ad45398e 100644 --- a/docs/releases/v2.0.5.mdx +++ b/docs/releases/v2.0.5.mdx @@ -1,84 +1,77 @@ --- title: v2.0.5 -description: CowAgent 2.0.5 - Cow CLI、Skill Hub 开源、浏览器工具、企微扫码创建、DeepSeek 独立模块及多项优化 +description: CowAgent 2.0.5 - Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more --- -## 🖥️ Cow CLI 命令系统 +## 🖥️ Cow CLI -新增 Cow CLI 命令系统,支持在终端和对话中执行命令,实现对 CowAgent 的全方位管理: +New CLI command system for managing CowAgent from terminal and chat: -- **终端命令**:在系统终端中执行 `cow <命令>`,支持 `start`、`stop`、`restart`、`update`、`status`、`logs` 等服务管理操作 -- **对话命令**:在对话中输入 `/<命令>` 或 `cow <命令>`,支持 `/help`、`/status`、`/config`、`/skill`、`/context`、`/logs`、`/version` 等 -- **web控制台**:Web 控制台输入框输入 `/` 即可弹出指令菜单,支持方向键回溯历史输入 -- **Windows 支持**:新增 PowerShell 一键安装脚本 `scripts/run.ps1`,同时支持 `cow` 命令 +- **Terminal commands**: Run `cow ` for `start`, `stop`, `restart`, `update`, `status`, `logs`, etc. +- **Chat commands**: Type `/` in conversation for `/help`, `/status`, `/config`, `/skill`, `/context`, `/logs`, `/version`, etc. +- **Web console**: Type `/` in the input box to open a slash command menu, with arrow-key input history +- **Windows support**: New PowerShell script `scripts/run.ps1` with `cow` command support -相关文档:[命令总览](https://docs.cowagent.ai/cli) +Docs: [Command Overview](https://docs.cowagent.ai/en/cli) -## 🧩 Cow Skill Hub 开源 +## 🧩 Cow Skill Hub Open Source -[Cow Skill Hub](https://skills.cowagent.ai)(技能广场)正式开源并上线,提供 AI Agent 技能的浏览、搜索、安装和发布,汇集精选技能、社区贡献技能、三方技能: +[Cow Skill Hub](https://skills.cowagent.ai) is now open source and live — browse, search, install, and publish AI Agent skills: -- **一键安装**:在对话中 `/skill install <名称>` 或终端 `cow skill install <名称>` 一键安装 -- **多来源支持**:支持安装 Skill Hub、GitHub、ClawHub、LinkAI 上的全部技能,支持 GitHub 批量安装和子目录指定 -- **技能搜索**:`/skill search` 和 `/skill list --remote` 浏览和搜索技能广场 -- **技能发布**:通过 [skills.cowagent.ai/submit](https://skills.cowagent.ai/submit) 提交自己的技能 -- **镜像加速**:支持 Skill Hub 镜像加速,国内环境下载更流畅 +- **One-command install**: `/skill install ` in chat or `cow skill install ` in terminal +- **Multi-source**: Install from Skill Hub, GitHub, ClawHub, LinkAI, and more +- **Search**: `/skill search` and `/skill list --remote` to browse the hub +- **Publish**: Submit your own skills at [skills.cowagent.ai/submit](https://skills.cowagent.ai/submit) +- **Mirror**: Mirror acceleration for faster downloads in China -Skill Hub 开源仓库:[cow-skill-hub](https://github.com/zhayujie/cow-skill-hub)。 +Open source repo: [cow-skill-hub](https://github.com/zhayujie/cow-skill-hub) -相关文档:[技能广场](https://docs.cowagent.ai/skills/hub)、[安装技能](https://docs.cowagent.ai/skills/install) +Docs: [Skill Hub](https://docs.cowagent.ai/en/skills/hub), [Install Skills](https://docs.cowagent.ai/en/skills/install) +## 🌐 Browser Tool -## 🌐 新增浏览器工具 +New Browser tool — Agent can control a Chromium browser to visit and interact with web pages: -新增 Browser 工具,Agent 可控制浏览器访问和操作网页: +- **Navigation & interaction**: `navigate`, `click`, `fill`, `select`, `scroll`, `press`, etc. +- **Page snapshot**: Compact DOM snapshot for efficient page understanding, auto-snapshot after navigation +- **Screenshot**: Save page screenshots to workspace +- **JavaScript execution**: Run custom scripts on pages +- **CLI install**: `cow install-browser` for one-command setup +- **Docker support**: Browser install built into Docker image -- **网页导航与交互**:支持 `navigate`、`click`、`fill`、`select`、`scroll`、`press` 等操作 -- **页面快照**:使用精简 DOM 快照技术,让 Agent 高效理解页面结构,导航后自动快照 -- **截图能力**:支持页面截图保存到工作区 -- **JavaScript 执行**:支持在页面中执行自定义脚本 -- **CLI 安装**:通过 `cow install-browser` 一键安装浏览器及依赖,自动适配系统环境 -- **Docker 支持**:Docker 镜像已内置浏览器安装支持 - -相关文档:[浏览器工具](https://docs.cowagent.ai/tools/browser)。 +Docs: [Browser Tool](https://docs.cowagent.ai/en/tools/browser) +## 🤖 WeCom Bot QR Code Setup -## 🤖 企微智能机器人扫码创建 +WeCom Bot channel now supports QR code scan for one-click bot creation: -企业微信智能机器人通道新增扫码一键创建功能: +- **QR scan in Web console**: Select "Scan QR" mode, scan with WeCom to auto-create and connect a bot — no manual configuration needed +- **Manual mode**: Still supports manual Bot ID and Secret input +- **Stream push optimization**: Throttled push to avoid WebSocket congestion -- **Web 控制台扫码**:在 Web 控制台通道页面,选择「扫码接入」模式,使用企业微信扫码即可自动创建并接入智能机器人,无需手动到企业微信后台配置 -- **手动模式保留**:同时保留「手动填写」模式,可输入已有的 Bot ID 和 Secret 接入 -- **流式推送优化**:增加推送节流,避免 WebSocket 拥塞 +Docs: [WeCom Bot](https://docs.cowagent.ai/en/channels/wecom-bot) -相关文档:[企微智能机器人接入](https://docs.cowagent.ai/channels/wecom-bot)。 +PR: [#2735](https://github.com/zhayujie/CowAgent/pull/2735). Thanks [@WecomTeam](https://github.com/WecomTeam) -相关提交:[#2735](https://github.com/zhayujie/CowAgent/pull/2735) +## 🐛 Other Improvements & Fixes -Thanks [@WecomTeam](https://github.com/WecomTeam) +- **DeepSeek module**: Independent DeepSeek Bot with dedicated `deepseek_api_key` config ([#2719](https://github.com/zhayujie/CowAgent/pull/2719)). Thanks [@6vision](https://github.com/6vision) +- **Web console**: Slash command menu, input history, new model options, mobile optimization ([#2731](https://github.com/zhayujie/CowAgent/pull/2731)). Thanks [@zkjqd](https://github.com/zkjqd) +- **Context loss**: Fix context loss after trimming ([393f0c0](https://github.com/zhayujie/CowAgent/commit/393f0c0)) +- **System prompt**: Fix system prompt not rebuilding on every turn ([13f5fde](https://github.com/zhayujie/CowAgent/commit/13f5fde)) +- **Gemini**: Fix missing model attribute in GoogleGeminiBot ([#2716](https://github.com/zhayujie/CowAgent/pull/2716)). Thanks [@cowagent](https://github.com/cowagent) +- **WeChat channel**: Fix file send failures and filename loss ([6d9b7ba](https://github.com/zhayujie/CowAgent/commit/6d9b7ba), [45faa9c](https://github.com/zhayujie/CowAgent/commit/45faa9c)) +- **Docker**: Fix volume permissions, reduce image size ([3eb8348](https://github.com/zhayujie/CowAgent/commit/3eb8348), [4470d4c](https://github.com/zhayujie/CowAgent/commit/4470d4c)) +- **Security**: Fix Memory Content path traversal risk. Thanks [@August829](https://github.com/August829) -## 🐛 其他优化与修复 +## 📦 Upgrade -- **DeepSeek 独立模块**:新增独立的 DeepSeek Bot 模块,支持 `deepseek_api_key` 专属配置,无需再通过 OpenAI 兼容方式接入([#2719](https://github.com/zhayujie/CowAgent/pull/2719))。Thanks [@6vision](https://github.com/6vision) -- **Web 控制台优化**:新增斜杠指令菜单和输入历史回溯,新增模型选项,优化移动端适配([#2731](https://github.com/zhayujie/CowAgent/pull/2731))。Thanks [@zkjqd](https://github.com/zkjqd) -- **上下文丢失**:修复上下文裁剪后丢失的问题 ([393f0c0](https://github.com/zhayujie/CowAgent/commit/393f0c0)) -- **系统提示词**:修复系统提示词未在每轮重建的问题 ([13f5fde](https://github.com/zhayujie/CowAgent/commit/13f5fde)) -- **Agent 响应**:去除 Agent 响应首尾空白字符 ([f890318](https://github.com/zhayujie/CowAgent/commit/f890318)) -- **视觉压缩**:优化视觉图片压缩策略 ([22b8ca0](https://github.com/zhayujie/CowAgent/commit/22b8ca0)) -- **Gemini 模型**:修复 GoogleGeminiBot 缺少 model 属性的问题([#2716](https://github.com/zhayujie/CowAgent/pull/2716))。Thanks [@cowagent](https://github.com/cowagent) -- **微信通道**:修复文件发送失败、文件名丢失等问题 ([6d9b7ba](https://github.com/zhayujie/CowAgent/commit/6d9b7ba)、[baf66a1](https://github.com/zhayujie/CowAgent/commit/baf66a1)、[45faa9c](https://github.com/zhayujie/CowAgent/commit/45faa9c)) -- **Docker 优化**:修复卷权限问题,精简镜像体积 ([3eb8348](https://github.com/zhayujie/CowAgent/commit/3eb8348)、[4470d4c](https://github.com/zhayujie/CowAgent/commit/4470d4c)) -- **README 排版**:优化中英文排版空格([#2723](https://github.com/zhayujie/CowAgent/pull/2723))。Thanks [@Xiaozhou345](https://github.com/Xiaozhou345) -- **安全修复**:修复 Memory Content路径遍历风险,Thanks [@August829](https://github.com/August829) +Run `cow update` or `./run.sh update` to upgrade, or pull the latest code and restart. See [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade). -## 📦 升级方式 - -源码部署可执行 `cow update` 或 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 - -**发布日期**:2026.04.01 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.4...master) +**Release Date**: 2026.04.01 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.4...master) diff --git a/docs/releases/v2.0.6.mdx b/docs/releases/v2.0.6.mdx index 299be80b..a41f75aa 100644 --- a/docs/releases/v2.0.6.mdx +++ b/docs/releases/v2.0.6.mdx @@ -1,83 +1,83 @@ --- title: v2.0.6 -description: CowAgent 2.0.6 - 知识库系统、梦境记忆蒸馏、上下文智能压缩、Web 控制台多会话及多项优化 +description: CowAgent 2.0.6 - Knowledge Base, Deep Dream Memory Distillation, Smart Context Compression, Web Console Multi-Session and More --- -## 项目正式更名为 CowAgent +## Project Renamed to CowAgent -项目仓库正式从 `chatgpt-on-wechat` 更名为 **CowAgent**,演进为功能完备的 AI Agent 助理。 +The repository has been officially renamed from `chatgpt-on-wechat` to **CowAgent**, evolving into a full-featured AI Agent assistant. -- 新地址:[github.com/zhayujie/CowAgent](https://github.com/zhayujie/CowAgent),旧地址 GitHub 会自动重定向 -- CLI 命令、配置文件、文档链接均保持兼容,无需额外操作 +- New URL: [github.com/zhayujie/CowAgent](https://github.com/zhayujie/CowAgent) — GitHub auto-redirects the old URL +- CLI commands, config files, and documentation links remain compatible — no extra steps needed -## 📚 知识库系统 +## 📚 Knowledge Base -新增个人知识库系统,Agent 可自主构建和维护结构化知识,并在对话中按需检索引用。 +New personal knowledge base system — Agent can autonomously build and maintain structured knowledge, retrieving it on demand during conversations: -- **索引驱动的自组织结构**:知识库采用 `knowledge/` 目录,按分类自动组织,每个知识页面为独立的 Markdown 文件 -- **自动写入**:向 Agent 发送文件、链接等知识,或在讨论中识别到有价值的知识时,自动创建或更新知识页面 -- **混合检索**:支持关键词全文搜索和向量语义检索,在对话中按需加载相关知识 -- **可视化**:支持文件树浏览和知识图谱可视化,文档内链接可直接跳转查看 -- **命令管理**:`/knowledge` 查看统计、`/knowledge list` 查看目录结构、`/knowledge on|off` 开关知识库 +- **Index-driven self-organizing structure**: Knowledge is stored in `knowledge/` directory, auto-organized by category, with each knowledge page as an independent Markdown file +- **Auto-write**: Send files, links, or other knowledge to the Agent, or it will automatically create/update knowledge pages when valuable information is identified in conversation +- **Hybrid retrieval**: Supports keyword full-text search and vector semantic retrieval, loading relevant knowledge on demand during conversations +- **Visualization**: File tree browsing and knowledge graph visualization, with in-document links for direct navigation +- **Command management**: `/knowledge` for stats, `/knowledge list` for directory structure, `/knowledge on|off` to toggle -相关文档:[知识库](https://docs.cowagent.ai/knowledge) -Inspired by Karpathy's [LLM Wiki](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f). +Docs: [Knowledge Base](https://docs.cowagent.ai/en/knowledge) -## 🌙 梦境记忆蒸馏(Deep Dream) +## 🌙 Deep Dream Memory Distillation -全新的记忆整理机制,每日自动将分散的对话记忆蒸馏为精炼的长期记忆: +A new memory consolidation mechanism that automatically distills scattered conversation memories into refined long-term memory daily: -- **三层记忆流转**:对话上下文(短期)→ 天级记忆(中期)→ MEMORY.md(长期),形成完整的记忆生命周期 -- **自动蒸馏**:每日 23:55 定时执行,读取当天天级记忆和 MEMORY.md,通过 LLM 进行去重、合并、修剪,输出精炼的新版 MEMORY.md -- **梦境日记**:每次蒸馏生成一篇叙事风格的梦境日记,记录整理过程的发现和洞察,存储在 `memory/dreams/` 目录 -- **手动触发**:支持 `/memory dream [N]` 手动触发,可指定整理天数(默认 3 天,最大 30 天),完成后在对话中通知结果 -- **Web 控制台**:记忆管理页面新增「梦境日记」tab,可浏览和查看所有梦境日记 +- **Three-tier memory flow**: Conversation context (short-term) → Daily memory (mid-term) → MEMORY.md (long-term), forming a complete memory lifecycle +- **Auto-distillation**: Runs daily at 23:55, reads the day's daily memory and MEMORY.md, performs deduplication, merging, and pruning via LLM, outputting a refined MEMORY.md +- **Dream diary**: Each distillation generates a narrative-style dream diary recording discoveries and insights, stored in `memory/dreams/` +- **Manual trigger**: `/memory dream [N]` to manually trigger with configurable lookback days (default 3, max 30), with chat notification on completion +- **Web console**: Memory management page now includes a "Dream Diary" tab for browsing all dream diaries -相关文档:[梦境蒸馏](https://docs.cowagent.ai/memory/deep-dream) +Docs: [Deep Dream](https://docs.cowagent.ai/en/memory/deep-dream) -## 🧠 上下文智能压缩 +## 🧠 Smart Context Compression -上下文超出限制时将裁剪的部分通过 LLM 总结后异步注入,保持对话连贯性: +When context exceeds limits, trimmed portions are summarized by LLM and asynchronously injected to maintain conversation continuity: -- **LLM 异步摘要**:裁剪的消息由 LLM 总结为关键信息,同时写入天级记忆文件和注入保留的上下文 -- **多模型兼容**:优先使用主模型进行摘要,兼容 Claude、OpenAI、MiniMax 等不同模型的消息格式要求 +- **Async LLM summary**: Trimmed messages are summarized into key information by LLM, written to daily memory files and injected into retained context +- **Multi-model compatible**: Uses the primary model for summarization, compatible with Claude, OpenAI, MiniMax and other model message format requirements -相关文档:[短期记忆](https://docs.cowagent.ai/memory/context) +Docs: [Short-term Memory](https://docs.cowagent.ai/en/memory/context) -## 💬 Web 控制台升级 +## 💬 Web Console Upgrades -Web 控制台多项功能增强: +Multiple enhancements to the Web console: -- **多会话管理**:支持创建和切换多个独立会话,侧边栏展示会话列表,支持会话标题自动生成和手动编辑 -- **密码保护**:支持为控制台设置登录密码,可通过 `web_console_password` 配置项控制 -- **深度思考**:支持在 Web 端展示模型的思考过程,可通过`enable_thinking` 配置项控制 -- **定时推送**:支持定时任务结果推送到 Web 控制台 -- **消息复制**:AI 回复支持一键复制原始 Markdown 内容 +- **Multi-session management**: Create and switch between independent sessions, sidebar session list with auto-generated and manually editable titles +- **Password protection**: Set a login password via `web_console_password` config option +- **Deep thinking**: Display model thinking process in Web console, controlled by `enable_thinking` config option +- **Scheduled push**: Scheduled task results can be pushed to Web console +- **Message copy**: One-click copy of raw Markdown content from AI reply bubbles +- **Language toggle**: Top language switch button now shows current language for more intuitive interaction -## 🤖 模型相关 +## 🤖 Model Updates -- **视觉识别优化**:图片识别工具优先使用主模型,支持多模型厂商自动降级。相关文档:[视觉工具](https://docs.cowagent.ai/tools/vision) -- **MiniMax 新模型**:新增 MiniMax-M2.7-highspeed 模型和 MiniMax TTS 语音合成支持。Thanks @octo-patch -- **通义千问**:新增 qwen3.6-plus 模型支持 +- **Vision optimization**: Image recognition tool prefers the primary model with automatic multi-provider fallback. Docs: [Vision Tool](https://docs.cowagent.ai/en/tools/vision) +- **MiniMax new model**: Added MiniMax-M2.7-highspeed model and MiniMax TTS voice synthesis support. Thanks @octo-patch +- **Qwen**: Added qwen3.6-plus model support -## 🐛 其他优化与修复 +## 🐛 Other Improvements & Fixes -- **记忆提示词优化**:`MEMORY.md` 默认注入系统提示词,精细化记忆检索和写入的触发条件,增强主动写入能力 -- **系统提示词**:优化系统提示词的风格和语气引导 -- **浏览器工具**:增强隐式交互元素检测 -- **文件发送**:修复通用文件类型(tar.gz、zip 等)未能正确发送的问题。Thanks @6vision -- **macOS 兼容**:修复网络预检超时兼容性问题。Thanks @Moliang Zhou -- **Windows 兼容**:修复 Windows 下 PowerShell 兼容性、进程更新、终端编码等多项问题 -- **Python 3.13+**:修复 Python 3.13 及以上版本缺少 `legacy-cgi` 依赖的问题 -- **个人微信**:更新个人微信通道版本 +- **Memory prompts**: `MEMORY.md` injected into system prompt by default, with refined memory retrieval and write trigger conditions for enhanced proactive writing +- **System prompt**: Optimized system prompt style and tone guidance +- **Browser tool**: Enhanced implicit interactive element detection +- **File send**: Fixed common file types (tar.gz, zip, etc.) not being sent correctly. Thanks @6vision +- **macOS compatibility**: Fixed network pre-check timeout compatibility issue. Thanks @Moliang Zhou +- **Windows compatibility**: Fixed PowerShell compatibility, process updates, terminal encoding and other issues on Windows +- **Python 3.13+**: Fixed missing `legacy-cgi` dependency for Python 3.13+ +- **WeChat channel**: Updated personal WeChat channel version -## 📦 升级方式 +## 📦 Upgrade -源码部署可执行 `cow update` 或 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 +Run `cow update` or `./run.sh update` to upgrade, or pull the latest code and restart. See [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade). -**发布日期**:2026.04.14 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.5...master) +**Release Date**: 2026.04.14 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.5...master) diff --git a/docs/releases/v2.0.7.mdx b/docs/releases/v2.0.7.mdx index b4b6e27b..522e5339 100644 --- a/docs/releases/v2.0.7.mdx +++ b/docs/releases/v2.0.7.mdx @@ -1,64 +1,65 @@ --- title: v2.0.7 -description: CowAgent 2.0.7 - 图像生成技能(六厂商自动路由)、新模型支持、知识库增强、Web 控制台优化及多项修复 +description: CowAgent 2.0.7 - Image Generation Skill (6-provider auto-routing), new models, knowledge base enhancements, Web Console improvements and bug fixes --- -## 🎨 图像生成技能 +## 🎨 Image Generation Skill -新增图像生成内置技能,支持文生图、图生图、多图融合,支持 `GPT-Image-2`、`Nano Banana` 等多种模型: +New built-in `image-generation` skill supporting text-to-image, image-to-image, and multi-image fusion across six major providers: -- **自动路由**:支持六种模型厂商自动切换,OpenAI (GPT-Image-2) → Gemini (Nano Banana) → Seedream (火山方舟) → Qwen (百炼) → MiniMax → LinkAI -- **开箱即用**:配置 API Key 即可使用,无需手动指定模型。也支持在对话中指定特定模型 -- **灵活控制**:支持 `quality`(画质)、`size`(分辨率,512/1K~4K)、`aspect_ratio`(宽高比)等参数,各厂商自动适配有效值 -- **图片编辑**:传入已有图片即可进行编辑、风格迁移、多图融合 -- **Skill 级配置**:支持通过 `config.json` 中的 `skills.image-generation.model` 固定默认模型 +- **6-provider auto-routing**: OpenAI (GPT-Image-2) → Gemini (Nano Banana) → Seedream (Volcengine Ark) → Qwen (DashScope) → MiniMax → LinkAI — automatically selects from configured providers in fixed priority order, with automatic fallback on failure +- **Zero model selection**: Just configure an API key and it works — no need to manually specify a model. You can also name a specific model in conversation (e.g. "draw a cat with seedream") +- **Flexible control**: Supports `quality`, `size` (512/1K–4K), and `aspect_ratio` parameters, with each provider automatically mapping to its supported values +- **Image editing**: Pass existing images for editing, style transfer, or multi-image fusion (Seedream supports up to 14 reference images) +- **Skill-level config**: Pin a default model via `skills.image-generation.model` in `config.json` +- **Image lightbox**: All images in the Web console now support click-to-enlarge preview -相关文档:[图像生成技能](https://docs.cowagent.ai/skills/image-generation) +Docs: [Image Generation Skill](https://docs.cowagent.ai/en/skills/image-generation) -## 🤖 新模型支持 +## 🤖 New Model Support -- **Kimi K2.6**:新增 `kimi-k2.6` 模型支持 -- **Claude Opus 4.7**:新增 `claude-opus-4-7` 模型支持 -- **GLM 5.1**:新增 `glm-5.1` 模型支持 -- **Kimi Coding Plan**:支持 Kimi Coding Plan 模式 -- **自定义模型厂商**:新增[自定义模型](https://docs.cowagent.ai/models/custom)提供方配置,方便接入本地模型及更多厂商 +- **Kimi K2.6**: Added `kimi-k2.6` model support +- **Claude Opus 4.7**: Added `claude-opus-4-7` model support +- **GLM 5.1**: Added `glm-5.1` model support +- **Kimi Coding Plan**: Support for Kimi Coding Plan mode +- **Custom model providers**: New custom model provider configuration for easier integration with additional vendors -## 📚 知识库增强 +## 💬 Web Console Improvements -- **嵌套目录支持**:知识库列表和展示支持多级嵌套目录 -- **根级文件展示**:知识树中显示根目录下的 `index.md`、`log.md` 等文件 -- **空状态统计修复**:排除根级文件对知识库统计的干扰,正确保持空状态 +- **Smart auto-scroll**: Improved chat scroll behaviour — no longer forces scroll to bottom while the user is reading earlier messages +- **Reasoning content cap**: Deep thinking content capped at 4 KB to prevent frontend lag +- **Mobile optimisation**: Session sidebar hidden by default on mobile, with overlay dismiss support +- **Session title fix**: Fixed title auto-generation fallback logic and Bridge reset on config change +- **Image preview dedup**: Fixed duplicate image rendering within the same message -## 🌙 梦境记忆优化 +## 📚 Knowledge Base Enhancements -- **结构化组织**:梦境记忆文件按日期自动归档,目录结构更清晰 -- **定时抖动**:每日定时触发增加随机抖动,避免集群场景下的并发冲突 +- **Nested directory support**: Knowledge base listing and display now support multi-level nested directories +- **Root-level file display**: Show `index.md`, `log.md` and other root-level files in the knowledge tree +- **Empty state stats fix**: Root-level files no longer interfere with empty-state detection -## 🛠 技能系统改进 +## 🌙 Dream Memory Improvements -- **技能管理刷新**:`/skill` 命令执行后自动加载最新技能,确保状态同步 -- **安装来源扩展**:技能安装支持多种来源格式(URL、zip、本地文件等) +- **Structured organisation**: Dream memory files are now auto-archived by date with a cleaner directory structure +- **Schedule jitter**: Daily dream trigger includes random jitter to avoid concurrency conflicts in cluster deployments -## 💬 Web 控制台优化 +## 🛠 Skill System Improvements -- **智能自动滚动**:优化聊天窗口滚动逻辑,用户手动翻阅时不再强制跳到底部 Thanks @colin2060 -- **移动端适配**:侧边栏默认隐藏,支持点击遮罩关闭 -- **图片预览去重**:修复同一消息中图片重复渲染的问题 -- **推理内容截断**:深度思考内容超出阶段,解决前端卡顿问题 -- **会话标题修复**:修复标题自动生成的回退逻辑 +- **Skill manager refresh**: `/skill` commands now automatically refresh the skill manager to keep state in sync +- **Installation sources**: Skill installation supports multiple source formats (URL, zip, local file, etc.) with automatic target directory handling +## 🐛 Other Fixes -## 🐛 其他修复 +- **Gemini fix**: Fixed Gemini tool calls not returning results +- **Agent retry**: Empty-response retries no longer drop `tool_calls` +- **Docker env sync**: Fixed environment variables not syncing after config update in Docker environments +- **Python 3.7 compat**: Deferred `Literal` import for Python 3.7 compatibility +- **Model switch notification**: Fixed bot_type change notification not showing after model switch. Thanks @6vision +- **Config command**: `/config` now supports setting `enable_thinking` +- **Thinking display**: Deep thinking display disabled by default -- **Gemini 修复**:修复 Gemini tool call 不返回结果的问题 -- **Agent 重试**:空响应重试时不再丢弃 tool_calls -- **Docker 环境变量**:修复 Docker 环境下更新配置后环境变量未同步的问题 Thanks @sunboy0523 -- **Python 3.7 兼容**:延迟导入 `Literal` 以兼容 Python 3.7 -- **模型切换通知**:修复切换模型后 bot_type 变更通知未显示的问题。Thanks @6vision -- **配置命令增强**:`/config` 支持设置 `enable_thinking` +## 📦 Upgrade -## 📦 升级方式 +Run `cow update` or `./run.sh update` to upgrade, or pull the latest code and restart. See [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade). -源码部署可执行 `cow update` 或 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 - -**发布日期**:2026.04.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.6...2.0.7) +**Release Date**: 2026.04.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.6...master) diff --git a/docs/releases/v2.0.8.mdx b/docs/releases/v2.0.8.mdx index ced1b967..3fcc29da 100644 --- a/docs/releases/v2.0.8.mdx +++ b/docs/releases/v2.0.8.mdx @@ -1,63 +1,68 @@ --- title: v2.0.8 -description: CowAgent 2.0.8 - 飞书渠道全面升级(语音、流式打字机、一键扫码接入)、DeepSeek V4 / 百度千帆支持、定时任务工具优化 +description: CowAgent 2.0.8 - Major Feishu channel upgrade (voice, streaming typewriter, one-click QR app creation), DeepSeek V4 / ERNIE 5.0 support, scheduler memory enhancements and multiple fixes --- -## 🪶 飞书渠道全面升级 +## 🪶 Major Feishu Channel Upgrade -### 1. 一键扫码创建飞书应用 +### 1. One-click QR-scan App Creation -不再需要手动到飞书开放平台建应用、填权限和事件订阅。Web 控制台和命令行启动时若未配置 `feishu_app_id`,会自动展示扫码入口,飞书扫码授权后自动创建机器人并回填配置,开箱即用。 +No more manual app setup, permission scopes and event subscriptions in the Feishu Open Platform. When `feishu_app_id` is not configured, both the Web Console and CLI startup flow now show a QR-scan entry — scan with Feishu, authorize, and the bot is created and config is filled back automatically. Out-of-the-box. -相关文档:[飞书渠道](https://docs.cowagent.ai/channels/feishu) +Documentation: [Feishu Channel](https://docs.cowagent.ai/en/channels/feishu) -### 2. 语音消息收发 +### 2. Voice Messages -支持接收用户发送的飞书语音消息并自动转文本,回复也可走 TTS 以语音形式发出。同时优化了中文短语音的识别准确度。 +Receive Feishu voice messages with automatic speech-to-text, and reply in voice via TTS. Recognition accuracy for short Chinese voice messages has been improved. -### 3. 流式打字机回复 +### 3. Streaming Typewriter Replies -接入飞书 CardKit 流式卡片,**默认开启**,体验对齐 Web 端: +Integrated with Feishu CardKit streaming cards, **enabled by default**, matching the Web Console experience: -- 多轮 Agent 场景下中间过场消息与最终回复分卡呈现 -- 针对 DeepSeek 等高频输出模型做了专门优化,速度与 Web 端持平 -- 不支持时自动回退为普通文本回复,无需手动配置 -- 要求飞书客户端 ≥ 7.20 +- Multi-turn agent flows render intermediate updates and the final reply on separate cards +- Tuned for high-throughput models like DeepSeek to keep pace with the Web Console +- Falls back to plain text replies automatically when not supported, no manual config needed +- Requires Feishu client ≥ 7.20 -飞书语音消息收发与流式打字机的基础能力来自社区贡献 #2791 Thanks @yangluxin613 +The voice and streaming building blocks come from a community contribution #2791. Thanks [@yangluxin613](https://github.com/yangluxin613) -## 🤖 新模型支持 +## 🤖 New Model Support -- **DeepSeek V4 系列**:新增 `deepseek-v4-pro` / `deepseek-v4-flash`,并将默认模型切换为 `deepseek-v4-flash` -- **思考模型开关统一**:DeepSeek V4、Qwen3 等思考模型的开关行为对齐到 `enable_thinking` -- **百度千帆模型接入**:新增百度千帆厂商,支持 `ernie-5.0`、`ernie-4.5-turbo-128k` 等模型,并支持图像识别工具,相关文档查看 [百度千帆](https://docs.cowagent.ai/models/qianfan)。#2790 Thanks @jimmyzhuu -- **新增有道翻译**:`translate` 模块新增有道翻译支持 #2797 Thanks @Zmjjeff7 +- **DeepSeek V4 series**: Added `deepseek-v4-pro` / `deepseek-v4-flash`, with `deepseek-v4-flash` set as the new default +- **Unified thinking-mode toggle**: DeepSeek V4, Qwen3 and other thinking-capable models now share the same `enable_thinking` switch +- **ERNIE first-class integration**: New `qianfan` provider supporting `ernie-5.0` (default recommendation), `ernie-x1.1`, `ernie-4.5-turbo-128k`, `ernie-4.5-turbo-32k`. Dedicated `qianfan_api_key` / `qianfan_api_base` settings keep OpenAI config clean; legacy `wenxin` / `wenxin-4` paths are fully preserved. #2790 Thanks [@jimmyzhuu](https://github.com/jimmyzhuu) -## 🛠 OpenAI 客户端重构 + Documentation: [ERNIE](https://docs.cowagent.ai/en/models/qianfan) -- **去 SDK 依赖**:OpenAI Bot 改为原生 HTTP 实现,启动更轻、依赖冲突更少 -- **Web 控制台提示**:模型配置 API Base 输入框加入版本路径占位提示 +## 🌐 Translation Provider -## ⏰ 定时任务记忆增强 +- **Youdao translator**: Added a Youdao provider to the `translate/` module using the v3 SHA-256 signing scheme, with automatic ISO 639-1 language-code mapping (`zh`, `zh-TW`, etc.) #2797 Thanks [@Zmjjeff7](https://github.com/Zmjjeff7) -- **任务结果可被追问**:定时任务的执行结果自动注入到接收方的会话历史中,下一轮对话可直接追问,无需重新交代上下文 Thanks @huangrichao2020 -- **不污染长期记忆**:注入的调度对话不会被纳入每日梦境记忆汇总,避免高频任务把记忆刷满 -- **避免越跑越慢**:调度任务自己的上下文长度自动控制在合理范围内,长期反复执行也不会越积越大、拖慢响应 +## 🛠 OpenAI Client Refactor -## 🔧 工具与安全 +- **Drop SDK dependency**: The OpenAI bot is reimplemented on a native HTTP client — leaner startup, fewer dependency conflicts +- **Web Console hint**: API base inputs in the model config UI now include version-path placeholder hints -- **图像识别模型**:让 `tools.vision.model` 配置真正生效,未配置时自动 fallback #2792 Thanks CNXudiandian -- **Bash 安全确认**:仅对工作区外的破坏性删除做二次确认,工作区内常规操作不再打扰 +## ⏰ Scheduler Memory Enhancements -## 🐛 其他修复 +- **Follow-up on task results**: Scheduled task results are automatically injected into the receiver's session history — the next turn can ask follow-up questions without re-stating context. Thanks [@huangrichao2020](https://github.com/huangrichao2020) +- **No long-term memory pollution**: Scheduler-injected pairs are excluded from the daily memory flush so high-frequency tasks don't drown the memory store +- **Bounded scheduler context**: The scheduler's own session context is automatically capped, so long-running periodic tasks don't accumulate state and slow down replies -- 修复 Deep Dream 在多实例场景下重复触发 -- 修复 DeepSeek 多轮对话中部分历史轮次缺失 `reasoning_content` +## 🔧 Tools and Safety -## 📦 升级方式 +- **Vision model selection**: `tools.vision.model` config now actually takes effect, with automatic fallback when unconfigured #2792 +- **Bash safety prompt**: The destructive-deletion confirm prompt is now scoped to paths outside the workspace — routine in-workspace operations are no longer interrupted -源码部署可执行 `cow update` 或 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 +## 🐛 Other Fixes -> ⚠️ 飞书一键创建应用依赖 `lark-oapi>=1.5.5`,`cow update` 会自动拉取;手动部署请确保依赖已更新。 +- Fixed Deep Dream firing duplicate runs in multi-instance setups +- Fixed missing `reasoning_content` on some history turns in DeepSeek multi-turn conversations -**发布日期**:2026.05.06 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.7...2.0.8) +## 📦 Upgrade + +Source-code deployments can run `cow update` or `./run.sh update` for a one-click upgrade, or pull the latest code and restart manually. See [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade) for details. + +> ⚠️ One-click Feishu app creation requires `lark-oapi>=1.5.5`. `cow update` pulls it automatically; manual deployments must update dependencies. + +**Release Date**: 2026.05.05 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.7...2.0.8) diff --git a/docs/releases/v2.0.9.mdx b/docs/releases/v2.0.9.mdx index 957e0ced..ccae36fc 100644 --- a/docs/releases/v2.0.9.mdx +++ b/docs/releases/v2.0.9.mdx @@ -1,65 +1,65 @@ --- title: v2.0.9 -description: CowAgent 2.0.9 - 新增模型管理、MCP 协议支持、浏览器登录态持久化、新模型接入 +description: CowAgent 2.0.9 - Web Console model management, MCP protocol support, browser persistent login, new models and deployment hardening --- -## 🖥️ 新增模型管理 +## 🖥️ Model Management Console -Web 控制台新增「模型」页面,按 **模型厂商 + 模型能力** 进行管理,支持对话、图像、语音、向量模型和搜索能力的配置: +The Web Console adds a new **Models** page that organizes everything by **provider × capability**, covering chat, image, voice, embedding and search models in one place: -- **多厂商配置**:所有厂商的 API Key / API Base 在顶部统一维护,下方所有能力立即生效,无需重复填写 -- **图像模型**:图像理解与图像生成均可独立选择厂商和模型,未指定时跟随主模型自动选择 -- **语音模型**:语音识别和合成可独立配置,新增千问、智谱 ASR/TTS 模型 -- **向量模型**:支持配置 Embedding 模型(用于记忆及知识库检索),新增支持 OpenAI、通义、豆包、智谱等;切换模型后需执行 `/memory rebuild-index` 在线重建索引 -- **搜索能力**:联网搜索能力升级,支持博查、百度、智谱等多个厂商,自动模式下 Agent 可综合多来源搜索结果进行深度研究 +- **Per-provider configuration**: Each provider's API Key / API Base is configured once at the top, and every capability below picks it up automatically — no more re-entering credentials +- **Image models**: Image understanding and image generation can each pick their own provider and model independently; falls back to the main model when unspecified +- **Voice models**: ASR (speech-to-text) and TTS (text-to-speech) can be configured independently, with new Qwen and Zhipu ASR/TTS models added +- **Embedding models**: Configurable embedding models (used for memory and knowledge-base retrieval), with new support for OpenAI, Tongyi, Doubao, Zhipu and others; run `/memory rebuild-index` after switching to rebuild the index online +- **Search capability**: Web search has been upgraded to support Bocha, Baidu, Zhipu and more providers — in auto mode the agent can synthesize results from multiple sources for deeper research -相关文档:[模型概览](https://docs.cowagent.ai/models) +Documentation: [Models Overview](https://docs.cowagent.ai/en/models) -20260522113305 +20260522113305 -## 🧩 MCP 协议支持 +## 🧩 MCP Protocol Support -支持 **MCP(Model Context Protocol)** 协议,从固定工具集扩展为开放可插拔的工具生态,任何兼容 MCP 协议的服务均可作为工具直接接入 Agent。 +Adds support for **MCP (Model Context Protocol)**, expanding from a fixed built-in toolset to an open, pluggable tool ecosystem — any MCP-compatible service can be plugged in directly as an agent tool. -- 原生 JSON-RPC 实现,零额外依赖,同时支持 `stdio` 和 `sse` 两种传输 -- 兼容 Claude Desktop / Cursor 等主流风格的 `mcpServers` 配置,优先读取 `~/cow/mcp.json` +- Native JSON-RPC implementation, zero extra dependencies, supports both `stdio` and `sse` transports +- Compatible with the `mcpServers` configuration style used by Claude Desktop / Cursor, reads `~/cow/mcp.json` by default -相关文档:[MCP 工具](https://docs.cowagent.ai/tools/mcp)。Thanks @yangluxin613 (#2801) +Documentation: [MCP Tools](https://docs.cowagent.ai/en/tools/mcp). Thanks [@yangluxin613](https://github.com/yangluxin613) (#2801) -## 🌐 浏览器登录态持久化 +## 🌐 Browser Persistent Login -针对需要登录、有反爬机制的网站,浏览器工具支持登录一次后长期复用登录态,并允许接入用户自己的真实 Chrome 以通过指纹检测: +For sites that require login or have anti-bot protection, the browser tool can now persist a login session for long-term reuse, and supports attaching to your real Chrome browser to bypass fingerprint detection: -- **持久化用户配置(默认)**:默认使用 `~/.cow/browser_profile` 作为浏览器用户目录,登录一次后下次自动复用登录态 -- **CDP 模式**:通过 `tools.browser.cdp_endpoint` 接管真实 Chrome 浏览器,享有完整浏览器权限 +- **Persistent user profile (default)**: Uses `~/.cow/browser_profile` as the browser user data dir by default; once logged in, sessions are reused automatically on subsequent runs +- **CDP mode**: Configure `tools.browser.cdp_endpoint` to take over a real Chrome instance with full browser permissions -相关文档:[浏览器工具](https://docs.cowagent.ai/tools/browser)。Thanks @leafmove (#2809) +Documentation: [Browser Tool](https://docs.cowagent.ai/en/tools/browser). Thanks [@leafmove](https://github.com/leafmove) (#2809) -## 🤖 模型新增与优化 +## 🤖 New Models and Improvements -- **模型新增**:`gpt-5.5`、`gemini-3.5-flash`、`qwen3.7-max`、`ernie-5.1` -- **模型优化**:DeepSeek V4 支持 `reasoning_effort` 思考深度参数;修复 MiMo 等思考模型通过 OpenAI 兼容协议接入的问题 +- **New models**: `gpt-5.5`, `gemini-3.5-flash`, `qwen3.7-max`, `ernie-5.1` +- **Improvements**: DeepSeek V4 supports the `reasoning_effort` thinking-depth parameter; fixed thinking models like MiMo failing to connect via the OpenAI-compatible protocol -## 🔒 部署与安全 +## 🔒 Deployment & Security -- **默认本机访问**:Web 控制台 `web_host` 配置默认绑定 `127.0.0.1`,服务器部署时可手动设置为 `0.0.0.0` 并设置密码。Thanks @August829、@yidaozhongqing、@YLChen-007、@icysun -- **前端资源完全本地化**:第三方 CSS / JS 全部本地分发,离线 / 内网环境也能正常加载控制台。Thanks @gitlayzer (#2816) +- **Bind to localhost by default**: The Web Console `web_host` now defaults to `127.0.0.1`; for server deployments, set it to `0.0.0.0` and configure a password manually. Thanks @August829, @yidaozhongqing, @YLChen-007, @icysun +- **Fully bundled frontend assets**: All third-party CSS / JS are now served locally — the console works offline and on intranet deployments. Thanks [@gitlayzer](https://github.com/gitlayzer) (#2816) -## 🛠 体验优化与修复 +## 🛠 UX Improvements & Fixes -- **TTS 适配更多通道**:Web对话、个人微信、飞书、钉钉、企微智能机器人均已支持回复语音,详情查看 [通道概览](https://docs.cowagent.ai/channels) -- **日志面板增强**:根据日志等级差异化高亮展示、支持根据等级筛选。Thanks @yangluxin613 (#2807) -- **Web 控制台自动启动**:程序启动后自动打开 Web 控制台。Thanks @yangluxin613 (#2804) -- **Ctrl+C 干净退出**:不再打印一长串 `KeyboardInterrupt` 堆栈。Thanks @yangluxin613 (#2806) -- **文件夹上传**:Web 端支持目录上传,路径校验适配 Windows。Thanks @TryToMakeUsBetter (#2814) -- 修复定时任务在某些情况下重复执行的问题。Thanks @CNXudiandian (#2820) -- 修复定时任务带时区时单次任务不触发的问题。Thanks @AethericSpace -- 修复执行失败的工具调用在页面刷新后不显示的问题。Thanks @a1094174619 (#2822) -- 修复企微机器人消息中包含非法控制字符导致投递失败的问题。Thanks @Jacques-Zhao (#2810) +- **TTS rolls out to more channels**: Web Console, Personal WeChat, Feishu, DingTalk and WeCom Smart Bot all support voice replies — see the [Channels Overview](https://docs.cowagent.ai/en/channels) +- **Log panel enhancements**: Differentiated highlighting by log level, with level-based filtering. Thanks [@yangluxin613](https://github.com/yangluxin613) (#2807) +- **Auto-launch Web Console**: The Web Console now opens automatically on startup. Thanks [@yangluxin613](https://github.com/yangluxin613) (#2804) +- **Clean Ctrl+C exit**: No more long `KeyboardInterrupt` stack traces. Thanks [@yangluxin613](https://github.com/yangluxin613) (#2806) +- **Folder upload**: Web Console supports directory uploads, with path validation adapted for Windows. Thanks [@TryToMakeUsBetter](https://github.com/TryToMakeUsBetter) (#2814) +- Fixed scheduled tasks executing duplicates under certain conditions. Thanks [@CNXudiandian](https://github.com/CNXudiandian) (#2820) +- Fixed one-shot scheduled tasks with timezone not firing. Thanks @AethericSpace +- Fixed failed tool calls not being displayed after page refresh. Thanks [@a1094174619](https://github.com/a1094174619) (#2822) +- Fixed WeCom bot messages with illegal control characters failing to be delivered. Thanks [@Jacques-Zhao](https://github.com/Jacques-Zhao) (#2810) -## 📦 升级方式 +## 📦 Upgrade -源码部署可执行 `cow update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 +Source-code deployments can run `cow update` for a one-click upgrade, or pull the latest code and restart manually. See the [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade) for details. -**发布日期**:2026.05.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.8...2.0.9) +**Release Date**: 2026.05.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.8...2.0.9) diff --git a/docs/skills/create.mdx b/docs/skills/create.mdx index 45cef149..cfdbde02 100644 --- a/docs/skills/create.mdx +++ b/docs/skills/create.mdx @@ -1,32 +1,32 @@ --- -title: 创造技能 -description: 通过对话创建自定义技能 +title: Create Skills +description: Create custom skills through conversation --- -CowAgent 内置了 Skill Creator,可以通过自然语言对话快速创建、安装或更新技能。 +CowAgent includes a built-in Skill Creator that lets you quickly create, install, or update skills through natural language conversation. -## 使用方式 +## Usage -直接在对话中描述你想要的技能,Agent 会自动完成创建: +Simply describe the skill you want in a conversation, and the Agent will handle the creation: -- 将工作流程固化为技能:"帮我把这个部署流程创建为一个技能" -- 对接第三方 API:"根据这个接口文档创建一个技能" -- 安装远程技能:"帮我安装 xxx 技能" +- Codify workflows as skills: "Create a skill from this deployment process" +- Integrate third-party APIs: "Create a skill based on this API documentation" +- Install remote skills: "Install xxx skill for me" -## 创建流程 +## Creation Flow -1. 告诉 Agent 你想创建的技能功能 -2. Agent 自动生成 `SKILL.md` 说明文件和运行脚本 -3. 技能保存到工作空间的 `~/cow/skills/` 目录 -4. 后续对话中 Agent 会自动识别并使用该技能 +1. Tell the Agent what skill you want to create +2. Agent automatically generates `SKILL.md` description and execution scripts +3. Skill is saved to the workspace `~/cow/skills/` directory +4. Agent will automatically recognize and use the skill in future conversations -## SKILL.md 格式 +## SKILL.md Format -创建的技能遵循标准的 SKILL.md 格式: +Created skills follow the standard SKILL.md format: ```markdown --- @@ -45,14 +45,14 @@ metadata: Detailed instructions... ``` -| 字段 | 说明 | +| Field | Description | | --- | --- | -| `name` | 技能名称,需与目录名一致 | -| `description` | 技能描述,Agent 据此决定是否调用 | -| `metadata.requires.bins` | 依赖的系统命令 | -| `metadata.requires.env` | 依赖的环境变量 | -| `metadata.always` | 是否始终加载(默认 false) | +| `name` | Skill name, must match directory name | +| `description` | Skill description, Agent decides whether to invoke based on this | +| `metadata.requires.bins` | Required system commands | +| `metadata.requires.env` | Required environment variables | +| `metadata.always` | Always load (default false) | - 详细开发文档可参考 [Skill Creator 说明](https://github.com/zhayujie/CowAgent/blob/master/skills/skill-creator/SKILL.md)。 + See the [Skill Creator documentation](https://github.com/zhayujie/CowAgent/blob/master/skills/skill-creator/SKILL.md) for details. diff --git a/docs/skills/hub.mdx b/docs/skills/hub.mdx index 843dd4d4..e88b4b2f 100644 --- a/docs/skills/hub.mdx +++ b/docs/skills/hub.mdx @@ -1,65 +1,65 @@ --- -title: 技能广场 -description: 浏览、搜索和安装 AI Agent 技能 +title: Skill Hub +description: Browse, search, and install AI Agent skills --- -[Cow Skill Hub](https://skills.cowagent.ai/) 是开源的 AI Agent 技能广场,汇集了官方推荐、社区贡献和第三方平台(GitHub、ClawHub 等)的技能。 +[Cow Skill Hub](https://skills.cowagent.ai/) is an open-source skill marketplace for AI Agents, aggregating official picks, community contributions, and third-party skills from GitHub, ClawHub, and beyond. -开源仓库:[github.com/zhayujie/cow-skill-hub](https://github.com/zhayujie/cow-skill-hub) +Source code: [github.com/zhayujie/cow-skill-hub](https://github.com/zhayujie/cow-skill-hub) -## 功能 +## Features -- **浏览技能**:按类别(推荐 / 社区 / 第三方)和标签筛选 -- **搜索技能**:按名称或描述搜索 -- **查看详情**:查看技能文档、文件内容、安装命令和依赖的环境变量 -- **一键安装**:复制安装命令即可在 CowAgent 中使用 +- **Browse skills** — filter by category (Featured / Community / Third-party) and tags +- **Search skills** — find skills by name or description +- **View details** — read the skill manifest, file contents, install command, and required environment variables +- **One-click install** — copy the install command and run it in CowAgent -## 安装技能 +## Installing a skill -在对话中或终端中执行安装命令: +Run the install command in chat or in your terminal: -```text 对话 +```text Chat /skill install ``` -```bash 终端 +```bash Terminal cow skill install ``` -也可以在对话中浏览技能广场: +You can also browse the marketplace directly from chat: ```text /skill list --remote -/skill search <关键词> +/skill search ``` -除了在列表中展示的精选技能,还可以通过 **CLI命令 + Skill Hub** 安装各种第三方技能(**GitHub、ClawHub、LinkAI、URL** 等)参考 [安装技能](/skills/install)。 +Beyond the curated list, you can install third-party skills from **GitHub, ClawHub, LinkAI, or any URL** via the CLI. See [Installing skills](/skills/install) for details. -## 贡献技能 +## Contributing a skill -欢迎向技能广场提交你的技能: +To submit your own skill: -1. 访问 [skills.cowagent.ai/submit](https://skills.cowagent.ai/submit) -2. 使用 GitHub 或 Google 账号登录 -3. 上传包含 `SKILL.md` 的文件夹或 zip 包 -4. 自动解析技能名称、显示名称和描述,可按需修改 -5. 提交后将经过安全检查和审核后发布 +1. Visit [skills.cowagent.ai/submit](https://skills.cowagent.ai/submit) +2. Sign in with GitHub or Google +3. Upload a folder or zip file containing `SKILL.md` +4. Skill name, display name, and description are auto-detected — adjust as needed +5. Submit for review; skills go live after security and quality checks -技能文件结构: +Skill file layout: ``` your-skill/ -├── SKILL.md # 必须,放在根目录 -├── scripts/ # 可选,运行脚本 -└── resources/ # 可选,其他资源 +├── SKILL.md # required, in the root +├── scripts/ # optional, runtime scripts +└── resources/ # optional, additional assets ``` - 技能基于 `SKILL.md` 文件构建,你也可以在技能详情页下载 SKILL.md,用于任何支持自定义指令的 Agent(如 OpenClaw、Cursor、Claude Code 等)。 + Skills are built around the `SKILL.md` manifest. You can also download `SKILL.md` from a skill's detail page and use it with any Agent that supports custom instructions (OpenClaw, Cursor, Claude Code, and more). diff --git a/docs/skills/image-generation.mdx b/docs/skills/image-generation.mdx index 288fd656..e8de3e0f 100644 --- a/docs/skills/image-generation.mdx +++ b/docs/skills/image-generation.mdx @@ -1,30 +1,30 @@ --- -title: image-generation - 图像生成 -description: 文生图 / 图生图 / 多图融合,支持多家厂商自动路由与回退 +title: image-generation +description: Text-to-image / image-to-image / multi-image fusion with automatic multi-provider routing and fallback --- -通用的图像生成与编辑技能,支持 OpenAI、Gemini、Seedream(火山方舟)、Qwen(百炼)、MiniMax、LinkAI 共六家厂商。配好任意一家的 Key 即可使用,配多家可享受自动回退。 +A general-purpose image generation and editing skill supporting six providers: OpenAI, Gemini, Seedream (Volcengine Ark), Qwen (DashScope), MiniMax, and LinkAI. Configure any one provider's key to start using it; configure multiple to enable automatic fallback. -## 支持的模型 +## Supported Models -| 厂商 | 模型 / 别名 | 特点 | +| Provider | Models / Aliases | Notes | | --- | --- | --- | -| OpenAI | `gpt-image-2`、`gpt-image-1` | 通用文生图,高质量,支持 `quality` 控制画质 | -| Gemini Nano Banana | `nano-banana-2`、`nano-banana-pro`、`nano-banana` | 对应 `gemini-3.1-flash`、`gemini-3-pro`、`gemini-2.5-flash` 的图像版本 | -| Seedream(火山方舟) | `seedream-5.0-lite`、`seedream-4.5` | 原生 2K–4K,最多 14 张图融合 | -| Qwen(百炼) | `qwen-image-2.0`、`qwen-image-2.0-pro` | 擅长中文排版和图文融合 | -| MiniMax | `image-01` | 简单快速 | -| LinkAI | 任意模型 | 统一网关,作为兜底 | +| OpenAI | `gpt-image-2`, `gpt-image-1` | General-purpose, high quality, supports `quality` parameter | +| Gemini Nano Banana | `nano-banana-2`, `nano-banana-pro`, `nano-banana` | Corresponds to the image variants of `gemini-3.1-flash`, `gemini-3-pro`, `gemini-2.5-flash` | +| Seedream (Volcengine Ark) | `seedream-5.0-lite`, `seedream-4.5` | Native 2K–4K, up to 14 reference images for fusion | +| Qwen (DashScope) | `qwen-image-2.0`, `qwen-image-2.0-pro` | Strong with Chinese text rendering and text-image layouts | +| MiniMax | `image-01` | Fast and simple | +| LinkAI | Any model | Universal gateway, used as fallback | -## 模型选择 +## Model Selection -默认走「自动路由 + 失败回退」: +By default, "auto routing + automatic fallback" is used: -1. 按 `OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI` 顺序选第一个已配置的厂商 -2. 遇到 401、模型未开通、网络异常等错误时,自动切到下一家 -3. 用户在对话里指定模型时(如「用 seedream 画一只猫」),对应厂商会被提到最前优先尝试 +1. Pick the first configured provider in the order `OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI` +2. On errors such as 401, model not enabled, or network issues, automatically switch to the next provider +3. If the user specifies a model in the conversation (e.g. "use seedream to draw a cat"), the corresponding provider is promoted to the front -如需固定使用某个模型: +To pin a specific model: ```json { @@ -36,63 +36,63 @@ description: 文生图 / 图生图 / 多图融合,支持多家厂商自动路 } ``` -## 配置 API Key +## Configuring API Keys - 推荐通过 [Web 控制台](/channels/web) 的「模型管理」页面配置,配好的对话模型 Key 会被图像生成技能自动复用,无需重复配置。也可手动编辑配置文件或在对话中通过 `env_config` 工具临时设置。 + It is recommended to configure providers from the "Model Management" page in the [Web console](/channels/web). Chat model keys configured there are automatically reused by the image generation skill — no need to set them twice. You can also edit the configuration file manually or temporarily set keys in a conversation using the `env_config` tool. -凭证统一复用主模型厂商的 Key: +Credentials are shared with the main model providers: -| 字段 | 对应厂商 | +| Field | Provider | | --- | --- | | `openai_api_key` | OpenAI | | `gemini_api_key` | Gemini | -| `ark_api_key` | 火山方舟(Seedream) | -| `dashscope_api_key` | 阿里百炼(Qwen) | +| `ark_api_key` | Volcengine Ark (Seedream) | +| `dashscope_api_key` | Alibaba DashScope (Qwen) | | `minimax_api_key` | MiniMax | | `linkai_api_key` | LinkAI | -## 开启和关闭 +## Enabling and Disabling -技能会根据 API Key 自动调整状态: +The skill automatically adjusts its status based on API keys: -- **已配置 Key**:Agent 收到画图请求时直接调用 -- **未配置 Key**:技能仍会出现在上下文中(标记为「需要配置」),Agent 会引导用户去配 Key +- **Key configured**: the Agent calls the skill directly when it receives a drawing request +- **Key not configured**: the skill still appears in context (marked as "needs configuration") — the Agent will guide the user to set up a key -如需手动控制: +To control it manually: ```text -/skill disable image-generation # 关闭 -/skill enable image-generation # 重新开启 +/skill disable image-generation # Disable +/skill enable image-generation # Re-enable ``` -终端等价命令:`cow skill disable image-generation` / `cow skill enable image-generation`。 +Equivalent terminal commands: `cow skill disable image-generation` / `cow skill enable image-generation`. -## 参数 +## Parameters -| 参数 | 类型 | 必填 | 默认 | 说明 | +| Parameter | Type | Required | Default | Description | | --- | --- | --- | --- | --- | -| `prompt` | string | 是 | — | 图像描述 | -| `image_url` | string / list | 否 | null | 编辑用的输入图,本地路径或 URL;传列表为多图融合 | -| `quality` | string | 否 | auto | `low` / `medium` / `high`,仅部分厂商支持 | -| `size` | string | 否 | auto | `512` / `1K` / `2K` / `3K` / `4K`,或像素值如 `1024x1024` | -| `aspect_ratio` | string | 否 | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9`;Gemini 还支持 `1:4` / `4:1` / `1:8` / `8:1` | +| `prompt` | string | Yes | — | Image description | +| `image_url` | string / list | No | null | Input image for editing — local path or URL; pass a list for multi-image fusion | +| `quality` | string | No | auto | `low` / `medium` / `high`, supported only by some providers | +| `size` | string | No | auto | `512` / `1K` / `2K` / `3K` / `4K`, or pixel value like `1024x1024` | +| `aspect_ratio` | string | No | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9`; Gemini also supports `1:4` / `4:1` / `1:8` / `8:1` | - **质量越高、分辨率越大,耗时和成本越高。** 日常对话用默认(`auto`)或 `quality=low` + `size=1K` 即可,约 20 秒出图;做海报或明确要高清时再上 `high` + `2K/4K`,可能需要 1–5 分钟。 + **Higher quality and larger size cost more and take longer.** For everyday conversations, use the defaults (`auto`) or `quality=low` + `size=1K` — about 20 seconds per image. For posters or when high resolution is explicitly requested, use `quality=high` + `size=2K/4K` — may take 1–5 minutes. -## 常见用法 +## Common Use Cases -- **文生图**:根据描述生成插画、海报、图标、头像、分镜图等 -- **图生图**:在已有图片上改风格、换元素、加装饰、加文字等 -- **多图融合**:把多张参考图合成一张(换装、角色合影等) +- **Text-to-image**: generate illustrations, posters, icons, avatars, storyboards, etc. from a description +- **Image-to-image**: change styles, swap elements, add decorations or text on an existing image +- **Multi-image fusion**: combine multiple reference images into one (outfit swaps, character group photos, etc.) -- bash 超时建议设 600 秒:单厂商 HTTP 超时 300 秒,脚本可能依次尝试多家 -- 输入图片自动压缩到 4MB 以内、最长边不超过 4096px -- Gemini / Seedream / Qwen / MiniMax 不支持 `quality` 参数 -- Seedream 默认出 2K 图;`seedream-5.0-lite` 支持到 3K,`seedream-4.5` 支持到 4K +- Bash timeout should be set to 600 seconds: each provider has a 300-second HTTP timeout, and the script may try multiple providers sequentially +- Input images are automatically compressed to ≤ 4 MB with the longest edge ≤ 4096 px +- Gemini / Seedream / Qwen / MiniMax do not support the `quality` parameter +- Seedream defaults to 2K; `seedream-5.0-lite` supports up to 3K; `seedream-4.5` supports up to 4K diff --git a/docs/skills/index.mdx b/docs/skills/index.mdx index 795cebc0..a7058c98 100644 --- a/docs/skills/index.mdx +++ b/docs/skills/index.mdx @@ -1,33 +1,32 @@ --- -title: 技能概览 -description: CowAgent 技能系统介绍 +title: Skills Overview +description: CowAgent skills system introduction --- -技能(Skill)为 Agent 提供无限的扩展性。每个 Skill 由说明文件(`SKILL.md`)、运行脚本(可选)、资源(可选)组成,描述如何完成特定类型的任务。 +Skills provide infinite extensibility for the Agent. Each Skill consists of a description file (`SKILL.md`), execution scripts (optional), and resources (optional), describing how to accomplish specific types of tasks. -Skill 与 Tool 的区别:Tool 是由代码实现的原子操作(如读写文件、执行命令),Skill 则是基于说明文件的高级工作流,可以组合调用多个 Tool 来完成复杂任务。 +The difference between Skills and Tools: Tools are atomic operations implemented in code (e.g., file read/write, command execution), while Skills are high-level workflows based on description files that can combine multiple Tools to complete complex tasks. -## 获取技能 +## Getting Skills -CowAgent 提供多种方式获取技能: +CowAgent offers multiple ways to acquire skills: -- [Cow 技能广场](https://skills.cowagent.ai/) — 在线浏览所有可用技能,或通过 `/skill list --remote` 在对话中浏览和安装 -- **GitHub** — 直接从 GitHub 仓库安装,支持批量安装 -- **ClawHub** — 通过 `/skill install clawhub:名称` 安装 ClawHub 上的技能 (4w+个) -- **LinkA** — 通过 `/skill install linkai:编码` 安装 LinkAI 上的公开资源和创建的知识库/数据库/工作流/插件等资源 -- **URL** — 从 zip 压缩包或 SKILL.md 链接安装 -- **对话创建** — 通过自然语言对话让 Agent 自动创建技能 +- **Cow Skill Hub** — Browse and install community skills via `/skill list --remote` +- **GitHub** — Install directly from GitHub repositories, with batch install support +- **ClawHub** — Install ClawHub skills via `/skill install clawhub:name` +- **URL** — Install from zip archives or SKILL.md links +- **Conversational creation** — Let the Agent create skills through natural language conversation -详细安装方式参考 [安装技能](/skills/install) 和 [技能管理命令](/cli/skill)。也可以通过对话 [创建技能](/skills/create),或向 [Skill Hub](https://skills.cowagent.ai/submit) 贡献你的技能。 +See [Install Skills](/skills/install) and [Skill Management Commands](/cli/skill) for details. You can also [create skills](/skills/create) through conversation. -## 技能加载优先级 +## Skill Loading Priority -1. **工作空间技能**(最高):`~/cow/skills/` -2. **项目内置技能**(最低):`skills/` +1. **Workspace skills** (highest): `~/cow/skills/` +2. **Project built-in skills** (lowest): `skills/` -同名技能按优先级覆盖。 +Skills with the same name are overridden by priority. -## 技能文件结构 +## Skill File Structure ``` skills/ @@ -37,7 +36,7 @@ skills/ │ └── resources/ # Additional resources (optional) ``` -### SKILL.md 格式 +### SKILL.md Format ```markdown --- @@ -56,10 +55,10 @@ metadata: Detailed instructions... ``` -| 字段 | 说明 | +| Field | Description | | --- | --- | -| `name` | 技能名称,需与目录名一致 | -| `description` | 技能描述,Agent 据此决定是否调用 | -| `metadata.requires.bins` | 依赖的系统命令 | -| `metadata.requires.env` | 依赖的环境变量 | -| `metadata.always` | 是否始终加载(默认 false) | +| `name` | Skill name, must match directory name | +| `description` | Skill description, Agent decides whether to invoke based on this | +| `metadata.requires.bins` | Required system commands | +| `metadata.requires.env` | Required environment variables | +| `metadata.always` | Always load (default false) | diff --git a/docs/skills/install.mdx b/docs/skills/install.mdx index 84395d95..0457f7c3 100644 --- a/docs/skills/install.mdx +++ b/docs/skills/install.mdx @@ -1,66 +1,65 @@ --- -title: 安装技能 -description: 通过命令一键安装来自多种来源的技能 +title: Install Skills +description: Install skills from multiple sources with a single command --- -CowAgent 支持通过统一的 `install` 命令安装来自 [Cow 技能广场](https://skills.cowagent.ai/)、GitHub、ClawHub、LinkAI 以及任意 URL 上的技能。在对话中使用 `/skill install`,在终端中使用 `cow skill install`。 +CowAgent supports installing skills from [Cow Skill Hub](https://skills.cowagent.ai/), GitHub, ClawHub, LinkAI, and any URL via a unified `install` command. Use `/skill install` in chat or `cow skill install` in the terminal. -## 从Cow技能广场安装 +## From the Skill Hub -访问 [skills.cowagent.ai](https://skills.cowagent.ai/) 浏览所有可用技能,找到想要的技能后直接安装,例如: +Browse all available skills at [skills.cowagent.ai](https://skills.cowagent.ai/) and install by name: ```text /skill list --remote /skill install pptx ``` -## 从 GitHub 安装 +## From GitHub -> Github上的所有技能都可以直接安装,支持仓库级批量安装和指定子目录安装,例如: +Any GitHub-hosted skill can be installed directly. Supports both repository-level batch install and subdirectory-level single install: ```text /skill install larksuite/cli /skill install https://github.com/larksuite/cli/tree/main/skills/lark-im ``` -## 从 ClawHub 安装 - -[ClawHub](https://clawhub.ai/) 上的所有技能 (4w+个) 都可以一键安装,例如: +## From ClawHub +All [ClawHub](https://clawhub.ai/) skills (40k+) can be installed with a single command: ```text /skill install clawhub: ``` -## 从 LinkAI 安装 +## From LinkAI -[LinkAI](https://link-ai.tech/console) 上的所有公开资源 (1w+个应用/工作流/插件) ,以及自己创建的资源 (应用/工作流/知识库/数据库/插件) 都可以通过命令一键安装: +All public resources on [LinkAI](https://link-ai.tech/console) (10k+ apps / workflows / plugins), as well as your own resources (apps, workflows, knowledge bases, databases, plugins), can be installed via: ```text /skill install linkai: ``` -> LinkAI平台上创建的所有应用、工作流、知识库、数据库、插件都有唯一的code,可在[控制台](https://link-ai.tech/console)各资源页面中进行获取并填写到命令中 +> Every resource created on the LinkAI platform has a unique `code`. Find it on each resource's page in the [console](https://link-ai.tech/console). -## 从 URL 安装 +## From URL -支持 zip 压缩包和 SKILL.md 文件链接: +Supports zip archives and SKILL.md file links: ```text /skill install https://cdn.link-ai.tech/skills/pptx.zip /skill install https://example.com/path/to/SKILL.md ``` -## 管理技能 +## Manage Skills ```text -/skill list # 查看已安装技能 -/skill info pptx # 查看技能详情 -/skill enable pptx # 启用技能 -/skill disable pptx # 禁用技能 -/skill uninstall pptx # 卸载技能 +/skill list # View installed skills +/skill info pptx # View skill details +/skill enable pptx # Enable a skill +/skill disable pptx # Disable a skill +/skill uninstall pptx # Uninstall a skill ``` - 以上所有命令在终端中使用时,将 `/skill` 替换为 `cow skill` 即可。完整命令说明参考 [技能管理命令](/cli/skill)。 + All commands above work in the terminal by replacing `/skill` with `cow skill`. See [Skill Management Commands](/cli/skill) for full documentation. diff --git a/docs/skills/knowledge-wiki.mdx b/docs/skills/knowledge-wiki.mdx index 40b4d298..14ae9c90 100644 --- a/docs/skills/knowledge-wiki.mdx +++ b/docs/skills/knowledge-wiki.mdx @@ -1,112 +1,112 @@ --- -title: knowledge-wiki - 知识库 -description: 维护本地结构化知识库,自动归档、分类和交叉引用 +title: knowledge-wiki +description: Maintain a local structured knowledge base with automatic archiving, categorisation, and cross-referencing --- -帮你把对话中产生的资料、灵感和零散笔记整理成结构化的本地知识库,自动维护索引和页面之间的交叉引用。 +Organises notes, insights, and reference materials from your conversations into a structured local knowledge base, automatically maintaining an index and cross-references between pages. -`knowledge-wiki` 在工作空间下维护一个 `knowledge/` 目录,相当于 Agent 的「外脑」。技能设置了 `always: true`,会**常驻上下文**,不需要任何外部依赖。 +`knowledge-wiki` maintains a `knowledge/` directory in your workspace — essentially the Agent's "second brain". The skill is marked `always: true`, so it is **always loaded** and requires no external dependencies. -## 什么时候会触发 +## When It Triggers -- 你分享了一篇文章、一份文档或一个 URL,想要沉淀下来 -- 聊天过程中聊出了值得长期保留的结论 -- 你想查一下之前积累过的知识 +- You share an article, document, or URL that you want to keep for future reference +- A conversation produces conclusions worth retaining long-term +- You want to look up something you accumulated earlier -## 目录结构 +## Directory Structure ``` knowledge/ -├── index.md # 全局索引(必须维护) -├── log.md # 操作日志(只追加) -└── / # 分类子目录(按内容自由分组) - └── .md # 知识页(文件名用小写加中划线) +├── index.md # Global index (must be maintained) +├── log.md # Operation log (append-only) +└── / # Category subdirectories (grouped by content) + └── .md # Knowledge page (lowercase-hyphenated filename) ``` -## 三个核心操作 +## Three Core Operations -### 1. 收录(Ingest) +### 1. Ingest -你分享了一段资料时,Agent 会: +When you share some material, the Agent will: -1. 读懂原文,提取关键信息 -2. 按内容决定放到哪个分类下——先看 `index.md` 里有没有合适的分类,没有就新建一个 -3. 生成知识页 `knowledge//.md` -4. 更新索引 `index.md` 和日志 `log.md` +1. Read and understand the original content, extracting key information +2. Decide which category it belongs to — check `index.md` first; create a new category if none fits +3. Generate a knowledge page at `knowledge//.md` +4. Update the index `index.md` and the log `log.md` -### 2. 综合(Synthesize) +### 2. Synthesise -聊天中产生了新的结论或洞见时: +When a conversation produces new conclusions or insights: -1. 在合适的分类下创建新知识页 -2. 给相关的已有页面加上互相指向的链接 -3. 更新索引和日志 +1. Create a new knowledge page under an appropriate category +2. Add cross-links to and from related existing pages +3. Update the index and log -### 3. 查询(Query) +### 3. Query -你问到以前积累的知识时: +When you ask about previously accumulated knowledge: -1. 先从 `index.md` 里找可能相关的页面 -2. 用 `read` 工具打开具体页面 -3. 需要时再用 `memory_search` 补充检索 -4. 回答里会带上知识页的链接,方便你点过去看原文 +1. Search `index.md` for potentially relevant pages +2. Open specific pages with the `read` tool +3. Supplement with `memory_search` if needed +4. Include links to knowledge pages in the answer so you can click through to the source -## 知识页怎么写 +## Page Format ```markdown -# 页面标题 +# Page Title -> Source: <来源 URL 或简要说明> +> Source: -正文内容。页面之间用相对路径链接: -[相关页](../category/related-page.md) +Body content. Link between pages using relative paths: +[Related Page](../category/related-page.md) -## 要点 +## Key Points - ... -## 相关页面 +## Related Pages -- [页面 A](../category/page-a.md) — 为什么相关 +- [Page A](../category/page-a.md) — why it's related ``` -- `> Source:` 用来记录这条知识的来源。有明确来源时一定要写 -- 交叉引用很重要:创建或更新某页时,记得也去关联页面里补上反向链接 -- **只链接已经存在的页面**。如果某个概念值得单独成页,先建好再加链接 +- `> Source:` records where this knowledge came from. Always include it when there is a clear source +- Cross-references are important: when creating or updating a page, remember to add back-links in the related pages too +- **Only link to pages that already exist.** If a concept deserves its own page, create it first, then add the link -## 索引格式 +## Index Format -`knowledge/index.md` 采用扁平列表,按分类分组,每个知识页占一行: +`knowledge/index.md` uses a flat list grouped by category, one knowledge page per line: ```markdown # Knowledge Index -## 分类 A -- [页面标题](category-a/page-slug.md) — 一句话摘要 +## Category A +- [Page Title](category-a/page-slug.md) — one-line summary -## 分类 B -- [页面标题](category-b/page-slug.md) — 一句话摘要 +## Category B +- [Page Title](category-b/page-slug.md) — one-line summary ``` -不用表格,不加 emoji。分类怎么起名、怎么组织都可以灵活调整。 +No tables, no emojis. Category names and organisation can be adjusted freely. -## 日志格式 +## Log Format -`knowledge/log.md` 只追加、不修改,最新的写在最下面: +`knowledge/log.md` is append-only — newest entries go at the bottom: ```markdown -## [YYYY-MM-DD] ingest | 页面标题 -## [YYYY-MM-DD] synthesize | 页面标题 +## [YYYY-MM-DD] ingest | Page Title +## [YYYY-MM-DD] synthesize | Page Title ``` -## 写作约定 +## Writing Guidelines -- **文件名**用小写加中划线,比如 `machine-learning.md` -- **一页只讲一件事**,需要关联的内容通过链接串起来 -- **有了就更新,不要重复建页** -- **每次改完都要更新索引** `knowledge/index.md` -- **写精华别抄全文**,抓住要点就行 -- **对话里引用知识页时用完整路径**,比如 `[标题](knowledge//.md)`。页面之间互相链接才用相对路径 -- **基于知识页回答问题时附上链接**,方便深入查阅 +- **Filenames**: lowercase with hyphens, e.g. `machine-learning.md` +- **One topic per page** — link related content across pages +- **Update, don't duplicate** — if a page already exists, update it rather than creating a new one +- **Always update the index** `knowledge/index.md` after any change +- **Distill, don't copy** — capture the key points, not the entire source +- **Use full paths when referencing knowledge pages in conversations**, e.g. `[Title](knowledge//.md)`. Use relative paths only for inter-page links +- **Include links when answering questions based on knowledge pages** so users can dig deeper diff --git a/docs/skills/skill-creator.mdx b/docs/skills/skill-creator.mdx index 623a74f6..58853f52 100644 --- a/docs/skills/skill-creator.mdx +++ b/docs/skills/skill-creator.mdx @@ -1,180 +1,180 @@ --- -title: skill-creator - 技能创建 -description: 创建、安装、更新技能,规范 SKILL.md 写法与目录结构 +title: skill-creator +description: Create, install, and update skills — standardises SKILL.md format and directory structure --- -`skill-creator` 是一个「元技能」,专门用来帮助 Agent 创建、安装和更新其他技能,确保所有技能的 `SKILL.md` 写法和目录结构保持一致。 +`skill-creator` is a "meta-skill" that helps the Agent create, install, and update other skills, ensuring every skill follows a consistent `SKILL.md` format and directory layout. -## 什么时候会触发 +## When It Triggers -- 用户想从 URL 或远程仓库安装一个技能 -- 用户想从头创建一个全新的技能 -- 需要升级或重构已有技能 +- The user wants to install a skill from a URL or remote repository +- The user wants to create a brand-new skill from scratch +- An existing skill needs upgrading or restructuring -## 技能是什么 +## What Is a Skill? -简单来说,技能就是一份「可复用的说明书」加上可选的脚本和资源。它给 Agent 注入了某个领域的专业知识,让 Agent 在遇到对应任务时能像专家一样处理。 +A skill is a reusable instruction set plus optional scripts and assets. It injects domain expertise into the Agent so it can handle specific tasks like a specialist. -一个技能通常包含以下内容: +A skill typically contains: -1. **专项工作流** — 某类任务的完整步骤 -2. **工具用法** — 怎么调某种 API 或处理某种文件 -3. **领域知识** — 团队约定、业务规则、数据结构之类 -4. **附带资源** — 脚本、参考文档、模板等 +1. **Specialised workflow** — step-by-step instructions for a category of tasks +2. **Tool usage** — how to call a particular API or process a particular file format +3. **Domain knowledge** — team conventions, business rules, data schemas, etc. +4. **Attached resources** — scripts, reference docs, templates, etc. -**核心原则:能省则省**。只写 Agent 自己想不到的内容,每加一行都要问自己:值不值得占这些 token? +**Core principle: less is more.** Only write what the Agent wouldn't figure out on its own. For every line you add, ask yourself: is it worth the tokens? -## 目录结构 +## Directory Structure ``` skill-name/ -├── SKILL.md # 必需:技能定义 -│ ├── YAML frontmatter(必填 name / description) -│ └── Markdown 正文(说明 + 示例) -└── 可选资源 - ├── scripts/ # 可执行脚本(Python / Bash 等) - ├── references/ # 内容较多的参考文档,Agent 按需读取 - └── assets/ # 模板、图标等,会直接用在输出里 +├── SKILL.md # Required: skill definition +│ ├── YAML frontmatter (name / description are mandatory) +│ └── Markdown body (instructions + examples) +└── Optional resources + ├── scripts/ # Executable scripts (Python / Bash, etc.) + ├── references/ # Large reference docs the Agent reads on demand + └── assets/ # Templates, icons, etc. used directly in output ``` -## SKILL.md 规范定义 +## SKILL.md Specification -SKILL.md 文件头部的 `frontmatter` 字段: +Frontmatter fields in the SKILL.md header: -| 字段 | 说明 | +| Field | Description | | --- | --- | -| `name` | 技能名,小写加中划线,必须和目录名一致 | -| `description` | **最关键的字段**。写清楚「这个技能干什么」和「什么情况下该用它」,Agent 看到这段来决定要不要调它。注意:所有触发相关的描述都放在这里,不要写到正文里 | -| `metadata.cowagent.requires.bins` | 系统里必须装了哪些命令行工具 | -| `metadata.cowagent.requires.env` | 需要哪些环境变量(全部满足才行) | -| `metadata.cowagent.requires.anyEnv` | 多个 API Key 满足一个就行 | -| `metadata.cowagent.requires.anyBins` | 多个工具满足一个就行 | -| `metadata.cowagent.always` | 设为 `true` 会始终加载,不检查依赖 | -| `metadata.cowagent.emoji` | 展示用的 emoji(可选) | -| `metadata.cowagent.os` | 限定系统,如 `["darwin", "linux"]` | +| `name` | Skill name — lowercase with hyphens, must match the directory name | +| `description` | **The most important field.** Clearly state what the skill does and when to use it. The Agent reads this to decide whether to invoke it. All trigger-related descriptions go here, not in the body | +| `metadata.cowagent.requires.bins` | System CLI tools that must be installed | +| `metadata.cowagent.requires.env` | Required environment variables (all must be present) | +| `metadata.cowagent.requires.anyEnv` | Multiple API keys — at least one must be set | +| `metadata.cowagent.requires.anyBins` | Multiple tools — at least one must be installed | +| `metadata.cowagent.always` | Set to `true` to always load, skipping dependency checks | +| `metadata.cowagent.emoji` | Display emoji (optional) | +| `metadata.cowagent.os` | OS restriction, e.g. `["darwin", "linux"]` | -`category` 字段不需要手写,系统会自动设成 `skill`。 +The `category` field does not need to be set manually — the system automatically sets it to `skill`. -声明 API Key 依赖有两种写法: +Two ways to declare API key dependencies: ```yaml metadata: cowagent: requires: - env: ["MYAPI_KEY"] # 必须有 + env: ["MYAPI_KEY"] # Must be present ``` ```yaml metadata: cowagent: requires: - anyEnv: ["OPENAI_API_KEY", "LINKAI_API_KEY"] # 有一个就行 + anyEnv: ["OPENAI_API_KEY", "LINKAI_API_KEY"] # At least one ``` -**技能会自动按依赖启禁用**:环境变量齐了就自动启用,缺了就自动禁用,不需要手动 `/skill enable`。 +**Skills are auto-enabled/disabled based on dependencies**: they activate when all required environment variables are present and deactivate when any are missing — no need for manual `/skill enable`. -## 资源目录怎么用 +## Resource Directories -| 目录 | 放什么 | 不要放 | +| Directory | What goes here | What does NOT go here | | --- | --- | --- | -| `scripts/` | 需要反复执行的代码,或需要确定性结果的脚本 | 纯演示用的代码片段 | -| `references/` | **超过 500 行**、SKILL.md 实在塞不下的大文档(比如完整的数据库 Schema) | 普通 API 文档、示例、教程 | -| `assets/` | 会出现在最终产物里的文件(模板、图标、样板代码等) | 说明性文档 | +| `scripts/` | Code that needs to run repeatedly, or scripts that produce deterministic results | Demo-only code snippets | +| `references/` | Documents **over 500 lines** that genuinely won't fit in SKILL.md (e.g. a full DB schema) | General API docs, tutorials, examples | +| `assets/` | Files that appear in the final output (templates, icons, boilerplate, etc.) | Explanatory documentation | -**原则上所有内容都写在 `SKILL.md` 里**,只有确实放不下才拆到资源目录。 +**In principle, everything goes in `SKILL.md`** — only split into resource directories when it truly won't fit. -不要给技能加 `README.md`、`CHANGELOG.md`、`INSTALLATION_GUIDE.md` 之类的文件——全部放进 `SKILL.md`。资源目录里只放真正要跑的脚本或真正要用的素材。 +Do not add `README.md`, `CHANGELOG.md`, or `INSTALLATION_GUIDE.md` to a skill — put everything in `SKILL.md`. Resource directories should only contain scripts that actually run or assets that are actually used. -## 安装外部技能 +## Installing External Skills -安装后最终落在 `/skills//` 目录。 +After installation, the skill lands in `/skills//`. -| 来源 | 怎么装 | +| Source | How to install | | --- | --- | -| URL(单文件) | curl / web_fetch 直接拉 | -| URL(zip 包) | 下载解压 | -| 本地 SKILL.md | 直接读 | -| 本地 zip 包 | 解压 | +| URL (single file) | curl / web_fetch | +| URL (zip archive) | Download and extract | +| Local SKILL.md | Read directly | +| Local zip archive | Extract | -安装步骤: +Installation steps: -1. 找到 `SKILL.md`(可能在包的根目录或某个子目录里) -2. 从 frontmatter 里读出 `name` -3. 把**整个技能目录**(包括 `SKILL.md`、`scripts/`、`assets/` 等)复制到 `/skills//` -4. 如果包里有 `INSTALL.md` 之类的安装脚本,照着跑一遍,但最终结果仍然要落在 `/skills//` 下 +1. Locate the `SKILL.md` (may be at the root or in a subdirectory of the archive) +2. Read the `name` from the frontmatter +3. Copy the **entire skill directory** (including `SKILL.md`, `scripts/`, `assets/`, etc.) to `/skills//` +4. If the archive contains an `INSTALL.md` or similar setup script, run it — but the final result must still reside under `/skills//` -## 从头创建技能 +## Creating a Skill from Scratch -推荐按这个顺序来: +Recommended order: -1. **搞清楚需求** — 让用户举几个具体的使用场景,一次别问太多 -2. **想好结构** — 这个技能需要脚本吗?需要参考文档吗?需要模板素材吗? -3. **生成骨架** — 用初始化脚本: +1. **Clarify requirements** — ask the user for a few concrete use cases (don't ask too many at once) +2. **Plan the structure** — does this skill need scripts? Reference docs? Template assets? +3. **Scaffold** — use the init script: ```bash scripts/init_skill.py --path /skills [--resources scripts,references,assets] [--examples] ``` -4. **填充内容** — 写好 SKILL.md、补上脚本和资源。脚本写完一定要实际跑一遍 -5. **格式校验**(可选): +4. **Fill in content** — write SKILL.md, add scripts and resources. Always test scripts after writing them +5. **Validate** (optional): ```bash scripts/quick_validate.py /skills/ ``` -6. **迭代完善** — 实际用起来之后根据反馈持续改进 +6. **Iterate** — keep improving based on real-world usage feedback -## 命名规则 +## Naming Conventions -- 只用小写字母、数字和中划线。用户给的名字需要做标准化处理,比如 `Plan Mode` → `plan-mode` -- 长度别超过 64 个字符 -- 尽量短、用动词开头、一看就知道干什么 -- 必要时用工具名做前缀,比如 `gh-address-comments`、`linear-address-issue` -- 目录名和 `name` 字段必须完全一致 +- Use only lowercase letters, digits, and hyphens. Normalise user-given names, e.g. `Plan Mode` → `plan-mode` +- Maximum 64 characters +- Keep it short, start with a verb, make it self-explanatory +- Use tool names as prefixes when appropriate, e.g. `gh-address-comments`, `linear-address-issue` +- The directory name and the `name` field must match exactly -## 三级加载机制 +## Three-Level Loading -技能不会一次性全部塞进上下文,而是分三级按需加载: +Skills are not loaded into context all at once — they use a three-level progressive loading mechanism: -1. **元信息**(`name` + `description`)— 常驻上下文,约 100 词。Agent 靠它判断「要不要用这个技能」 -2. **SKILL.md 正文** — 确定要用了才加载,建议控制在 500 行以内 -3. **资源文件** — Agent 需要的时候再读 +1. **Metadata** (`name` + `description`) — always in context (~100 words). The Agent uses this to decide whether to invoke the skill +2. **SKILL.md body** — loaded only when the skill is activated; keep it under 500 lines +3. **Resource files** — read on demand by the Agent -如果一个技能涉及多个变体(比如多云厂商部署),建议这样组织: +For skills with multiple variants (e.g. multi-cloud deployment), organise like this: ``` cloud-deploy/ -├── SKILL.md # 主流程和厂商选择逻辑 +├── SKILL.md # Main workflow and provider selection logic └── references/ ├── aws.md ├── gcp.md └── azure.md ``` -用户选了 AWS,Agent 只需要读 `aws.md`,不用把三家的文档全加载进来。 +When the user picks AWS, the Agent only reads `aws.md` — no need to load all three providers. -## 常见设计模式 +## Common Design Patterns -**步骤式**:按编号列出操作步骤和对应脚本。 +**Step-by-step**: numbered steps with corresponding scripts. ```markdown -1. 分析表单结构(运行 analyze_form.py) -2. 生成字段映射(编辑 fields.json) -3. 自动填充表单(运行 fill_form.py) +1. Analyse form structure (run analyze_form.py) +2. Generate field mappings (edit fields.json) +3. Auto-fill the form (run fill_form.py) ``` -**分支式**:根据用户意图走不同流程。 +**Branching**: different flows based on user intent. ```markdown -1. 判断操作类型: - **新建内容?** → 走「创建流程」 - **编辑已有内容?** → 走「编辑流程」 +1. Determine operation type: + **Creating new content?** → follow the "Create" workflow + **Editing existing content?** → follow the "Edit" workflow ``` -**模板式**:输出格式有严格要求时,在 SKILL.md 里直接给一个样板,让 Agent 照着写。 +**Template-based**: when output format has strict requirements, include a template in SKILL.md for the Agent to follow. diff --git a/docs/tools/bash.mdx b/docs/tools/bash.mdx index 0090fe14..60b20918 100644 --- a/docs/tools/bash.mdx +++ b/docs/tools/bash.mdx @@ -1,27 +1,27 @@ --- -title: bash - 终端 -description: 执行系统命令 +title: bash - Terminal +description: Execute system commands --- -在当前工作目录执行 Bash 命令,返回 stdout 和 stderr。`env_config` 中配置的 API Key 会自动注入到环境变量中。 +Execute Bash commands in the current working directory, returns stdout and stderr. API keys configured via `env_config` are automatically injected into the environment. -## 依赖 +## Dependencies -无额外依赖,默认可用。 +No extra dependencies, available by default. -## 参数 +## Parameters -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `command` | string | 是 | 要执行的命令 | -| `timeout` | integer | 否 | 超时时间(秒) | +| `command` | string | Yes | Command to execute | +| `timeout` | integer | No | Timeout in seconds | -## 使用场景 +## Use Cases -- 安装软件包和依赖 -- 运行代码和测试 -- 部署应用和服务(Nginx 配置、进程管理等) -- 系统运维和排查 +- Install packages and dependencies +- Run code and tests +- Deploy applications and services (Nginx config, process management, etc.) +- System administration and troubleshooting diff --git a/docs/tools/browser.mdx b/docs/tools/browser.mdx index 0d3c9197..4c6fda82 100644 --- a/docs/tools/browser.mdx +++ b/docs/tools/browser.mdx @@ -1,42 +1,42 @@ --- -title: browser - 浏览器 -description: 控制浏览器访问和操作网页 +title: browser - Browser +description: Control a browser to access and interact with web pages --- -控制 Chromium 浏览器进行网页导航、元素交互和内容提取。支持 JavaScript 渲染的动态页面,使用精简 DOM 快照让 Agent 高效理解页面结构。 +Control a Chromium browser for web navigation, element interaction and content extraction. Supports JavaScript-rendered pages and uses a compact DOM snapshot so the Agent can efficiently understand page structure. -## 安装 +## Installation - + ```bash cow install-browser ``` - 该命令会自动完成: - - 安装 `playwright` Python 包(旧系统自动降级兼容版本) - - 在 Linux 上安装系统依赖 - - 下载 Chromium 浏览器(Linux 服务器自动使用无头精简版) - - 自动检测国内网络并使用镜像加速 + This command will: + - Install the `playwright` Python package (with auto-fallback for older systems) + - Install system dependencies on Linux + - Download the Chromium browser (Linux servers automatically use the headless build) + - Detect China-mainland networks and use mirror acceleration - + ```bash pip install playwright playwright install chromium ``` - Linux 服务器还需安装系统依赖: + On Linux servers, install system dependencies as well: ```bash sudo playwright install-deps chromium ``` - 如果系统较旧(如 Ubuntu 18.04,glibc < 2.28),需安装兼容版本: + On older systems (e.g. Ubuntu 18.04, glibc < 2.28), install a compatible version: ```bash pip install playwright==1.28.0 python -m playwright install chromium ``` - 国内网络下载 Chromium 较慢,可设置镜像加速: + To accelerate the Chromium download from China: ```bash export PLAYWRIGHT_DOWNLOAD_HOST=https://registry.npmmirror.com/-/binary/playwright python -m playwright install chromium @@ -45,55 +45,55 @@ description: 控制浏览器访问和操作网页 - 1. 支持 Ubuntu 20.04+、Debian 10+、macOS、Windows。Ubuntu 18.04 等旧系统会自动降级安装兼容版本。 - 2. 浏览器工具依赖较重(约300MB),为可选安装。轻量的网页内容获取可使用 `web_fetch` 工具。 + 1. Supported on Ubuntu 20.04+, Debian 10+, macOS and Windows. Older systems such as Ubuntu 18.04 will fall back to a compatible version automatically. + 2. The browser tool has heavy dependencies (~300MB) and is optional. For lightweight web content retrieval, use the `web_fetch` tool. -## 工作流程 +## Workflow -Agent 使用浏览器的典型流程: +A typical browser workflow for the Agent: -1. **`navigate`** — 打开目标 URL -2. **`snapshot`** — 获取页面精简 DOM,交互元素自动编号(ref) -3. **`click` / `fill` / `select`** — 通过 ref 编号操作元素 -4. **`snapshot`** — 再次快照验证操作结果 +1. **`navigate`** — Open the target URL +2. **`snapshot`** — Get a compact DOM with auto-numbered interactive elements (`ref`) +3. **`click` / `fill` / `select`** — Operate elements by `ref` +4. **`snapshot`** — Snapshot again to verify the result -## 支持的操作 +## Supported Actions -| 操作 | 说明 | 关键参数 | +| Action | Description | Key parameters | | --- | --- | --- | -| `navigate` | 打开 URL | `url` | -| `snapshot` | 获取页面结构化文本(主要方式) | `selector`(可选) | -| `click` | 点击元素 | `ref` 或 `selector` | -| `fill` | 填入文本 | `ref` 或 `selector`,`text` | -| `select` | 下拉选择 | `ref` 或 `selector`,`value` | -| `scroll` | 滚动页面 | `direction`(up/down/left/right) | -| `screenshot` | 截图保存到工作区 | `full_page` | -| `wait` | 等待元素或超时 | `selector`,`timeout` | -| `press` | 按键(Enter、Tab 等) | `key` | -| `back` / `forward` | 浏览器前进/后退 | - | -| `get_text` | 获取元素文本内容 | `selector` | -| `evaluate` | 执行 JavaScript | `script` | +| `navigate` | Open URL | `url` | +| `snapshot` | Get structured page text (primary way) | `selector` (optional) | +| `click` | Click an element | `ref` or `selector` | +| `fill` | Fill text into an input | `ref` or `selector`, `text` | +| `select` | Select a dropdown option | `ref` or `selector`, `value` | +| `scroll` | Scroll the page | `direction` (up/down/left/right) | +| `screenshot` | Save a screenshot to the workspace | `full_page` | +| `wait` | Wait for an element or timeout | `selector`, `timeout` | +| `press` | Press a key (Enter, Tab, etc.) | `key` | +| `back` / `forward` | Browser back / forward | - | +| `get_text` | Get an element's text content | `selector` | +| `evaluate` | Run JavaScript | `script` | -## 使用场景 +## Use Cases -- 访问指定 URL 获取动态页面内容 -- 填写表单、登录操作 -- 操作网页元素(点击按钮、选择选项等) -- 验证部署后的网页效果 -- 抓取需要 JS 渲染的动态内容 +- Access a URL to retrieve dynamic page content +- Fill in forms and log in +- Operate web elements (click buttons, select options, etc.) +- Verify the result of a deployed web page +- Scrape content that requires JS rendering -## 运行模式 +## Run Mode -浏览器会根据运行环境自动选择模式: +The browser picks a mode based on the runtime environment: -| 环境 | 模式 | +| Environment | Mode | | --- | --- | -| macOS / Windows | 有头模式(显示浏览器窗口) | -| Linux 桌面(有 DISPLAY) | 有头模式 | -| Linux 服务器(无 DISPLAY) | 无头模式(headless) | +| macOS / Windows | Headed (browser window visible) | +| Linux desktop (with DISPLAY) | Headed | +| Linux server (no DISPLAY) | Headless | -可在 `config.json` 中手动覆盖: +You can override it in `config.json`: ```json { @@ -105,15 +105,15 @@ Agent 使用浏览器的典型流程: } ``` -## 登录态持久化 +## Persistent Login -**只需登录一次目标网站,Agent 后续可直接使用**。提供两种方式: +**Log in to a target site once and the Agent can keep using it.** Two ways are supported: -### 方式一:Persistent 模式(默认) +### Option 1: Persistent mode (default) -开箱即用,登录信息保存在 `~/.cow/browser_profile`。无需任何配置。 +Works out of the box. Login state is saved under `~/.cow/browser_profile`. No configuration needed. -如需关闭持久化模式,每次都用纯净环境: +To disable persistence and start with a clean environment every time: ```json { @@ -125,11 +125,11 @@ Agent 使用浏览器的典型流程: } ``` -### 方式二:CDP 模式(接管真实 Chrome) +### Option 2: CDP mode (attach to real Chrome) -让 Agent 连接独立启动的真实 Chrome(而非 Playwright 自带的 Chromium),获得完整浏览器指纹,适合反爬严格的网站。 +Have the Agent connect to a separately launched real Chrome (instead of the Chromium bundled with Playwright) for full browser fingerprints. Useful for sites with strict bot detection. -启动 Chrome 时加上调试端口和独立用户目录: +Launch Chrome with a debugging port and a dedicated user data directory: @@ -155,7 +155,7 @@ Agent 使用浏览器的典型流程: -在 `config.json` 中配置端点: +Then point the Agent at the endpoint in `config.json`: ```json { @@ -168,5 +168,5 @@ Agent 使用浏览器的典型流程: ``` - Chrome 137+ 限制 `--remote-debugging-port` 必须搭配独立 `--user-data-dir`,因此 CDP 启动的 Chrome **无法直接复用你日常 Chrome 的登录态**,需要在独立目录中重新登录一次。 + Chrome 137+ requires `--remote-debugging-port` to be paired with a dedicated `--user-data-dir`. As a result, the CDP-launched Chrome **cannot directly reuse the login state of your daily Chrome**; you'll need to log in once inside this dedicated profile. diff --git a/docs/tools/edit.mdx b/docs/tools/edit.mdx index 717af2ba..f231c6b9 100644 --- a/docs/tools/edit.mdx +++ b/docs/tools/edit.mdx @@ -1,24 +1,24 @@ --- -title: edit - 文件编辑 -description: 通过精确文本替换编辑文件 +title: edit - File Edit +description: Edit files via precise text replacement --- -通过精确文本替换编辑文件。如果 `oldText` 为空则追加到文件末尾。 +Edit files via precise text replacement. If `oldText` is empty, appends to the end of the file. -## 依赖 +## Dependencies -无额外依赖,默认可用。 +No extra dependencies, available by default. -## 参数 +## Parameters -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `path` | string | 是 | 文件路径 | -| `oldText` | string | 是 | 要替换的原始文本(为空时追加到末尾) | -| `newText` | string | 是 | 替换后的文本 | +| `path` | string | Yes | File path | +| `oldText` | string | Yes | Original text to replace (empty to append) | +| `newText` | string | Yes | Replacement text | -## 使用场景 +## Use Cases -- 修改配置文件中的特定参数 -- 修复代码中的 bug -- 在文件指定位置插入内容 +- Modify specific parameters in configuration files +- Fix bugs in code +- Insert content at specific positions in files diff --git a/docs/tools/env-config.mdx b/docs/tools/env-config.mdx index d5d52c68..23f75bf8 100644 --- a/docs/tools/env-config.mdx +++ b/docs/tools/env-config.mdx @@ -1,35 +1,35 @@ --- -title: env_config - 环境变量 -description: 管理 API Key 等秘钥配置 +title: env_config - Environment +description: Manage API keys and secrets --- -管理工作空间 `.env` 文件中的环境变量(API Key 等秘钥),支持通过对话安全地添加和更新。内置安全保护和脱敏策略。 +Manage environment variables (API keys and secrets) in the workspace `.env` file, with secure conversational updates. Built-in security protection and desensitization. -## 依赖 +## Dependencies -| 依赖 | 安装命令 | +| Dependency | Install Command | | --- | --- | | `python-dotenv` ≥ 1.0.0 | `pip install python-dotenv>=1.0.0` | -安装扩展依赖时已包含:`pip3 install -r requirements-optional.txt` +Included when installing optional dependencies: `pip3 install -r requirements-optional.txt` -## 参数 +## Parameters -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `action` | string | 是 | 操作类型:`get`、`set`、`list`、`delete` | -| `key` | string | 否 | 环境变量名称 | -| `value` | string | 否 | 环境变量值(仅 `set` 时需要) | +| `action` | string | Yes | Operation type: `get`, `set`, `list`, `delete` | +| `key` | string | No | Environment variable name | +| `value` | string | No | Environment variable value (only for `set`) | -## 使用方式 +## Usage -直接告诉 Agent 需要配置的秘钥,Agent 会自动调用该工具: +Tell the Agent what key you need to configure, and it will automatically invoke this tool: -- "帮我配置 BOCHA_API_KEY" -- "设置 OPENAI_API_KEY 为 sk-xxx" -- "查看已配置的环境变量" +- "Configure my BOCHA_API_KEY" +- "Set OPENAI_API_KEY to sk-xxx" +- "Show configured environment variables" -配置的秘钥会自动注入到 `bash` 工具的执行环境中。 +Configured keys are automatically injected into the `bash` tool's execution environment. diff --git a/docs/tools/index.mdx b/docs/tools/index.mdx index 0d96923a..fd2a21f3 100644 --- a/docs/tools/index.mdx +++ b/docs/tools/index.mdx @@ -1,69 +1,60 @@ --- -title: 工具概览 -description: CowAgent 内置工具系统 +title: Tools Overview +description: CowAgent built-in tools system --- -工具是 Agent 访问操作系统资源的核心能力。Agent 会根据任务需求智能选择和调用工具,完成文件操作、命令执行、联网搜索、定时任务等各类操作。工具实现在项目的 `agent/tools/` 目录下。 +Tools are the core capability for Agent to access operating system resources. The Agent intelligently selects and invokes tools based on task requirements, performing file operations, command execution, web search, scheduled tasks, and more. Tools are implemented in the `agent/tools/` directory. -## 内置工具 +## Built-in Tools -以下工具默认可用,无需额外配置: +The following tools are available by default with no extra configuration: - - 读取文件内容,支持文本、图片、PDF + + Read file content, supports text, images, PDF - - 创建或覆盖写入文件 + + Create or overwrite files - - 通过精确文本替换编辑文件 + + Edit files via precise text replacement - - 列出目录内容 + + List directory contents - - 执行系统命令 + + Execute system commands - - 向用户发送文件或图片 + + Send files or images to user - - 搜索和读取长期记忆 - - - 管理 API Key 等秘钥配置 - - - 获取网页或文档内容 - - - 创建和管理定时任务 + + Search and read long-term memory -## 可选工具 +## Optional Tools -以下工具需要安装额外依赖或配置 API Key 后启用: +The following tools require additional dependencies or API key configuration: - - 搜索互联网获取实时信息 + + Manage API keys and secrets - - 分析图片内容(识别、描述、OCR 文字提取等) + + Create and manage scheduled tasks - - 控制浏览器访问和操作网页 + + Search the internet for real-time information -## MCP 工具 +## MCP Tools -通过 [Model Context Protocol](https://modelcontextprotocol.io) 接入社区生态中的各种MCP工具,配置一次 `mcp.json` 即用即得: +Integrate thousands of community tools (maps, GitHub, Notion, etc.) via the [Model Context Protocol](https://modelcontextprotocol.io). Configure `mcp.json` once, ready to use: - - 支持 stdio / SSE 标准协议,热更新,零代码接入 + + Supports standard stdio / SSE transports. Hot-reload, zero code changes. diff --git a/docs/tools/ls.mdx b/docs/tools/ls.mdx index e4d25fc5..e9a5f656 100644 --- a/docs/tools/ls.mdx +++ b/docs/tools/ls.mdx @@ -1,23 +1,23 @@ --- -title: ls - 目录列表 -description: 列出目录内容 +title: ls - Directory List +description: List directory contents --- -列出目录内容,按字母排序,目录名带 `/` 后缀,包含隐藏文件。 +List directory contents, sorted alphabetically, directories suffixed with `/`, includes hidden files. -## 依赖 +## Dependencies -无额外依赖,默认可用。 +No extra dependencies, available by default. -## 参数 +## Parameters -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `path` | string | 是 | 目录路径,相对路径基于工作空间目录 | -| `limit` | integer | 否 | 最大返回条目数,默认 500 | +| `path` | string | Yes | Directory path, relative paths are based on workspace directory | +| `limit` | integer | No | Maximum entries to return, default 500 | -## 使用场景 +## Use Cases -- 浏览项目结构 -- 查找特定文件 -- 检查目录是否存在 +- Browse project structure +- Find specific files +- Check if a directory exists diff --git a/docs/tools/mcp.mdx b/docs/tools/mcp.mdx index 8b7670c1..fc320fe0 100644 --- a/docs/tools/mcp.mdx +++ b/docs/tools/mcp.mdx @@ -1,19 +1,19 @@ --- -title: MCP 工具 -description: 通过 Model Context Protocol 接入外部工具生态 +title: MCP Tools +description: Integrate external tool ecosystems via the Model Context Protocol --- -CowAgent 支持 [Model Context Protocol (MCP)](https://modelcontextprotocol.io),让 Agent 能够直接调用社区中数以万计的 MCP 工具。配置一次 `mcp.json`,工具就会以与内置工具完全相同的方式呈现给 LLM,可被自动选择和调用。 +CowAgent supports the [Model Context Protocol (MCP)](https://modelcontextprotocol.io), allowing the Agent to directly invoke tens of thousands of community MCP tools. Configure `mcp.json` once and the tools are exposed to the LLM in exactly the same way as built-in tools — automatically selected and invoked. -## 配置文件 +## Configuration File -CowAgent 读取 `~/cow/mcp.json`。文件不存在时不会启用任何 MCP 工具,也不会报错。 +CowAgent reads `~/cow/mcp.json`. If the file does not exist, no MCP tools are loaded — and no error is raised. -Docker 部署时,官方 `docker-compose.yml` 已经把宿主机 `./cow` 挂载到容器内 `/home/agent/cow`(即容器用户的 `~/cow`),把 `mcp.json` 放进宿主机 `./cow/` 目录即可生效。 +For Docker deployments, the official `docker-compose.yml` already mounts the host's `./cow` directory to `/home/agent/cow` inside the container (i.e. the container user's `~/cow`). Just drop `mcp.json` into the host's `./cow/` directory and it will take effect. -### 标准格式 +### Standard Format -完全兼容 MCP 社区标准,同 Claude Desktop / Cursor 一致: +Fully compatible with the MCP community standard, identical to Claude Desktop / Cursor: ```json { @@ -29,17 +29,17 @@ Docker 部署时,官方 `docker-compose.yml` 已经把宿主机 `./cow` 挂载 } ``` -| 字段 | 必填 | 说明 | +| Field | Required | Description | | --- | --- | --- | -| `command` | stdio | 启动 server 的可执行命令(如 `npx`、`python`、`uvx`) | -| `args` | 否 | 传给 command 的参数列表 | -| `env` | 否 | 子进程的环境变量,常用于 API Key | -| `url` | SSE / Streamable HTTP | 远程端点 URL(与 `command` 二选一) | -| `type` | 远程 | 远程传输类型,可选 `sse` 或 `streamable-http`,默认 `sse` | -| `headers` | 否 | 远程请求附加 HTTP 头(如 `Authorization`),仅 Streamable HTTP 使用 | -| `disabled` | 否 | `true` 时跳过该 server,便于临时关闭 | +| `command` | stdio | Executable to launch the server (e.g. `npx`, `python`, `uvx`) | +| `args` | No | Arguments passed to `command` | +| `env` | No | Environment variables for the subprocess, commonly used for API keys | +| `url` | SSE / Streamable HTTP | Remote endpoint URL (alternative to `command`) | +| `type` | Remote | Remote transport type: `sse` or `streamable-http` (defaults to `sse`) | +| `headers` | No | Extra HTTP headers for remote requests (e.g. `Authorization`); Streamable HTTP only | +| `disabled` | No | When `true`, this server is skipped — handy for temporary disabling | -### 完整示例 +### Full Example ```json { @@ -59,54 +59,54 @@ Docker 部署时,官方 `docker-compose.yml` 已经把宿主机 `./cow` 挂载 } ``` -- **fetch**:通用网页抓取,返回页面文本内容,无需 API Key -- **github**:访问 GitHub 仓库、Issue、PR 等,需要 Personal Access Token +- **fetch**: Generic web page fetcher that returns page text content. No API key required. +- **github**: Access GitHub repos, issues, PRs, etc. Requires a Personal Access Token. -## 让 Agent 帮你配置 +## Let the Agent Configure It for You -CowAgent 自带 `read` / `write` / `edit` 工具,**直接把要装的 MCP 配置发给 Agent,让它写到配置文件中: +CowAgent ships with `read` / `write` / `edit` tools, so **you can simply send the MCP config to the Agent and ask it to write the file**: -例如: +For example: ```markdown -帮我把这个 MCP 加到 ~/cow/mcp.json 里: +Add this MCP to ~/cow/mcp.json: {"mcpServers":{"fetch":{"command":"uvx","args":["mcp-server-fetch"]}}} ``` -Agent 会: +The Agent will: -1. 访问 MCP 配置文件,合并新 server 配置,保留已有项 -2. 自动重载增量的 MCP Server,下一次对话即可使用相应 Tools +1. Read the existing MCP config and merge the new server entry, preserving existing ones +2. Hot-reload the new MCP server, so the corresponding tools become available on the next message -## 工作方式 +## How It Works -- 启动时**异步加载**:`mcp.json` 中配置的所有 server 会在后台异步加载,不阻塞主流程,对话可以立刻使用 -- **热更新**:用户或 Agent 修改 `mcp.json` 后,消息处理完成时会自动重载变更的 server,无需重启 cow -- **平铺呈现**:每个 MCP server 暴露的多个方法会平铺为独立的工具,LLM 直接选择调用,不需要二次决策 +- **Async loading at startup**: All servers configured in `mcp.json` are loaded asynchronously in the background, never blocking the main loop — chat is usable immediately. +- **Hot reload**: When you or the Agent modifies `mcp.json`, changed servers are automatically reloaded after the current message — no need to restart cow. +- **Flat exposure**: Each method exposed by an MCP server appears as an individual tool. The LLM picks one directly without a second-stage decision. -## 支持的传输协议 +## Supported Transports -| 协议 | 说明 | 配置字段 | +| Transport | Description | Config Field | | --- | --- | --- | -| **stdio** | 子进程通信,最常见,社区生态最丰富 | `command` + `args` | -| **SSE** | HTTP Server-Sent Events,旧版远程协议 | `url`(默认) | -| **Streamable HTTP** | 新版远程协议,单端点收发,逐步取代 SSE | `type: "streamable-http"` + `url` | +| **stdio** | Subprocess communication. The most common option, with the richest community ecosystem. | `command` + `args` | +| **SSE** | HTTP Server-Sent Events. Legacy remote transport. | `url` (default) | +| **Streamable HTTP** | New unified remote transport, gradually replacing SSE. | `type: "streamable-http"` + `url` | -## 排错 +## Troubleshooting -| 现象 | 排查方向 | +| Symptom | What to Check | | --- | --- | -| 启动后 Agent 没有 MCP 工具 | 检查 `~/cow/mcp.json` 是否存在、JSON 格式是否合法 | -| 某个 server 加载失败 | 查看启动日志中的 `[MCP] Server 'xxx' load failed`,常见为依赖未装、API Key 缺失 | -| 修改 `mcp.json` 没有生效 | 改动会在**下一条消息**生效;若 server 配置不变(如只改注释),不会触发重启 | -| Docker 部署 | 确认宿主机 `./cow` 已挂载到容器内 `/home/agent/cow`,`mcp.json` 直接放进宿主机 `./cow/` 目录即可,或者直接对话 Agent 安装 | +| Agent has no MCP tools after startup | Verify that `~/cow/mcp.json` exists and contains valid JSON | +| A specific server fails to load | Look for `[MCP] Server 'xxx' load failed` in startup logs — usually missing dependencies or API keys | +| Changes to `mcp.json` aren't applied | Changes take effect on **the next message**. If the server config didn't actually change (e.g. only comments edited), no restart is triggered | +| Docker deployment | Make sure host's `./cow` is mounted to `/home/agent/cow` in the container, then just drop `mcp.json` into host's `./cow/`. Or just ask the Agent to do it | -## MCP 市场推荐 +## Recommended MCP Marketplaces -可以从各个第三方广场寻找现成的 MCP server,复制 JSON 配置即可使用,例如: +You can browse third-party MCP marketplaces and copy a JSON config to use directly, for example: -- [mcp.so](https://mcp.so) — 全球 MCP 服务索引 -- [ModelScope MCP 广场](https://modelscope.cn/mcp) — 魔搭社区 MCP 广场,国内访问更稳定 +- [mcp.so](https://mcp.so) — Global MCP service index +- [ModelScope MCP Hub](https://modelscope.cn/mcp) — ModelScope's MCP hub, more reliable from mainland China -只要遵循 MCP 标准协议(stdio / SSE / Streamable HTTP),都可以直接接入 CowAgent。 +Any MCP server that follows the standard protocol (stdio / SSE / Streamable HTTP) integrates with CowAgent out of the box. diff --git a/docs/tools/memory.mdx b/docs/tools/memory.mdx index c3cc6fe4..d272bf9b 100644 --- a/docs/tools/memory.mdx +++ b/docs/tools/memory.mdx @@ -1,43 +1,43 @@ --- -title: memory - 记忆与知识 -description: 搜索和读取长期记忆及知识库文件 +title: memory - Memory & Knowledge +description: Search and read long-term memory and knowledge base files --- -记忆工具包含两个子工具:`memory_search`(搜索记忆)和 `memory_get`(读取记忆或知识文件)。 +The memory tool contains two sub-tools: `memory_search` (search memory) and `memory_get` (read memory or knowledge files). -当 [知识库](/knowledge) 功能开启时,这两个工具同时支持访问 `memory/` 和 `knowledge/` 目录下的文件。 +When the [knowledge base](/knowledge) feature is enabled, both tools also support accessing files under the `knowledge/` directory. -## 依赖 +## Dependencies -无额外依赖,默认可用。由 Agent Core 的记忆系统管理。 +No extra dependencies, available by default. Managed by the Agent Core memory system. ## memory_search -搜索历史记忆和知识库内容,支持关键词和向量混合检索。 +Search historical memory and knowledge base content with hybrid keyword and vector retrieval. -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `query` | string | 是 | 搜索查询 | +| `query` | string | Yes | Search query | ## memory_get -读取特定记忆文件或知识库文件的内容。 +Read the content of a specific memory or knowledge file. -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `path` | string | 是 | 文件的相对路径(如 `MEMORY.md`、`memory/2026-01-01.md`、`knowledge/concepts/rag.md`) | -| `start_line` | integer | 否 | 起始行号 | -| `end_line` | integer | 否 | 结束行号 | +| `path` | string | Yes | Relative path to the file (e.g. `MEMORY.md`, `memory/2026-01-01.md`, `knowledge/concepts/rag.md`) | +| `start_line` | integer | No | Start line number | +| `end_line` | integer | No | End line number | -## 工作方式 +## How It Works -Agent 会在以下场景自动调用记忆工具: +The Agent automatically invokes memory tools in these scenarios: -- 用户分享重要信息时 → 存储到记忆 -- 需要参考历史信息时 → 搜索相关记忆 -- 对话达到一定长度时 → 提取摘要存储 -- 讨论到专业知识时 → 检索知识库中的相关页面 +- When the user shares important information → stores to memory +- When historical context is needed → searches relevant memory +- When conversation reaches a certain length → extracts summary for storage +- When discussing domain knowledge → retrieves relevant pages from the knowledge base - 当 `knowledge` 配置为 `false` 时,工具的描述和搜索范围会自动调整为仅包含记忆文件。 + When `knowledge` is set to `false` in config, the tool descriptions and search scope automatically adjust to include only memory files. diff --git a/docs/tools/read.mdx b/docs/tools/read.mdx index 07e08b88..56b56570 100644 --- a/docs/tools/read.mdx +++ b/docs/tools/read.mdx @@ -1,24 +1,24 @@ --- -title: read - 文件读取 -description: 读取文件内容 +title: read - File Read +description: Read file content --- -读取文件内容。支持文本文件、PDF 文件、图片(返回元数据)等格式。 +Read file content. Supports text files, PDF files, images (returns metadata), and more. -## 依赖 +## Dependencies -无额外依赖,默认可用。 +No extra dependencies, available by default. -## 参数 +## Parameters -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `path` | string | 是 | 文件路径,相对路径基于工作空间目录 | -| `offset` | integer | 否 | 起始行号(1-indexed),负值表示从末尾读取 | -| `limit` | integer | 否 | 读取行数 | +| `path` | string | Yes | File path, relative paths are based on workspace directory | +| `offset` | integer | No | Start line number (1-indexed), negative values read from the end | +| `limit` | integer | No | Number of lines to read | -## 使用场景 +## Use Cases -- 查看配置文件、日志文件 -- 读取代码文件进行分析 -- 检查图片/视频的文件信息 +- View configuration files, log files +- Read code files for analysis +- Check image/video file info diff --git a/docs/tools/scheduler.mdx b/docs/tools/scheduler.mdx index 2648116f..18c211bf 100644 --- a/docs/tools/scheduler.mdx +++ b/docs/tools/scheduler.mdx @@ -1,61 +1,61 @@ --- -title: scheduler - 定时任务 -description: 创建和管理定时任务 +title: scheduler - Scheduler +description: Create and manage scheduled tasks --- -创建和管理动态定时任务,支持灵活的调度方式和执行模式。 +Create and manage dynamic scheduled tasks with flexible scheduling and execution modes. -## 依赖 +## Dependencies -| 依赖 | 安装命令 | +| Dependency | Install Command | | --- | --- | | `croniter` ≥ 2.0.0 | `pip install croniter>=2.0.0` | -安装核心依赖时已包含:`pip3 install -r requirements.txt` +Included in core dependencies: `pip3 install -r requirements.txt` -## 调度方式 +## Scheduling Modes -| 方式 | 说明 | +| Mode | Description | | --- | --- | -| 一次性任务 | 在指定时间执行一次 | -| 固定间隔 | 按固定时间间隔重复执行 | -| Cron 表达式 | 使用 Cron 语法定义复杂调度规则 | +| One-time | Execute once at a specified time | +| Fixed interval | Repeat at fixed time intervals | +| Cron expression | Define complex schedules using Cron syntax | -## 执行模式 +## Execution Modes -- **固定消息发送**:到达触发时间时发送预设消息 -- **Agent 动态任务**:到达触发时间时由 Agent 智能执行任务 +- **Fixed message**: Send a preset message when triggered +- **Agent dynamic task**: Agent intelligently executes the task when triggered -## 使用方式 +## Usage -通过自然语言即可创建和管理定时任务: +Create and manage scheduled tasks with natural language: -- "每天早上 9 点给我发天气预报" -- "每隔 2 小时检查一下服务器状态" -- "明天下午 3 点提醒我开会" -- "查看所有定时任务" +- "Send me a weather report every morning at 9 AM" +- "Check server status every 2 hours" +- "Remind me about the meeting tomorrow at 3 PM" +- "Show all scheduled tasks" -## 结果进入会话上下文 +## Results injected into the conversation -定时任务在隔离 session 中执行(内部规划与 tool 调用不污染用户会话),但**最终输出**会作为一对消息回写到接收者的真实会话,用户可以直接追问"刚才那条第二点展开说说"。 +Scheduled tasks run inside an isolated session (so internal planning and tool calls do not pollute the user's chat), but the **final output** is written back to the user's real session as a message pair. You can directly follow up — e.g. "expand on point 2 from earlier". -**默认策略** +**Default policy** -- Agent 动态任务的输出进入上下文 -- 固定消息类任务默认不进入上下文(可通过配置打开) -- 每个会话最多保留最近 **3 对** scheduler 消息,更早的自动清理;普通用户消息不受影响 +- Output of Agent dynamic tasks is injected into the conversation +- Fixed-message tasks are not injected by default (configurable) +- Each session keeps the most recent **3 pairs** of scheduler messages; older pairs are pruned automatically. Regular user messages are unaffected -**配置项** +**Configuration** -| 配置项 | 默认值 | 说明 | +| Key | Default | Description | | --- | --- | --- | -| `scheduler_inject_to_session` | `true` | 总开关 | -| `scheduler_inject_max_per_session` | `3` | 每会话保留 scheduler 消息对数上限 | -| `scheduler_inject_send_message` | `false` | 是否同时注入固定消息类任务 | +| `scheduler_inject_to_session` | `true` | Master switch | +| `scheduler_inject_max_per_session` | `3` | Max scheduler message pairs kept per session | +| `scheduler_inject_send_message` | `false` | Whether to also inject fixed-message tasks | ```json { @@ -65,16 +65,16 @@ description: 创建和管理定时任务 } ``` -## 任务执行时的上下文 +## Context inside scheduled task execution -定时任务的隔离 session 会保留最近几次执行的对话历史,便于做"对比上次"、"延续之前结论"等操作;但为了避免高频任务(如每 5 分钟监控)prompt 越积越长,会按公式自动裁剪: +The isolated session for scheduled tasks retains a few recent runs of conversation history, so you can naturally do "compare with last time" or "continue from previous conclusion". To prevent prompts from growing unbounded for high-frequency tasks (e.g. a 5-minute monitor), history is auto-trimmed: ``` scheduler_keep_turns = max(1, agent_max_context_turns / 5) ``` -`agent_max_context_turns` 默认为 `20`,所以定时任务每次执行默认带最近 **4 轮**历史。需要更长记忆可调大 `agent_max_context_turns`。 +`agent_max_context_turns` defaults to `20`, so each scheduled run keeps the most recent **4 turns** of history by default. Increase `agent_max_context_turns` if you need longer memory. -群聊场景(飞书 / 企微群机器人 / 钉钉等)下用户的真实 session_id 形如 `user_id:group_id`,与 receiver 不同。创建任务时会自动记录正确的 session_id;老的 `tasks.json` 缺该字段时回落到 receiver,行为与历史版本一致。 +For group-chat scenarios (Feishu / WeCom group bots / DingTalk, etc.), the user's real `session_id` looks like `user_id:group_id` — different from `receiver`. Scheduler records the correct `session_id` when a task is created. For older `tasks.json` entries missing this field, the runtime falls back to `receiver`, matching legacy behavior. diff --git a/docs/tools/send.mdx b/docs/tools/send.mdx index 05f73a6b..1cf089ac 100644 --- a/docs/tools/send.mdx +++ b/docs/tools/send.mdx @@ -1,23 +1,23 @@ --- -title: send - 文件发送 -description: 向用户发送文件 +title: send - File Send +description: Send files to user --- -向用户发送文件(图片、视频、音频、文档等),当用户明确要求发送/分享文件时使用。 +Send files to the user (images, videos, audio, documents, etc.), used when the user explicitly requests to send/share a file. -## 依赖 +## Dependencies -无额外依赖,默认可用。 +No extra dependencies, available by default. -## 参数 +## Parameters -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `path` | string | 是 | 文件路径,可以是绝对路径或相对于工作空间的路径 | -| `message` | string | 否 | 附带的消息说明 | +| `path` | string | Yes | File path, can be absolute or relative to workspace | +| `message` | string | No | Accompanying message | -## 使用场景 +## Use Cases -- 将生成的代码或文档发送给用户 -- 发送截图、图表 -- 分享下载的文件 +- Send generated code or documents to the user +- Send screenshots, charts +- Share downloaded files diff --git a/docs/tools/vision.mdx b/docs/tools/vision.mdx index 675afe41..4db6bec0 100644 --- a/docs/tools/vision.mdx +++ b/docs/tools/vision.mdx @@ -1,42 +1,42 @@ --- -title: vision - 图片理解 -description: 分析图片内容(识别、描述、OCR 等) +title: vision - Image Understanding +description: Analyze image content (recognition, description, OCR, etc.) --- -使用 Vision API 分析本地图片或图片 URL,支持内容描述、文字提取(OCR)、物体识别等。 +Analyze local images or image URLs using Vision API. Supports content description, text extraction (OCR), object recognition, and more. -## 模型选择 +## Model Selection -Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置即可使用: +The vision tool uses a multi-level auto-selection strategy with automatic fallback — no manual configuration required: -1. **主模型** — 优先使用当前配置的主模型进行图像识别(需要是多模态模型) -2. **其他已配置模型** — 自动发现已配置 API Key 的其他多模态模型作为备选 +1. **Main model** — uses the currently configured main model for image recognition (must be a multimodal model) +2. **Other configured models** — auto-discovers other multimodal models with configured API keys as alternatives -如果当前 provider 调用失败,会自动尝试下一个,直到成功或全部失败。 +If the current provider fails, the tool automatically tries the next one until it succeeds or all fail. -### 支持的模型 +### Supported Models -| 厂商 | 视觉模型 | 说明 | +| Provider | Vision Model | Notes | | --- | --- | --- | -| OpenAI / 兼容协议 | 使用主模型 | 支持所有 OpenAI 协议兼容的多模态模型 | -| 通义千问 (DashScope) | 使用主模型 | 例如 qwen3.6-plus 等 | -| Claude | 使用主模型 | Anthropic 原生图像格式 | -| Gemini | 使用主模型 | inlineData 格式 | -| 豆包 (Doubao) | 使用主模型 | doubao-seed-2-0 系列原生支持 | -| Kimi (Moonshot) | 使用主模型 | kimi-k2.6、kimi-k2.5 原生支持 | -| 百度千帆 (Qianfan) | 使用主模型 | 默认使用多模态主模型 (如 ernie-5.1),主模型不支持时兜底使用 ernie-4.5-turbo-vl | -| 智谱 AI | glm-5v-turbo | 固定使用视觉专用模型 | -| MiniMax | MiniMax-Text-01 | 固定使用视觉专用模型 | +| OpenAI / Compatible | Main model | All OpenAI-protocol-compatible multimodal models | +| Qwen (DashScope) | Main model | e.g. qwen3.6-plus, etc. | +| Claude | Main model | Anthropic native image format | +| Gemini | Main model | inlineData format | +| Doubao | Main model | doubao-seed-2-0 series natively supported | +| Kimi (Moonshot) | Main model | kimi-k2.6, kimi-k2.5 natively supported | +| ERNIE | Main model | Defaults to the multimodal main model (e.g. `ernie-5.1`); falls back to `ernie-4.5-turbo-vl` when the main model is not multimodal | +| ZhipuAI | glm-5v-turbo | Always uses the dedicated vision model | +| MiniMax | MiniMax-Text-01 | Always uses the dedicated vision model | - 智谱和 MiniMax 的文本模型不支持图像理解,因此始终使用对应的视觉专用模型,无需手动指定。 + ZhipuAI and MiniMax text models do not support image understanding, so their dedicated vision models are always used automatically. -> 当 `use_linkai=true` 时,默认使用 LinkAI 的多模态模型进行 +> When `use_linkai=true`, LinkAI's multimodal model is used by default. -## 自定义配置 +## Custom Configuration -如果希望指定 Vision 使用的模型,可在 `config.json` 中配置,例如: +To specify the model used by Vision, configure it in `config.json`, for example: ```json { @@ -48,28 +48,28 @@ Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置 } ``` -指定的模型会被**优先使用**,工具会根据模型名自动路由到对应的 provider;若调用失败,会自动 fallback 到其他已配置的 provider。 +The specified model is **used first**, and the tool automatically routes to the corresponding provider based on the model name; on failure, it falls back to other configured providers. -大多数情况下无需配置,主模型支持多模态或配置任意一个支持视觉的 API Key 即可自动工作。 +In most cases no configuration is needed — the tool works automatically as long as the main model supports multimodal input or any vision-capable API key is configured. -## 参数 +## Parameters -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `image` | string | 是 | 本地文件路径或 HTTP(S) 图片 URL | -| `question` | string | 是 | 对图片提出的问题 | +| `image` | string | Yes | Local file path or HTTP(S) image URL | +| `question` | string | Yes | Question to ask about the image | -支持的图片格式:jpg、jpeg、png、gif、webp +Supported image formats: jpg, jpeg, png, gif, webp -## 使用场景 +## Use Cases -- 描述图片中的内容 -- 提取图片中的文字(OCR) -- 识别物体、颜色、场景 -- 分析截图、文档扫描图片等 +- Describe image content +- Extract text from images (OCR) +- Identify objects, colors, scenes +- Analyze screenshots and scanned documents - 超过 1MB 的图片会自动压缩后上传,所有图片(包括远程 URL)会统一转为 base64 传输,确保兼容所有模型后端。 + Images larger than 1MB are automatically compressed before upload. All images (including remote URLs) are converted to base64 for transmission to ensure compatibility with all model backends. diff --git a/docs/tools/web-fetch.mdx b/docs/tools/web-fetch.mdx index 12f85953..0a0349b9 100644 --- a/docs/tools/web-fetch.mdx +++ b/docs/tools/web-fetch.mdx @@ -1,32 +1,32 @@ --- -title: web_fetch - 网页获取 -description: 获取网页或文档内容 +title: web_fetch - Web Fetch +description: Fetch web pages and document content --- -获取 HTTP/HTTPS URL 的内容。对网页提取可读文本,对文档文件(PDF、Word、Excel 等)自动下载并解析内容。 +Fetch the content of an HTTP/HTTPS URL. Web pages are extracted as readable text; document files (PDF, Word, Excel, etc.) are downloaded and parsed automatically. -## 参数 +## Parameters -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `url` | string | 是 | HTTP/HTTPS URL(网页或文档链接) | +| `url` | string | Yes | HTTP/HTTPS URL (web page or document) | -## 支持的文件类型 +## Supported file types -| 类型 | 格式 | +| Type | Formats | | --- | --- | | PDF | `.pdf` | | Word | `.docx` | -| 文本 | `.txt`、`.md`、`.csv`、`.log` | -| 表格 | `.xls`、`.xlsx` | -| 演示文稿 | `.ppt`、`.pptx` | +| Text | `.txt`, `.md`, `.csv`, `.log` | +| Spreadsheet | `.xls`, `.xlsx` | +| Presentation | `.ppt`, `.pptx` | -## 使用场景 +## Use cases -- 获取网页的文本内容 -- 下载并解析远程文档 -- 获取 API 响应内容 +- Extract readable text from a web page +- Download and parse remote documents +- Inspect API response bodies - `web_fetch` 只能获取静态 HTML 内容。如果页面需要 JavaScript 渲染(如 SPA 单页应用),请使用 `browser` 工具。 + `web_fetch` only retrieves static HTML. For pages that require JavaScript rendering (such as SPAs), use the `browser` tool instead. diff --git a/docs/tools/web-search.mdx b/docs/tools/web-search.mdx index 928eb633..09dff68e 100644 --- a/docs/tools/web-search.mdx +++ b/docs/tools/web-search.mdx @@ -1,26 +1,26 @@ --- -title: web_search - 联网搜索 -description: 搜索互联网获取实时信息,支持多个搜索厂商 +title: web_search - Web Search +description: Search the internet for real-time information, with support for multiple search providers --- -搜索互联网获取实时信息、新闻、研究等内容。支持博查、百度千帆、智谱、LinkAI 四个后端,配置任意一家即可使用。 +Search the internet for real-time information, news, research, and more. Supports four backends — Bocha, ERNIE, GLM, and LinkAI — and works once any one of them is configured. - 推荐通过 [Web 控制台](/channels/web) 的「模型管理 → 搜索」面板可视化配置厂商与策略,无需手动编辑配置文件。 + It is recommended to configure providers and routing strategy visually from the "Model Management → Search" panel in the [Web console](/channels/web), without manually editing the configuration file. -## 厂商 +## Providers -| 厂商 | 凭证 | 申请入口 | +| Provider | Credential | Apply | | --- | --- | --- | -| 博查 Bocha | `tools.web_search.bocha_api_key` | [博查开放平台](https://open.bochaai.com/) | -| 百度千帆 | 复用 `qianfan_api_key` | [千帆控制台](https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy) | -| 智谱 Zhipu | 复用 `zhipu_ai_api_key` | [智谱开放平台](https://docs.bigmodel.cn/cn/guide/tools/web-search) | -| LinkAI | 复用 `linkai_api_key` | [LinkAI 控制台](https://link-ai.tech/console/interface) | +| Bocha | `tools.web_search.bocha_api_key` | [Bocha Open Platform](https://open.bochaai.com/) | +| ERNIE | Reuses `qianfan_api_key` | [Qianfan Console](https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy) | +| Zhipu | Reuses `zhipu_ai_api_key` | [Zhipu Open Platform](https://docs.bigmodel.cn/cn/guide/tools/web-search) | +| LinkAI | Reuses `linkai_api_key` | [LinkAI Console](https://link-ai.tech/console/interface) | -除博查需要单独的 `bocha_api_key` 外,其他三家直接复用对应模型的 API Key,配好模型即同时获得搜索能力。 +Except for Bocha which requires a dedicated `bocha_api_key`, the other three reuse the corresponding model's API key — configuring the model automatically grants search capability. -## 路由策略 +## Routing Strategy ```json { @@ -33,19 +33,19 @@ description: 搜索互联网获取实时信息,支持多个搜索厂商 } ``` -- `auto`(默认):由 Agent 在已配置的厂商中智能选择,并可在一次任务中多次调用、切换不同厂商以获取更全面的结果;未指定时按 `bocha → qianfan → zhipu → linkai` 顺序兜底。 -- `fixed`:固定使用 `provider` 指定的厂商;该厂商凭证缺失时自动回落到 auto 顺序。 +- `auto` (default): the Agent intelligently picks among configured providers and may call multiple providers in a single task to gather more comprehensive results; when none is specified, falls back through `bocha → qianfan → zhipu → linkai`. +- `fixed`: always use the provider specified in `provider`; falls back to the auto order if that provider's credentials are missing. -## 工具参数 +## Tool Parameters -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `query` | string | 是 | 搜索关键词 | -| `count` | integer | 否 | 返回结果数量(1–50,默认 10) | -| `freshness` | string | 否 | 时间范围:`noLimit`(默认)、`oneDay`、`oneWeek`、`oneMonth`、`oneYear`,或日期范围如 `2025-01-01..2025-02-01` | -| `summary` | boolean | 否 | 是否返回页面摘要(默认 false) | -| `provider` | string | 否 | `auto` 策略下配置了多个厂商时可见,用于单次切换厂商 | +| `query` | string | Yes | Search keywords | +| `count` | integer | No | Number of results (1–50, default 10) | +| `freshness` | string | No | Time range: `noLimit` (default), `oneDay`, `oneWeek`, `oneMonth`, `oneYear`, or date range like `2025-01-01..2025-02-01` | +| `summary` | boolean | No | Whether to return page summaries (default false) | +| `provider` | string | No | Available when multiple providers are configured under the `auto` strategy; used to switch provider for a single call | - 四家凭证均未配置时,该工具不会注册到 Agent。 + If none of the four credentials are configured, this tool is not registered with the Agent. diff --git a/docs/tools/write.mdx b/docs/tools/write.mdx index 51cf66f1..2c0a10f0 100644 --- a/docs/tools/write.mdx +++ b/docs/tools/write.mdx @@ -1,27 +1,27 @@ --- -title: write - 文件写入 -description: 创建或覆盖写入文件 +title: write - File Write +description: Create or overwrite files --- -写入内容到文件。文件不存在则自动创建,已存在则覆盖。自动创建父目录。 +Write content to a file. Creates the file if it doesn't exist, overwrites if it does. Automatically creates parent directories. -## 依赖 +## Dependencies -无额外依赖,默认可用。 +No extra dependencies, available by default. -## 参数 +## Parameters -| 参数 | 类型 | 必填 | 说明 | +| Parameter | Type | Required | Description | | --- | --- | --- | --- | -| `path` | string | 是 | 文件路径 | -| `content` | string | 是 | 要写入的内容 | +| `path` | string | Yes | File path | +| `content` | string | Yes | Content to write | -## 使用场景 +## Use Cases -- 创建新的代码文件或脚本 -- 生成配置文件 -- 保存处理结果 +- Create new code files or scripts +- Generate configuration files +- Save processing results - 单次写入不应超过 10KB。对于大文件,建议先创建骨架,再使用 edit 工具分块添加内容。 + Single writes should not exceed 10KB. For large files, create a skeleton first, then use the edit tool to add content in chunks. diff --git a/docs/zh/README.md b/docs/zh/README.md index fafc2f1e..d37f09ce 100644 --- a/docs/zh/README.md +++ b/docs/zh/README.md @@ -13,8 +13,8 @@ CowAgent 轻量、易部署、可扩展,自由接入主流大模型,覆盖

🌐 官网  ·  - 📖 文档中心  ·  - 🚀 快速开始  ·  + 📖 文档中心  ·  + 🚀 快速开始  ·  🧩 技能广场  ·  ☁️ 在线体验

@@ -25,15 +25,15 @@ CowAgent 轻量、易部署、可扩展,自由接入主流大模型,覆盖 | 能力 | 说明 | | :--- | :--- | -| [任务规划](https://docs.cowagent.ai/intro/architecture) | 理解复杂任务并自主分解执行,循环调用工具直到完成目标 | -| [长期记忆](https://docs.cowagent.ai/memory) | 三层记忆架构(上下文 → 天级 → 核心),梦境蒸馏自动整理,支持关键词与向量混合检索 | -| [知识库](https://docs.cowagent.ai/knowledge) | 自动整理结构化知识为 Markdown Wiki,构建持续增长的知识图谱,可视化浏览 | -| [技能](https://docs.cowagent.ai/skills) | 从 [Skill Hub](https://skills.cowagent.ai/)、GitHub、ClawHub 等一键安装;也可通过对话创造自定义技能 | -| [工具](https://docs.cowagent.ai/tools) | 内置文件读写、终端、浏览器、定时任务、记忆检索、联网搜索等 10+ 工具,支持 MCP 协议 | -| [通道](https://docs.cowagent.ai/channels) | 一个 Agent 同时接入 Web、微信、飞书、钉钉、企微、QQ、公众号、Telegram、Slack 等多个渠道 | +| [任务规划](https://docs.cowagent.ai/zh/intro/architecture) | 理解复杂任务并自主分解执行,循环调用工具直到完成目标 | +| [长期记忆](https://docs.cowagent.ai/zh/memory) | 三层记忆架构(上下文 → 天级 → 核心),梦境蒸馏自动整理,支持关键词与向量混合检索 | +| [知识库](https://docs.cowagent.ai/zh/knowledge) | 自动整理结构化知识为 Markdown Wiki,构建持续增长的知识图谱,可视化浏览 | +| [技能](https://docs.cowagent.ai/zh/skills) | 从 [Skill Hub](https://skills.cowagent.ai/)、GitHub、ClawHub 等一键安装;也可通过对话创造自定义技能 | +| [工具](https://docs.cowagent.ai/zh/tools) | 内置文件读写、终端、浏览器、定时任务、记忆检索、联网搜索等 10+ 工具,支持 MCP 协议 | +| [通道](https://docs.cowagent.ai/zh/channels) | 一个 Agent 同时接入 Web、微信、飞书、钉钉、企微、QQ、公众号、Telegram、Slack 等多个渠道 | | 多模态 | 文本、图片、语音、文件全消息类型支持,覆盖识别、生成、收发 | -| [模型](https://docs.cowagent.ai/models) | DeepSeek、Claude、Gemini、GPT、GLM、Qwen、Kimi、MiniMax、Doubao 等主流厂商,配置一行切换 | -| [部署](https://docs.cowagent.ai/guide/quick-start) | 一键脚本安装,Web 控制台统一管理;本地、Docker、服务器多种部署方式 | +| [模型](https://docs.cowagent.ai/zh/models) | DeepSeek、Claude、Gemini、GPT、GLM、Qwen、Kimi、MiniMax、Doubao 等主流厂商,配置一行切换 | +| [部署](https://docs.cowagent.ai/zh/guide/quick-start) | 一键脚本安装,Web 控制台统一管理;本地、Docker、服务器多种部署方式 |
@@ -43,7 +43,7 @@ CowAgent 轻量、易部署、可扩展,自由接入主流大模型,覆盖 CowAgent 是一个完整的 **Agent Harness**:消息从各类**通道**进入,**Agent Core** 结合记忆、知识库与可用工具/技能进行任务规划与决策,调用**模型**生成结果,再回传至原通道。各模块解耦清晰,按需扩展。 -详见 [项目架构](https://docs.cowagent.ai/intro/architecture)。 +详见 [项目架构](https://docs.cowagent.ai/zh/intro/architecture)。
@@ -74,9 +74,9 @@ docker compose up -d > 服务器部署且需要公网访问控制台时,请在 `config.json` 中将 `web_host` 设为 `0.0.0.0`(同时强烈建议设置 `web_password` 启用鉴权),然后访问 `http://:9899`,并确保防火墙/安全组放行 `9899` 端口。 -> 📖 详细安装指南:[快速开始](https://docs.cowagent.ai/guide/quick-start) · [源码安装](https://docs.cowagent.ai/guide/manual-install) · [升级](https://docs.cowagent.ai/guide/upgrade) +> 📖 详细安装指南:[快速开始](https://docs.cowagent.ai/zh/guide/quick-start) · [源码安装](https://docs.cowagent.ai/zh/guide/manual-install) · [升级](https://docs.cowagent.ai/zh/guide/upgrade) -安装后可使用 `cow` [CLI 命令](https://docs.cowagent.ai/cli) 管理服务: +安装后可使用 `cow` [CLI 命令](https://docs.cowagent.ai/zh/cli) 管理服务: ```bash cow start | stop | restart # 服务管理 @@ -94,21 +94,21 @@ CowAgent 支持国内外主流厂商的大语言模型。**文本对话、图像 | 厂商 | 代表模型 | 文本 | 图像理解 | 图像生成 | 语音识别 | 语音合成 | 向量 | | --- | --- | :-: | :-: | :-: | :-: | :-: | :-: | -| [DeepSeek](https://docs.cowagent.ai/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | | -| [MiniMax](https://docs.cowagent.ai/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | | -| [Claude](https://docs.cowagent.ai/models/claude) | claude-opus-4-8 | ✅ | ✅ | | | | | -| [Gemini](https://docs.cowagent.ai/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | | -| [OpenAI](https://docs.cowagent.ai/models/openai) | gpt-5.5、o 系列 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [智谱 GLM](https://docs.cowagent.ai/models/glm) | glm-5.1、glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ | -| [通义千问](https://docs.cowagent.ai/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [豆包 Doubao](https://docs.cowagent.ai/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ | -| [Kimi](https://docs.cowagent.ai/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | | -| [百度ERNIE](https://docs.cowagent.ai/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | | -| [小米 MiMo](https://docs.cowagent.ai/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | | -| [LinkAI](https://docs.cowagent.ai/models/linkai) | 一个 Key 接入 100+ 模型 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [自定义](https://docs.cowagent.ai/models/custom) | 本地模型 / 三方代理 | ✅ | | | | | | +| [DeepSeek](https://docs.cowagent.ai/zh/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | | +| [MiniMax](https://docs.cowagent.ai/zh/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | | +| [Claude](https://docs.cowagent.ai/zh/models/claude) | claude-opus-4-8 | ✅ | ✅ | | | | | +| [Gemini](https://docs.cowagent.ai/zh/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | | +| [OpenAI](https://docs.cowagent.ai/zh/models/openai) | gpt-5.5、o 系列 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [智谱 GLM](https://docs.cowagent.ai/zh/models/glm) | glm-5.1、glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ | +| [通义千问](https://docs.cowagent.ai/zh/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [豆包 Doubao](https://docs.cowagent.ai/zh/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ | +| [Kimi](https://docs.cowagent.ai/zh/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | | +| [百度ERNIE](https://docs.cowagent.ai/zh/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | | +| [小米 MiMo](https://docs.cowagent.ai/zh/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | | +| [LinkAI](https://docs.cowagent.ai/zh/models/linkai) | 一个 Key 接入 100+ 模型 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [自定义](https://docs.cowagent.ai/zh/models/custom) | 本地模型 / 三方代理 | ✅ | | | | | | -> 推荐通过 Web 控制台在线配置,无需手动编辑文件。手动配置请参考各厂商文档,详见 [模型概览](https://docs.cowagent.ai/models)。 +> 推荐通过 Web 控制台在线配置,无需手动编辑文件。手动配置请参考各厂商文档,详见 [模型概览](https://docs.cowagent.ai/zh/models)。
@@ -118,20 +118,20 @@ CowAgent 支持国内外主流厂商的大语言模型。**文本对话、图像 | 通道 | 文本 | 图片 | 文件 | 语音 | 群聊 | | --- | :-: | :-: | :-: | :-: | :-: | -| [Web 控制台](https://docs.cowagent.ai/channels/web)(默认) | ✅ | ✅ | ✅ | ✅ | | -| [微信](https://docs.cowagent.ai/channels/weixin) | ✅ | ✅ | ✅ | ✅ | | -| [飞书](https://docs.cowagent.ai/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [钉钉](https://docs.cowagent.ai/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [企微智能机器人](https://docs.cowagent.ai/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [QQ](https://docs.cowagent.ai/channels/qq) | ✅ | ✅ | ✅ | | ✅ | -| [企业微信应用](https://docs.cowagent.ai/channels/wecom) | ✅ | ✅ | ✅ | ✅ | | -| [微信客服](https://docs.cowagent.ai/channels/wechat-kf) | ✅ | ✅ | ✅ | ✅ | | -| [微信公众号](https://docs.cowagent.ai/channels/wechatmp) | ✅ | ✅ | | ✅ | | -| [Telegram](https://docs.cowagent.ai/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Slack](https://docs.cowagent.ai/channels/slack) | ✅ | ✅ | ✅ | | ✅ | -| [Discord](https://docs.cowagent.ai/channels/discord) | ✅ | ✅ | ✅ | | ✅ | +| [Web 控制台](https://docs.cowagent.ai/zh/channels/web)(默认) | ✅ | ✅ | ✅ | ✅ | | +| [微信](https://docs.cowagent.ai/zh/channels/weixin) | ✅ | ✅ | ✅ | ✅ | | +| [飞书](https://docs.cowagent.ai/zh/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [钉钉](https://docs.cowagent.ai/zh/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [企微智能机器人](https://docs.cowagent.ai/zh/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [QQ](https://docs.cowagent.ai/zh/channels/qq) | ✅ | ✅ | ✅ | | ✅ | +| [企业微信应用](https://docs.cowagent.ai/zh/channels/wecom) | ✅ | ✅ | ✅ | ✅ | | +| [微信客服](https://docs.cowagent.ai/zh/channels/wechat-kf) | ✅ | ✅ | ✅ | ✅ | | +| [微信公众号](https://docs.cowagent.ai/zh/channels/wechatmp) | ✅ | ✅ | | ✅ | | +| [Telegram](https://docs.cowagent.ai/zh/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Slack](https://docs.cowagent.ai/zh/channels/slack) | ✅ | ✅ | ✅ | | ✅ | +| [Discord](https://docs.cowagent.ai/zh/channels/discord) | ✅ | ✅ | ✅ | | ✅ | -> 飞书、企微智能机器人支持在 Web 控制台内**扫码一键接入**,无需公网 IP。详见 [通道概览](https://docs.cowagent.ai/channels)。 +> 飞书、企微智能机器人支持在 Web 控制台内**扫码一键接入**,无需公网 IP。详见 [通道概览](https://docs.cowagent.ai/zh/channels)。 CowAgent Web 控制台 @@ -141,9 +141,9 @@ CowAgent 支持国内外主流厂商的大语言模型。**文本对话、图像 ## 🧠 记忆与知识库 -**长期记忆**采用三层架构:对话上下文(短期)→ 天级记忆(中期)→ MEMORY.md(长期)。每日自动执行**梦境蒸馏(Deep Dream)**,将分散记忆整合为精炼的长期记忆并生成叙事日记。详见 [长期记忆](https://docs.cowagent.ai/memory) · [梦境蒸馏](https://docs.cowagent.ai/memory/deep-dream)。 +**长期记忆**采用三层架构:对话上下文(短期)→ 天级记忆(中期)→ MEMORY.md(长期)。每日自动执行**梦境蒸馏(Deep Dream)**,将分散记忆整合为精炼的长期记忆并生成叙事日记。详见 [长期记忆](https://docs.cowagent.ai/zh/memory) · [梦境蒸馏](https://docs.cowagent.ai/zh/memory/deep-dream)。 -**个人知识库** 与按时间记录的记忆不同,以**主题为维度**组织结构化知识。Agent 在对话中自动整理有价值信息,维护交叉引用与索引,Web 控制台可可视化浏览知识图谱。详见 [个人知识库](https://docs.cowagent.ai/knowledge)。 +**个人知识库** 与按时间记录的记忆不同,以**主题为维度**组织结构化知识。Agent 在对话中自动整理有价值信息,维护交叉引用与索引,Web 控制台可可视化浏览知识图谱。详见 [个人知识库](https://docs.cowagent.ai/zh/knowledge)。
@@ -171,7 +171,7 @@ CowAgent 支持国内外主流厂商的大语言模型。**文本对话、图像 **MCP 协议** 通过 [Model Context Protocol](https://modelcontextprotocol.io) 接入开放生态中的各种 MCP 服务,配置一次 `mcp.json` 即用即得,支持 stdio / SSE 协议、热更新、零代码接入。 -详见 [工具概览](https://docs.cowagent.ai/tools) · [MCP 集成](https://docs.cowagent.ai/tools/mcp)。 +详见 [工具概览](https://docs.cowagent.ai/zh/tools) · [MCP 集成](https://docs.cowagent.ai/zh/tools/mcp)。 ### 技能系统 @@ -185,7 +185,7 @@ CowAgent 支持国内外主流厂商的大语言模型。**文本对话、图像 /skill install <名称> # 一键安装 ``` -详见 [技能概览](https://docs.cowagent.ai/skills) · [创建技能](https://docs.cowagent.ai/skills/create)。 +详见 [技能概览](https://docs.cowagent.ai/zh/skills) · [创建技能](https://docs.cowagent.ai/zh/skills/create)。
@@ -205,7 +205,7 @@ CowAgent 支持国内外主流厂商的大语言模型。**文本对话、图像 > **2026.02.03:** [v2.0.0](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) — 正式升级为超级 Agent 助理,支持多轮任务决策、长期记忆、Skills 框架 -完整更新历史:[Release Notes](https://docs.cowagent.ai/releases) +完整更新历史:[Release Notes](https://docs.cowagent.ai/zh/releases)
diff --git a/docs/zh/channels/dingtalk.mdx b/docs/zh/channels/dingtalk.mdx new file mode 100644 index 00000000..df11019e --- /dev/null +++ b/docs/zh/channels/dingtalk.mdx @@ -0,0 +1,56 @@ +--- +title: 钉钉 +description: 将 CowAgent 接入钉钉应用 +--- + +通过钉钉开放平台创建智能机器人应用,将 CowAgent 接入钉钉。 + +## 一、创建应用 + +1. 进入 [钉钉开发者后台](https://open-dev.dingtalk.com/fe/app#/corp/app),登录后点击 **创建应用**,填写应用相关信息: + + + +2. 点击添加应用能力,选择 **机器人** 能力,点击 **添加**: + + + +3. 配置机器人信息后点击 **发布**。发布后,点击 "**点击调试**",会自动创建测试群聊,可在客户端查看: + + + +4. 点击 **版本管理与发布**,创建新版本发布: + + + +## 二、项目配置 + +1. 点击 **凭证与基础信息**,获取 `Client ID` 和 `Client Secret`: + + + +2. 将以下配置加入项目根目录的 `config.json` 文件: + +```json +{ + "channel_type": "dingtalk", + "dingtalk_client_id": "YOUR_CLIENT_ID", + "dingtalk_client_secret": "YOUR_CLIENT_SECRET" +} +``` + +3. 安装依赖: + +```bash +pip3 install dingtalk_stream +``` + +4. 启动项目后,在钉钉开发者后台点击 **事件订阅**,点击 **已完成接入,验证连接通道**,显示 **连接接入成功** 即表示配置完成: + + + +## 三、使用 + +与机器人私聊或将机器人拉入企业群中均可开启对话: + + diff --git a/docs/zh/channels/discord.mdx b/docs/zh/channels/discord.mdx new file mode 100644 index 00000000..0563ab4c --- /dev/null +++ b/docs/zh/channels/discord.mdx @@ -0,0 +1,93 @@ +--- +title: Discord +description: 将 CowAgent 接入 Discord Bot +--- + +> 通过 Discord Bot 的 **Gateway 长连接** 接入 CowAgent,支持私聊(DM)与服务器频道(@机器人 / 回复机器人触发)。Gateway 基于 WebSocket 长连接,无需公网 IP 与回调地址,开箱即用。 + +## 一、接入步骤 + +### 步骤一:创建 Discord 应用与 Bot + +1. 打开 [Discord 开发者后台](https://discord.com/developers/applications),点击 **New Application**,填写名称(如 `CowAgent`)并创建。 +2. 左侧菜单进入 **Bot** 页面,点击 **Reset Token** 生成 Bot Token,复制并妥善保存(仅显示一次)。 + + + 这个 Token 等同于 Bot 的密码,请勿泄露。若意外泄漏,在 Bot 页面再次点击 **Reset Token** 重置即可。 + + +### 步骤二:开启 Message Content Intent + +私聊与频道读取文本均依赖该权限。 + +1. 在 **Bot** 页面下方找到 **Privileged Gateway Intents**。 +2. 打开 **Message Content Intent** 开关并保存。 + + + 未开启该开关时,机器人收到的消息内容会为空,导致无响应。 + + +### 步骤三:邀请 Bot 进入服务器 + +1. 左侧菜单进入 **OAuth2 → URL Generator**。 +2. **Scopes** 勾选 `bot`。 +3. **Bot Permissions** 至少勾选:`Send Messages`、`Read Message History`、`Attach Files`、`View Channels`。 +4. 复制底部生成的授权链接,在浏览器打开,选择目标服务器完成授权。 + + + 仅需私聊(DM)可跳过此步,但仍需先在任意共同服务器中与机器人建立 DM 通道,或由用户主动私聊机器人。 + + +### 步骤四:接入 CowAgent + + + + 打开 Web 控制台(本地链接:http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **Discord**,填入 Bot Token,点击接入即可。 + + + 在 `config.json` 中添加以下配置后启动: + + ```json + { + "channel_type": "discord", + "discord_token": "your-discord-bot-token", + "discord_group_trigger": "mention_or_reply" + } + ``` + + | 参数 | 说明 | 默认值 | + | --- | --- | --- | + | `discord_token` | 开发者后台 Bot 页面生成的 Bot Token | - | + | `discord_group_trigger` | 频道触发方式:`mention_or_reply`(@或回复机器人)/ `mention_only`(仅@) / `all`(所有消息) | `mention_or_reply` | + + + +启动 Cow 后,日志中出现以下输出即表示接入成功: + +``` +[Discord] Bot logged in as CowAgent#1234 (id=123456789) +[Discord] ✅ Discord bot ready, listening for messages +``` + +## 二、功能说明 + +| 功能 | 支持情况 | +| --- | --- | +| 私聊(DM) | ✅ | +| 服务器频道(@机器人 / 回复机器人) | ✅ | +| 文本消息 | ✅ 收发 | +| 图片消息 | ✅ 收发 | +| 文件消息 | ✅ 收发(PDF / Word / Excel 等) | + + + Discord 单条消息上限为 2000 字符,超长回复会自动按换行拆分为多条发送。 + + +## 三、使用 + +完成接入后: + +- **私聊(DM)**:在服务器成员列表中找到你的机器人,点击头像直接发消息对话。 +- **频道**:在已邀请机器人的频道中,使用 `@你的机器人 你好` 或 **回复机器人的某条消息** 触发对话。 + +发送图片或文件时,可以在附件的输入框中 **添加文字说明**(描述/问题)一并发送,机器人会结合附件回答。也支持先发附件再发问题,两条消息会自动合并提问。 diff --git a/docs/zh/channels/feishu.mdx b/docs/zh/channels/feishu.mdx new file mode 100644 index 00000000..5cb8fe80 --- /dev/null +++ b/docs/zh/channels/feishu.mdx @@ -0,0 +1,111 @@ +--- +title: 飞书 +description: 将 CowAgent 接入飞书应用 +--- + +> 通过飞书自建应用接入 CowAgent,支持单聊与群聊(@机器人),使用 WebSocket 长连接模式,无需公网 IP,支持流式打字机回复、语音消息收发。 + + + 接入需要是飞书企业用户且具有企业管理权限。 + + +## 一、接入方式 + +### 方式一:扫码一键接入(推荐) + +启动 Cow 项目后在终端中即可完成扫码创建。或打开 Web 控制台(本地链接:http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **飞书**,点击 **一键创建飞书应用**,使用 **飞书 App** 扫描二维码即可自动完成应用创建并接入: + + + + + + + 1. `lark-oapi` 依赖版本需要 >=1.5.5 + 2. 扫码创建出的应用会自动预置全部所需权限(消息收发、卡片读写、群聊事件等)和事件订阅,无需到开发者后台手动配置。 + + + +### 方式二:手动创建接入 + +需要先在飞书开放平台创建自建应用并配置权限,再通过 Web 控制台或配置文件接入。 + +**步骤一:创建应用** + +1. 进入 [飞书开发平台](https://open.feishu.cn/app/),点击 **创建企业自建应用**: + + + +2. 在 **添加应用能力** 中,为应用添加 **机器人** 能力: + + + +3. 在 **权限管理** 中,将以下权限粘贴到输入框,全选并 **批量开通**: + +``` +im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p2p_msg,im:message.p2p_msg:readonly,im:message:send_as_bot,im:resource,cardkit:card:write +``` + + + +4. 在 **凭证与基础信息** 中获取 `App ID` 和 `App Secret`: + + + +**步骤二:接入 CowAgent** + + + + 打开 Web 控制台,选择 **通道** 菜单,点击 **接入通道**,选择 **飞书**,切换到「手动填写」Tab,输入 App ID 和 App Secret,点击接入即可。 + + + 在 `config.json` 中添加以下配置后启动程序: + + ```json + { + "channel_type": "feishu", + "feishu_app_id": "YOUR_APP_ID", + "feishu_app_secret": "YOUR_APP_SECRET", + "feishu_stream_reply": true + } + ``` + + | 参数 | 说明 | 默认值 | + | --- | --- | --- | + | `feishu_app_id` | 飞书应用 App ID | - | + | `feishu_app_secret` | 飞书应用 App Secret | - | + | `feishu_stream_reply` | 是否开启流式打字机回复 | `true` | + + + +**步骤三:发布应用** + +1. 启动 Cow 项目后,在飞书开放平台点击 **事件与回调**,选择 **长连接** 模式并保存: + + + +2. 点击 **添加事件**,搜索 "接收消息",选择 **接收消息 v2.0** 并确认。 + +3. 点击 **版本管理与发布**,创建版本并申请 **线上发布**,在飞书客户端审核通过: + + + +## 二、功能说明 + +| 功能 | 支持情况 | +| --- | --- | +| 单聊 | ✅ | +| 群聊(@机器人) | ✅ | +| 文本消息 | ✅ 收发 | +| 图片消息 | ✅ 收发 | +| 语音消息 | ✅ 收发 | +| 流式回复 | ✅(通过 `feishu_stream_reply` 配置控制,默认开启) | + + + 流式回复需要机器人具备 `cardkit:card:write` 权限(一键创建已默认开通),且接收方飞书客户端版本 ≥ 7.20。低版本客户端会显示升级提示,权限或版本不满足时自动降级为普通文本回复。 + + +## 三、使用 + +完成接入后,在飞书中搜索机器人名称即可开始单聊对话。 + +如需在群聊中使用,将机器人添加到群中,@机器人发送消息即可。 diff --git a/docs/zh/channels/index.mdx b/docs/zh/channels/index.mdx new file mode 100644 index 00000000..3a930ce1 --- /dev/null +++ b/docs/zh/channels/index.mdx @@ -0,0 +1,45 @@ +--- +title: 通道概览 +description: CowAgent 支持的通道及能力矩阵 +--- + +CowAgent 支持接入多种聊天通道,启动时通过 `channel_type` 切换。Web 控制台默认开启,可与其他接入通道并行运行。 + +## 能力矩阵 + +下表汇总各通道支持的入站消息类型、机器人回复类型与群聊能力,方便按场景选择。 + +| 通道 | 文本 | 图片 | 文件 | 语音 | 群聊 | +| --- | :-: | :-: | :-: | :-: | :-: | +| [微信](/zh/channels/weixin) | ✅ | ✅ | ✅ | ✅ | | +| [Web 控制台](/zh/channels/web) | ✅ | ✅ | ✅ | ✅ | | +| [飞书](/zh/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [钉钉](/zh/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [企微智能机器人](/zh/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [QQ](/zh/channels/qq) | ✅ | ✅ | ✅ | | ✅ | +| [企业微信应用](/zh/channels/wecom) | ✅ | ✅ | ✅ | ✅ | | +| [公众号](/zh/channels/wechatmp) | ✅ | ✅ | | ✅ | | +| [Telegram](/zh/channels/telegram) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Slack](/zh/channels/slack) | ✅ | ✅ | ✅ | | ✅ | +| [Discord](/zh/channels/discord) | ✅ | ✅ | ✅ | | ✅ | + +- **图片 / 文件 / 语音**列表示通道支持收发对应消息类型,具体细节详见各通道文档 +- **群聊**列指可识别并响应群消息 + + + 每个通道的语音 / 图像能力依赖对应模型厂商的配置,详见 [模型概览](/models)。 + + +## 通道一览 + +- [Web 控制台](/zh/channels/web) — 内置浏览器对话和管理面板,默认开启 +- [微信](/zh/channels/weixin) — 通过个人微信扫码登录 +- [飞书](/zh/channels/feishu) — 飞书自建机器人 +- [钉钉](/zh/channels/dingtalk) — 钉钉自建机器人 +- [企微智能机器人](/zh/channels/wecom-bot) — 企业微信智能机器人 +- [QQ](/zh/channels/qq) — QQ 官方机器人开放平台 +- [企业微信应用](/zh/channels/wecom) — 企业微信自建应用接入 +- [公众号](/zh/channels/wechatmp) — 微信公众号(订阅号 / 服务号) +- [Telegram](/zh/channels/telegram) — 海外 IM,5 分钟接入,无需公网 IP +- [Slack](/zh/channels/slack) — 团队协作 IM,Socket Mode 接入,无需公网 IP +- [Discord](/zh/channels/discord) — 社区 IM,Gateway 长连接接入,无需公网 IP diff --git a/docs/zh/channels/qq.mdx b/docs/zh/channels/qq.mdx new file mode 100644 index 00000000..3b7554a9 --- /dev/null +++ b/docs/zh/channels/qq.mdx @@ -0,0 +1,88 @@ +--- +title: QQ 机器人 +description: 将 CowAgent 接入 QQ 机器人(WebSocket 长连接模式) +--- + +> 通过 QQ 开放平台的机器人接口接入 CowAgent,支持 QQ 单聊、QQ 群聊(@机器人)、频道消息和频道私信,无需公网 IP,使用 WebSocket 长连接模式。 + + + QQ 机器人通过 QQ 开放平台创建,使用 WebSocket 长连接接收消息,通过 OpenAPI 发送消息,无需公网 IP 和域名。 + + +## 一、创建 QQ 机器人 + +> 进入[QQ 开放平台](https://q.qq.com),QQ扫码登录,如果未注册开放平台账号,请先完成[账号注册](https://q.qq.com/#/register)。 + +1.在 [QQ开放平台-机器人列表页](https://q.qq.com/#/apps),点击创建机器人: + + + +2.填写机器人名称、头像等基本信息,完成创建: + + + +3.点击进入机器人配置页面,选择**开发管理**菜单,完成以下步骤: + + - 复制并记录 **AppID**(机器人ID) + - 生成并记录 **AppSecret**(机器人秘钥) + + + +## 二、配置和运行 + +### 方式一:Web 控制台接入 + +启动 Cow项目后打开 Web 控制台 (本地链接为: http://127.0.0.1:9899/ ),选择 **通道** 菜单,点击 **接入通道**,选择 **QQ 机器人**,填写上一步保存的 AppID 和 AppSecret,点击接入即可。 + + + +### 方式二:配置文件接入 + +在 `config.json` 中添加以下配置: + +```json +{ + "channel_type": "qq", + "qq_app_id": "YOUR_APP_ID", + "qq_app_secret": "YOUR_APP_SECRET" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `qq_app_id` | QQ 机器人的 AppID,在开放平台开发管理中获取 | +| `qq_app_secret` | QQ 机器人的 AppSecret,在开放平台开发管理中获取 | + +配置完成后启动程序,日志显示 `[QQ] ✅ Connected successfully` 即表示连接成功。 + + +## 三、使用 + +在 QQ开放平台 - 管理 - **使用范围和人员** 菜单中,使用QQ客户端扫描 "添加到群和消息列表" 的二维码,即可开始与QQ机器人的聊天: + + + +对话效果: + + +## 四、功能说明 + +> 注意:若需在群聊及频道中使用QQ机器人,需完成发布上架审核并在使用范围配置权限使用范围。 + +| 功能 | 支持情况 | +| --- | --- | +| QQ 单聊 | ✅ | +| QQ 群聊(@机器人) | ✅ | +| 频道消息(@机器人) | ✅ | +| 频道私信 | ✅ | +| 文本消息 | ✅ 收发 | +| 图片消息 | ✅ 收发(群聊和单聊) | +| 文件消息 | ✅ 发送(群聊和单聊) | +| 定时任务 | ✅ 主动推送(每月每用户限 4 条) | + + +## 五、注意事项 + +- **被动消息限制**:QQ 单聊被动消息有效期为 60 分钟,每条消息最多回复 5 次;QQ 群聊被动消息有效期为 5 分钟。 +- **主动消息限制**:单聊和群聊每月主动消息上限为 4 条,在使用定时任务功能时需要注意这个限制 +- **事件权限**:默认订阅 `GROUP_AND_C2C_EVENT`(QQ群/单聊)和 `PUBLIC_GUILD_MESSAGES`(频道公域消息),如需其他事件类型请在开放平台申请权限。 diff --git a/docs/zh/channels/slack.mdx b/docs/zh/channels/slack.mdx new file mode 100644 index 00000000..1103f1c0 --- /dev/null +++ b/docs/zh/channels/slack.mdx @@ -0,0 +1,118 @@ +--- +title: Slack +description: 将 CowAgent 接入 Slack App +--- + +> 通过 Slack App 的 **Socket Mode** 接入 CowAgent,支持私聊(DM)与频道(@机器人 / 线程内回复触发)。Socket Mode 基于长连接,无需公网 IP 与回调地址,开箱即用。 + +## 一、接入步骤 + +### 步骤一:创建 Slack App + +1. 打开 [Slack API 应用管理页](https://api.slack.com/apps),点击 **Create New App** → **From scratch**。 +2. 填写 **App Name**(如 `CowAgent`),选择要安装的 **Workspace**,点击创建。 + +### 步骤二:开启 Socket Mode 并获取 App Token + +1. 左侧菜单进入 **Settings → Socket Mode**,打开 **Enable Socket Mode**。 +2. 系统会提示生成一个 **App-Level Token**,作用域勾选 `connections:write`,生成后保存这串以 `xapp-` 开头的 Token。 + + + Socket Mode 通过 WebSocket 长连接接收事件,无需在公网暴露回调 URL,适合本地或内网部署。 + + +### 步骤三:配置 Bot 权限并安装 + +1. 进入 **Features → OAuth & Permissions**,在 **Bot Token Scopes** 中点击 **Add an OAuth Scope**,逐项添加以下权限: + + ``` + app_mentions:read + channels:history + chat:write + commands + files:read + files:write + groups:history + im:history + mpim:history + users:read + ``` + + + `files:read` / `files:write` 用于图片、文件的收发;若仅需文本对话可省略。 + + +2. 进入 **Features → Event Subscriptions**,打开 **Enable Events**,在 **Subscribe to bot events** 中点击 **Add Bot User Event** 添加以下事件: + + ``` + app_mention + message.im + message.channels + ``` + + + 如需在私有频道使用,再添加 `message.groups`。 + +3. 进入 **Features → App Home**,在 **Show Tabs** 区域勾选 **Messages Tab**,并勾选下方的 **Allow users to send Slash commands and messages from the messages tab**(允许用户从消息标签页发送消息),否则私聊输入框会被关闭、无法给机器人发消息。 +4. 回到 **OAuth & Permissions**,点击 **Install to Workspace** 完成安装,安装后获取以 `xoxb-` 开头的 **Bot User OAuth Token**。 + + + 若 Slack 客户端仍提示「向此应用发送消息的功能已关闭」,请确认已完成上一步的 App Home 设置,并刷新或重启 Slack 客户端(必要时把 App 从对话列表移除后重新打开)。 + + +### 步骤四:接入 CowAgent + + + + 打开 Web 控制台(本地链接:http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **Slack**,分别填入 Bot Token(`xoxb-`)和 App Token(`xapp-`),点击接入即可。 + + + 在 `config.json` 中添加以下配置后启动: + + ```json + { + "channel_type": "slack", + "slack_bot_token": "xoxb-xxxxxxxxxxxx", + "slack_app_token": "xapp-xxxxxxxxxxxx", + "slack_group_trigger": "mention_or_reply" + } + ``` + + | 参数 | 说明 | 默认值 | + | --- | --- | --- | + | `slack_bot_token` | Bot User OAuth Token,形如 `xoxb-...` | - | + | `slack_app_token` | App-Level Token(开启 Socket Mode 后生成),形如 `xapp-...` | - | + | `slack_group_trigger` | 频道触发方式:`mention_or_reply`(@或线程内回复)/ `mention_only`(仅@) / `all`(所有消息) | `mention_or_reply` | + + + +启动 Cow 后,日志中出现以下输出即表示接入成功: + +``` +[Slack] Bot logged in as user_id=U0XXXXXXX, team=Txxxxxxxx +[Slack] ✅ Slack bot ready, listening for events +``` + +## 二、功能说明 + +| 功能 | 支持情况 | +| --- | --- | +| 私聊(DM) | ✅ | +| 频道(@机器人 / 线程内回复) | ✅ | +| 文本消息 | ✅ 收发 | +| 图片消息 | ✅ 收发 | +| 文件消息 | ✅ 收发(PDF / Word / Excel 等) | +| 线程回复 | ✅ 回复发送至触发消息所在线程 | + + + Slack 通过线程(Thread)组织对话。机器人会把回复发送到触发消息所在的线程,频道内更整洁。 + + +## 三、使用 + +完成接入后: + +- **私聊(DM)**:在 Slack 左侧 **Apps** 中找到你的 App,直接发消息对话。 +- **频道**:把 App 邀请进频道(`/invite @你的App`),使用 `@你的App 你好` 触发对话;后续在同一线程内直接回复即可继续对话。 + +发送图片或文件时,可以在附件的输入框中 **添加文字说明**(描述/问题)一并发送,机器人会结合附件回答。也支持先发附件再发问题,两条消息会自动合并提问。 diff --git a/docs/zh/channels/telegram.mdx b/docs/zh/channels/telegram.mdx new file mode 100644 index 00000000..d7ab7a44 --- /dev/null +++ b/docs/zh/channels/telegram.mdx @@ -0,0 +1,112 @@ +--- +title: Telegram +description: 将 CowAgent 接入 Telegram Bot +--- + +> 通过 Telegram Bot API 接入 CowAgent,支持单聊与群聊(@机器人 / 回复机器人触发),使用 Long Polling 模式无需公网 IP,开箱即用。 + + +## 一、接入步骤 + +### 步骤一:通过 BotFather 创建 Bot + +1. 在 Telegram 中搜索并打开官方账号 [@BotFather](https://t.me/BotFather)。 +2. 发送 `/newbot` 命令,按提示输入: + - **Bot 名称**(显示名,可中文,例如 `My CowAgent Bot`) + - **Bot 用户名**(必须以 `bot` 结尾,例如 `my_cowagent_bot`) +3. 创建成功后,BotFather 会返回一段 **HTTP API Token**(形如 `123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ`),妥善保存。 + + + 这个 Token 等同于 Bot 的密码,请勿泄露。如果意外泄漏可向 `@BotFather` 发送 `/revoke` 重置。 + + +### 步骤二:(群聊使用)关闭 Privacy Mode + +仅使用单聊可跳过此步。Telegram Bot 默认开启 **Privacy Mode**,群聊中只能收到带 `@bot` 的命令(如 `/start@your_bot`)以及对 bot 消息的 reply;**普通的 `@bot 你好` 文字消息收不到**,会导致群聊无响应。 + +向 `@BotFather` 发送: + +1. `/setprivacy` +2. 选择刚才创建的 bot +3. 选择 `Disable` + + + 若设置后群聊仍无响应,可尝试把 Bot 从群里移除并重新拉入。 + + +### 步骤三:接入 CowAgent + + + + 打开 Web 控制台(本地链接:http://127.0.0.1:9899 ),选择 **通道** 菜单,点击 **接入通道**,选择 **Telegram**,填入 Bot Token,点击接入即可。 + + + 在 `config.json` 中添加以下配置后启动: + + ```json + { + "channel_type": "telegram", + "telegram_token": "123456789:ABCdefGhIJKlmNoPQRsTUVwxyZ", + "telegram_group_trigger": "mention_or_reply" + } + ``` + + | 参数 | 说明 | 默认值 | + | --- | --- | --- | + | `telegram_token` | BotFather 返回的 HTTP API Token | - | + | `telegram_group_trigger` | 群聊触发方式:`mention_or_reply`(@或回复机器人)/ `mention_only`(仅@) / `all`(所有消息) | `mention_or_reply` | + | `telegram_register_commands` | 启动时是否自动向 BotFather 注册命令菜单 | `true` | + | `telegram_proxy` | (可选)代理地址,如 `http://127.0.0.1:7890`、`socks5://127.0.0.1:1080`;运行环境无法直连 `api.telegram.org` 时配置,留空则使用环境变量 `HTTPS_PROXY` | `""` | + + + +启动 Cow 后,日志中出现以下输出即表示接入成功: + +``` +[Telegram] Bot logged in as @my_cowagent_bot (id=123456789) +[Telegram] Registered 10 bot commands +[Telegram] ✅ Telegram bot ready, polling for updates +``` + +## 二、功能说明 + +| 功能 | 支持情况 | +| --- | --- | +| 单聊 | ✅ | +| 群聊(@机器人 / 回复机器人) | ✅ | +| 文本消息 | ✅ 收发 | +| 图片消息 | ✅ 收发 | +| 语音消息 | ✅ 收发(接收 OGG/Opus,发送 OGG/Opus) | +| 视频消息 | ✅ 收发 | +| 文件消息 | ✅ 收发(PDF / Word / Excel 等) | +| 命令菜单 | ✅ 与 Web 控制台 slash 命令一致 | + +### 命令菜单 + +启动时会自动向 BotFather 注册命令菜单,用户在 Telegram 输入框输入 `/` 会出现下拉提示: + +| 命令 | 说明 | +| --- | --- | +| `/help` | 显示命令帮助 | +| `/status` | 查看运行状态 | +| `/context` | 查看对话上下文(`/context clear` 清除) | +| `/skill` | 技能管理(`/skill list`、`/skill install` 等) | +| `/memory` | 记忆管理(`/memory dream`) | +| `/knowledge` | 知识库管理(`/knowledge list` / `on` / `off`) | +| `/config` | 查看当前配置 | +| `/cancel` | 中止当前正在运行的 Agent 任务 | +| `/logs` | 查看最近日志 | +| `/version` | 查看版本 | + + + Telegram 命令菜单只能展示一级命令,子命令通过空格输入即可,例如 `/skill list`、`/context clear`。 + + +## 三、使用 + +完成接入后: + +- **单聊**:在 Telegram 中搜索你创建的 Bot 用户名(如 `@my_cowagent_bot`),点击 `Start` 即可开始对话。 +- **群聊**:把 Bot 拉进群,使用 `@bot 你好` 或 **回复 Bot 的某条消息** 触发对话。若群聊无响应,请检查 Privacy Mode 是否已按 [步骤二](#步骤二-群聊使用-关闭-privacy-mode) 关闭。 + +发送图片或文件时,可以直接在附件上方的输入框中 **添加 Caption**(描述/问题)一并发送,机器人会结合附件回答。也支持先发附件再发问题,两条消息会自动合并提问。 diff --git a/docs/zh/channels/web.mdx b/docs/zh/channels/web.mdx new file mode 100644 index 00000000..810da3da --- /dev/null +++ b/docs/zh/channels/web.mdx @@ -0,0 +1,95 @@ +--- +title: Web 控制台 +description: 通过 Web 控制台使用 CowAgent +--- + +Web 控制台是 CowAgent 的默认通道,启动后会自动运行,通过浏览器即可与 Agent 对话,并支持在线管理模型、技能、记忆、通道等配置。 + +## 配置 + +```json +{ + "channel_type": "web", + "web_host": "0.0.0.0", + "web_port": 9899, + "web_password": "", + "enable_thinking": false +} +``` + +| 参数 | 说明 | 默认值 | +| --- | --- | --- | +| `channel_type` | 设为 `web` | `web` | +| `web_host` | Web 服务监听地址,默认监听 `127.0.0.1`(仅本机),如需公网访问请改为 `0.0.0.0` 并设置密码 | `""` | +| `web_port` | Web 服务监听端口 | `9899` | +| `web_password` | 访问密码,留空表示不启用密码保护;监听 `0.0.0.0` 时建议设置 | `""` | +| `web_session_expire_days` | 登录会话有效天数 | `30` | +| `web_file_serve_root` | web 端可直接读取/发送的文件根目录,默认仅限用户主目录及 agent 工作空间;设为 `/` 可放开整个文件系统 | `"~"` | +| `enable_thinking` | 是否启用深度思考模式 | `false` | + +配置密码后,访问控制台时需先输入密码完成登录。登录状态默认保持 30 天,期间重启服务也无需重新登录。密码也支持在控制台的「配置」页面中在线修改。 + +## 访问地址 + +启动项目后访问: + +- 本地运行:`http://localhost:9899` +- 服务器运行:`http://:9899` + + + 请确保服务器防火墙和安全组已放行对应端口。 + + +## 功能介绍 + +### 对话界面 + +支持流式输出,可实时展示 Agent 的思考过程(Reasoning)和工具调用过程(Tool Calls),更直观地观察 Agent 的决策过程。深度思考功能可通过配置或控制台的「Agent 配置」开关控制。 + + + +#### 多会话管理 + +对话界面支持多会话(Session)管理,所有会话记录持久化存储在数据库中: + +- **会话列表**:点击左侧历史会话图标可展开/收起会话列表面板,支持滚动加载全部历史会话 +- **AI 生成标题**:新会话在首轮对话完成后,自动调用模型生成简短的会话摘要标题 +- **新建会话**:点击会话列表顶部的「新对话」按钮或输入区的 `+` 按钮创建新会话 +- **删除会话**:点击会话项的删除按钮,确认后永久删除该会话及其所有消息 +- **清除上下文**:点击输入区的清除按钮,在当前会话中插入一条分隔线,分隔线以上的消息仍然展示但不再作为模型的上下文输入 + +### 模型管理 + +支持在线管理不同模型厂商的文本、图像、语音、向量模型配置,无需手动编辑配置文件: + + + +### 技能管理 + +支持在线查看和管理 Agent 技能(Skills): + + + +### 记忆管理 + +支持在线查看和管理 Agent 记忆: + + + +### 通道管理 + +支持在线管理接入通道,支持实时连接/断开操作: + + + +### 定时任务 + +支持在线查看和管理定时任务,包括一次性任务、固定间隔、Cron 表达式等多种调度方式的可视化管理: + + + +### 日志 + +支持在线实时查看 Agent 运行日志,便于监控运行状态和排查问题: + + diff --git a/docs/zh/channels/wechat-kf.mdx b/docs/zh/channels/wechat-kf.mdx new file mode 100644 index 00000000..ca83aaed --- /dev/null +++ b/docs/zh/channels/wechat-kf.mdx @@ -0,0 +1,130 @@ +--- +title: 微信客服 +description: 将 CowAgent 接入微信客服(WeCom Customer Service) +--- + +通过把企业微信自建应用绑定到「微信客服」账号,CowAgent 可以接管来自外部微信用户的客服咨询,并可在小程序、公众号、视频号及视频号小店等场景中通过链接或二维码触达微信用户。 + + + 微信客服只能使用 Docker 部署或服务器 Python 部署,需要公网可达的回调地址,不支持本地运行模式。 + + +## 一、准备 + +需要的资源: + +1. 一台服务器(有公网 IP) +2. 注册并已认证的企业微信 +3. 已开通「微信客服」能力 + + + 建议**单独再创建一个企微自建应用**用于微信客服,不要复用已有的 `wechatcom_app` 应用,否则两个通道会争抢同一个回调地址。 + + +## 二、创建企业微信自建应用 + +1. 在 [企业微信管理后台](https://work.weixin.qq.com/wework_admin/frame#apps),点击 **应用管理 → 创建应用**: + + + +2. 点击 **我的企业**,在最下方获取 **企业ID**(后续填写到 `wechat_kf_corp_id`): + + + +3. 进入上一步创建的应用,点击 Secret 旁的「**查看**」,Secret 会推送到管理员手机端的企业微信里查看: + + + +4. 进入应用 **接收消息 → 设置API接收**,点击右侧「**随机获取**」生成 **Token** 和 **EncodingAESKey** 并保存: + + + + + 此时保存 API 接收配置会失败,因为程序还未启动,等项目运行后再回来保存。 + + +## 三、配置和运行 + +填入上一步收集到的 4 个字段(Corp ID / Secret / Token / EncodingAESKey): + + + + 启动 Cow 项目后打开 Web 控制台,选择 **通道** 菜单,点击 **接入通道**,选择 **微信客服**,依次填入 Corp ID / Secret / Token / AES Key(端口默认 9888,可改),点击接入即可。 + + + + + 在 `config.json` 中添加以下配置(各参数与企业微信后台的对应关系见上方截图): + + ```json + { + "channel_type": "wechat_kf", + "wechat_kf_corp_id": "YOUR_CORP_ID", + "wechat_kf_secret": "YOUR_SECRET", + "wechat_kf_token": "YOUR_TOKEN", + "wechat_kf_aes_key": "YOUR_AES_KEY", + "wechat_kf_port": 9888 + } + ``` + + | 参数 | 说明 | + | --- | --- | + | `wechat_kf_corp_id` | 企业 ID | + | `wechat_kf_secret` | 绑定到微信客服的那个企微自建应用的 Secret | + | `wechat_kf_token` | API 接收配置中的 Token | + | `wechat_kf_aes_key` | API 接收配置中的 EncodingAESKey | + | `wechat_kf_port` | 监听端口,默认 9888 | + + + +接入完成后启动程序(Web 控制台方式会自动重启),日志中出现 `Listening on http://0.0.0.0:9888/wxkf/` 表示运行成功,需要将该端口对外开放(如在云服务器安全组中放行)。 + +接着回到企业微信「接收消息 → 设置API接收」,将回调 URL 填为 `http://:9888/wxkf/`,点击保存。保存成功后还需将服务器 IP 添加到 **企业可信IP** 中,否则无法收发消息: + + + + + + + 如遇到 URL 配置回调不通过或配置失败: + 1. 确保服务器防火墙关闭且安全组放行监听端口(默认 9888) + 2. 仔细检查 Token、Secret、EncodingAESKey 等参数配置是否一致,URL 格式是否正确 + 3. 认证企业微信需要配置与主体一致的备案域名 + + +## 四、绑定微信客服账号 + +进入企业微信管理后台 **微信客服** 页面,创建客服账号并与上一步的企微自建应用绑定: + + + + + + + +绑定完成后,进入 **微信客服 → 微信客服账号详情**,在「**接入链接**」一栏: + +- 点击「**复制链接**」可获得形如 `https://work.weixin.qq.com/kfid/kfcd83e5896b9ba07be` 的访问链接 +- 点击「**生成二维码**」可获得对应的二维码 + +把链接或二维码推给微信客户使用即可: + + + +## 五、使用 + +微信用户通过链接或二维码进入客服对话后,即可与 AI 进行多轮对话,支持文本、图片、语音等消息: + + + +除此之外,基于微信生态官方能力,还可将微信客服应用在公众号、小程序、视频号及视频号小店等场景,详情可查看企业微信管理后台 [微信客服 → 接入场景](https://work.weixin.qq.com/wework_admin/frame#/app/servicer) 的相关说明: + + + +## 常见问题 + +需要确保已安装以下依赖: + +```bash +pip install websocket-client pycryptodome +``` diff --git a/docs/zh/channels/wechatmp.mdx b/docs/zh/channels/wechatmp.mdx new file mode 100644 index 00000000..917df41d --- /dev/null +++ b/docs/zh/channels/wechatmp.mdx @@ -0,0 +1,72 @@ +--- +title: 微信公众号 +description: 将 CowAgent 接入微信公众号 +--- + +CowAgent 支持接入个人订阅号和企业服务号两种公众号类型。 + +| 类型 | 要求 | 特点 | +| --- | --- | --- | +| **个人订阅号** | 个人可申请 | 收到消息时会回复一条提示,回复生成后需用户主动发消息获取 | +| **企业服务号** | 企业申请,需通过微信认证开通客服接口 | 回复生成后可主动推送给用户 | + + + 公众号仅支持服务器和 Docker 部署,不支持本地运行。需额外安装扩展依赖:`pip3 install -r requirements-optional.txt` + + +## 一、个人订阅号 + +在 `config.json` 中添加以下配置: + +```json +{ + "channel_type": "wechatmp", + "single_chat_prefix": [""], + "wechatmp_app_id": "wx73f9******d1e48", + "wechatmp_app_secret": "YOUR_APP_SECRET", + "wechatmp_aes_key": "", + "wechatmp_token": "YOUR_TOKEN", + "wechatmp_port": 80 +} +``` + +### 配置步骤 + +这些配置需要和 [微信公众号后台](https://mp.weixin.qq.com/advanced/advanced?action=dev&t=advanced/dev) 中的保持一致,进入页面后,在左侧菜单选择 **设置与开发 → 基本配置 → 服务器配置**,按下图进行配置: + + + +1. 在公众平台启用开发者密码(对应配置 `wechatmp_app_secret`),并将服务器 IP 填入白名单 +2. 按上图填写 `config.json` 中与公众号相关的配置,要与公众号后台的配置一致 +3. 启动程序,启动后会监听 80 端口(若无权限监听,则在启动命令前加上 `sudo`;若 80 端口已被占用,则关闭该占用进程) +4. 在公众号后台 **启用服务器配置** 并提交,保存成功则表示已成功配置。注意 **"服务器地址(URL)"** 需要配置为 `http://{HOST}/wx` 的格式,其中 `{HOST}` 可以是服务器的 IP 或域名 + +随后关注公众号并发送消息即可看到以下效果: + + + +由于受订阅号限制,回复内容较短的情况下(15s 内),可以立即完成回复,但耗时较长的回复则会先回复一句 "正在思考中",后续需要用户输入任意文字主动获取答案,而服务号则可以通过客服接口解决这一问题。 + + + **语音识别**:可利用微信自带的语音识别功能,需要在公众号管理页面的 "设置与开发 → 接口权限" 页面开启 "接收语音识别结果"。 + + +## 二、企业服务号 + +企业服务号与上述个人订阅号的接入过程基本相同,差异如下: + +1. 在公众平台申请企业服务号并完成微信认证,在接口权限中确认已获得 **客服接口** 的权限 +2. 在 `config.json` 中设置 `"channel_type": "wechatmp_service"`,其他配置与上述订阅号相同 +3. 交互效果上,即使是较长耗时的回复,也可以主动推送给用户,无需用户手动获取 + +```json +{ + "channel_type": "wechatmp_service", + "single_chat_prefix": [""], + "wechatmp_app_id": "YOUR_APP_ID", + "wechatmp_app_secret": "YOUR_APP_SECRET", + "wechatmp_aes_key": "", + "wechatmp_token": "YOUR_TOKEN", + "wechatmp_port": 80 +} +``` diff --git a/docs/zh/channels/wecom-bot.mdx b/docs/zh/channels/wecom-bot.mdx new file mode 100644 index 00000000..7275639f --- /dev/null +++ b/docs/zh/channels/wecom-bot.mdx @@ -0,0 +1,90 @@ +--- +title: 企微智能机器人 +description: 将 CowAgent 接入企业微信智能机器人(长连接模式) +--- + +> 通过企业微信智能机器人接入CowAgent,支持企业内部单聊和内部群聊,无需公网 IP,使用 WebSocket 长连接模式,支持Markdown渲染和流式输出。 + + + 智能机器人与企业微信自建应用是两种不同的接入方式。智能机器人使用 WebSocket 长连接,无需服务器公网 IP 和域名,配置更简单。 + + +## 一、接入方式 + +### 方式一:扫码一键接入(推荐) + +无需提前创建机器人,启动 Cow 项目后打开 Web 控制台(本地链接:http://127.0.0.1:9899/),选择 **通道** 菜单,点击**接入通道**,选择**企微智能机器人**,切换到「扫码接入」模式,使用**企业微信**扫码即可自动完成机器人创建和接入。 + + + + + 扫码成功后,可在企业微信工作台 - **智能机器人**页面对机器人进行进一步配置,包括修改名称、头像、可见范围等。 + + +### 方式二:手动创建接入 + +需要先在企业微信中创建智能机器人并获取 Bot ID 和 Secret,再通过 Web 控制台或配置文件接入。 + +**步骤一:创建智能机器人** + +1. 打开企业微信客户端,进入工作台,点击**智能机器人**: + + + +2. 点击创建机器人 - 手动创建: + + + +3. 右侧窗口拖到最下方,选择**API模式创建**: + + + +4. 设置机器人名称、头像、可见范围,并选择**长连接模式**,记录下 **Bot ID** 和 **Secret** 信息后点击保存。 + +**步骤二:接入 CowAgent** + + + + 打开 Web 控制台,选择**通道**菜单,点击**接入通道**,选择**企微智能机器人**,切换到「手动填写」模式,输入 Bot ID 和 Secret,点击接入即可。 + + + + + 在 `config.json` 中添加以下配置后启动程序: + + ```json + { + "channel_type": "wecom_bot", + "wecom_bot_id": "YOUR_BOT_ID", + "wecom_bot_secret": "YOUR_SECRET" + } + ``` + + | 参数 | 说明 | + | --- | --- | + | `wecom_bot_id` | 智能机器人的 BotID | + | `wecom_bot_secret` | 智能机器人的 Secret | + + + +日志显示 `[WecomBot] Subscribe success` 即表示连接成功。 + +## 二、功能说明 + +| 功能 | 支持情况 | +| --- | --- | +| 单聊 | ✅ | +| 群聊(@机器人) | ✅ | +| 文本消息 | ✅ 收发 | +| 图片消息 | ✅ 收发 | +| 文件消息 | ✅ 收发 | +| 流式回复 | ✅ | +| 定时任务主动推送 | ✅ | + +## 三、使用 + +在企业微信中搜索创建的机器人名称,即可开始单聊对话。 + +如需在企微内部群聊中使用,将机器人添加到群中,@机器人发送消息即可。 + + diff --git a/docs/zh/channels/wecom.mdx b/docs/zh/channels/wecom.mdx new file mode 100644 index 00000000..e0ed6fbc --- /dev/null +++ b/docs/zh/channels/wecom.mdx @@ -0,0 +1,98 @@ +--- +title: 企微自建应用 +description: 将 CowAgent 接入企业微信自建应用 +--- + +通过企业微信自建应用接入 CowAgent,支持企业内部人员单聊使用。 + + + 企业微信只能使用 Docker 部署或服务器 Python 部署,不支持本地运行模式。 + + +## 一、准备 + +需要的资源: + +1. 一台服务器(有公网 IP) +2. 注册一个企业微信(个人也可注册,但无法认证) +3. 认证企业微信还需要对应主体备案的域名 + +## 二、创建企业微信应用 + +1. 在 [企业微信管理后台](https://work.weixin.qq.com/wework_admin/frame#profile) 点击 **我的企业**,在最下方获取 **企业ID**(后续填写到 `wechatcom_corp_id` 字段中)。 + +2. 切换到 **应用管理**,点击创建应用: + + + +3. 进入应用创建页面,记录 `AgentId` 和 `Secret`: + + + +4. 点击 **设置API接收**,配置应用接口: + + + +- URL 格式为 `http://ip:port/wxcomapp`(认证企业需使用备案域名) +- 随机获取 `Token` 和 `EncodingAESKey` 并保存 + + + 此时保存 API 接收配置会失败,因为程序还未启动,等项目运行后再回来保存。 + + +## 三、配置和运行 + +在 `config.json` 中添加以下配置(各参数与企业微信后台的对应关系见上方截图): + +```json +{ + "channel_type": "wechatcom_app", + "single_chat_prefix": [""], + "wechatcom_corp_id": "YOUR_CORP_ID", + "wechatcomapp_token": "YOUR_TOKEN", + "wechatcomapp_secret": "YOUR_SECRET", + "wechatcomapp_agent_id": "YOUR_AGENT_ID", + "wechatcomapp_aes_key": "YOUR_AES_KEY", + "wechatcomapp_port": 9898 +} +``` + +| 参数 | 说明 | +| --- | --- | +| `wechatcom_corp_id` | 企业 ID | +| `wechatcomapp_token` | API 接收配置中的 Token | +| `wechatcomapp_secret` | 应用的 Secret | +| `wechatcomapp_agent_id` | 应用的 AgentId | +| `wechatcomapp_aes_key` | API 接收配置中的 EncodingAESKey | +| `wechatcomapp_port` | 监听端口,默认 9898 | + +配置完成后启动程序。当后台日志显示 `http://0.0.0.0:9898/` 时说明程序运行成功,需要将该端口对外开放(如在云服务器安全组中放行)。 + +程序启动后,回到企业微信后台保存 **消息服务器配置**,保存成功后还需将服务器 IP 添加到 **企业可信IP** 中,否则无法收发消息: + + + + + 如遇到 URL 配置回调不通过或配置失败: + 1. 确保服务器防火墙关闭且安全组放行监听端口 + 2. 仔细检查 Token、Secret Key 等参数配置是否一致,URL 格式是否正确 + 3. 认证企业微信需要配置与主体一致的备案域名 + + +## 四、使用 + +在企业微信中搜索刚创建的应用名称,即可直接对话: + + + +如需让外部个人微信用户使用,可在 **我的企业 → 微信插件** 中分享邀请关注二维码,个人微信扫码关注后即可与应用对话: + + + +## 常见问题 + +需要确保已安装以下依赖: + +```bash +pip install websocket-client pycryptodome +``` diff --git a/docs/zh/channels/weixin.mdx b/docs/zh/channels/weixin.mdx new file mode 100644 index 00000000..c19974a4 --- /dev/null +++ b/docs/zh/channels/weixin.mdx @@ -0,0 +1,74 @@ +--- +title: 微信 +description: 将 CowAgent 接入个人微信(基于官方接口) +--- + +> 接入个人微信,扫码登录即可使用,支持文本、图片、语音、文件、视频等消息的私聊收发。通过微信官方API进行接入,无安全风险,接入后会在会话中新增一个机器人助手,不影响当前账号的使用。 + +## 一、配置和运行 + +### 方式一:Web 控制台接入 + +启动 Cow 项目后打开 Web 控制台 (本地链接为: http://127.0.0.1:9899/ ),选择 **通道** 菜单,点击 **接入通道**,选择 **微信**,点击接入后按照提示扫码登录。 + + + +### 方式二:配置文件接入 + +在 `config.json` 中设置 `channel_type` 为 `weixin`: + +```json +{ + "channel_type": "weixin" +} +``` + +启动程序后,终端会显示二维码,使用微信扫码授权即可完成登录。 + + + + + + 1. 兼容历史配置:`channel_type` 设为 `wx` 同样可以启用微信通道。 + 2. 注意微信客户端需要更新至 8.0.69 版本或以上 + + +## 二、使用说明 + +微信扫码并进行授权确认后,即可完成接入并开始对话。接入微信后会在对话中创建出一个机器人助理,不会对已有账号的正常使用有任何影响。 + +> 你可以通过搜索"微信ClawBot"随时找到这个机器人,还可以修改这个机器人的头像、备注等信息,将机器人置顶在消息列表等。 + + + + + +## 三、登录说明 + +### 扫码登录 + +首次启动时,终端会显示一个二维码(有效期约 2 分钟)。使用微信扫描二维码并在手机上确认后即可完成登录。 + +- 二维码过期后会自动刷新并重新显示 +- `requirements.txt` 中已默认包含 `qrcode` 依赖,安装后可在终端直接渲染二维码图案 + +### 凭证保存 + +登录成功后,凭证会自动保存至 `~/.weixin_cow_credentials.json`,下次启动时无需重新扫码。 + +如需重新登录,删除该凭证文件后重启程序即可。 + +### Session 过期 + +当微信 session 过期时(errcode -14),程序会自动清除旧凭证并重新发起扫码登录,无需手动干预。 + +## 四、功能说明 + +| 功能 | 支持情况 | +| --- | --- | +| 单聊 | ✅ | +| 文本消息 | ✅ 收发 | +| 图片消息 | ✅ 收发 | +| 文件消息 | ✅ 收发 | +| 视频消息 | ✅ 收发 | +| 语音消息 | ✅ 接收 (自带语音识别) | diff --git a/docs/zh/cli/general.mdx b/docs/zh/cli/general.mdx new file mode 100644 index 00000000..36af1783 --- /dev/null +++ b/docs/zh/cli/general.mdx @@ -0,0 +1,124 @@ +--- +title: 常用命令 +description: 查看状态、管理配置和上下文等常用命令 +--- + +以下命令支持在对话中使用 `/` 前缀,也支持在终端中使用 `cow` 前缀(部分命令仅对话可用)。 + + + 在 Web 控制台中输入 `/` 会自动弹出命令提示,支持键盘上下选择和 Tab 补全。 + + +## help + +显示所有可用命令的帮助信息。 + +```text +/help +``` + +## status + +查看当前会话和服务的运行状态,包括进程信息、模型配置、会话消息数量和已加载技能数量。 + +```text +/status +``` + +输出示例: + +``` +🐮 CowAgent Status + +Process: PID 12345 | Running 2h 15m +Version: 2.0.4 +Channel: web +Model: MiniMax-M2.5 +Mode: agent + +Session: 12 messages | 8 skills loaded +``` + +## cancel + +中止当前会话正在运行的 Agent 任务。在 Agent 执行长时间任务(例如多轮工具调用、长流式输出)时,可随时发送 `/cancel`,Agent 会在下一次工具执行前停止。Web 端、微信、企业微信、飞书等各通道均可使用。 + +```text +/cancel +``` + +## config + +查看或修改运行时配置。修改后立即生效,无需重启服务。 + +**查看所有可配置项:** + +```text +/config +``` + +**查看单个配置项:** + +```text +/config model +``` + +**修改配置项:** + +```text +/config model deepseek-v4-flash +``` + +**支持修改的配置项:** + +| 配置项 | 说明 | 示例值 | +| --- | --- | --- | +| `model` | AI 模型名称 | `deepseek-v4-flash` | +| `agent_max_context_tokens` | 最大上下文 tokens | `40000` | +| `agent_max_context_turns` | 最大上下文记忆轮次 | `30` | +| `agent_max_steps` | 单次任务最大决策步数 | `15` | +| `enable_thinking` | 是否启用深度思考模式 | `true` / `false` | + + + 修改 `model` 时,系统会自动匹配对应的模型调用方式。配置会写入 `config.json` 并持久保存。 + + +## context + +查看当前会话的上下文信息,包括消息数量、内容长度等统计。 + +```text +/context +``` + +**清空当前会话上下文:** + +```text +/context clear +``` + + + 清空上下文后,Agent 会"忘记"之前的对话内容,适用于切换话题或释放上下文空间。 + + +## logs + +查看最近的服务日志,默认显示最近 20 行,最多 50 行。 + +```text +/logs +``` + +**指定行数:** + +```text +/logs 50 +``` + +## version + +显示当前 CowAgent 版本号。 + +```text +/version +``` diff --git a/docs/zh/cli/index.mdx b/docs/zh/cli/index.mdx new file mode 100644 index 00000000..f6462ecb --- /dev/null +++ b/docs/zh/cli/index.mdx @@ -0,0 +1,98 @@ +--- +title: 命令总览 +description: CowAgent 命令系统 — 终端 CLI 和对话命令 +--- + +CowAgent 提供两种命令交互方式: + +- **终端CLI** — 在系统终端中执行 `cow <命令>`,用于服务管理、技能管理等运维操作 +- **对话命令** — 在对话中输入 `/<命令>` 或 `cow <命令>`,用于查看状态、管理技能、调整配置等 + +## 终端命令 + +通过一键安装脚本部署后,`cow` 命令会自动可用。手动安装的用户需要在项目根目录下额外执行: + +```bash +pip install -e . +``` + +安装后即可在任意位置使用 `cow` 命令: + +```bash +cow help +``` + +输出示例: + +``` +CowAgent CLI + +Usage: cow + +Service: + start Start the CowAgent service + stop Stop the CowAgent service + restart Restart the CowAgent service + update Update code and restart service + status Show service status + logs View service logs + +Skills: + skill Manage skills (list / search / install / uninstall ...) + +Memory & Knowledge: + memory Memory distillation (dream) + knowledge View knowledge base stats and structure + +Others: + help Show this help message + version Show version +``` + +## 对话命令 + +在 Web 控制台或任意接入渠道的对话中,支持输入以 `/` 开头的命令: + +| 命令 | 说明 | +| --- | --- | +| `/help` | 显示命令帮助 | +| `/status` | 查看服务状态和配置 | +| `/cancel` | 中止当前正在运行的 Agent 任务 | +| `/config` | 查看或修改运行时配置 | +| `/skill` | 管理技能(安装、卸载、启用、禁用等) | +| `/memory dream [N]` | 手动触发记忆蒸馏(默认 3 天,最大 30) | +| `/knowledge` | 查看知识库统计信息 | +| `/knowledge list` | 查看知识库目录结构 | +| `/knowledge on\|off` | 开启或关闭知识库 | +| `/context` | 查看当前会话上下文信息 | +| `/context clear` | 清空当前会话上下文 | +| `/logs` | 查看最近日志 | +| `/version` | 显示版本号 | + + + 对话命令中 `/start`、`/stop`、`/restart` 等服务管理命令会提示到终端中执行,因为它们涉及进程操作。 + + +## 命令对照表 + +以下是各命令在终端和对话中的可用性: + +| 命令 | 终端 (`cow`) | 对话 (`/`) | +| --- | :---: | :---: | +| help | ✓ | ✓ | +| version | ✓ | ✓ | +| status | ✓ | ✓ | +| logs | ✓ | ✓ | +| cancel | ✗ | ✓ | +| config | ✗ | ✓ | +| context | — | ✓ | +| memory (子命令) | ✗ | ✓ | +| knowledge (子命令) | ✓ | ✓ | +| skill (子命令) | ✓ | ✓ | +| start / stop / restart | ✓ | ✗ | +| update | ✓ | ✗ | +| install-browser | ✓ | ✗ | + + + `context` 在终端中仅提示到对话中使用。`config` 仅支持在对话中修改。 + diff --git a/docs/zh/cli/memory-knowledge.mdx b/docs/zh/cli/memory-knowledge.mdx new file mode 100644 index 00000000..3dc6713d --- /dev/null +++ b/docs/zh/cli/memory-knowledge.mdx @@ -0,0 +1,77 @@ +--- +title: 记忆与知识库 +description: 记忆蒸馏和知识库管理命令 +--- + +## memory + +管理 Agent 的长期记忆系统。 + +### memory dream + +手动触发记忆蒸馏(Deep Dream),整理近期的天级记忆,蒸馏合并到 MEMORY.md,并生成梦境日记。 + +```text +/memory dream [N] +``` + +- `N`:整理近 N 天的记忆,默认 3 天,最大 30 天 +- 蒸馏在后台异步执行,完成后会在对话中通知结果 +- 无需等待 Agent 初始化,首次对话前即可使用 + +**示例:** + +```text +/memory dream # 整理近 3 天 +/memory dream 7 # 整理近 7 天 +/memory dream 30 # 整理近 30 天(全量) +``` + +蒸馏完成后,Web 端会收到带有跳转链接的通知,可直接查看更新后的 MEMORY.md 和梦境日记。 + + + 系统每天 23:55 会自动执行一次蒸馏(lookback 1 天)。手动触发适用于首次部署后的历史整理,或需要立即更新记忆时使用。 + + +## knowledge + +查看和管理个人知识库。默认显示知识库统计信息。 + +```text +/knowledge +``` + +输出示例: + +``` +📚 知识库 + +- 状态:已开启 +- 页面数:12 +- 总大小:45.2 KB +- 分类明细: + - concepts/: 5 篇 + - entities/: 4 篇 + - sources/: 3 篇 +``` + +### knowledge list + +查看知识库目录树结构。 + +```text +/knowledge list +``` + +### knowledge on / off + +开启或关闭知识库。关闭后不再注入知识提示词和索引知识文件。 + +```text +/knowledge on +/knowledge off +``` + + + 终端 CLI 中 `cow knowledge` 和 `cow knowledge list` 可用,但 `on|off` 仅支持在对话中使用(需实时生效)。 + diff --git a/docs/zh/cli/process.mdx b/docs/zh/cli/process.mdx new file mode 100644 index 00000000..0fc38539 --- /dev/null +++ b/docs/zh/cli/process.mdx @@ -0,0 +1,134 @@ +--- +title: 进程管理 +description: 使用 cow 命令管理 CowAgent 进程的启动、停止、重启、更新等操作 +--- + +进程管理命令用于控制 CowAgent 后台进程的生命周期。这些命令仅在终端中可用。 + +## start + +启动 CowAgent 服务。默认以后台进程方式运行,并自动跟踪日志输出。 + +```bash +cow start +``` + +**选项:** + +| 选项 | 说明 | +| --- | --- | +| `-f`, `--foreground` | 前台运行,不以后台守护进程方式启动 | +| `--no-logs` | 启动后不自动跟踪日志 | + +## stop + +停止正在运行的 CowAgent 服务。 + +```bash +cow stop +``` + +## restart + +重启 CowAgent 服务(先停止再启动)。 + +```bash +cow restart +``` + +**选项:** + +| 选项 | 说明 | +| --- | --- | +| `--no-logs` | 重启后不自动跟踪日志 | + +## update + +更新代码并重启服务。自动执行以下流程: + +1. 拉取最新代码(`git pull`) +2. 停止当前服务 +3. 更新 Python 依赖 +4. 重新安装 CLI +5. 启动服务 + +```bash +cow update +``` + + + 如果 `git pull` 失败(如存在本地未提交的修改),更新会中止,服务不受影响。 + + +## status + +查看 CowAgent 服务运行状态,包括进程信息、版本号、当前配置的模型和通道。 + +```bash +cow status +``` + +输出示例: + +``` +🐮 CowAgent Status + Status: ● Running (PID: 12345) + Version: 2.0.4 + Channel: web + Model: MiniMax-M2.5 + Mode: agent +``` + +## logs + +查看服务日志。 + +```bash +cow logs +``` + +**选项:** + +| 选项 | 说明 | 默认值 | +| --- | --- | --- | +| `-f`, `--follow` | 持续跟踪日志输出 | 否 | +| `-n`, `--lines` | 显示最近 N 行 | 50 | + +示例: + +```bash +# 查看最近 100 行日志 +cow logs -n 100 + +# 持续跟踪日志 +cow logs -f +``` + +## install-browser + +安装 Playwright 和 Chromium 浏览器,用于启用 [浏览器工具](/zh/tools/browser)。 + +```bash +cow install-browser +``` + + + 仅在需要使用浏览器工具(如网页浏览、截图等)时才需要安装。 + + +## run.sh 兼容 + +如果未安装 Cow CLI,也可以使用 `run.sh` 脚本管理服务: + +| cow 命令 | run.sh 等效命令 | +| --- | --- | +| `cow start` | `./run.sh start` | +| `cow stop` | `./run.sh stop` | +| `cow restart` | `./run.sh restart` | +| `cow update` | `./run.sh update` | +| `cow status` | `./run.sh status` | +| `cow logs` | `./run.sh logs` | + + + 推荐使用 `cow` 命令,它提供更简洁的语法和更丰富的功能。通过一键安装脚本部署时 `cow` 命令会自动安装。 + diff --git a/docs/zh/cli/skill.mdx b/docs/zh/cli/skill.mdx new file mode 100644 index 00000000..3b4a8aee --- /dev/null +++ b/docs/zh/cli/skill.mdx @@ -0,0 +1,218 @@ +--- +title: 技能管理 +description: 通过命令安装、卸载、启用、禁用和管理技能 +--- + +技能管理命令用于安装、查询和管理 CowAgent 的技能。在对话中使用 `/skill <子命令>`,在终端中使用 `cow skill <子命令>`。 + +## list + +列出已安装的技能及其状态。 + + +```text 对话 +/skill list +``` + +```bash 终端 +cow skill list +``` + + +输出示例: + +``` +📦 已安装的技能 (3/4) + +✅ pptx + Use this skill any time a .pptx file is involved… + 来源: cowhub + +✅ skill-creator + Create, install, or update skills… + 来源: builtin + +⏸️ image-vision (已禁用) + 图片理解和视觉分析 + 来源: builtin +``` + +**浏览技能广场**(查看 Hub 上所有可安装的技能): + + +```text 对话 +/skill list --remote +``` + +```bash 终端 +cow skill list --remote +``` + + +**选项:** + +| 选项 | 说明 | 默认值 | +| --- | --- | --- | +| `--remote`, `-r` | 浏览 Skill Hub 远程技能列表 | 否 | +| `--page` | 远程列表分页页码 | 1 | + +## search + +在技能广场中搜索技能。 + + +```text 对话 +/skill search pptx +``` + +```bash 终端 +cow skill search pptx +``` + + +## install + +安装技能。通过统一的 `install` 命令,可一键安装来自 **Cow 技能广场、GitHub、ClawHub** 以及任意 URL(zip 压缩包、SKILL.md 链接)上的技能,无需手动下载和配置。 + +**从 Cow 技能广场安装(推荐):** + + +```text 对话 +/skill install pptx +``` + +```bash 终端 +cow skill install pptx +``` + + +**从 GitHub 安装:** + + +```text 对话 +# 安装仓库中的所有技能(自动扫描包含 SKILL.md 的子目录) +/skill install larksuite/cli + +# 指定子目录,只安装单个技能 +/skill install https://github.com/larksuite/cli/tree/main/skills/lark-im + +# 使用 # 指定子目录 +/skill install larksuite/cli#skills/lark-minutes +``` + +```bash 终端 +# 安装仓库中的所有技能(自动扫描包含 SKILL.md 的子目录) +cow skill install larksuite/cli + +# 指定子目录,只安装单个技能 +cow skill install https://github.com/larksuite/cli/tree/main/skills/lark-im + +# 使用 # 指定子目录 +cow skill install larksuite/cli#skills/lark-minutes +``` + + +支持完整的 GitHub URL 和 `owner/repo` 简写。对于 mono-repo(一个仓库中包含多个技能),不指定子目录时会自动发现并批量安装所有技能;指定子目录时只安装该目录下的技能。 + +**从 ClawHub 安装:** + + +```text 对话 +/skill install clawhub:baidu-search +``` + +```bash 终端 +cow skill install clawhub:baidu-search +``` + + +**从 URL 安装:** + + +```text 对话 +# 从 zip 压缩包安装(支持单个或批量) +/skill install https://cdn.link-ai.tech/skills/pptx.zip + +# 从 SKILL.md 链接安装 +/skill install https://example.com/path/to/SKILL.md +``` + +```bash 终端 +# 从 zip 压缩包安装(支持单个或批量) +cow skill install https://cdn.link-ai.tech/skills/pptx.zip + +# 从 SKILL.md 链接安装 +cow skill install https://example.com/path/to/SKILL.md +``` + + +支持从 zip / tar.gz 压缩包 URL 安装,解压后自动扫描包含 `SKILL.md` 的目录,支持单个或批量安装。也支持直接从 `SKILL.md` 文件链接安装,会自动解析技能名称和描述。 + +安装成功后会显示技能名称、描述和来源,例如: + +``` +✅ baidu-search + 百度搜索:使用百度搜索引擎检索信息… + 来源: clawhub +``` + +## uninstall + +卸载已安装的技能。 + + +```text 对话 +/skill uninstall pptx +``` + +```bash 终端 +cow skill uninstall pptx +``` + + + + 卸载操作会删除技能目录下的所有文件,此操作不可恢复。 + + +## enable / disable + +启用或禁用技能,禁用后技能不会被 Agent 调用。 + + +```text 对话 +/skill enable pptx +/skill disable pptx +``` + +```bash 终端 +cow skill enable pptx +cow skill disable pptx +``` + + +## info + +查看已安装技能的详细信息,包括 `SKILL.md` 内容预览。 + + +```text 对话 +/skill info pptx +``` + +```bash 终端 +cow skill info pptx +``` + + +## 技能来源 + +安装的技能会记录来源信息,可通过 `/skill list` 查看: + +| 来源标识 | 说明 | +| --- | --- | +| `builtin` | 项目内置技能 | +| `cowhub` | 从 CowAgent Skill Hub 安装 | +| `github` | 从 GitHub URL 直接安装 | +| `clawhub` | 从 ClawHub 安装 | +| `url` | 从 SKILL.md URL 安装 | +| `local` | 本地创建的技能 | diff --git a/docs/zh/guide/manual-install.mdx b/docs/zh/guide/manual-install.mdx new file mode 100644 index 00000000..e6b81b42 --- /dev/null +++ b/docs/zh/guide/manual-install.mdx @@ -0,0 +1,182 @@ +--- +title: 手动安装 +description: 手动部署 CowAgent(源码 / Docker) +--- + +## 源码部署 + +### 1. 克隆项目代码 + +```bash +git clone https://github.com/zhayujie/CowAgent +cd CowAgent/ +``` + + + 若遇到网络问题可使用国内仓库地址:https://gitee.com/zhayujie/CowAgent + + +### 2. 安装依赖 + +核心依赖(必选): + +```bash +pip3 install -r requirements.txt +``` + +扩展依赖(可选,建议安装): + +```bash +pip3 install -r requirements-optional.txt +``` + +> 国内网络可使用镜像源加速:`pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple` + +### 3. 安装 Cow CLI + +安装命令行工具,用于管理服务和技能: + +```bash +pip3 install -e . +``` + +安装后即可使用 `cow` 命令: + +```bash +cow help +``` + + + 此步骤为推荐操作。安装后可以使用 `cow start`、`cow stop`、`cow update` 等命令管理服务,也可以使用 `cow skill` 管理技能。如果不安装 CLI,可以使用 `./run.sh` 或 `python3 app.py` 运行。 + + +### 3.1 安装浏览器工具(可选) + +如需使用浏览器工具(控制浏览器访问网页、填写表单等),运行: + +```bash +cow install-browser +``` + +该命令会自动安装 Playwright 和 Chromium 浏览器。详细说明参考 [浏览器工具文档](/zh/tools/browser)。 + + + 浏览器工具依赖较重(~300MB),如不需要可跳过,不影响其他功能正常使用。 + + +### 4. 配置 + +复制配置文件模板并编辑: + +```bash +cp config-template.json config.json +``` + +在 `config.json` 中填写模型 API Key 和通道类型等配置,详细说明参考各 [模型文档](/zh/models/minimax)。 + +### 5. 运行 + +**使用 Cow CLI 运行(推荐):** + +```bash +cow start +``` + +**或者本地前台运行:** + +```bash +python3 app.py +``` + +运行后默认启动 Web 控制台,访问 `http://localhost:9899` 开始对话和管理Agent。 + +**服务器后台运行(不使用 CLI 时):** + +```bash +nohup python3 app.py & tail -f nohup.out +``` + + + **服务器公网访问 Web 控制台**:默认 `web_host` 仅监听 `127.0.0.1`(本机访问),需公网访问时请在 `config.json` 中将 `web_host` 设为 `0.0.0.0`,同时强烈建议设置 `web_password` 启用鉴权。此外还需在防火墙/安全组中放行 `9899` 端口,建议仅对指定 IP 开放以保证安全。 + + +## Docker 部署 + +使用 Docker 部署无需下载源码和安装依赖。Agent模式下更推荐使用源码部署以获得更多系统访问能力。 + + + 需要安装 [Docker](https://docs.docker.com/engine/install/) 和 docker-compose。 + + +**1. 下载配置文件** + +```bash +curl -O https://cdn.link-ai.tech/code/cow/docker-compose.yml +``` + +打开 `docker-compose.yml` 填写所需配置。 + +**2. 启动容器** + +```bash +sudo docker compose up -d +``` + +**3. 查看日志** + +```bash +sudo docker logs -f chatgpt-on-wechat +``` + + + **Docker 公网访问 Web 控制台**:在 `docker-compose.yml` 中将 `WEB_HOST` 设为 `0.0.0.0`(容器内默认绑定 `127.0.0.1` 无法从宿主机外访问),同时强烈建议设置 `WEB_PASSWORD` 启用鉴权。此外需确保 `9899` 端口正确映射到宿主机,并在防火墙/安全组放行该端口。 + + +## 核心配置项 + + + + ```json + { + "channel_type": "web", + "model": "deepseek-v4-flash", + "deepseek_api_key": "", + "agent": true, + "agent_workspace": "~/cow", + "agent_max_context_tokens": 40000, + "agent_max_context_turns": 30, + "agent_max_steps": 15, + "cow_lang": "auto" + } + ``` + + + ```yaml + environment: + CHANNEL_TYPE: 'web' + MODEL: 'deepseek-v4-flash' + DEEPSEEK_API_KEY: 'your-api-key' + DEEPSEEK_API_BASE: 'https://api.deepseek.com/v1' + AGENT: 'True' + AGENT_MAX_CONTEXT_TOKENS: 40000 + AGENT_MAX_CONTEXT_TURNS: 30 + AGENT_MAX_STEPS: 15 + COW_LANG: 'auto' + ``` + + + +| 参数 | 环境变量 | 说明 | 默认值 | +| --- | --- | --- | --- | +| `channel_type` | `CHANNEL_TYPE` | 接入渠道类型 | `web` | +| `model` | `MODEL` | 模型名称 | `deepseek-v4-flash` | +| `agent` | `AGENT` | 是否启用 Agent 模式 | `true` | +| `agent_workspace` | - | Agent 工作空间路径 | `~/cow` | +| `agent_max_context_tokens` | `AGENT_MAX_CONTEXT_TOKENS` | 最大上下文 tokens | `40000` | +| `agent_max_context_turns` | `AGENT_MAX_CONTEXT_TURNS` | 最大上下文记忆轮次 | `30` | +| `agent_max_steps` | `AGENT_MAX_STEPS` | 单次任务最大决策步数 | `15` | +| `cow_lang` | `COW_LANG` | 界面、命令文案、系统提示词等的语言,`auto` 自动检测,可设为 `zh` / `en` | `auto` | + + + 全部配置项可在项目 [`config.py`](https://github.com/zhayujie/CowAgent/blob/master/config.py) 文件中查看。Docker 部署时,配置项名称需转为大写环境变量格式。 + diff --git a/docs/zh/guide/quick-start.mdx b/docs/zh/guide/quick-start.mdx new file mode 100644 index 00000000..964ffe74 --- /dev/null +++ b/docs/zh/guide/quick-start.mdx @@ -0,0 +1,58 @@ +--- +title: 一键安装 +description: 使用脚本一键安装和管理 CowAgent +--- + +项目提供了一键安装、配置、启动、管理程序的脚本,推荐使用脚本快速运行。 + +支持 Linux、macOS、Windows 操作系统,需安装 Python 3.7 ~ 3.12(推荐 3.9)。 + +## 安装命令 + + + + ```bash + bash <(curl -fsSL https://cdn.link-ai.tech/code/cow/run.sh) + ``` + + + ```powershell + irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex + ``` + + + +脚本自动执行以下流程: + +1. 检查 Python 环境(需要 Python 3.7+) +2. 安装必要工具(git、curl 等) +3. 克隆项目代码到 `~/CowAgent` +4. 安装 Python 依赖和 Cow CLI +5. 引导配置 AI 模型和通信渠道 +6. 启动服务 + +运行后默认启动 Web 控制台,访问 `http://localhost:9899` 开始对话和管理Agent。 + + + **服务器部署需要公网访问控制台时**,请在 `config.json` 中将 `web_host` 设为 `0.0.0.0`(默认仅监听 `127.0.0.1` 本机访问),同时强烈建议设置 `web_password` 启用鉴权。然后通过 `http://:9899` 访问,并确保防火墙/安全组放行 `9899` 端口。 + + +## 管理命令 + +安装完成后,使用 `cow` CLI 管理服务: + +| 命令 | 说明 | +| --- | --- | +| `cow start` | 启动服务 | +| `cow stop` | 停止服务 | +| `cow restart` | 重启服务 | +| `cow status` | 查看运行状态 | +| `cow logs` | 查看实时日志 | +| `cow update` | 更新代码并重启 | +| `cow install-browser` | 安装浏览器工具依赖 | + +更多命令和用法参考 [命令文档](/zh/cli/index)。 + + + 如果 `cow` 命令不可用,也可以使用 `./run.sh <命令>`(Linux/macOS)或 `.\scripts\run.ps1 <命令>`(Windows)作为替代,功能等效。 + diff --git a/docs/zh/guide/upgrade.mdx b/docs/zh/guide/upgrade.mdx new file mode 100644 index 00000000..7a36d706 --- /dev/null +++ b/docs/zh/guide/upgrade.mdx @@ -0,0 +1,61 @@ +--- +title: 更新升级 +description: CowAgent 的升级方式说明 +--- + +## 命令升级(推荐) + +使用 `cow update` 一键完成代码更新和服务重启: + +```bash +cow update +``` + +该命令会自动完成以下流程: + +1. 拉取最新代码(`git pull`) +2. 停止当前服务 +3. 更新 Python 依赖 +4. 重新安装 CLI +5. 启动服务 + + + 如果未安装 Cow CLI,也可以使用 `./run.sh update` 完成相同操作。 + + +## 手动升级 + +在项目根目录下执行: + +```bash +git pull +pip3 install -r requirements.txt +pip3 install -e . +``` + +更新完成后重启服务: + +```bash +# 使用 Cow CLI (推荐) +cow restart + +# 或使用 run.sh +./run.sh restart + +# 或使用 nohup 直接运行 +kill $(ps -ef | grep app.py | grep -v grep | awk '{print $2}') +nohup python3 app.py & tail -f nohup.out +``` + +## Docker 升级 + +在 `docker-compose.yml` 所在目录下执行: + +```bash +sudo docker compose pull +sudo docker compose up -d +``` + + + 升级前建议备份 `config.json` 配置文件。Docker 环境下如需保留数据,可通过 volume 挂载持久化工作空间目录。 + diff --git a/docs/zh/intro/architecture.mdx b/docs/zh/intro/architecture.mdx new file mode 100644 index 00000000..9d8c3da2 --- /dev/null +++ b/docs/zh/intro/architecture.mdx @@ -0,0 +1,87 @@ +--- +title: 项目架构 +description: CowAgent 2.0 的系统架构和核心设计 +--- + +CowAgent 2.0 从简单的聊天机器人全面升级为超级智能助理,采用 Agent 架构设计,具备自主思考、规划任务、长期记忆和技能扩展等能力。 + +## 系统架构 + +CowAgent 的整体架构由以下核心模块组成: + +CowAgent Architecture + +| 模块 | 说明 | +| --- | --- | +| **Plan** | 理解用户意图,将复杂任务分解为多步骤计划,循环调用工具直到完成目标 | +| **Memory** | 自动将重要信息持久化为核心记忆和日级记忆,支持关键词和向量混合检索,跨会话保持上下文连续性 | +| **Knowledge** | 以主题维度组织结构化知识,Agent 自主整理有价值信息为 Markdown 页面,维护索引和交叉引用,构建持续增长的知识网络 | +| **Tools** | Agent 访问操作系统资源的核心能力,内置文件读写、终端执行、浏览器操作、定时调度、记忆检索、联网搜索等 10+ 种工具 | +| **Skills** | 加载和管理 Skills,支持从 Skill Hub、GitHub 等一键安装,或通过对话创建自定义技能 | +| **Models** | 模型层,统一接入 OpenAI、Claude、Gemini、DeepSeek、MiniMax、GLM、Qwen 等国内外主流大语言模型 | +| **Channels** | 消息通道层,负责接收和发送消息,支持 Web 控制台、微信、飞书、钉钉、企微、公众号等,统一消息协议 | +| **CLI** | 命令行系统,提供终端命令(`cow`)和对话命令(`/`),支持进程管理、技能安装、配置修改、知识库管理等操作 | + +## Agent 模式 + +启用 Agent 模式后,CowAgent 会以自主智能体的方式运行,核心工作流如下: + +1. **接收消息** — 通过通道接收用户输入 +2. **理解意图** — 分析任务需求和上下文 +3. **规划任务** — 将复杂任务分解为多个步骤 +4. **调用工具** — 选择合适的工具执行每个步骤 +5. **记忆与知识更新** — 将重要信息存入长期记忆,将结构化知识整理至知识库 +6. **返回结果** — 将执行结果发送回用户 + +## 工作空间 + +Agent 的工作空间默认位于 `~/cow` 目录,用于存储系统提示词、记忆文件、技能文件等: + +``` +~/cow/ +├── SYSTEM.md # Agent system prompt +├── USER.md # User profile +├── MEMORY.md # Core memory +├── memory/ # Long-term memory storage +│ └── YYYY-MM-DD.md # Daily memory +├── knowledge/ # Personal knowledge base +│ ├── index.md # Knowledge index +│ └── / # Topic-based pages +└── skills/ # Custom skills + ├── skill-1/ + └── skill-2/ +``` + +秘钥文件单独存储在 `~/.cow` 目录(出于安全考虑): + +``` +~/.cow/ +└── .env # Secret keys for skills +``` + +## 核心配置 + +在 `config.json` 中配置 Agent 模式的核心参数: + +```json +{ + "agent": true, + "agent_workspace": "~/cow", + "agent_max_context_tokens": 40000, + "agent_max_context_turns": 30, + "agent_max_steps": 15, + "enable_thinking": false, + "cow_lang": "auto" +} +``` + +| 参数 | 说明 | 默认值 | +| --- | --- | --- | +| `agent` | 是否启用 Agent 模式 | `true` | +| `agent_workspace` | 工作空间路径 | `~/cow` | +| `agent_max_context_tokens` | 最大上下文 token 数 | `50000` | +| `agent_max_context_turns` | 最大上下文记忆轮次 | `20` | +| `agent_max_steps` | 单次任务最大决策步数 | `20` | +| `enable_thinking` | 是否启用深度思考模式 | `false` | +| `knowledge` | 是否启用个人知识库 | `true` | +| `cow_lang` | 界面、命令文案、系统提示词等的语言,`auto` 自动检测,可设为 `zh` / `en` | `auto` | diff --git a/docs/zh/intro/features.mdx b/docs/zh/intro/features.mdx new file mode 100644 index 00000000..ca54b39e --- /dev/null +++ b/docs/zh/intro/features.mdx @@ -0,0 +1,142 @@ +--- +title: 功能介绍 +description: CowAgent 长期记忆、个人知识库、任务规划、技能系统、CLI 命令、浏览器工具详细说明 +--- + +## 1. 长期记忆 + +> 记忆系统让 Agent 能够长期记住重要信息,采用三层记忆流转架构:对话上下文(短期)→ 天级记忆(中期)→ MEMORY.md(长期),形成完整的记忆生命周期。 + +第一次启动 Agent 时,Agent 会主动询问关键信息,并记录至工作空间(默认 `~/cow`)中的智能体设定、用户身份、记忆文件中。 + +在后续的长期对话中,Agent 会在需要时智能记录或检索记忆,并对自身设定、用户偏好、记忆文件等进行不断更新。每日自动执行 **梦境蒸馏(Deep Dream)**,将分散的天级记忆整合为精炼的长期记忆,同时生成叙事风格的梦境日记。 + + + + + +详细说明请参考 [长期记忆](/memory) 和 [梦境蒸馏](/zh/memory/deep-dream)。 + +## 2. 个人知识库 + +> 知识库系统让 Agent 能够持续积累和组织结构化知识。与按时间线记录的记忆不同,知识库以主题为维度,将文章、对话洞察、学习材料等整理为互相关联的 Markdown 页面,形成持续增长的知识网络。 + +Agent 会在对话中自动将有价值的信息整理为知识页面,维护交叉引用和索引,通过 Web 控制台可浏览文档和查看知识图谱。知识库存储在工作空间的 `~/cow/knowledge/` 目录下。 + +- **自动整理**:Agent 在对话中自主提取和整理结构化知识,维护索引和交叉引用 +- **知识图谱**:基于页面间的交叉引用自动构建知识图谱,Web 控制台提供可视化关系图浏览 +- **对话联动**:Agent 回复中引用的知识文档链接可在 Web 控制台中直接点击跳转查看 +- **CLI 管理**:通过 `/knowledge` 命令查看统计、浏览目录,通过 `/knowledge on|off` 开关功能 + + + + + +详细说明请参考 [个人知识库](/knowledge)。 + +## 3. 任务规划和工具调用 + +工具是 Agent 访问操作系统资源的核心,Agent 会根据任务需求智能选择和调用工具,完成文件读写、命令执行、定时任务等各类操作。内置工具的实现在项目的 `agent/tools/` 目录下。 + +**主要工具:** 文件读写编辑、Bash 终端、浏览器操作、文件发送、定时调度、记忆搜索、联网搜索、环境配置等。 + +### 3.1 终端和文件访问 + +针对操作系统的终端和文件的访问能力,是最基础和核心的工具,其他很多工具或技能都是基于此进行扩展。用户可通过手机端与 Agent 交互,操作个人电脑或服务器上的资源: + + + + + +### 3.2 编程能力 + +基于编程能力和系统访问能力,Agent 可以实现从信息搜索、图片等素材生成、编码、测试、部署、Nginx 配置修改、发布的 **Vibecoding 全流程**,通过手机端简单的一句命令完成应用的快速 demo: + + + + + +### 3.3 定时任务 + +基于 `scheduler` 工具实现动态定时任务,支持**一次性任务、固定时间间隔、Cron 表达式**三种形式,任务触发可选择**固定消息发送**或 **Agent 动态任务**执行两种模式: + + + + + +### 3.4 浏览器操作 + +内置 `browser` 工具,Agent 可控制浏览器访问网页、填写表单、点击元素、截图,支持动态 JS 渲染页面。运行 `cow install-browser` 一键安装,自动适配服务器(无头模式)和桌面环境: + + + + + +### 3.5 环境变量管理 + +技能所需的秘钥存储在环境变量文件中,由 `env_config` 工具进行管理,你可以通过对话的方式更新秘钥,工具内置安全保护和脱敏策略: + + + + + +## 4. 技能系统 + +技能系统为 Agent 提供无限的扩展性,每个 Skill 由说明文件、运行脚本(可选)、资源(可选)组成,描述如何完成特定类型的任务。通过 Skill 可以让 Agent 遵循说明完成复杂流程、调用各类工具或对接第三方系统。 + +- [Skill Hub](https://skills.cowagent.ai/):开放的技能广场,汇集官方推荐、社区贡献和第三方技能,支持一键安装。 +- **内置技能:** 在项目的 `skills/` 目录下,包含技能创造器、图像识别、LinkAI 智能体、网页抓取等。内置 Skill 根据依赖条件(API Key、系统命令等)自动判断是否启用。 +- **自定义技能:** 由用户通过对话创建,存放在工作空间中(`~/cow/skills/`),可实现任何复杂的业务流程和第三方系统对接。 + +安装技能:`/skill install <名称>` 或 `cow skill install <名称>`,支持从 Skill Hub、GitHub、ClawHub、URL 等来源安装。 + +### 4.1 创建技能 + +通过 `skill-creator` 技能可以通过对话的方式快速创建技能。你可以让 Agent 将某个工作流程固化为技能,或者把任意接口文档和示例发送给 Agent,让他直接完成对接: + + + + + +### 4.2 搜索和图像识别 + +- **联网搜索:** 内置 `web_search` 工具,支持多种搜索引擎,配置 `BOCHA_API_KEY` 或 `LINKAI_API_KEY` 后启用。 +- **图像识别:** 内置 `openai-image-vision` 技能,可使用 `gpt-4.1-mini`、`gpt-4.1` 等模型,依赖 `OPENAI_API_KEY`。 + + + + + +### 4.3 技能广场 + +访问 [skills.cowagent.ai](https://skills.cowagent.ai/) 浏览所有可用技能,或在对话中执行: + +```text +/skill list --remote # 浏览技能广场 +/skill search <关键词> # 搜索技能 +/skill install <名称> # 一键安装 +``` + +同时还支持安装Github、ClawHub、LinkAI等第三方平台上的所有技能,详情查看 [技能安装](/zh/skills/install) + + + + +## 5. CLI 命令系统 + +CowAgent 提供两种命令交互方式,覆盖服务管理、技能安装、配置调整等日常运维操作: + +- **终端 CLI:** 在系统终端执行 `cow <命令>`,支持 `start`、`stop`、`restart`、`update`、`status`、`logs`、`skill` 等 +- **对话命令:** 在对话中输入 `/<命令>`,Web 控制台输入 `/` 可弹出指令菜单快速选择 + +```bash +cow start # 启动服务 +cow stop # 停止服务 +cow update # 更新并重启 +cow skill install pptx # 安装技能 +cow install-browser # 安装浏览器工具 +``` + +详细命令参考 [命令总览](https://docs.cowagent.ai/cli)。 + + diff --git a/docs/zh/intro/index.mdx b/docs/zh/intro/index.mdx new file mode 100644 index 00000000..4701b754 --- /dev/null +++ b/docs/zh/intro/index.mdx @@ -0,0 +1,84 @@ +--- +title: 项目介绍 +description: CowAgent - 基于大模型的超级AI助理 +--- + +
+ CowAgent +
+ +**CowAgent** 是基于大模型的超级AI助理,能够主动思考和任务规划、操作计算机和外部资源、创造和执行Skills、拥有长期记忆和知识库并不断成长。 + +CowAgent 支持灵活切换多种模型,能处理文本、语音、图片、文件等多模态消息,可接入微信、飞书、钉钉、企业微信应用、微信公众号、网页中使用,7×24小时运行于你的个人电脑或服务器中。 + + + + 开源代码仓库,欢迎 Star 和贡献 + + + 无需安装,立即在线体验 CowAgent + + + +## 核心能力 + + + + 能够理解复杂任务并自主规划执行,持续思考和调用各类工具和技能直到完成目标。 + + + 三层记忆流转(上下文→天级记忆→全局记忆),每日梦境蒸馏整理,支持关键词及向量检索。 + + + 自动整理结构化知识,支持知识图谱可视化,通过交叉引用构建持续增长的知识网络。 + + + 实现了Skills创建和运行的引擎,内置多种技能,并支持通过自然语言对话完成自定义Skills开发。 + + + 内置文件读写、终端执行、浏览器操作、定时任务、消息发送等工具,Agent 可自主调用工具完成复杂任务。 + + + 提供终端 CLI 和对话中的命令,支持进程管理、技能安装、配置修改、上下文查看等常用操作。 + + + 支持 OpenAI, Claude, Gemini, DeepSeek, MiniMax, GLM, Qwen, Kimi, Doubao 等国内外主流模型厂商。 + + + 支持运行在本地计算机或服务器,可集成到微信、网页、飞书、钉钉、微信公众号、企业微信应用中使用。 + + + +## 快速体验 + +在终端执行以下命令,即可一键安装、配置、启动 CowAgent: + + + + ```bash + bash <(curl -fsSL https://cdn.link-ai.tech/code/cow/run.sh) + ``` + + + ```powershell + irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex + ``` + + + +运行后默认会启动 Web 控制台,通过访问 `http://localhost:9899` 可以在网页端进行对话、配置、应用通道接入等操作。 + + + + 查看完整的安装和运行指南 + + + 了解 CowAgent 的系统架构设计 + + + +## 社区 + +添加小助手微信加入开源项目交流群: + + diff --git a/docs/zh/knowledge/index.mdx b/docs/zh/knowledge/index.mdx new file mode 100644 index 00000000..4f9aa797 --- /dev/null +++ b/docs/zh/knowledge/index.mdx @@ -0,0 +1,96 @@ +--- +title: 个人知识库 +description: CowAgent 的个人知识库系统 — 结构化知识沉淀、自动整理与知识图谱 +--- + +个人知识库是 Agent 的长期结构化知识存储,保存在工作空间的 `knowledge/` 目录下。与按时间线组织的记忆不同,知识库以主题为维度,将用户分享的文章、对话中的洞察、学习材料等整理为互相关联的 Markdown 页面,形成可持续增长的知识网络。 + + + + + +## 核心概念 + +### 知识 vs 记忆 + +| 维度 | 知识库(knowledge/) | 长期记忆(memory/) | +| --- | --- | --- | +| 组织方式 | 按主题分类、互相关联 | 按时间线、日期文件 | +| 写入方式 | Agent 主动整理结构化内容 | 上下文裁剪时自动摘要 | +| 内容特点 | 提炼后的结构化知识 | 原始对话摘要 | +| 典型用途 | 学习笔记、技术文档、项目知识 | 对话历史、事件记录 | + +### 目录结构 + +``` +~/cow/knowledge/ +├── index.md # 知识索引,所有页面的入口 +├── log.md # 变更日志,记录每次写入 +├── concepts/ # 概念类知识 +│ └── machine-learning.md +├── entities/ # 实体类知识(人物、组织、工具) +│ └── openai.md +└── sources/ # 来源类知识(文章、论文) + └── llm-wiki.md +``` + +目录结构是灵活的 — Agent 会根据实际内容自动创建合适的分类目录。用户也可以通过对话的方式自定义目录组织方式。 + + + + + + +## 自动整理 + +知识库的写入是 Agent 的自主行为,在以下场景中触发: + +- **用户分享文章或文档** — Agent 自动提取关键信息,创建结构化知识页面 +- **对话产生有价值的结论** — Agent 将洞察整理为知识页面,并与已有知识建立关联 +- **用户主动要求整理** — 用户可以通过对话指导 Agent 组织和更新知识 + + + + + + +每个知识页面都包含与其他页面的交叉引用链接,逐步构建起一个知识图谱。 + +## 知识检索 + +Agent 在对话中可以通过以下方式检索知识: + +- **索引查阅** — 通过 `knowledge/index.md` 快速定位相关知识页面 +- **语义搜索** — 通过 `memory_search` 工具对知识库内容进行语义检索 +- **直接读取** — 通过 `memory_get` 工具读取特定知识文件 + +## Web 控制台 + +Web 控制台提供了专用的「知识」模块,支持: + +- **文档浏览** — 树状目录结构,可搜索、可折叠,点击查看文档内容 +- **知识图谱** — 可视化展示知识之间的关联关系,节点可直接跳转至文档 +- **对话联动** — Agent 回复中引用的知识文档链接可直接点击跳转查看 + + + + + + +## CLI 命令 + +通过 `/knowledge` 命令管理知识库: + +| 命令 | 说明 | +| --- | --- | +| `/knowledge` | 显示知识库统计信息 | +| `/knowledge list` | 以树状结构显示文件目录 | +| `/knowledge on` | 开启知识库功能 | +| `/knowledge off` | 关闭知识库功能 | + +## 相关配置 + +| 参数 | 说明 | 默认值 | +| --- | --- | --- | +| `knowledge` | 是否启用个人知识库功能 | `true` | +| `agent_workspace` | 工作空间路径,知识库存储在此目录的 `knowledge/` 子目录下 | `~/cow` | diff --git a/docs/zh/memory/context.mdx b/docs/zh/memory/context.mdx new file mode 100644 index 00000000..3d358f9d --- /dev/null +++ b/docs/zh/memory/context.mdx @@ -0,0 +1,81 @@ +--- +title: 短期记忆 +description: 对话上下文 — 消息管理、压缩策略和上下文操作 +--- + +对话上下文是 Agent 的短期记忆,包含当前会话中的所有消息(用户输入、Agent 回复、工具调用及结果)。合理管理上下文对于 Agent 的推理质量和成本控制至关重要。 + +## 上下文结构 + +每一轮对话由以下消息组成: + +``` +用户消息 → Agent 思考 → 工具调用 → 工具结果 → ... → Agent 最终回复 +``` + +一轮中可能包含多次工具调用(Agent 的决策步数由 `agent_max_steps` 控制),所有工具调用和结果都会保留在上下文中,直到被压缩或裁剪。 + +## 关键配置 + +| 参数 | 说明 | 默认值 | +| --- | --- | --- | +| `agent_max_context_tokens` | 上下文最大 token 预算 | `50000` | +| `agent_max_context_turns` | 上下文最大对话轮次 | `20` | +| `agent_max_steps` | 单轮对话最大决策步数(工具调用次数) | `15` | + +可通过 `config.json` 或对话中的 `/config` 命令修改。 + +## 压缩策略 + +当上下文超出限制时,系统会自动执行压缩以释放空间。整个过程分为多个阶段: + +### 1. 工具结果截断 + +在每次决策循环开始前,系统会检查历史轮次中的工具调用结果。超过 **20000 字符** 的工具结果会被截断,仅保留首尾内容和截断说明。当前轮次的工具结果不受影响。 + +### 2. 轮次裁剪 + +当对话轮次超过 `agent_max_context_turns` 时: + +- 裁剪 **最早一半** 的完整轮次(保证工具调用链的完整性) +- 被裁剪的消息会通过 LLM 总结后**写入当天的日级记忆文件** +- LLM 摘要完成后,同时将摘要**注入到保留消息的第一条用户消息开头**,帮助模型在后续对话中保持上下文连贯性 +- 摘要注入在后台异步完成,不阻塞当前回复;注入的摘要在下一轮对话时生效 + +### 3. Token 预算裁剪 + +裁剪轮次后,如果 token 数仍超出预算: + +- **轮次 < 5 时**:对所有轮次进行**文本压缩** — 每轮只保留第一条用户文本和最后一条 Agent 回复,去掉中间的工具调用链 +- **轮次 ≥ 5 时**:再次裁剪**前半轮次**,被丢弃内容同样写入记忆并注入上下文摘要 + +### 4. 溢出应急处理 + +当模型 API 返回上下文溢出错误时: + +1. 先将当前所有消息总结写入记忆 +2. 执行激进裁剪(工具结果限制 10K 字符、用户文本限制 10K、最多保留 5 轮) +3. 如果仍然溢出,清空整个对话上下文 + +## 会话持久化 + +对话消息会持久化到本地数据库,服务重启后自动恢复。恢复策略: + +- 恢复最近的 **`max(3, max_context_turns / 6)`** 轮对话 +- 只保留每轮的**用户文本和 Agent 最终回复**,不恢复中间工具调用链 +- 超过 **30 天**的历史会话自动清理 + +## 操作命令 + +在对话中可以使用以下命令管理上下文: + +| 命令 | 说明 | +| --- | --- | +| `/context` | 查看当前上下文统计(消息数、角色分布、总字符数) | +| `/context clear` | 清空当前会话上下文 | +| `/config agent_max_context_tokens 80000` | 调整上下文 token 预算 | +| `/config agent_max_context_turns 30` | 调整上下文轮次上限 | + + + 清空上下文后,Agent 会"忘记"之前的对话内容。被裁剪和清空的内容如果已经写入长期记忆,仍可通过记忆检索找回。 + diff --git a/docs/zh/memory/deep-dream.mdx b/docs/zh/memory/deep-dream.mdx new file mode 100644 index 00000000..726ec5e2 --- /dev/null +++ b/docs/zh/memory/deep-dream.mdx @@ -0,0 +1,94 @@ +--- +title: 梦境蒸馏 +description: Deep Dream — 从对话到永久记忆的自动蒸馏机制 +--- + +梦境蒸馏(Deep Dream)是 CowAgent 记忆系统的核心整理机制,负责将分散的天级记忆蒸馏为精炼的长期记忆,并生成梦境日记。 + +## 记忆流转 + +CowAgent 的记忆从短期到长期经历三个阶段: + +``` +对话上下文(短期)→ 天级记忆(中期)→ MEMORY.md(长期) +``` + +### 1. 对话 → 天级记忆 + +当对话上下文被裁剪或每日定时总结时,系统使用 LLM 将对话内容摘要为关键事件,写入当天的天级记忆文件 `memory/YYYY-MM-DD.md`。 + +触发时机: +- **上下文裁剪** — 轮次或 token 超限时,裁剪的内容被总结写入 +- **每日定时** — 23:55 自动触发全量总结 +- **API 溢出** — 紧急保存当前对话摘要 + +### 2. 天级记忆 → MEMORY.md(蒸馏) + +每日总结完成后,Deep Dream 自动执行蒸馏: + +1. **读取材料** — 当前 `MEMORY.md` + 当天的天级记忆 +2. **LLM 蒸馏** — 去重、合并、修剪、提取新信息 +3. **覆写 MEMORY.md** — 输出精炼后的长期记忆 +4. **生成梦境日记** — 记录整理过程的发现和洞察 + +### 3. MEMORY.md 的作用 + +`MEMORY.md` 会被注入到每次对话的系统提示词中,让 Agent 始终了解用户的偏好、决策和关键事实。因此它必须保持精炼——Deep Dream 会控制在约 30 条以内。 + +## 蒸馏规则 + +Deep Dream 遵循以下整理规则: + +| 操作 | 说明 | +| --- | --- | +| **合并提炼** | 含义相近的多条合并为一条高密度表述 | +| **新增萃取** | 从天级记忆中提取偏好、决策、人物、经验等 | +| **冲突更新** | 新信息与旧条目矛盾时,以新信息为准 | +| **清理无效** | 删除临时性记录、空白条目、格式残留 | +| **删除冗余** | 已被更精炼表述涵盖的旧条目删除 | + +## 梦境日记 + +每次蒸馏会生成一篇梦境日记,保存在 `memory/dreams/YYYY-MM-DD.md`,用叙事风格记录: + +- 发现了哪些重复或矛盾 +- 从天级记忆中提取了什么新洞察 +- 做了哪些清理和优化 +- 整体感受和观察 + +梦境日记可在 Web 控制台的「记忆管理 → 梦境日记」tab 中查看。 + + + + + +## 手动触发 + +除了每日自动执行外,也可以在对话中手动触发: + +```text +/memory dream [N] +``` + +- `N`:整理近 N 天的记忆(默认 3 天,最大 30 天) +- 蒸馏在后台异步执行,完成后在对话中通知结果 +- Web 端通知包含可点击链接,直接跳转查看 MEMORY.md 和梦境日记 +- 无需 Agent 初始化,首次对话前即可使用 + + + + + + + 首次部署后可以手动执行一次 `/memory dream 30`,将历史天级记忆全量蒸馏到 MEMORY.md。 + + +## 安全机制 + +| 机制 | 说明 | +| --- | --- | +| **无新内容跳过** | 没有天级记忆时不执行蒸馏,避免空覆写 | +| **输入去重** | 定时任务中,输入材料未变化时自动跳过 | +| **异步执行** | 蒸馏在后台线程运行,不阻塞对话 | +| **顺序保证** | 定时任务中,天级 flush 全部完成后才启动蒸馏 | +| **禁止编造** | 提示词明确约束只能基于已有材料整理,不得推测或添加 | diff --git a/docs/zh/memory/index.mdx b/docs/zh/memory/index.mdx new file mode 100644 index 00000000..c547aab0 --- /dev/null +++ b/docs/zh/memory/index.mdx @@ -0,0 +1,71 @@ +--- +title: 长期记忆 +description: CowAgent 的长期记忆系统 — 文件持久化、自动写入与混合检索 +--- + +长期记忆保存在工作空间文件中,跨会话持久存在。Agent 在对话中通过检索工具按需加载历史记忆,也会在上下文裁剪时自动将对话摘要写入长期记忆。 + +Memory Architecture + +## 记忆类型 + +### 核心记忆(MEMORY.md) + +存储在 `~/cow/MEMORY.md` 中,包含用户的长期偏好、重要决策、关键事实等不会随时间淡化的信息。Agent 可通过工具读写此文件来维护长期知识。 + +### 日级记忆(memory/YYYY-MM-DD.md) + +存储在 `~/cow/memory/` 目录下,按日期命名(如 `2026-03-08.md`),记录每天的对话摘要和关键事件。仅在首次写入时创建,避免生成空文件。 + +### 梦境日记(memory/dreams/YYYY-MM-DD.md) + +Deep Dream(记忆蒸馏)过程的副产物,记录每次整理的发现、去重合并操作和新洞察。存储在 `~/cow/memory/dreams/` 目录下,按日期命名。 + +## 自动写入 + +Agent 通过以下机制自动将对话内容持久化为长期记忆: + +- **上下文裁剪时** — 当对话轮次或 token 超出配置上限时,裁剪最早一半的上下文,使用 LLM 将被裁剪的内容总结为关键信息写入当天记忆文件,并将摘要异步注入到保留的上下文中,帮助模型保持对话连贯性 +- **每日定时总结** — 每天 23:55 自动触发一次全量总结,防止低活跃日无记忆留存(内容无变化时自动跳过) +- [梦境蒸馏(Deep Dream)](/zh/memory/deep-dream) — 每日总结完成后自动执行,将天级记忆蒸馏合并到 MEMORY.md,并生成梦境日记 +- **API 上下文溢出时** — 当模型 API 返回上下文溢出错误时,紧急保存当前对话摘要 + +所有记忆写入均在后台异步执行(LLM 总结 + 文件写入),不阻塞正常对话回复。 + +## 记忆检索 + +记忆系统支持混合检索模式: + +- **关键词检索** — 基于 FTS5 全文索引匹配历史记忆,支持 BM25 排序 +- **向量检索** — 基于 embedding 语义相似度搜索,即使表述不同也能找到相关记忆 + +Agent 会在对话中根据需要自动触发记忆检索,将相关历史信息纳入上下文。检索结果按混合评分排序(默认向量权重 0.7、关键词权重 0.3),日级记忆会随时间衰减(半衰期 30 天),核心记忆不衰减。 + +## 相关文件 + +工作空间(默认 `~/cow`)中与记忆相关的文件: + +| 文件 | 说明 | +| --- | --- | +| `AGENT.md` | Agent 的人格和行为设定 | +| `USER.md` | 用户身份信息和偏好 | +| `RULE.md` | 自定义规则和约束 | +| `MEMORY.md` | 核心记忆(长期) | +| `memory/YYYY-MM-DD.md` | 日级记忆(按需创建) | +| `memory/dreams/YYYY-MM-DD.md` | 梦境日记(Deep Dream 自动生成) | + +## Web 控制台 + +在 Web 控制台的记忆管理页面中,可浏览记忆文件和梦境日记,支持通过 Tab 切换查看: + + + + + +## 相关配置 + +| 参数 | 说明 | 默认值 | +| --- | --- | --- | +| `agent_workspace` | 工作空间路径,记忆文件存储在此目录下 | `~/cow` | +| `agent_max_context_tokens` | 最大上下文 token 数,超出时裁剪并总结写入记忆 | `50000` | +| `agent_max_context_turns` | 最大上下文轮次,超出时裁剪并总结写入记忆 | `20` | diff --git a/docs/zh/models/claude.mdx b/docs/zh/models/claude.mdx new file mode 100644 index 00000000..ee1809d6 --- /dev/null +++ b/docs/zh/models/claude.mdx @@ -0,0 +1,50 @@ +--- +title: Claude +description: Anthropic Claude 模型配置(文本对话 + 图像理解) +--- + +Claude 由 Anthropic 提供,支持文本对话与图像理解,主流 Sonnet / Opus 模型均原生支持视觉,无需额外指定 Vision 模型。 + + + 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + + +## 文本对话 + +```json +{ + "model": "claude-opus-4-8", + "claude_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 支持 `claude-opus-4-8`、`claude-opus-4-7`、`claude-sonnet-4-6`、`claude-opus-4-6`、`claude-sonnet-4-5`、`claude-sonnet-4-0`、`claude-3-5-sonnet-latest` 等,参考 [官方模型](https://docs.anthropic.com/en/docs/about-claude/models/overview) | +| `claude_api_key` | 在 [Claude 控制台](https://console.anthropic.com/settings/keys) 创建 | +| `claude_api_base` | 可选,默认为 `https://api.anthropic.com/v1`,可改为第三方代理 | + +### 模型选择 + +| 模型 | 适用场景 | +| --- | --- | +| `claude-opus-4-8` | 默认推荐,最新旗舰,复杂推理与长链路任务效果最佳 | +| `claude-opus-4-7` | 上一代 Opus 旗舰 | +| `claude-sonnet-4-6` | 性价比与速度平衡,成本更低 | +| `claude-opus-4-6` / `claude-sonnet-4-5` / `claude-sonnet-4-0` | 更早的旗舰,价格更低 | + +## 图像理解 + +配置 `claude_api_key` 后 Agent 的 Vision 工具会自动使用 Claude 主模型识别图像,无需额外配置。 + +如需手动指定 Vision 模型,可在配置文件中显式配置: + +```json +{ + "tools": { + "vision": { + "model": "claude-sonnet-4-6" + } + } +} +``` diff --git a/docs/zh/models/coding-plan.mdx b/docs/zh/models/coding-plan.mdx new file mode 100644 index 00000000..a8341638 --- /dev/null +++ b/docs/zh/models/coding-plan.mdx @@ -0,0 +1,140 @@ +--- +title: Coding Plan +description: Coding Plan 模式模型配置 +--- + +> Coding Plan 是各厂商推出的编程包月套餐,适合高频使用 Agent 的场景。CowAgent 支持通过 OpenAI 兼容方式接入各厂商的 Coding Plan 接口。 + + + Coding Plan 的 API Base 和 API Key 通常与普通按量计费接口不通用,请在各厂商平台单独获取。 + + +## 通用配置格式 + +所有厂商均可使用 OpenAI 兼容协议接入,可在web控制台快速配置。设置模型厂商为**OpenAI**,选择自定义模型并填入模型编码,最后填写对应厂商的API Base 和 API Key: + + + +也可通过 `config.json` 配置文件直接修改: + +```json +{ + "bot_type": "openai", + "model": "模型名称", + "open_ai_api_base": "厂商 Coding Plan API Base", + "open_ai_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `bot_type` | 固定为 `openai`(OpenAI 兼容方式) | +| `model` | 各厂商支持的模型名称 | +| `open_ai_api_base` | 各厂商 Coding Plan 专用 API Base | +| `open_ai_api_key` | 各厂商 Coding Plan 专用 API Key | + +--- + +## 阿里云 + +```json +{ + "bot_type": "openai", + "model": "qwen3.5-plus", + "open_ai_api_base": "https://coding.dashscope.aliyuncs.com/v1", + "open_ai_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | `qwen3.5-plus`、`qwen3-max-2026-01-23`、`qwen3-coder-next`、`qwen3-coder-plus`、`glm-5`、`glm-4.7`、`kimi-k2.5`、`MiniMax-M2.5` | +| `open_ai_api_base` | `https://coding.dashscope.aliyuncs.com/v1` | +| `open_ai_api_key` | Coding Plan 专用 Key(与按量计费接口不通用) | + +官方文档:[快速开始](https://help.aliyun.com/zh/model-studio/coding-plan-quickstart?spm=a2c4g.11186623.help-menu-2400256.d_0_2_1.70115203zi5Igc)、[模型列表](https://help.aliyun.com/zh/model-studio/coding-plan) + +--- + +## MiniMax + +```json +{ + "bot_type": "openai", + "model": "MiniMax-M2.5", + "open_ai_api_base": "https://api.minimaxi.com/v1", + "open_ai_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | `MiniMax-M2.5`、`MiniMax-M2.5-highspeed`、`MiniMax-M2.1`、`MiniMax-M2` | +| `open_ai_api_base` | 国内:`https://api.minimaxi.com/v1`;海外:`https://api.minimax.io/v1` | +| `open_ai_api_key` | Coding Plan 专用 Key(与按量计费接口不通用) | + +官方文档:[国内 Key 获取](https://platform.minimaxi.com/docs/coding-plan/quickstart)、[模型列表](https://platform.minimaxi.com/docs/guides/pricing-coding-plan)、[国际 Key 获取](https://platform.minimax.io/docs/coding-plan/quickstart) + +--- + + +## 智谱 GLM + +```json +{ + "bot_type": "openai", + "model": "glm-4.7", + "open_ai_api_base": "https://open.bigmodel.cn/api/coding/paas/v4", + "open_ai_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | `glm-5`、`glm-4.7`、`glm-4.6`、`glm-4.5`、`glm-4.5-air` | +| `open_ai_api_base` | 中国区:`https://open.bigmodel.cn/api/coding/paas/v4`;全球区:`https://api.z.ai/api/coding/paas/v4` | +| `open_ai_api_key` | API Key 与普通接口通用 | + +官方文档:[国内版快速开始](https://docs.bigmodel.cn/cn/coding-plan/quick-start)、[国际版快速开始](https://docs.z.ai/devpack/quick-start) + +--- + +## 火山引擎 + +```json +{ + "bot_type": "openai", + "model": "Doubao-Seed-2.0-Code", + "open_ai_api_base": "https://ark.cn-beijing.volces.com/api/coding/v3", + "open_ai_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | `Doubao-Seed-2.0-Code`、`Doubao-Seed-2.0-pro`、`Doubao-Seed-2.0-lite`、`Doubao-Seed-Code`、`MiniMax-M2.5`、`Kimi-K2.5`、`GLM-4.7`、`DeepSeek-V3.2` | +| `open_ai_api_base` | `https://ark.cn-beijing.volces.com/api/coding/v3` | +| `open_ai_api_key` | API Key 与普通接口通用 | + +官方文档:[快速开始](https://www.volcengine.com/docs/82379/1928261?lang=zh) + +--- + +## Kimi + +```json +{ + "bot_type": "moonshot", + "model": "kimi-for-coding", + "moonshot_base_url": "https://api.kimi.com/coding/v1", + "moonshot_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 填写 `kimi-for-coding` 会自动更新模型,或指定模型例如 `kimi-k2.6` | +| `moonshot_base_url` | `https://api.kimi.com/coding/v1` | +| `moonshot_api_key` | Coding Plan 专用 Key(与按量计费接口不通用) | + +官方文档:[Key 获取](https://www.kimi.com/code/docs/) diff --git a/docs/zh/models/custom.mdx b/docs/zh/models/custom.mdx new file mode 100644 index 00000000..2673a8de --- /dev/null +++ b/docs/zh/models/custom.mdx @@ -0,0 +1,62 @@ +--- +title: 自定义 +description: 自定义厂商配置,适用于第三方 API 代理和本地模型 +--- + +适用于通过 OpenAI 兼容协议接入的第三方模型服务或本地部署的模型,例如: + +- **第三方 API 代理**:使用统一的 API Base 调用多种模型 +- **本地模型**:通过 Ollama、vLLM、LocalAI 等工具在本地部署的模型 +- **私有化部署**:企业内部部署的模型服务 + + + 与 `openai` 厂商的区别:选择自定义厂商后,通过 `/config model` 切换模型时,不会自动切换厂商类型,始终使用自定义的 API 地址。 + + +## 文本对话 + +### 第三方 API 代理 + +```json +{ + "bot_type": "custom", + "model": "", + "custom_api_key": "YOUR_API_KEY", + "custom_api_base": "https://{your-proxy.com}/v1" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `bot_type` | 必须设为 `custom` | +| `model` | 模型名称,填写代理服务支持的任意模型名 | +| `custom_api_key` | API 密钥,由代理服务提供 | +| `custom_api_base` | API 地址,由代理服务提供,需兼容 OpenAI 协议 | + +### 本地模型 + +本地模型通常不需要 API Key,只需填写 API Base: + +```json +{ + "bot_type": "custom", + "model": "qwen3.5:27b", + "custom_api_base": "http://localhost:11434/v1" +} +``` + +常见的本地部署工具及默认地址: + +| 工具 | 默认 API Base | +| --- | --- | +| [Ollama](https://ollama.com) | `http://localhost:11434/v1` | +| [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` | +| [LocalAI](https://localai.io) | `http://localhost:8080/v1` | + +### 切换模型 + +自定义厂商下切换模型时,只会修改 `model`,不会改变 `bot_type` 和 API 地址: + +``` +/config model qwen3.5:27b +``` diff --git a/docs/zh/models/deepseek.mdx b/docs/zh/models/deepseek.mdx new file mode 100644 index 00000000..57b96d55 --- /dev/null +++ b/docs/zh/models/deepseek.mdx @@ -0,0 +1,72 @@ +--- +title: DeepSeek +description: DeepSeek 模型配置(文本对话 + 思考模式) +--- + +DeepSeek 是当前 Agent 模式默认推荐的厂商之一,主打高性价比的文本对话和任务规划能力。 + +## 文本对话 + +```json +{ + "model": "deepseek-v4-flash", + "deepseek_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 支持 `deepseek-v4-flash`(默认)、`deepseek-v4-pro` | +| `deepseek_api_key` | 在 [DeepSeek 平台](https://platform.deepseek.com/api_keys) 创建 | +| `deepseek_api_base` | 可选,默认为 `https://api.deepseek.com/v1`,可修改为第三方代理地址 | + +### 模型选择 + +| 模型 | 适用场景 | +| --- | --- | +| `deepseek-v4-flash` | 默认推荐,速度快、成本低 | +| `deepseek-v4-pro` | 更智能,复杂任务效果更强 | + +## 思考模式 + +V4 系列(`deepseek-v4-flash` / `deepseek-v4-pro`)支持显式的「思考模式」:模型在输出最终回答前,先输出一段思维链(`reasoning_content`),从而提升答案质量。 + +### 开关 + +通过全局配置 `enable_thinking` 控制,也可在 web控制台 - 配置页面中进行切换: + +```json +{ + "enable_thinking": true +} +``` + +- `true`:所有渠道下模型都会先思考再作答。Web 控制台会展示思考过程,IM 渠道(微信 / 企微 / 钉钉 / 飞书)虽不展示但同样获得更好答案。 +- `false`:关闭思考,响应更快,首字延迟更低。 + +### 推理强度 + +思考模式下可通过 `reasoning_effort` 控制推理强度: + +```json +{ + "enable_thinking": true, + "reasoning_effort": "high" +} +``` + +| 取值 | 适用场景 | +| --- | --- | +| `high`(默认) | 日常 Agent 任务,思考与速度的平衡 | +| `max` | 复杂编码、长链路规划、严格约束的任务,推理更深但耗时与输出 token 更多 | + +`reasoning_effort` 仅在 `enable_thinking` 为 `true` 时生效;模型不支持思考模式时该字段自动忽略。 + +### 行为说明 + +- **采样参数**:思考模式下 `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` 会被服务端忽略(不会报错),CowAgent 会自动跳过传入。 +- **多轮工具调用**:当历史中包含工具调用时,DeepSeek 要求所有 assistant 消息必须回传 `reasoning_content`。CowAgent 会自动处理回传逻辑,跨轮次切换思考开关也不会出错。 + + + 默认使用 `deepseek-v4-flash`;复杂任务可使用 `deepseek-v4-pro`;需要深度推理可开启 `enable_thinking`。 + diff --git a/docs/zh/models/doubao.mdx b/docs/zh/models/doubao.mdx new file mode 100644 index 00000000..cfdc5670 --- /dev/null +++ b/docs/zh/models/doubao.mdx @@ -0,0 +1,66 @@ +--- +title: 豆包 Doubao +description: 豆包(火山方舟)模型配置(文本 / 图像理解 / 图像生成 / 向量) +--- + +豆包(火山方舟)支持文本对话、图像理解、图像生成(Seedream)和向量能力,一份 `ark_api_key` 即可启用全部能力。 + + + 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + + +## 文本对话 + +```json +{ + "model": "doubao-seed-2-0-pro-260215", + "ark_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 可填 `doubao-seed-2-0-pro-260215`、`doubao-seed-2-0-code-preview-260215`、`doubao-seed-2-0-lite-260215` 等 | +| `ark_api_key` | 在 [火山方舟控制台](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) 创建 | +| `ark_base_url` | 可选,默认为 `https://ark.cn-beijing.volces.com/api/v3` | + +## 图像理解 + +配置 `ark_api_key` 后 Agent 的 Vision 工具会自动使用 `doubao-seed-2-0-pro-260215` 识别图像,无需额外配置。 + +如需手动指定 Vision 模型: + +```json +{ + "tools": { + "vision": { + "model": "doubao-seed-2-0-pro-260215" + } + } +} +``` + +## 图像生成 + +```json +{ + "skills": { + "image-generation": { + "model": "seedream-5.0-lite" + } + } +} +``` + +可选模型:`seedream-5.0-lite`、`seedream-4.5`。 + +## 向量 + +```json +{ + "embedding_provider": "doubao", + "embedding_model": "doubao-embedding-vision-251215" +} +``` + +默认模型 `doubao-embedding-vision-251215`(多模态 embedding),可在配置文件中通过 `embedding_dimensions` 指定 1024 或 2048 维。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。 diff --git a/docs/zh/models/gemini.mdx b/docs/zh/models/gemini.mdx new file mode 100644 index 00000000..f1c8991a --- /dev/null +++ b/docs/zh/models/gemini.mdx @@ -0,0 +1,59 @@ +--- +title: Gemini +description: Google Gemini 模型配置(文本对话 + 图像理解 + 图像生成) +--- + +Google Gemini 支持文本对话、图像理解和图像生成(Nano Banana 系列),一个 `gemini_api_key` 即可启用全部能力。 + + + 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + + +## 文本对话 + +```json +{ + "model": "gemini-3.5-flash", + "gemini_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 推荐 `gemini-3.5-flash`,亦支持 `gemini-3.1-pro-preview`、`gemini-3.1-flash-lite-preview`、`gemini-3-flash-preview`、`gemini-3-pro-preview` 等,参考 [官方文档](https://ai.google.dev/gemini-api/docs/models) | +| `gemini_api_key` | 在 [Google AI Studio](https://aistudio.google.com/app/apikey) 创建 | +| `gemini_api_base` | 可选,默认为 `https://generativelanguage.googleapis.com`,可改为第三方代理 | + +## 图像理解 + +Gemini 全系列模型均原生支持视觉,配置 `gemini_api_key` 后 Agent 的 Vision 工具会自动使用主模型识别图像,无需额外配置。 + +如需手动指定 Vision 模型: + +```json +{ + "tools": { + "vision": { + "model": "gemini-3.1-flash-lite-preview" + } + } +} +``` + +## 图像生成 + +```json +{ + "skills": { + "image-generation": { + "model": "gemini-3.1-flash-image-preview" + } + } +} +``` + +| 模型 ID | 别名 | +| --- | --- | +| `gemini-3.1-flash-image-preview` | Nano Banana 2 | +| `gemini-3-pro-image-preview` | Nano Banana Pro | +| `gemini-2.5-flash-image` | Nano Banana | diff --git a/docs/zh/models/glm.mdx b/docs/zh/models/glm.mdx new file mode 100644 index 00000000..ad5f8fd3 --- /dev/null +++ b/docs/zh/models/glm.mdx @@ -0,0 +1,56 @@ +--- +title: 智谱 GLM +description: 智谱 AI GLM 模型配置(文本 / 图像理解 / 语音识别 / 向量) +--- + +智谱 AI 支持文本对话、图像理解、语音识别(ASR)和向量(Embedding),一份 `zhipu_ai_api_key` 即可启用全部能力。 + + + 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + + +## 文本对话 + +```json +{ + "model": "glm-5.1", + "zhipu_ai_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 可填 `glm-5.1`、`glm-5-turbo`、`glm-5`、`glm-4.7`、`glm-4-plus`、`glm-4-flash`、`glm-4-air` 等,参考 [模型编码](https://bigmodel.cn/dev/api/normal-model/glm-4) | +| `zhipu_ai_api_key` | 在 [智谱 AI 控制台](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) 创建 | +| `zhipu_ai_api_base` | 可选,默认为 `https://open.bigmodel.cn/api/paas/v4` | + +## 图像理解 + +智谱 chat 系列模型(`glm-5.1`、`glm-5-turbo` 等)不支持视觉,视觉调用统一路由到 `glm-5v-turbo`。配置 `zhipu_ai_api_key` 后 Agent 的 Vision 工具会自动使用该模型,无需在配置文件中显式指定。 + +## 语音识别 + +```json +{ + "voice_to_text": "zhipu", + "voice_to_text_model": "glm-asr-2512" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `voice_to_text` | 设为 `zhipu` 启用智谱 ASR | +| `voice_to_text_model` | 可选,默认 `glm-asr-2512` | + +凭证自动复用 `zhipu_ai_api_key`。语音文件建议小于 25MB,超大文件可能被服务端拒绝。 + +## 向量 + +```json +{ + "embedding_provider": "zhipu", + "embedding_model": "embedding-3" +} +``` + +可选模型:`embedding-3`、`embedding-2`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。 diff --git a/docs/zh/models/index.mdx b/docs/zh/models/index.mdx new file mode 100644 index 00000000..4169cd4a --- /dev/null +++ b/docs/zh/models/index.mdx @@ -0,0 +1,40 @@ +--- +title: 模型概览 +description: CowAgent 支持的模型厂商及能力矩阵 +--- + +CowAgent 支持国内外主流厂商的大语言模型,模型接口实现在项目的 `models/` 目录下。除文本对话外,部分厂商还提供视觉理解、图像生成、语音识别、语音合成、向量等能力,可在 Agent 流程中按需调用。 + + +## 模型能力总览 + +各厂商提供的能力一览。「文本」指主对话模型,其余列表示该厂商可承担对应 Agent 能力。 + +| 厂商 | 代表模型 | 文本 | 图像理解 | 图像生成 | 语音识别 | 语音合成 | 向量 | +| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: | +| [DeepSeek](/zh/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | | +| [MiniMax](/zh/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | | +| [Claude](/zh/models/claude) | claude-opus-4-8 | ✅ | ✅ | | | | | +| [Gemini](/zh/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | | +| [OpenAI](/zh/models/openai) | gpt-5.5、o 系列 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [智谱 GLM](/zh/models/glm) | glm-5.1、glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ | +| [通义千问](/zh/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [豆包 Doubao](/zh/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ | +| [Kimi](/zh/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | | +| [百度千帆](/zh/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | | +| [小米 MiMo](/zh/models/mimo) | mimo-v2.5-pro / v2.5 | ✅ | ✅ | | | ✅ | | +| [LinkAI](/zh/models/linkai) | 多厂商 100+ 模型统一接入 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [自定义](/zh/models/custom) |本地模型 / 三方代理 | ✅ | | | | | | + + + Web 控制台中各项能力(视觉 / 图像 / 语音识别 / 语音合成 / 向量 / 网络搜索)均可独立配置厂商与模型,互相之间不强制绑定。 + + + +## 配置方式 + +**方式一(推荐):** 通过 [Web 控制台](/zh/channels/web) 在线管理模型与各项能力,无需手动编辑配置文件: + + + +**方式二:** 手动编辑 `config.json`,根据所选模型填写对应的模型名称和 API Key。每个模型也支持 OpenAI 兼容方式接入,将 `bot_type` 设为 `openai`,配置 `open_ai_api_base` 和 `open_ai_api_key` 即可。 diff --git a/docs/zh/models/kimi.mdx b/docs/zh/models/kimi.mdx new file mode 100644 index 00000000..beb5beaf --- /dev/null +++ b/docs/zh/models/kimi.mdx @@ -0,0 +1,41 @@ +--- +title: Kimi +description: Kimi(Moonshot)模型配置(文本对话 + 图像理解) +--- + +Kimi 由 Moonshot 提供,支持文本对话与图像理解,`kimi-k2.x` 系列原生支持视觉。 + + + 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + + +## 文本对话 + +```json +{ + "model": "kimi-k2.6", + "moonshot_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 可填 `kimi-k2.6`、`kimi-k2.5`、`kimi-k2`、`moonshot-v1-8k`、`moonshot-v1-32k`、`moonshot-v1-128k` | +| `moonshot_api_key` | 在 [Moonshot 控制台](https://platform.moonshot.cn/console/api-keys) 创建 | +| `moonshot_base_url` | 可选,默认为 `https://api.moonshot.cn/v1` | + +## 图像理解 + +配置 `moonshot_api_key` 后 Agent 的 Vision 工具会自动使用 `kimi-k2.6` 识别图像,无需额外配置。 + +如需手动指定 Vision 模型: + +```json +{ + "tools": { + "vision": { + "model": "kimi-k2.6" + } + } +} +``` diff --git a/docs/zh/models/linkai.mdx b/docs/zh/models/linkai.mdx new file mode 100644 index 00000000..68647ebc --- /dev/null +++ b/docs/zh/models/linkai.mdx @@ -0,0 +1,103 @@ +--- +title: LinkAI +description: 通过 LinkAI 平台统一接入文本、视觉、图像、语音与向量能力 +--- + +通过一份 `linkai_api_key` 即可访问 OpenAI、Claude、Gemini、DeepSeek、MiniMax、Qwen、Kimi、豆包 等主流厂商的全部能力。 + + + 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + + +## 文本对话 + +```json +{ + "use_linkai": true, + "linkai_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `use_linkai` | 设为 `true` 启用 | +| `linkai_api_key` | 在 [控制台](https://link-ai.tech/console/interface) 创建 | +| `model` | 可填写 [模型列表](https://link-ai.tech/console/models) 中任意编码 | + +前往 [模型服务](https://link-ai.tech/console/models) 了解更多。 + +## 图像理解 + +配置完成后 Agent 的 Vision 工具会自动调用网关上的多模态模型,无需额外配置。如需手动指定 Vision 模型: + +```json +{ + "tools": { + "vision": { + "model": "gpt-5.4-mini" + } + } +} +``` + +可选模型:`gpt-4.1-mini`、`gpt-5.4-mini`、`qwen3.6-plus`、`doubao-seed-2-0-pro-260215`、`kimi-k2.6`、`claude-sonnet-4-6`、`gemini-3.1-flash-lite-preview` 等。 + +## 图像生成 + +```json +{ + "skills": { + "image-generation": { + "model": "gpt-image-2" + } + } +} +``` + +| 模型 ID | 别名 | +| --- | --- | +| `gpt-image-2` | OpenAI | +| `gemini-3.1-flash-image-preview` | Nano Banana 2 | +| `gemini-3-pro-image-preview` | Nano Banana Pro | +| `seedream-5.0-lite` | 字节豆包 Seedream | + +## 语音识别 + +```json +{ + "voice_to_text": "linkai" +} +``` + +ASR 固定使用 Whisper,凭证自动复用 `linkai_api_key`。 + +## 语音合成 + +语音合成网关下支持多个底层 TTS 引擎,按 `text_to_voice_model` 选择引擎,音色随引擎切换。 + +```json +{ + "text_to_voice": "linkai", + "text_to_voice_model": "doubao", + "tts_voice_id": "BV001_streaming" +} +``` + +| `text_to_voice_model` | 引擎说明 | +| --- | --- | +| `tts-1` | OpenAI · 多语种通用(音色 `alloy` / `nova` / `echo` 等) | +| `doubao` | 字节豆包 · 中文音色丰富 | +| `baidu` | 百度 · 中文主播音色 | + +不同引擎对应的音色不同,建议在 Web 控制台「模型管理 → 语音合成」中可视化选择。 + +## 向量 + +```json +{ + "embedding_provider": "linkai", + "embedding_model": "text-embedding-3-small" +} +``` + +默认模型 `text-embedding-3-small`(OpenAI 兼容)。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。 diff --git a/docs/zh/models/mimo.mdx b/docs/zh/models/mimo.mdx new file mode 100644 index 00000000..ea445df9 --- /dev/null +++ b/docs/zh/models/mimo.mdx @@ -0,0 +1,135 @@ +--- +title: 小米 MiMo +description: 小米 MiMo 模型配置(文本对话 + 图像理解 + 语音合成) +--- + +小米 MiMo 是原生全模态大模型,单 `mimo_api_key` 即可同时启用文本对话、图像理解与语音合成。 + + + 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + + +## 文本对话 + +```json +{ + "model": "mimo-v2.5-pro", + "mimo_api_key": "YOUR_API_KEY", + "mimo_api_base": "https://api.xiaomimimo.com/v1" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 默认推荐 `mimo-v2.5-pro`,也可使用 `mimo-v2.5` | +| `mimo_api_key` | 在 [MiMo 开放平台](https://platform.xiaomimimo.com/console/api-keys) 创建 | +| `mimo_api_base` | 可选,默认为 `https://api.xiaomimimo.com/v1` | + +### 模型选择 + +| 模型 | 适用场景 | +| --- | --- | +| `mimo-v2.5-pro` | 旗舰,原生全模态 + Agent 能力,最高 100 万 tokens 上下文 | +| `mimo-v2.5` | 综合版,原生全模态(文本 / 图像 / 视频 / 音频) | + +## 思考模式 + +MiMo V2.5 系列默认开启「思考模式」:模型在输出最终回答前会先输出 `reasoning_content`(思维链),提升复杂任务表现。 + +通过全局配置 `enable_thinking` 控制是否展示(也可在 Web 控制台 - 配置页面切换): + +```json +{ + "enable_thinking": true +} +``` + +## 图像理解 + +配置 `mimo_api_key` 后,Agent 的 Vision 工具可以自动使用 MiMo 视觉模型: + +- 当主模型本身是多模态时(`mimo-v2.5-pro` / `mimo-v2.5`),直接由主模型识别图像,无需额外配置 +- 当主模型是其他厂商时,Vision 工具会根据顺序自动 fallback 到 `mimo-v2.5-pro` + +如需手动指定 Vision 模型,可在配置文件中显式配置: + +```json +{ + "tools": { + "vision": { + "provider": "mimo", + "model": "mimo-v2.5-pro" + } + } +} +``` + +## 语音合成 + +```json +{ + "text_to_voice": "mimo", + "text_to_voice_model": "mimo-v2.5-tts", + "tts_voice_id": "冰糖" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `text_to_voice_model` | 当前仅支持 `mimo-v2.5-tts`(预置音色 + 唱歌模式) | +| `tts_voice_id` | 预置音色名(中文音色直接使用中文名作为 ID) | + +### 预置音色 + +| 音色 ID | 说明 | +| --- | --- | +| `冰糖` | 中文 · 女声(默认) | +| `茉莉` | 中文 · 女声 | +| `苏打` | 中文 · 男声 | +| `白桦` | 中文 · 男声 | +| `Mia` | 英文 · 女声 | +| `Chloe` | 英文 · 女声 | +| `Milo` | 英文 · 男声 | +| `Dean` | 英文 · 男声 | + +也可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。 + +### 风格控制 + +MiMo TTS 支持在合成文本中嵌入 **音频标签** 来控制情绪、语调、方言、角色甚至唱歌。标签需出现在 **最终被合成为语音的文本(即 Agent 回复内容)** 中,整体风格标签写在开头: + +``` +(风格)待合成内容 +``` + +支持半角 `()`、全角 `()` 或 `[]` 三种括号。常见风格示例: + +| 类型 | 示例标签 | +| --- | --- | +| 基础情绪 | `开心` `悲伤` `愤怒` `恐惧` `惊讶` `兴奋` `委屈` `平静` `冷漠` | +| 复合情绪 | `怅然` `欣慰` `无奈` `愧疚` `释然` `忐忑` `动情` | +| 整体语调 | `温柔` `高冷` `活泼` `严肃` `慵懒` `俏皮` `深沉` `干练` `凌厉` | +| 音色定位 | `磁性` `醇厚` `清亮` `空灵` `稚嫩` `苍老` `甜美` `沙哑` | +| 人设腔调 | `夹子音` `御姐音` `正太音` `大叔音` `台湾腔` | +| 方言 | `东北话` `四川话` `河南话` `粤语` | +| 角色扮演 | `孙悟空` `林黛玉` | +| 唱歌 | `唱歌`(等价于 `sing` / `singing`) | + +示例: + +- (磁性)夜已经深了,城市还在呼吸。 +- (东北话)哎呀妈呀,这天儿也忒冷了吧! +- (粤语)呢个真係好正啊! +- (唱歌)原谅我这一生不羁放纵爱自由… + +也可以在文本任意位置插入细粒度音频标签来控制呼吸、笑声、停顿等,例如: + +``` +(紧张,深呼吸)呼……冷静,冷静。(语速加快)自我介绍我背了五十遍了,应该没问题。 +``` + +完整标签列表参见 [MiMo 语音合成文档](https://platform.xiaomimimo.com/docs/zh-CN/usage-guide/speech-synthesis-v2.5)。 + + + CowAgent 在调用 TTS 时会将 Agent 的回复原文(含 `(...)` 标签)直接送入 MiMo 合成。你可以在人设 / 系统提示词里要求模型「在回复开头用 `(风格)` 标签控制语气」,即可让 IM 渠道(微信 / 飞书 / 钉钉 / 企微)的语音回复带上情绪、方言、唱歌等效果。 + diff --git a/docs/zh/models/minimax.mdx b/docs/zh/models/minimax.mdx new file mode 100644 index 00000000..8282f88b --- /dev/null +++ b/docs/zh/models/minimax.mdx @@ -0,0 +1,71 @@ +--- +title: MiniMax +description: MiniMax 模型配置(文本 / 图像理解 / 图像生成 / 语音合成) +--- + +MiniMax 支持文本对话、图像理解、图像生成与语音合成,一份 `minimax_api_key` 即可启用全部能力。 + + + 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + + +## 文本对话 + +```json +{ + "model": "MiniMax-M2.7", + "minimax_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 可填 `MiniMax-M2.7`、`MiniMax-M2.7-highspeed`、`MiniMax-M2.5`、`MiniMax-M2.1`、`MiniMax-M2.1-lightning`、`MiniMax-M2` 等 | +| `minimax_api_key` | 在 [MiniMax 控制台](https://platform.minimaxi.com/user-center/basic-information/interface-key) 创建 | + +## 图像理解 + +MiniMax 的 M2.x 系列 chat 模型本身不支持视觉,视觉调用统一路由到 `MiniMax-Text-01`。配置 `minimax_api_key` 后 Agent 的 Vision 工具会自动使用该模型,无需在配置文件中显式指定。 + +## 图像生成 + +```json +{ + "skills": { + "image-generation": { + "model": "image-01" + } + } +} +``` + +可选模型:`image-01`。 + +## 语音合成 + +```json +{ + "text_to_voice": "minimax", + "text_to_voice_model": "speech-2.8-hd", + "tts_voice_id": "female-shaonv" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `text_to_voice_model` | `speech-2.8-hd`(情绪渲染、自然听感)、`speech-2.8-turbo`(极速)、`speech-2.6-hd`、`speech-2.6-turbo` | +| `tts_voice_id` | 音色 ID,支持中文 / 粤语 / 英 / 日 / 韩,共 70+ 种 | + +常用音色示例: + +| 音色 ID | 说明 | +| --- | --- | +| `female-shaonv` | 中文 · 少女(女) | +| `female-yujie` | 中文 · 御姐(女) | +| `female-tianmei` | 中文 · 甜美女性(女) | +| `male-qn-jingying` | 中文 · 精英青年(男) | +| `male-qn-badao` | 中文 · 霸道青年(男) | +| `Cantonese_GentleLady` | 粤语 · 温柔女声 | +| `English_Graceful_Lady` | 英文 · Graceful Lady | + +完整音色(中文 / 粤语 / 英 / 日 / 韩共 70+ 种)可参考 [系统音色列表](https://platform.minimaxi.com/docs/faq/system-voice-id),也可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。 diff --git a/docs/zh/models/openai.mdx b/docs/zh/models/openai.mdx new file mode 100644 index 00000000..aad83c8f --- /dev/null +++ b/docs/zh/models/openai.mdx @@ -0,0 +1,103 @@ +--- +title: OpenAI +description: OpenAI 模型配置(文本 / 视觉 / 图像 / 语音 / 向量) +--- + +OpenAI 是覆盖最完整的厂商,可同时承担文本对话、视觉理解、图像生成、语音识别(ASR)、语音合成(TTS)和向量(Embedding)能力。一份 `open_ai_api_key` 即可让 Agent 用到全部能力。 + + + 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + + + +## 文本对话 + +```json +{ + "model": "gpt-5.5", + "open_ai_api_key": "YOUR_API_KEY", + "open_ai_api_base": "https://api.openai.com/v1" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 与 OpenAI 接口的 [model 参数](https://platform.openai.com/docs/models) 一致,支持 `gpt-5.5`、`gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano`、`gpt-5` 系列、`gpt-4.1`、o 系列等;Agent 模式默认 `gpt-5.5`,追求性价比可改为 `gpt-5.4` | +| `open_ai_api_key` | 在 [OpenAI 平台](https://platform.openai.com/api-keys) 创建 | +| `open_ai_api_base` | 可选,修改可接入第三方代理 | +| `bot_type` | 使用 OpenAI 官方模型时无需填写;通过兼容协议接入厂商模型时需设为 `openai` | + +## 图像理解 + +`gpt-5.5`、`gpt-5.4`、`gpt-4o`、`gpt-4.1` 等 OpenAI 模型均原生支持视觉,配置 `open_ai_api_key` 后 Agent 的 Vision 工具会自动使用主模型识别图像。若主模型不支持视觉或希望显式指定,可在配置文件中配置: + +```json +{ + "tools": { + "vision": { + "model": "gpt-5.4-mini" + } + } +} +``` + +支持的 Vision 模型:`gpt-5.5`、`gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano`、`gpt-5`、`gpt-4.1`、`gpt-4.1-mini`、`gpt-4o`。 + +## 图像生成 + +在配置文件中指定图像生成模型,Agent 调用图像生成技能时会自动路由到 OpenAI: + +```json +{ + "skills": { + "image-generation": { + "model": "gpt-image-2" + } + } +} +``` + +支持的图像生成模型:`gpt-image-2`、`gpt-image-1`。 + +## 语音识别 + +```json +{ + "voice_to_text": "openai", + "voice_to_text_model": "gpt-4o-mini-transcribe" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `voice_to_text` | 设为 `openai` 启用 OpenAI 语音识别 | +| `voice_to_text_model` | 可选,默认 `gpt-4o-mini-transcribe`;也可填 `gpt-4o-transcribe`、`whisper-1` | + +凭证自动复用 `open_ai_api_key`。 + +## 语音合成 + +```json +{ + "text_to_voice": "openai", + "text_to_voice_model": "tts-1", + "tts_voice_id": "alloy" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `text_to_voice_model` | `tts-1`、`tts-1-hd`、`gpt-4o-mini-tts` | +| `tts_voice_id` | 音色:`alloy`、`echo`、`fable`、`onyx`、`nova`、`shimmer`、`ash`、`ballad`、`coral`、`sage`、`verse` | + +## 向量 + +```json +{ + "embedding_provider": "openai", + "embedding_model": "text-embedding-3-small" +} +``` + +可选模型:`text-embedding-3-small`、`text-embedding-3-large`、`text-embedding-ada-002`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。 + diff --git a/docs/zh/models/qianfan.mdx b/docs/zh/models/qianfan.mdx new file mode 100644 index 00000000..bdd87214 --- /dev/null +++ b/docs/zh/models/qianfan.mdx @@ -0,0 +1,59 @@ +--- +title: 百度千帆 +description: 百度千帆 ERNIE 模型配置(文本对话 + 图像理解) +--- + +百度千帆提供 ERNIE 系列模型,支持文本对话与图像理解。 + + + 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + + +## 文本对话 + +```json +{ + "model": "ernie-5.1", + "qianfan_api_key": "YOUR_API_KEY", + "qianfan_api_base": "https://qianfan.baidubce.com/v2" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 默认推荐使用 `ernie-5.1`;也可使用 `ernie-5.0`、`ernie-x1.1`、`ernie-4.5-turbo-128k`、`ernie-4.5-turbo-32k` | +| `qianfan_api_key` | 千帆 API Key,格式通常以 `bce-v3/` 开头 | +| `qianfan_api_base` | 可选,默认为 `https://qianfan.baidubce.com/v2` | + +### 模型选择 + +| 模型 | 适用场景 | +| --- | --- | +| `ernie-5.1` | 默认推荐,文心新一代旗舰模型,综合能力最强 | +| `ernie-5.0` | 上一代旗舰模型,综合能力优异 | +| `ernie-x1.1` | 深度思考推理模型,幻觉更低、指令遵循与工具调用更强 | +| `ernie-4.5-turbo-128k` | 长上下文和通用对话 | +| `ernie-4.5-turbo-32k` | 通用对话,成本和上下文更均衡 | + +## 图像理解 + +配置 `qianfan_api_key` 后,Agent 的 Vision 工具可以自动使用千帆视觉模型: + +- 当主模型本身是多模态时(如 `ernie-5.1`、`ernie-5.0`、`ernie-x1.1`、`ernie-4.5-turbo-vl`),直接由主模型识别图像,无需额外配置 +- 当主模型是纯文本时(如 `ernie-4.5-turbo-128k`),Vision 工具会自动 fallback 到 `ernie-4.5-turbo-vl` + +如需手动指定 Vision 模型,可在配置文件中显式配置: + +```json +{ + "tools": { + "vision": { + "model": "ernie-4.5-turbo-vl" + } + } +} +``` + + + 新配置推荐使用 `qianfan_api_key`。旧的 `wenxin`、`wenxin-4`、`baidu_wenxin_api_key`、`baidu_wenxin_secret_key` 配置仍保持兼容。 + diff --git a/docs/zh/models/qwen.mdx b/docs/zh/models/qwen.mdx new file mode 100644 index 00000000..765bae64 --- /dev/null +++ b/docs/zh/models/qwen.mdx @@ -0,0 +1,112 @@ +--- +title: 通义千问 Qwen +description: 通义千问模型配置(文本 / 图像理解 / 图像生成 / 语音识别 / 语音合成 / 向量) +--- + +通义千问(DashScope / 百炼)是国内覆盖最完整的厂商之一,文本、图像理解、图像生成、语音识别、语音合成与向量能力均可用一份 `dashscope_api_key` 启用。 + + + 通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。 + + +## 文本对话 + +```json +{ + "model": "qwen3.6-plus", + "dashscope_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 可填 `qwen3.6-plus`、`qwen3.7-max`、`qwen3.5-plus`、`qwen3-max`、`qwen-max`、`qwen-plus`、`qwen-turbo`、`qwq-plus` 等 | +| `dashscope_api_key` | 在 [百炼控制台](https://bailian.console.aliyun.com/?tab=model#/api-key) 创建,参考 [官方文档](https://bailian.console.aliyun.com/?tab=api#/api) | + +## 图像理解 + +配置 `dashscope_api_key` 后 Agent 的 Vision 工具会自动调用千问的视觉模型识别图像。`qwen3-max` / `qwen3.5-plus` / `qwen3.6-plus` 等模型本身就是多模态;若主模型是纯文本(如 `qwen-turbo`),会自动回落到 `qwen-vl-max`。 + +如需手动指定 Vision 模型: + +```json +{ + "tools": { + "vision": { + "model": "qwen3.6-plus" + } + } +} +``` + +支持模型:`qwen3.6-plus`、`qwen3.5-plus`、`qwen3-max`。 + +## 图像生成 + +```json +{ + "skills": { + "image-generation": { + "model": "qwen-image-2.0" + } + } +} +``` + +可选模型:`qwen-image-2.0`、`qwen-image-2.0-pro`。 + +## 语音识别 + +```json +{ + "voice_to_text": "dashscope", + "voice_to_text_model": "qwen3-asr-flash" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `voice_to_text` | 设为 `dashscope` 启用通义千问 ASR | +| `voice_to_text_model` | 可选,默认 `qwen3-asr-flash` | + +凭证自动复用 `dashscope_api_key`。单段音频建议小于 10MB、时长不超过 300 秒。 + +## 语音合成 + +```json +{ + "text_to_voice": "dashscope", + "text_to_voice_model": "qwen3-tts-flash", + "tts_voice_id": "Cherry" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `text_to_voice_model` | 可选,默认 `qwen3-tts-flash`,覆盖普通话、方言与主流外语 | +| `tts_voice_id` | 音色 ID,详见下方常用列表 | + +常用音色示例: + +| 音色 ID | 说明 | +| --- | --- | +| `Cherry` | 芊悦 · 阳光女声 | +| `Serena` | 苏瑶 · 温柔女声 | +| `Ethan` | 晨煦 · 阳光男声 | +| `Chelsie` | 千雪 · 二次元少女 | +| `Dylan` | 北京话 · 晓东 | +| `Rocky` | 粤语 · 阿强 | +| `Sunny` | 四川话 · 晴儿 | + +完整音色(普通话 / 各地方言 / 双语等)可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。 + +## 向量 + +```json +{ + "embedding_provider": "dashscope", + "embedding_model": "text-embedding-v4" +} +``` + +默认模型 `text-embedding-v4`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。 diff --git a/docs/zh/releases/overview.mdx b/docs/zh/releases/overview.mdx new file mode 100644 index 00000000..9a426982 --- /dev/null +++ b/docs/zh/releases/overview.mdx @@ -0,0 +1,32 @@ +--- +title: 更新日志 +description: CowAgent 版本更新历史 +--- + +| 版本 | 日期 | 说明 | +| --- | --- | --- | +| [2.0.9](/zh/releases/v2.0.9) | 2026.05.22 | 新增模型管理、MCP 协议支持、浏览器登录态持久化、新模型接入(gpt-5.5、gemini-3.5-flash、qwen3.7-max 等)、部署安全加固 | +| [2.0.8](/zh/releases/v2.0.8) | 2026.05.06 | 飞书渠道全面升级(语音、流式输出和Markdown、扫码一键接入)、DeepSeek V4和百度模型新增、定时任务工具增强 | +| [2.0.7](/zh/releases/v2.0.7) | 2026.04.22 | 图像生成技能(六厂商自动路由)、新模型支持(Kimi K2.6、Claude Opus 4.7、GLM 5.1)、知识库增强、Web 控制台优化 | +| [2.0.6](/zh/releases/v2.0.6) | 2026.04.14 | 项目更名、知识库系统、梦境记忆蒸馏、上下文智能压缩、Web 控制台多会话及多项优化 | +| [2.0.5](/zh/releases/v2.0.5) | 2026.04.01 | Cow CLI、Skill Hub 开源、浏览器工具、企微扫码创建、多项优化和修复 | +| [2.0.4](/zh/releases/v2.0.4) | 2026.03.22 | 新增个人微信通道、新模型支持、日文文档、脚本重构及多项修复 | +| [2.0.3](/zh/releases/v2.0.3) | 2026.03.18 | 新增企微智能机器人和 QQ 通道、支持Coding Plan、新增多个模型、Web端文件处理、记忆系统升级 | +| [2.0.2](/zh/releases/v2.0.2) | 2026.02.27 | Web 控制台升级、多通道同时运行、会话持久化 | +| [2.0.1](/zh/releases/v2.0.1) | 2026.02.13 | 内置 Web Search 工具、智能上下文管理、多项修复 | +| [2.0.0](/zh/releases/v2.0.0) | 2026.02.03 | 全面升级为超级 Agent 助理 | +| 1.7.6 | 2025.05.23 | Web Channel 优化、AgentMesh 多智能体插件 | +| 1.7.5 | 2025.04.11 | DeepSeek 模型 | +| 1.7.4 | 2024.12.13 | Gemini 2.0 模型、Web Channel | +| 1.7.3 | 2024.10.31 | 稳定性提升、数据库功能 | +| 1.7.2 | 2024.09.26 | 一键安装脚本、o1 模型 | +| 1.7.0 | 2024.08.02 | 讯飞 4.0 模型、知识库引用 | +| 1.6.9 | 2024.07.19 | gpt-4o-mini、阿里语音识别 | +| 1.6.8 | 2024.07.05 | Claude 3.5、Gemini 1.5 Pro | +| 1.6.0 | 2024.04.26 | Kimi 接入、gpt-4-turbo 升级 | +| 1.5.8 | 2024.03.26 | GLM-4、Claude-3、edge-tts | +| 1.5.2 | 2023.11.10 | 飞书通道、图像识别对话 | +| 1.5.0 | 2023.11.10 | gpt-4-turbo、dall-e-3、tts 多模态 | +| 1.0.0 | 2022.12.12 | 项目创建,首次接入 ChatGPT 模型 | + +更多历史版本请查看 [GitHub Releases](https://github.com/zhayujie/CowAgent/releases)。 diff --git a/docs/zh/releases/v2.0.0.mdx b/docs/zh/releases/v2.0.0.mdx new file mode 100644 index 00000000..3436bc19 --- /dev/null +++ b/docs/zh/releases/v2.0.0.mdx @@ -0,0 +1,105 @@ +--- +title: v2.0.0 +description: CowAgent 2.0 - 从聊天机器人到超级智能助理的全面升级 +--- + +CowAgent 2.0 实现了从聊天机器人到**超级智能助理**的全面升级!现在它能够主动思考和规划任务、拥有长期记忆、操作计算机和外部资源、创造和执行技能,真正理解你并和你一起成长。 + +**发布日期**:2026.02.03 | [GitHub Release](https://github.com/zhayujie/CowAgent/releases/tag/2.0.0) + +## 重点更新 + +### Agent 核心能力 + +- **复杂任务规划**:能够理解复杂任务并自主规划执行,持续思考和调用工具直到完成目标,支持多轮推理和上下文理解 +- **长期记忆**:自动将对话记忆持久化至本地文件和数据库中,包括全局记忆和天级记忆,支持关键词及向量检索 +- **内置系统工具**:内置实现 10+ 种工具,包括文件操作、Bash 终端、浏览器、文件发送、定时任务、记忆管理等 +- **Skills**:新增 Skill 运行引擎,内置多种技能,并支持通过自然语言对话完成自定义 Skills 开发 +- **安全和成本**:通过秘钥管理工具、提示词控制、系统权限等手段控制 Agent 的访问安全;通过最大记忆轮次、最大上下文 token、工具执行步数对 token 成本进行限制 + +### 其他更新 + +- **渠道优化**:飞书及钉钉接入渠道支持长连接接入(无需公网 IP)、支持图片/文件消息的接收和发送 +- **模型更新**:新增 claude-sonnet-4-5、gemini-3-pro-preview、glm-4.7、MiniMax-M2.1、qwen3-max 等最新模型 +- **部署优化**:增加一键安装、配置、运行、管理的脚本,简化部署流程 + +## 长期记忆系统 + +Agent 会在用户分享重要信息时主动存储,也会在对话达到一定长度时自动提取摘要。支持语义搜索和向量检索的混合检索模式。 + +**首次启动**时,Agent 会主动询问关键信息,并记录至工作空间(默认 `~/cow`)中的智能体设定、用户身份、记忆文件中。 + +**长期对话**中,Agent 会智能记录或检索记忆,不断更新自身设定、用户偏好,总结经验和教训,真正实现自主思考和持续成长。 + + + + + +## 任务规划与工具调用 + +Agent 根据任务需求智能选择和调用工具,完成各类复杂操作。 + +### 终端和文件访问 + +最基础和核心的工具能力,用户可通过手机端与 Agent 交互,操作个人电脑或服务器上的资源: + + + + + +### 应用编程能力 + +基于编程能力和系统访问能力,Agent 可实现从信息搜索、素材生成、编码、测试、部署、Nginx 配置、发布的 **Vibecoding 全流程**,通过手机端一句命令完成应用快速 demo。 + + + + + +### 定时任务 + +支持 **一次性任务、固定时间间隔、Cron 表达式** 三种形式,任务触发可选择 **固定消息发送** 或 **Agent 动态任务执行** 两种模式: + + + + + +### 环境变量管理 + +通过 `env_config` 工具管理技能所需秘钥,支持对话式更新,内置安全保护和脱敏策略: + + + + + +## 技能系统 + +每个 Skill 由说明文件、运行脚本(可选)、资源(可选)组成,为 Agent 提供无限扩展性。 + +### 技能创造器 + +通过对话方式快速创建技能,将工作流程固化或对接任意第三方接口: + + + + + +### 网页搜索和图像识别 + +- **网页搜索**:内置 `web_search` 工具,支持多种搜索引擎,配置对应 API Key 即可使用 +- **图像识别**:支持 `gpt-4.1-mini`、`gpt-4.1` 等模型,配置 `OPENAI_API_KEY` 即可使用 + + + + + +### 三方知识库和插件 + +`linkai-agent` 技能可将 [LinkAI](https://link-ai.tech/) 上的所有智能体作为 Skill 使用,实现多智能体决策: + + + + + +## 参与共建 + +2.0 版本后,项目将持续升级 Agent 能力、拓展接入渠道、内置工具、技能系统,降低模型成本和提升安全性。欢迎 [提出反馈](https://github.com/zhayujie/CowAgent/issues) 和 [贡献代码](https://github.com/zhayujie/CowAgent/pulls)。 diff --git a/docs/zh/releases/v2.0.1.mdx b/docs/zh/releases/v2.0.1.mdx new file mode 100644 index 00000000..da7b6745 --- /dev/null +++ b/docs/zh/releases/v2.0.1.mdx @@ -0,0 +1,36 @@ +--- +title: v2.0.1 +description: CowAgent 2.0.1 - 内置 Web Search、智能上下文管理、多项修复 +--- + +**发布日期**:2026.02 | [GitHub Release](https://github.com/zhayujie/CowAgent/releases/tag/2.0.1) | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.0..2.0.1) + +## 新特性 + +- **内置 Web Search 工具**:将网络搜索作为 Agent 内置工具集成,降低决策成本 ([4f0ea5d](https://github.com/zhayujie/CowAgent/commit/4f0ea5d7568d61db91ff69c91c429e785fd1b1c2)) +- **Claude Opus 4.6 模型支持**:新增对 Claude Opus 4.6 模型的支持 ([#2661](https://github.com/zhayujie/CowAgent/pull/2661)) +- **企业微信图片消息识别**:支持企业微信渠道的图片消息识别功能 ([#2667](https://github.com/zhayujie/CowAgent/pull/2667)) + +## 优化 + +- **智能上下文管理**:解决聊天上下文溢出问题,新增智能上下文裁剪策略,防止 token 超限 ([cea7fb7](https://github.com/zhayujie/CowAgent/commit/cea7fb7490c53454602bf05955a0e9f059bcf0fd), [8acf2db](https://github.com/zhayujie/CowAgent/commit/8acf2dbdfe713b84ad74b761b7f86674b1c1904d)) [#2663](https://github.com/zhayujie/CowAgent/issues/2663) +- **运行时信息动态更新**:通过动态函数方案实现系统提示词中时间戳等运行时信息的自动更新 ([#2655](https://github.com/zhayujie/CowAgent/pull/2655), [#2657](https://github.com/zhayujie/CowAgent/pull/2657)) +- **Skill 提示词优化**:改进 Skill 系统提示词生成逻辑,简化工具描述,提升 Agent 表现 ([6c21833](https://github.com/zhayujie/CowAgent/commit/6c218331b1f1208ea8be6bf226936d3b556ade3e)) +- **智谱 AI 自定义 API Base URL**:支持智谱 AI 配置自定义 API Base URL ([#2660](https://github.com/zhayujie/CowAgent/pull/2660)) +- **启动脚本优化**:改进 `run.sh` 脚本的交互体验和配置流程 ([#2656](https://github.com/zhayujie/CowAgent/pull/2656)) +- **决策轮次日志**:新增 Agent 决策轮次的日志记录,便于调试 ([cb303e6](https://github.com/zhayujie/CowAgent/commit/cb303e6109c50c8dfef1f5e6c1ec47223bf3cd11)) + +## 问题修复 + +- **定时任务记忆丢失**:修复 Scheduler 调度器导致的记忆丢失问题 ([a77a874](https://github.com/zhayujie/CowAgent/commit/a77a8741b500a408c6f5c8868856fb4b018fe9db)) +- **空工具调用与超长结果**:修复空 tool calls 及过长工具返回结果的异常处理 ([0542700](https://github.com/zhayujie/CowAgent/commit/0542700f9091ebb08c1a56103b0f0f45f24aa621)) +- **OpenAI Function Call**:修复 OpenAI 模型的 function call 调用兼容性问题 ([158c87a](https://github.com/zhayujie/CowAgent/commit/158c87ab8b05bae054cc1b4eacdbb64fc1062ba9)) +- **Claude 工具名字段**:移除 Claude 模型响应中多余的 tool name 字段 ([eec10cb](https://github.com/zhayujie/CowAgent/commit/eec10cb5db6a3d5bc12ef606606532237d2c5f6e)) +- **MiniMax 推理优化**:优化 MiniMax 模型 reasoning content 处理,隐藏思考过程输出 ([c72cda3](https://github.com/zhayujie/CowAgent/commit/c72cda33864bd1542012ee6e0a8bd8c6c88cb5ed), [72b1cac](https://github.com/zhayujie/CowAgent/commit/72b1cacea1ba0d1f3dedacbab2e088e98fd7e172)) +- **智谱 AI 思考过程**:隐藏智谱 AI 模型的思考过程展示 ([72b1cac](https://github.com/zhayujie/CowAgent/commit/72b1cacea1ba0d1f3dedacbab2e088e98fd7e172)) +- **飞书连接与证书**:修复飞书渠道的 SSL 证书错误和连接异常问题 ([229b14b](https://github.com/zhayujie/CowAgent/commit/229b14b6fcabe7123d53cab1dea39f38dab26d6d), [8674421](https://github.com/zhayujie/CowAgent/commit/867442155e7f095b4f38b0856f8c1d8312b5fcf7)) +- **model_type 类型校验**:修复非字符串 `model_type` 导致的 `AttributeError` ([#2666](https://github.com/zhayujie/CowAgent/pull/2666)) + +## 平台兼容 + +- **Windows 兼容性适配**:修复 Windows 平台下路径处理、文件编码及 `os.getuid()` 不可用等问题,涉及多个工具模块 ([051ffd7](https://github.com/zhayujie/CowAgent/commit/051ffd78a372f71a967fd3259e37fe19131f83cf), [5264f7c](https://github.com/zhayujie/CowAgent/commit/5264f7ce18360ee4db5dcb4ebe67307977d40014)) diff --git a/docs/zh/releases/v2.0.2.mdx b/docs/zh/releases/v2.0.2.mdx new file mode 100644 index 00000000..ad17bcba --- /dev/null +++ b/docs/zh/releases/v2.0.2.mdx @@ -0,0 +1,98 @@ +--- +title: v2.0.2 +description: CowAgent 2.0.2 - Web 控制台升级、多通道同时运行、会话持久化 +--- + +## ✨ 重点更新 + +### 🖥️ Web 控制台升级 + +本次对 Web 控制台进行了全面升级,支持流式对话输出、工具执行过程和思考过程的可视化展示,并支持对模型、技能、记忆、通道、Agent 配置的在线查看和管理。 + +#### 对话界面 + +支持流式输出,可实时展示 Agent 的思考过程(Reasoning)和工具调用过程(Tool Calls),更直观地观察 Agent 的决策过程: + + + +#### 模型管理 + +支持在线管理模型配置,无需手动编辑配置文件: + + + +#### 技能管理 + +支持在线查看和管理 Agent 技能(Skills): + + + +#### 记忆管理 + +支持在线查看和管理 Agent 记忆: + + + +#### 通道管理 + +支持在线管理接入通道,支持实时连接/断开操作: + + + +#### 定时任务 + +支持在线查看和管理定时任务,包括一次性任务、固定间隔、Cron 表达式等多种调度方式的可视化管理: + + + +#### 日志 + +支持在线实时查看 Agent 运行日志,便于监控运行状态和排查问题: + + + +相关提交:[f1a1413](https://github.com/zhayujie/CowAgent/commit/f1a1413), [c0702c8](https://github.com/zhayujie/CowAgent/commit/c0702c8), [394853c](https://github.com/zhayujie/CowAgent/commit/394853c), [1c71c4e](https://github.com/zhayujie/CowAgent/commit/1c71c4e), [5e3eccb](https://github.com/zhayujie/CowAgent/commit/5e3eccb), [e1dc037](https://github.com/zhayujie/CowAgent/commit/e1dc037), [5edbf4c](https://github.com/zhayujie/CowAgent/commit/5edbf4c), [7d258b5](https://github.com/zhayujie/CowAgent/commit/7d258b5) + +### 🔀 多通道同时运行 + +支持多个接入通道(如飞书、钉钉、企微应用、Web 等)同时运行,每个通道在独立子线程中启动,互不干扰。 + +配置方式:在 `config.json` 中通过 `channel_type` 配置多个通道,以逗号分隔,也可在 Web 控制台的通道管理页面中实时连接或断开各通道。 + +```json +{ + "channel_type": "web,feishu,dingtalk" +} +``` + +相关提交:[4694594](https://github.com/zhayujie/CowAgent/commit/4694594), [7cce224](https://github.com/zhayujie/CowAgent/commit/7cce224), [7d258b5](https://github.com/zhayujie/CowAgent/commit/7d258b5), [c9adddb](https://github.com/zhayujie/CowAgent/commit/c9adddb) + +### 💾 会话持久化 + +会话历史支持持久化存储至本地 SQLite 数据库,服务重启后会话上下文自动恢复,不再丢失。Web 控制台中的历史对话记录也会同步恢复展示。 + +相关提交:[29bfbec](https://github.com/zhayujie/CowAgent/commit/29bfbec), [9917552](https://github.com/zhayujie/CowAgent/commit/9917552), [925d728](https://github.com/zhayujie/CowAgent/commit/925d728) + +### 🤖 新增模型 + +- **Gemini 3.1 Pro Preview**:新增 `gemini-3.1-pro-preview` 模型支持 ([52d7cad](https://github.com/zhayujie/CowAgent/commit/52d7cad)) +- **Claude 4.6 Sonnet**:新增 `claude-4.6-sonnet` 模型支持 ([52d7cad](https://github.com/zhayujie/CowAgent/commit/52d7cad)) +- **Qwen3.5 Plus**:新增 `qwen3.5-plus` 模型支持 ([e59a289](https://github.com/zhayujie/CowAgent/commit/e59a289)) +- **MiniMax M2.5**:新增 `Minimax-M2.5` 模型支持 ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) +- **GLM-5**:新增 `glm-5` 模型支持 ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) +- **Kimi K2.5**:新增 `kimi-k2.5` 模型支持 ([48db538](https://github.com/zhayujie/CowAgent/commit/48db538)) +- **Doubao 2.0 Code**:新增 `doubao-2.0-code` 编程专用模型 ([ab28ee5](https://github.com/zhayujie/CowAgent/commit/ab28ee5)) +- **DashScope 模型**:新增阿里云 DashScope 模型名称支持 ([ce58f23](https://github.com/zhayujie/CowAgent/commit/ce58f23)) + +### 🌐 新增官网和文档中心 + +- **官网上线**:[cowagent.ai](https://cowagent.ai/) +- **文档中心上线**:[docs.cowagent.ai](https://docs.cowagent.ai/) + +### 🐛 问题修复 + +- **Gemini 钉钉图片识别**:修复 Gemini 在钉钉通道中无法处理图片标记的问题 ([05a3304](https://github.com/zhayujie/CowAgent/commit/05a3304)) ([#2670](https://github.com/zhayujie/CowAgent/pull/2670)) Thanks [@SgtPepper114](https://github.com/SgtPepper114) +- **启动脚本依赖**:修复 `run.sh` 脚本的依赖安装问题 ([b6fc9fa](https://github.com/zhayujie/CowAgent/commit/b6fc9fa)) +- **裸异常捕获**:将代码中的 `bare except` 替换为 `except Exception`,提升异常处理规范性 ([adca89b](https://github.com/zhayujie/CowAgent/commit/adca89b)) ([#2674](https://github.com/zhayujie/CowAgent/pull/2674)) Thanks [@haosenwang1018](https://github.com/haosenwang1018) + +**发布日期**:2026.02.27 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.1...master) diff --git a/docs/zh/releases/v2.0.3.mdx b/docs/zh/releases/v2.0.3.mdx new file mode 100644 index 00000000..22d2d1f6 --- /dev/null +++ b/docs/zh/releases/v2.0.3.mdx @@ -0,0 +1,91 @@ +--- +title: v2.0.3 +description: CowAgent 2.0.3 - 新增企微智能机器人和 QQ 通道、Web 控制台文件处理、记忆系统升级 +--- + +## 🔌 新增接入通道 + +### 企业微信智能机器人 + +新增企业微信智能机器人(`wecom_bot`)通道,支持流式卡片消息输出,支持文本和图片消息的接收与回复,可在 Web 控制台中进行通道配置和管理。 + +接入文档:[企微智能机器人接入](https://docs.cowagent.ai/channels/wecom-bot)。 + +相关提交:[d4480b6](https://github.com/zhayujie/CowAgent/commit/d4480b6), [a42f31f](https://github.com/zhayujie/CowAgent/commit/a42f31f), [4ecd4df](https://github.com/zhayujie/CowAgent/commit/4ecd4df), [8b45d6c](https://github.com/zhayujie/CowAgent/commit/8b45d6c) + +### QQ 通道 + +新增 QQ 官方机器人(`qq`)通道,支持文本和图片消息的接收与回复,支持私聊和群聊场景。 + +接入文档参考:[QQ机器人接入](https://docs.cowagent.ai/channels/qq)。 + +相关提交:[005a0e1](https://github.com/zhayujie/CowAgent/commit/005a0e1), [a4d54f5](https://github.com/zhayujie/CowAgent/commit/a4d54f5) + +## 🖥️ Web 控制台支持文件输入和处理 + +Web 控制台对话界面支持文件和图片上传,可直接发送文件给 Agent 进行处理。同时 Read 工具新增对 Office 文档(Word、Excel、PPT)的解析能力。 + +相关提交:[30c6d9b](https://github.com/zhayujie/CowAgent/commit/30c6d9b) + +## 🤖 新增模型 + +- **GPT-5.4 系列**:新增 `gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano` 模型支持 ([1623deb](https://github.com/zhayujie/CowAgent/commit/1623deb)) +- **Gemini 3.1 Flash Lite Preview**:新增 `gemini-3.1-flash-lite-preview` 模型支持 ([ba915f2](https://github.com/zhayujie/CowAgent/commit/ba915f2)) + +## 💰 Coding Plan 支持 + +新增各厂商 Coding Plan(编程包月套餐)的接入支持,通过 OpenAI 兼容方式统一接入。目前已支持阿里云、MiniMax、智谱 GLM、Kimi、火山引擎等厂商。 + +详细配置参考 [Coding Plan 文档](https://docs.cowagent.ai/models/coding-plan)。 + +## 🧠 记忆系统升级 + +记忆写入(Memory Flush)升级: + +- 使用 LLM 对超出上下文窗口的对话内容进行智能摘要,生成精炼的每日记忆条目 +- 摘要在后台线程异步执行,不阻塞回复 +- 优化上下文批量裁剪策略,降低冲刷频率 +- 新增每日定时冲刷兜底机制,避免低活跃场景下记忆丢失 +- 修复上下文记忆丢失问题 + +相关提交:[022c13f](https://github.com/zhayujie/CowAgent/commit/022c13f), [c116235](https://github.com/zhayujie/CowAgent/commit/c116235) + +## 🔧 工具重构 + +- **图片识别**:将图片识别(Image Vision)从 Skill 重构为内置 Tool,新增独立的图片视觉提供方(Vision Provider)配置,提升稳定性和可维护性 ([a50fafa](https://github.com/zhayujie/CowAgent/commit/a50fafa), [3b8b562](https://github.com/zhayujie/CowAgent/commit/3b8b562)) +- **网页抓取**:将网页抓取(Web Fetch)从 Skill 重构为内置 Tool,支持远程文档文件(PDF、Word、Excel、PPT)的下载和解析 ([ccb9030](https://github.com/zhayujie/CowAgent/commit/ccb9030), [fa61744](https://github.com/zhayujie/CowAgent/commit/fa61744)) + +## 🐳 Docker 部署优化 + +- **配置模板对齐**:`docker-compose.yml` 环境变量与 `config-template.json` 对齐,补充完整的模型 API Key 和 Agent 等配置项 +- **Web 控制台端口映射**:新增 `9899` 端口映射,Docker 部署后可通过浏览器访问 Web 控制台 +- **配置热更新**:各模型 Bot 的 API Key 和 API Base 改为实时读取,通过 Web 控制台修改配置后无需重启即可生效 +- **工作空间持久化**:新增 `./cow` Volume 挂载,Agent 工作空间数据(记忆、人格、技能等)持久化到宿主机,容器重建或升级不丢失 + +## ⚡ 性能优化 + +- **启动加速**:飞书通道采用懒加载方式导入依赖,避免 4-10 秒的启动延迟 ([924dc79](https://github.com/zhayujie/CowAgent/commit/924dc79)) +- **通道稳定性**:优化通道连接稳定性,支持通道配置通过环境变量设置 ([f1c04bc](https://github.com/zhayujie/CowAgent/commit/f1c04bc), [46d97fd](https://github.com/zhayujie/CowAgent/commit/46d97fd)) + +## 🐛 问题修复 + +- **bot_type 配置**:修复 Agent 模式下 `bot_type` 配置传递问题 ([#2691](https://github.com/zhayujie/CowAgent/pull/2691)) Thanks [@Weikjssss](https://github.com/Weikjssss) +- **bot_type 优先级**:调整 Agent 模式下 `bot_type` 的解析优先级 ([#2692](https://github.com/zhayujie/CowAgent/pull/2692)) Thanks [@6vision](https://github.com/6vision) +- **智谱模型配置**:修复智谱 `bot_type` 命名、Web 控制台持久化及正则转义问题 ([#2693](https://github.com/zhayujie/CowAgent/pull/2693)) Thanks [@6vision](https://github.com/6vision) +- **OpenAI 兼容层**:使用 `openai_compat` 层统一错误处理 ([#2688](https://github.com/zhayujie/CowAgent/pull/2688)) Thanks [@JasonOA888](https://github.com/JasonOA888) +- **OpenAI 兼容迁移**:完成所有模型 Bot 的 `openai_compat` 迁移 ([#2689](https://github.com/zhayujie/CowAgent/pull/2689)) +- **Gemini 工具调用**:修复 Gemini 模型的工具调用匹配问题 ([eda82ba](https://github.com/zhayujie/CowAgent/commit/eda82ba)) +- **会话并发**:修复会话并发场景下的竞态条件问题 ([9879878](https://github.com/zhayujie/CowAgent/commit/9879878)) +- **历史消息恢复**:修复历史会话消息不完整问题,仅恢复 user/assistant 文本消息,剥离工具调用 ([b788a3d](https://github.com/zhayujie/CowAgent/commit/b788a3d), [a33ce97](https://github.com/zhayujie/CowAgent/commit/a33ce97)) +- **飞书群聊**:移除飞书群聊场景下对 `bot_name` 的依赖 ([b641bff](https://github.com/zhayujie/CowAgent/commit/b641bff)) +- **Safari 兼容**:修复 Safari 浏览器 IME 回车键误触发消息发送问题 ([0687916](https://github.com/zhayujie/CowAgent/commit/0687916)) +- **Windows 兼容**:修复 Windows 下 bash 风格 `$VAR` 环境变量转换为 `%VAR%` 的问题 ([7c67513](https://github.com/zhayujie/CowAgent/commit/7c67513)) +- **MiniMax 参数**:增加 MiniMax 模型的 `max_tokens` 限制 ([1767413](https://github.com/zhayujie/CowAgent/commit/1767413)) +- **.gitignore 更新**:添加 Python 目录忽略规则 ([#2683](https://github.com/zhayujie/CowAgent/pull/2683)) Thanks [@pelioo](https://github.com/pelioo) +- **AGENT.md 主动演进**:优化系统提示词中对 AGENT.md 的更新引导,从被动的"用户修改时更新"改为主动识别对话中的性格、风格变化并自动更新 + +## 📦 升级方式 + +源码部署可执行 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 + +**发布日期**:2026.03.18 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.2...master) diff --git a/docs/zh/releases/v2.0.4.mdx b/docs/zh/releases/v2.0.4.mdx new file mode 100644 index 00000000..55264a4b --- /dev/null +++ b/docs/zh/releases/v2.0.4.mdx @@ -0,0 +1,51 @@ +--- +title: v2.0.4 +description: CowAgent 2.0.4 - 新增个人微信通道、新模型支持、日文文档、脚本重构及多项修复 +--- + +## 🔌 新增个人微信通道 + +新增个人微信(`weixin`)通道,微信扫描二维码即可将 CowAgent 接入个人微信,支持以下功能: + +- **消息收发**:支持文本、图片、文件、视频消息的接收与回复,支持语音消息接收和识别 +- **扫码登录**:终端显示二维码,微信扫码确认即可登录,二维码过期自动刷新 +- **凭证持久化**:登录凭证自动保存至 `~/.weixin_cow_credentials.json`,重启无需重新扫码 +- **Session 自动重连**:Session 过期后自动清除旧凭证并重新发起扫码登录 +- **Web 控制台接入**:支持在 Web 控制台中添加微信通道,扫码登录流程同步展示 +- **Docker 和脚本支持**:`run.sh` 和 `docker-compose.yml` 均已适配微信通道 + +接入文档:[微信接入](https://docs.cowagent.ai/channels/weixin)。 + +相关提交:[ce89869](https://github.com/zhayujie/CowAgent/commit/ce89869) + +## 🤖 新增模型 + +- **MiniMax-M2.7**:新增 MiniMax-M2.7 模型支持 +- **GLM-5-Turbo**:新增智谱 glm-5-turbo 模型支持 + +相关提交:[9192f6f](https://github.com/zhayujie/CowAgent/commit/9192f6f) + +## 🔧 脚本重构 + +- **run.sh 重构**:提取公共逻辑,精简脚本代码([49d8707](https://github.com/zhayujie/CowAgent/commit/49d8707)) +- **可执行权限**:修复 `run.sh` 文件权限问题 ([652156e](https://github.com/zhayujie/CowAgent/commit/652156e)) +- **PID 获取**:修复 `run.sh` 中进程 PID 获取错误的问题 ([9febb07](https://github.com/zhayujie/CowAgent/commit/9febb07)) + +## 🌍 文档更新 + +新增完整的日文文档,覆盖入门指南、通道接入、模型配置等主要章节。Thanks [@Ikko Ashimine](https://github.com/ikoamu) + +相关提交:[5487c0b](https://github.com/zhayujie/CowAgent/commit/5487c0b) + +## 🐛 问题修复 + +- **企微机器人兼容**:修复旧版 `websocket-client` 的兼容性问题,新增统一的 WebSocket 兼容层 ([bc7f627](https://github.com/zhayujie/CowAgent/commit/bc7f627)) +- **消息自动修复**:增强消息协议的容错能力,自动修复格式异常的消息序列 ([b8b57e3](https://github.com/zhayujie/CowAgent/commit/b8b57e3)) +- **飞书编码**:修复飞书通道消息和日志的编码问题 ([7d0e156](https://github.com/zhayujie/CowAgent/commit/7d0e156)) +- **飞书配置**:移除 `run.sh` 中对 `feishu_bot_name` 的冗余依赖 ([1b5be1b](https://github.com/zhayujie/CowAgent/commit/1b5be1b)) + +## 📦 升级方式 + +源码部署可执行 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 + +**发布日期**:2026.03.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.3...master) diff --git a/docs/zh/releases/v2.0.5.mdx b/docs/zh/releases/v2.0.5.mdx new file mode 100644 index 00000000..bf0c0e53 --- /dev/null +++ b/docs/zh/releases/v2.0.5.mdx @@ -0,0 +1,84 @@ +--- +title: v2.0.5 +description: CowAgent 2.0.5 - Cow CLI、Skill Hub 开源、浏览器工具、企微扫码创建、DeepSeek 独立模块及多项优化 +--- + +## 🖥️ Cow CLI 命令系统 + +新增 Cow CLI 命令系统,支持在终端和对话中执行命令,实现对 CowAgent 的全方位管理: + +- **终端命令**:在系统终端中执行 `cow <命令>`,支持 `start`、`stop`、`restart`、`update`、`status`、`logs` 等服务管理操作 +- **对话命令**:在对话中输入 `/<命令>` 或 `cow <命令>`,支持 `/help`、`/status`、`/config`、`/skill`、`/context`、`/logs`、`/version` 等 +- **web控制台**:Web 控制台输入框输入 `/` 即可弹出指令菜单,支持方向键回溯历史输入 +- **Windows 支持**:新增 PowerShell 一键安装脚本 `scripts/run.ps1`,同时支持 `cow` 命令 + +相关文档:[命令总览](https://docs.cowagent.ai/cli) + + + +## 🧩 Cow Skill Hub 开源 + +[Cow Skill Hub](https://skills.cowagent.ai)(技能广场)正式开源并上线,提供 AI Agent 技能的浏览、搜索、安装和发布,汇集精选技能、社区贡献技能、三方技能: + +- **一键安装**:在对话中 `/skill install <名称>` 或终端 `cow skill install <名称>` 一键安装 +- **多来源支持**:支持安装 Skill Hub、GitHub、ClawHub、LinkAI 上的全部技能,支持 GitHub 批量安装和子目录指定 +- **技能搜索**:`/skill search` 和 `/skill list --remote` 浏览和搜索技能广场 +- **技能发布**:通过 [skills.cowagent.ai/submit](https://skills.cowagent.ai/submit) 提交自己的技能 +- **镜像加速**:支持 Skill Hub 镜像加速,国内环境下载更流畅 + +Skill Hub 开源仓库:[cow-skill-hub](https://github.com/zhayujie/cow-skill-hub)。 + +相关文档:[技能广场](https://docs.cowagent.ai/skills/hub)、[安装技能](https://docs.cowagent.ai/skills/install) + + + + +## 🌐 新增浏览器工具 + +新增 Browser 工具,Agent 可控制浏览器访问和操作网页: + +- **网页导航与交互**:支持 `navigate`、`click`、`fill`、`select`、`scroll`、`press` 等操作 +- **页面快照**:使用精简 DOM 快照技术,让 Agent 高效理解页面结构,导航后自动快照 +- **截图能力**:支持页面截图保存到工作区 +- **JavaScript 执行**:支持在页面中执行自定义脚本 +- **CLI 安装**:通过 `cow install-browser` 一键安装浏览器及依赖,自动适配系统环境 +- **Docker 支持**:Docker 镜像已内置浏览器安装支持 + +相关文档:[浏览器工具](https://docs.cowagent.ai/tools/browser)。 + + + + +## 🤖 企微智能机器人扫码创建 + +企业微信智能机器人通道新增扫码一键创建功能: + +- **Web 控制台扫码**:在 Web 控制台通道页面,选择「扫码接入」模式,使用企业微信扫码即可自动创建并接入智能机器人,无需手动到企业微信后台配置 +- **手动模式保留**:同时保留「手动填写」模式,可输入已有的 Bot ID 和 Secret 接入 +- **流式推送优化**:增加推送节流,避免 WebSocket 拥塞 + +相关文档:[企微智能机器人接入](https://docs.cowagent.ai/channels/wecom-bot)。 + +相关提交:[#2735](https://github.com/zhayujie/CowAgent/pull/2735) + +Thanks [@WecomTeam](https://github.com/WecomTeam) + +## 🐛 其他优化与修复 + +- **DeepSeek 独立模块**:新增独立的 DeepSeek Bot 模块,支持 `deepseek_api_key` 专属配置,无需再通过 OpenAI 兼容方式接入([#2719](https://github.com/zhayujie/CowAgent/pull/2719))。Thanks [@6vision](https://github.com/6vision) +- **Web 控制台优化**:新增斜杠指令菜单和输入历史回溯,新增模型选项,优化移动端适配([#2731](https://github.com/zhayujie/CowAgent/pull/2731))。Thanks [@zkjqd](https://github.com/zkjqd) +- **上下文丢失**:修复上下文裁剪后丢失的问题 ([393f0c0](https://github.com/zhayujie/CowAgent/commit/393f0c0)) +- **系统提示词**:修复系统提示词未在每轮重建的问题 ([13f5fde](https://github.com/zhayujie/CowAgent/commit/13f5fde)) +- **Agent 响应**:去除 Agent 响应首尾空白字符 ([f890318](https://github.com/zhayujie/CowAgent/commit/f890318)) +- **视觉压缩**:优化视觉图片压缩策略 ([22b8ca0](https://github.com/zhayujie/CowAgent/commit/22b8ca0)) +- **Gemini 模型**:修复 GoogleGeminiBot 缺少 model 属性的问题([#2716](https://github.com/zhayujie/CowAgent/pull/2716))。Thanks [@cowagent](https://github.com/cowagent) +- **微信通道**:修复文件发送失败、文件名丢失等问题 ([6d9b7ba](https://github.com/zhayujie/CowAgent/commit/6d9b7ba)、[baf66a1](https://github.com/zhayujie/CowAgent/commit/baf66a1)、[45faa9c](https://github.com/zhayujie/CowAgent/commit/45faa9c)) +- **Docker 优化**:修复卷权限问题,精简镜像体积 ([3eb8348](https://github.com/zhayujie/CowAgent/commit/3eb8348)、[4470d4c](https://github.com/zhayujie/CowAgent/commit/4470d4c)) +- **README 排版**:优化中英文排版空格([#2723](https://github.com/zhayujie/CowAgent/pull/2723))。Thanks [@Xiaozhou345](https://github.com/Xiaozhou345) +- **安全修复**:修复 Memory Content路径遍历风险,Thanks [@August829](https://github.com/August829) + +## 📦 升级方式 + +源码部署可执行 `cow update` 或 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 + +**发布日期**:2026.04.01 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.4...master) diff --git a/docs/zh/releases/v2.0.6.mdx b/docs/zh/releases/v2.0.6.mdx new file mode 100644 index 00000000..299be80b --- /dev/null +++ b/docs/zh/releases/v2.0.6.mdx @@ -0,0 +1,83 @@ +--- +title: v2.0.6 +description: CowAgent 2.0.6 - 知识库系统、梦境记忆蒸馏、上下文智能压缩、Web 控制台多会话及多项优化 +--- + +## 项目正式更名为 CowAgent + +项目仓库正式从 `chatgpt-on-wechat` 更名为 **CowAgent**,演进为功能完备的 AI Agent 助理。 + +- 新地址:[github.com/zhayujie/CowAgent](https://github.com/zhayujie/CowAgent),旧地址 GitHub 会自动重定向 +- CLI 命令、配置文件、文档链接均保持兼容,无需额外操作 + +## 📚 知识库系统 + +新增个人知识库系统,Agent 可自主构建和维护结构化知识,并在对话中按需检索引用。 + +- **索引驱动的自组织结构**:知识库采用 `knowledge/` 目录,按分类自动组织,每个知识页面为独立的 Markdown 文件 +- **自动写入**:向 Agent 发送文件、链接等知识,或在讨论中识别到有价值的知识时,自动创建或更新知识页面 +- **混合检索**:支持关键词全文搜索和向量语义检索,在对话中按需加载相关知识 +- **可视化**:支持文件树浏览和知识图谱可视化,文档内链接可直接跳转查看 +- **命令管理**:`/knowledge` 查看统计、`/knowledge list` 查看目录结构、`/knowledge on|off` 开关知识库 + + + +相关文档:[知识库](https://docs.cowagent.ai/knowledge) + +Inspired by Karpathy's [LLM Wiki](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f). + +## 🌙 梦境记忆蒸馏(Deep Dream) + +全新的记忆整理机制,每日自动将分散的对话记忆蒸馏为精炼的长期记忆: + +- **三层记忆流转**:对话上下文(短期)→ 天级记忆(中期)→ MEMORY.md(长期),形成完整的记忆生命周期 +- **自动蒸馏**:每日 23:55 定时执行,读取当天天级记忆和 MEMORY.md,通过 LLM 进行去重、合并、修剪,输出精炼的新版 MEMORY.md +- **梦境日记**:每次蒸馏生成一篇叙事风格的梦境日记,记录整理过程的发现和洞察,存储在 `memory/dreams/` 目录 +- **手动触发**:支持 `/memory dream [N]` 手动触发,可指定整理天数(默认 3 天,最大 30 天),完成后在对话中通知结果 +- **Web 控制台**:记忆管理页面新增「梦境日记」tab,可浏览和查看所有梦境日记 + +相关文档:[梦境蒸馏](https://docs.cowagent.ai/memory/deep-dream) + + + +## 🧠 上下文智能压缩 + +上下文超出限制时将裁剪的部分通过 LLM 总结后异步注入,保持对话连贯性: + +- **LLM 异步摘要**:裁剪的消息由 LLM 总结为关键信息,同时写入天级记忆文件和注入保留的上下文 +- **多模型兼容**:优先使用主模型进行摘要,兼容 Claude、OpenAI、MiniMax 等不同模型的消息格式要求 + +相关文档:[短期记忆](https://docs.cowagent.ai/memory/context) + +## 💬 Web 控制台升级 + +Web 控制台多项功能增强: + +- **多会话管理**:支持创建和切换多个独立会话,侧边栏展示会话列表,支持会话标题自动生成和手动编辑 +- **密码保护**:支持为控制台设置登录密码,可通过 `web_console_password` 配置项控制 +- **深度思考**:支持在 Web 端展示模型的思考过程,可通过`enable_thinking` 配置项控制 +- **定时推送**:支持定时任务结果推送到 Web 控制台 +- **消息复制**:AI 回复支持一键复制原始 Markdown 内容 + +## 🤖 模型相关 + +- **视觉识别优化**:图片识别工具优先使用主模型,支持多模型厂商自动降级。相关文档:[视觉工具](https://docs.cowagent.ai/tools/vision) +- **MiniMax 新模型**:新增 MiniMax-M2.7-highspeed 模型和 MiniMax TTS 语音合成支持。Thanks @octo-patch +- **通义千问**:新增 qwen3.6-plus 模型支持 + +## 🐛 其他优化与修复 + +- **记忆提示词优化**:`MEMORY.md` 默认注入系统提示词,精细化记忆检索和写入的触发条件,增强主动写入能力 +- **系统提示词**:优化系统提示词的风格和语气引导 +- **浏览器工具**:增强隐式交互元素检测 +- **文件发送**:修复通用文件类型(tar.gz、zip 等)未能正确发送的问题。Thanks @6vision +- **macOS 兼容**:修复网络预检超时兼容性问题。Thanks @Moliang Zhou +- **Windows 兼容**:修复 Windows 下 PowerShell 兼容性、进程更新、终端编码等多项问题 +- **Python 3.13+**:修复 Python 3.13 及以上版本缺少 `legacy-cgi` 依赖的问题 +- **个人微信**:更新个人微信通道版本 + +## 📦 升级方式 + +源码部署可执行 `cow update` 或 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 + +**发布日期**:2026.04.14 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.5...master) diff --git a/docs/zh/releases/v2.0.7.mdx b/docs/zh/releases/v2.0.7.mdx new file mode 100644 index 00000000..b4b6e27b --- /dev/null +++ b/docs/zh/releases/v2.0.7.mdx @@ -0,0 +1,64 @@ +--- +title: v2.0.7 +description: CowAgent 2.0.7 - 图像生成技能(六厂商自动路由)、新模型支持、知识库增强、Web 控制台优化及多项修复 +--- + +## 🎨 图像生成技能 + +新增图像生成内置技能,支持文生图、图生图、多图融合,支持 `GPT-Image-2`、`Nano Banana` 等多种模型: + +- **自动路由**:支持六种模型厂商自动切换,OpenAI (GPT-Image-2) → Gemini (Nano Banana) → Seedream (火山方舟) → Qwen (百炼) → MiniMax → LinkAI +- **开箱即用**:配置 API Key 即可使用,无需手动指定模型。也支持在对话中指定特定模型 +- **灵活控制**:支持 `quality`(画质)、`size`(分辨率,512/1K~4K)、`aspect_ratio`(宽高比)等参数,各厂商自动适配有效值 +- **图片编辑**:传入已有图片即可进行编辑、风格迁移、多图融合 +- **Skill 级配置**:支持通过 `config.json` 中的 `skills.image-generation.model` 固定默认模型 + +相关文档:[图像生成技能](https://docs.cowagent.ai/skills/image-generation) + +## 🤖 新模型支持 + +- **Kimi K2.6**:新增 `kimi-k2.6` 模型支持 +- **Claude Opus 4.7**:新增 `claude-opus-4-7` 模型支持 +- **GLM 5.1**:新增 `glm-5.1` 模型支持 +- **Kimi Coding Plan**:支持 Kimi Coding Plan 模式 +- **自定义模型厂商**:新增[自定义模型](https://docs.cowagent.ai/models/custom)提供方配置,方便接入本地模型及更多厂商 + +## 📚 知识库增强 + +- **嵌套目录支持**:知识库列表和展示支持多级嵌套目录 +- **根级文件展示**:知识树中显示根目录下的 `index.md`、`log.md` 等文件 +- **空状态统计修复**:排除根级文件对知识库统计的干扰,正确保持空状态 + +## 🌙 梦境记忆优化 + +- **结构化组织**:梦境记忆文件按日期自动归档,目录结构更清晰 +- **定时抖动**:每日定时触发增加随机抖动,避免集群场景下的并发冲突 + +## 🛠 技能系统改进 + +- **技能管理刷新**:`/skill` 命令执行后自动加载最新技能,确保状态同步 +- **安装来源扩展**:技能安装支持多种来源格式(URL、zip、本地文件等) + +## 💬 Web 控制台优化 + +- **智能自动滚动**:优化聊天窗口滚动逻辑,用户手动翻阅时不再强制跳到底部 Thanks @colin2060 +- **移动端适配**:侧边栏默认隐藏,支持点击遮罩关闭 +- **图片预览去重**:修复同一消息中图片重复渲染的问题 +- **推理内容截断**:深度思考内容超出阶段,解决前端卡顿问题 +- **会话标题修复**:修复标题自动生成的回退逻辑 + + +## 🐛 其他修复 + +- **Gemini 修复**:修复 Gemini tool call 不返回结果的问题 +- **Agent 重试**:空响应重试时不再丢弃 tool_calls +- **Docker 环境变量**:修复 Docker 环境下更新配置后环境变量未同步的问题 Thanks @sunboy0523 +- **Python 3.7 兼容**:延迟导入 `Literal` 以兼容 Python 3.7 +- **模型切换通知**:修复切换模型后 bot_type 变更通知未显示的问题。Thanks @6vision +- **配置命令增强**:`/config` 支持设置 `enable_thinking` + +## 📦 升级方式 + +源码部署可执行 `cow update` 或 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 + +**发布日期**:2026.04.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.6...2.0.7) diff --git a/docs/zh/releases/v2.0.8.mdx b/docs/zh/releases/v2.0.8.mdx new file mode 100644 index 00000000..ced1b967 --- /dev/null +++ b/docs/zh/releases/v2.0.8.mdx @@ -0,0 +1,63 @@ +--- +title: v2.0.8 +description: CowAgent 2.0.8 - 飞书渠道全面升级(语音、流式打字机、一键扫码接入)、DeepSeek V4 / 百度千帆支持、定时任务工具优化 +--- + +## 🪶 飞书渠道全面升级 + +### 1. 一键扫码创建飞书应用 + +不再需要手动到飞书开放平台建应用、填权限和事件订阅。Web 控制台和命令行启动时若未配置 `feishu_app_id`,会自动展示扫码入口,飞书扫码授权后自动创建机器人并回填配置,开箱即用。 + +相关文档:[飞书渠道](https://docs.cowagent.ai/channels/feishu) + +### 2. 语音消息收发 + +支持接收用户发送的飞书语音消息并自动转文本,回复也可走 TTS 以语音形式发出。同时优化了中文短语音的识别准确度。 + +### 3. 流式打字机回复 + +接入飞书 CardKit 流式卡片,**默认开启**,体验对齐 Web 端: + +- 多轮 Agent 场景下中间过场消息与最终回复分卡呈现 +- 针对 DeepSeek 等高频输出模型做了专门优化,速度与 Web 端持平 +- 不支持时自动回退为普通文本回复,无需手动配置 +- 要求飞书客户端 ≥ 7.20 + +飞书语音消息收发与流式打字机的基础能力来自社区贡献 #2791 Thanks @yangluxin613 + +## 🤖 新模型支持 + +- **DeepSeek V4 系列**:新增 `deepseek-v4-pro` / `deepseek-v4-flash`,并将默认模型切换为 `deepseek-v4-flash` +- **思考模型开关统一**:DeepSeek V4、Qwen3 等思考模型的开关行为对齐到 `enable_thinking` +- **百度千帆模型接入**:新增百度千帆厂商,支持 `ernie-5.0`、`ernie-4.5-turbo-128k` 等模型,并支持图像识别工具,相关文档查看 [百度千帆](https://docs.cowagent.ai/models/qianfan)。#2790 Thanks @jimmyzhuu +- **新增有道翻译**:`translate` 模块新增有道翻译支持 #2797 Thanks @Zmjjeff7 + +## 🛠 OpenAI 客户端重构 + +- **去 SDK 依赖**:OpenAI Bot 改为原生 HTTP 实现,启动更轻、依赖冲突更少 +- **Web 控制台提示**:模型配置 API Base 输入框加入版本路径占位提示 + +## ⏰ 定时任务记忆增强 + +- **任务结果可被追问**:定时任务的执行结果自动注入到接收方的会话历史中,下一轮对话可直接追问,无需重新交代上下文 Thanks @huangrichao2020 +- **不污染长期记忆**:注入的调度对话不会被纳入每日梦境记忆汇总,避免高频任务把记忆刷满 +- **避免越跑越慢**:调度任务自己的上下文长度自动控制在合理范围内,长期反复执行也不会越积越大、拖慢响应 + +## 🔧 工具与安全 + +- **图像识别模型**:让 `tools.vision.model` 配置真正生效,未配置时自动 fallback #2792 Thanks CNXudiandian +- **Bash 安全确认**:仅对工作区外的破坏性删除做二次确认,工作区内常规操作不再打扰 + +## 🐛 其他修复 + +- 修复 Deep Dream 在多实例场景下重复触发 +- 修复 DeepSeek 多轮对话中部分历史轮次缺失 `reasoning_content` + +## 📦 升级方式 + +源码部署可执行 `cow update` 或 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 + +> ⚠️ 飞书一键创建应用依赖 `lark-oapi>=1.5.5`,`cow update` 会自动拉取;手动部署请确保依赖已更新。 + +**发布日期**:2026.05.06 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.7...2.0.8) diff --git a/docs/zh/releases/v2.0.9.mdx b/docs/zh/releases/v2.0.9.mdx new file mode 100644 index 00000000..957e0ced --- /dev/null +++ b/docs/zh/releases/v2.0.9.mdx @@ -0,0 +1,65 @@ +--- +title: v2.0.9 +description: CowAgent 2.0.9 - 新增模型管理、MCP 协议支持、浏览器登录态持久化、新模型接入 +--- + +## 🖥️ 新增模型管理 + +Web 控制台新增「模型」页面,按 **模型厂商 + 模型能力** 进行管理,支持对话、图像、语音、向量模型和搜索能力的配置: + +- **多厂商配置**:所有厂商的 API Key / API Base 在顶部统一维护,下方所有能力立即生效,无需重复填写 +- **图像模型**:图像理解与图像生成均可独立选择厂商和模型,未指定时跟随主模型自动选择 +- **语音模型**:语音识别和合成可独立配置,新增千问、智谱 ASR/TTS 模型 +- **向量模型**:支持配置 Embedding 模型(用于记忆及知识库检索),新增支持 OpenAI、通义、豆包、智谱等;切换模型后需执行 `/memory rebuild-index` 在线重建索引 +- **搜索能力**:联网搜索能力升级,支持博查、百度、智谱等多个厂商,自动模式下 Agent 可综合多来源搜索结果进行深度研究 + +相关文档:[模型概览](https://docs.cowagent.ai/models) + +20260522113305 + + +## 🧩 MCP 协议支持 + +支持 **MCP(Model Context Protocol)** 协议,从固定工具集扩展为开放可插拔的工具生态,任何兼容 MCP 协议的服务均可作为工具直接接入 Agent。 + +- 原生 JSON-RPC 实现,零额外依赖,同时支持 `stdio` 和 `sse` 两种传输 +- 兼容 Claude Desktop / Cursor 等主流风格的 `mcpServers` 配置,优先读取 `~/cow/mcp.json` + +相关文档:[MCP 工具](https://docs.cowagent.ai/tools/mcp)。Thanks @yangluxin613 (#2801) + +## 🌐 浏览器登录态持久化 + +针对需要登录、有反爬机制的网站,浏览器工具支持登录一次后长期复用登录态,并允许接入用户自己的真实 Chrome 以通过指纹检测: + +- **持久化用户配置(默认)**:默认使用 `~/.cow/browser_profile` 作为浏览器用户目录,登录一次后下次自动复用登录态 +- **CDP 模式**:通过 `tools.browser.cdp_endpoint` 接管真实 Chrome 浏览器,享有完整浏览器权限 + +相关文档:[浏览器工具](https://docs.cowagent.ai/tools/browser)。Thanks @leafmove (#2809) + +## 🤖 模型新增与优化 + +- **模型新增**:`gpt-5.5`、`gemini-3.5-flash`、`qwen3.7-max`、`ernie-5.1` +- **模型优化**:DeepSeek V4 支持 `reasoning_effort` 思考深度参数;修复 MiMo 等思考模型通过 OpenAI 兼容协议接入的问题 + +## 🔒 部署与安全 + +- **默认本机访问**:Web 控制台 `web_host` 配置默认绑定 `127.0.0.1`,服务器部署时可手动设置为 `0.0.0.0` 并设置密码。Thanks @August829、@yidaozhongqing、@YLChen-007、@icysun +- **前端资源完全本地化**:第三方 CSS / JS 全部本地分发,离线 / 内网环境也能正常加载控制台。Thanks @gitlayzer (#2816) + +## 🛠 体验优化与修复 + +- **TTS 适配更多通道**:Web对话、个人微信、飞书、钉钉、企微智能机器人均已支持回复语音,详情查看 [通道概览](https://docs.cowagent.ai/channels) +- **日志面板增强**:根据日志等级差异化高亮展示、支持根据等级筛选。Thanks @yangluxin613 (#2807) +- **Web 控制台自动启动**:程序启动后自动打开 Web 控制台。Thanks @yangluxin613 (#2804) +- **Ctrl+C 干净退出**:不再打印一长串 `KeyboardInterrupt` 堆栈。Thanks @yangluxin613 (#2806) +- **文件夹上传**:Web 端支持目录上传,路径校验适配 Windows。Thanks @TryToMakeUsBetter (#2814) +- 修复定时任务在某些情况下重复执行的问题。Thanks @CNXudiandian (#2820) +- 修复定时任务带时区时单次任务不触发的问题。Thanks @AethericSpace +- 修复执行失败的工具调用在页面刷新后不显示的问题。Thanks @a1094174619 (#2822) +- 修复企微机器人消息中包含非法控制字符导致投递失败的问题。Thanks @Jacques-Zhao (#2810) + +## 📦 升级方式 + +源码部署可执行 `cow update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。 + +**发布日期**:2026.05.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.8...2.0.9) diff --git a/docs/zh/skills/create.mdx b/docs/zh/skills/create.mdx new file mode 100644 index 00000000..45cef149 --- /dev/null +++ b/docs/zh/skills/create.mdx @@ -0,0 +1,58 @@ +--- +title: 创造技能 +description: 通过对话创建自定义技能 +--- + +CowAgent 内置了 Skill Creator,可以通过自然语言对话快速创建、安装或更新技能。 + +## 使用方式 + +直接在对话中描述你想要的技能,Agent 会自动完成创建: + +- 将工作流程固化为技能:"帮我把这个部署流程创建为一个技能" +- 对接第三方 API:"根据这个接口文档创建一个技能" +- 安装远程技能:"帮我安装 xxx 技能" + +## 创建流程 + +1. 告诉 Agent 你想创建的技能功能 +2. Agent 自动生成 `SKILL.md` 说明文件和运行脚本 +3. 技能保存到工作空间的 `~/cow/skills/` 目录 +4. 后续对话中 Agent 会自动识别并使用该技能 + + + + + +## SKILL.md 格式 + +创建的技能遵循标准的 SKILL.md 格式: + +```markdown +--- +name: my-skill +description: Brief description of the skill +metadata: + emoji: 🔧 + requires: + bins: ["curl"] + env: ["MY_API_KEY"] + primaryEnv: "MY_API_KEY" +--- + +# My Skill + +Detailed instructions... +``` + +| 字段 | 说明 | +| --- | --- | +| `name` | 技能名称,需与目录名一致 | +| `description` | 技能描述,Agent 据此决定是否调用 | +| `metadata.requires.bins` | 依赖的系统命令 | +| `metadata.requires.env` | 依赖的环境变量 | +| `metadata.always` | 是否始终加载(默认 false) | + + + 详细开发文档可参考 [Skill Creator 说明](https://github.com/zhayujie/CowAgent/blob/master/skills/skill-creator/SKILL.md)。 + diff --git a/docs/zh/skills/hub.mdx b/docs/zh/skills/hub.mdx new file mode 100644 index 00000000..65c8a216 --- /dev/null +++ b/docs/zh/skills/hub.mdx @@ -0,0 +1,65 @@ +--- +title: 技能广场 +description: 浏览、搜索和安装 AI Agent 技能 +--- + +[Cow Skill Hub](https://skills.cowagent.ai/) 是开源的 AI Agent 技能广场,汇集了官方推荐、社区贡献和第三方平台(GitHub、ClawHub 等)的技能。 + +开源仓库:[github.com/zhayujie/cow-skill-hub](https://github.com/zhayujie/cow-skill-hub) + + + +## 功能 + +- **浏览技能**:按类别(推荐 / 社区 / 第三方)和标签筛选 +- **搜索技能**:按名称或描述搜索 +- **查看详情**:查看技能文档、文件内容、安装命令和依赖的环境变量 +- **一键安装**:复制安装命令即可在 CowAgent 中使用 + +## 安装技能 + +在对话中或终端中执行安装命令: + + +```text 对话 +/skill install +``` + +```bash 终端 +cow skill install +``` + + +也可以在对话中浏览技能广场: + +```text +/skill list --remote +/skill search <关键词> +``` + +除了在列表中展示的精选技能,还可以通过 **CLI命令 + Skill Hub** 安装各种第三方技能(**GitHub、ClawHub、LinkAI、URL** 等)参考 [安装技能](/zh/skills/install)。 + +## 贡献技能 + +欢迎向技能广场提交你的技能: + +1. 访问 [skills.cowagent.ai/submit](https://skills.cowagent.ai/submit) +2. 使用 GitHub 或 Google 账号登录 +3. 上传包含 `SKILL.md` 的文件夹或 zip 包 +4. 自动解析技能名称、显示名称和描述,可按需修改 +5. 提交后将经过安全检查和审核后发布 + + + +技能文件结构: + +``` +your-skill/ +├── SKILL.md # 必须,放在根目录 +├── scripts/ # 可选,运行脚本 +└── resources/ # 可选,其他资源 +``` + + + 技能基于 `SKILL.md` 文件构建,你也可以在技能详情页下载 SKILL.md,用于任何支持自定义指令的 Agent(如 OpenClaw、Cursor、Claude Code 等)。 + diff --git a/docs/zh/skills/image-generation.mdx b/docs/zh/skills/image-generation.mdx new file mode 100644 index 00000000..5a174a8a --- /dev/null +++ b/docs/zh/skills/image-generation.mdx @@ -0,0 +1,98 @@ +--- +title: image-generation - 图像生成 +description: 文生图 / 图生图 / 多图融合,支持多家厂商自动路由与回退 +--- + +通用的图像生成与编辑技能,支持 OpenAI、Gemini、Seedream(火山方舟)、Qwen(百炼)、MiniMax、LinkAI 共六家厂商。配好任意一家的 Key 即可使用,配多家可享受自动回退。 + +## 支持的模型 + +| 厂商 | 模型 / 别名 | 特点 | +| --- | --- | --- | +| OpenAI | `gpt-image-2`、`gpt-image-1` | 通用文生图,高质量,支持 `quality` 控制画质 | +| Gemini Nano Banana | `nano-banana-2`、`nano-banana-pro`、`nano-banana` | 对应 `gemini-3.1-flash`、`gemini-3-pro`、`gemini-2.5-flash` 的图像版本 | +| Seedream(火山方舟) | `seedream-5.0-lite`、`seedream-4.5` | 原生 2K–4K,最多 14 张图融合 | +| Qwen(百炼) | `qwen-image-2.0`、`qwen-image-2.0-pro` | 擅长中文排版和图文融合 | +| MiniMax | `image-01` | 简单快速 | +| LinkAI | 任意模型 | 统一网关,作为兜底 | + +## 模型选择 + +默认走「自动路由 + 失败回退」: + +1. 按 `OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI` 顺序选第一个已配置的厂商 +2. 遇到 401、模型未开通、网络异常等错误时,自动切到下一家 +3. 用户在对话里指定模型时(如「用 seedream 画一只猫」),对应厂商会被提到最前优先尝试 + +如需固定使用某个模型: + +```json +{ + "skills": { + "image-generation": { + "model": "seedream-5.0-lite" + } + } +} +``` + +## 配置 API Key + + + 推荐通过 [Web 控制台](/zh/channels/web) 的「模型管理」页面配置,配好的对话模型 Key 会被图像生成技能自动复用,无需重复配置。也可手动编辑配置文件或在对话中通过 `env_config` 工具临时设置。 + + +凭证统一复用主模型厂商的 Key: + +| 字段 | 对应厂商 | +| --- | --- | +| `openai_api_key` | OpenAI | +| `gemini_api_key` | Gemini | +| `ark_api_key` | 火山方舟(Seedream) | +| `dashscope_api_key` | 阿里百炼(Qwen) | +| `minimax_api_key` | MiniMax | +| `linkai_api_key` | LinkAI | + + +## 开启和关闭 + +技能会根据 API Key 自动调整状态: + +- **已配置 Key**:Agent 收到画图请求时直接调用 +- **未配置 Key**:技能仍会出现在上下文中(标记为「需要配置」),Agent 会引导用户去配 Key + +如需手动控制: + +```text +/skill disable image-generation # 关闭 +/skill enable image-generation # 重新开启 +``` + +终端等价命令:`cow skill disable image-generation` / `cow skill enable image-generation`。 + +## 参数 + +| 参数 | 类型 | 必填 | 默认 | 说明 | +| --- | --- | --- | --- | --- | +| `prompt` | string | 是 | — | 图像描述 | +| `image_url` | string / list | 否 | null | 编辑用的输入图,本地路径或 URL;传列表为多图融合 | +| `quality` | string | 否 | auto | `low` / `medium` / `high`,仅部分厂商支持 | +| `size` | string | 否 | auto | `512` / `1K` / `2K` / `3K` / `4K`,或像素值如 `1024x1024` | +| `aspect_ratio` | string | 否 | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9`;Gemini 还支持 `1:4` / `4:1` / `1:8` / `8:1` | + + + **质量越高、分辨率越大,耗时和成本越高。** 日常对话用默认(`auto`)或 `quality=low` + `size=1K` 即可,约 20 秒出图;做海报或明确要高清时再上 `high` + `2K/4K`,可能需要 1–5 分钟。 + + +## 常见用法 + +- **文生图**:根据描述生成插画、海报、图标、头像、分镜图等 +- **图生图**:在已有图片上改风格、换元素、加装饰、加文字等 +- **多图融合**:把多张参考图合成一张(换装、角色合影等) + + +- bash 超时建议设 600 秒:单厂商 HTTP 超时 300 秒,脚本可能依次尝试多家 +- 输入图片自动压缩到 4MB 以内、最长边不超过 4096px +- Gemini / Seedream / Qwen / MiniMax 不支持 `quality` 参数 +- Seedream 默认出 2K 图;`seedream-5.0-lite` 支持到 3K,`seedream-4.5` 支持到 4K + diff --git a/docs/zh/skills/index.mdx b/docs/zh/skills/index.mdx new file mode 100644 index 00000000..d30a2efa --- /dev/null +++ b/docs/zh/skills/index.mdx @@ -0,0 +1,65 @@ +--- +title: 技能概览 +description: CowAgent 技能系统介绍 +--- + +技能(Skill)为 Agent 提供无限的扩展性。每个 Skill 由说明文件(`SKILL.md`)、运行脚本(可选)、资源(可选)组成,描述如何完成特定类型的任务。 + +Skill 与 Tool 的区别:Tool 是由代码实现的原子操作(如读写文件、执行命令),Skill 则是基于说明文件的高级工作流,可以组合调用多个 Tool 来完成复杂任务。 + +## 获取技能 + +CowAgent 提供多种方式获取技能: + +- [Cow 技能广场](https://skills.cowagent.ai/) — 在线浏览所有可用技能,或通过 `/skill list --remote` 在对话中浏览和安装 +- **GitHub** — 直接从 GitHub 仓库安装,支持批量安装 +- **ClawHub** — 通过 `/skill install clawhub:名称` 安装 ClawHub 上的技能 (4w+个) +- **LinkA** — 通过 `/skill install linkai:编码` 安装 LinkAI 上的公开资源和创建的知识库/数据库/工作流/插件等资源 +- **URL** — 从 zip 压缩包或 SKILL.md 链接安装 +- **对话创建** — 通过自然语言对话让 Agent 自动创建技能 + +详细安装方式参考 [安装技能](/zh/skills/install) 和 [技能管理命令](/zh/cli/skill)。也可以通过对话 [创建技能](/zh/skills/create),或向 [Skill Hub](https://skills.cowagent.ai/submit) 贡献你的技能。 + +## 技能加载优先级 + +1. **工作空间技能**(最高):`~/cow/skills/` +2. **项目内置技能**(最低):`skills/` + +同名技能按优先级覆盖。 + +## 技能文件结构 + +``` +skills/ +├── my-skill/ +│ ├── SKILL.md # Skill description (frontmatter + instructions) +│ ├── scripts/ # Execution scripts (optional) +│ └── resources/ # Additional resources (optional) +``` + +### SKILL.md 格式 + +```markdown +--- +name: my-skill +description: Brief description of the skill +metadata: + emoji: 🔧 + requires: + bins: ["curl"] + env: ["MY_API_KEY"] + primaryEnv: "MY_API_KEY" +--- + +# My Skill + +Detailed instructions... +``` + +| 字段 | 说明 | +| --- | --- | +| `name` | 技能名称,需与目录名一致 | +| `description` | 技能描述,Agent 据此决定是否调用 | +| `metadata.requires.bins` | 依赖的系统命令 | +| `metadata.requires.env` | 依赖的环境变量 | +| `metadata.always` | 是否始终加载(默认 false) | diff --git a/docs/zh/skills/install.mdx b/docs/zh/skills/install.mdx new file mode 100644 index 00000000..bbec9d99 --- /dev/null +++ b/docs/zh/skills/install.mdx @@ -0,0 +1,66 @@ +--- +title: 安装技能 +description: 通过命令一键安装来自多种来源的技能 +--- + +CowAgent 支持通过统一的 `install` 命令安装来自 [Cow 技能广场](https://skills.cowagent.ai/)、GitHub、ClawHub、LinkAI 以及任意 URL 上的技能。在对话中使用 `/skill install`,在终端中使用 `cow skill install`。 + +## 从Cow技能广场安装 + +访问 [skills.cowagent.ai](https://skills.cowagent.ai/) 浏览所有可用技能,找到想要的技能后直接安装,例如: + +```text +/skill list --remote +/skill install pptx +``` + +## 从 GitHub 安装 + +> Github上的所有技能都可以直接安装,支持仓库级批量安装和指定子目录安装,例如: + +```text +/skill install larksuite/cli +/skill install https://github.com/larksuite/cli/tree/main/skills/lark-im +``` + +## 从 ClawHub 安装 + +[ClawHub](https://clawhub.ai/) 上的所有技能 (4w+个) 都可以一键安装,例如: + + +```text +/skill install clawhub: +``` + +## 从 LinkAI 安装 + +[LinkAI](https://link-ai.tech/console) 上的所有公开资源 (1w+个应用/工作流/插件) ,以及自己创建的资源 (应用/工作流/知识库/数据库/插件) 都可以通过命令一键安装: + +```text +/skill install linkai: +``` + +> LinkAI平台上创建的所有应用、工作流、知识库、数据库、插件都有唯一的code,可在[控制台](https://link-ai.tech/console)各资源页面中进行获取并填写到命令中 + +## 从 URL 安装 + +支持 zip 压缩包和 SKILL.md 文件链接: + +```text +/skill install https://cdn.link-ai.tech/skills/pptx.zip +/skill install https://example.com/path/to/SKILL.md +``` + +## 管理技能 + +```text +/skill list # 查看已安装技能 +/skill info pptx # 查看技能详情 +/skill enable pptx # 启用技能 +/skill disable pptx # 禁用技能 +/skill uninstall pptx # 卸载技能 +``` + + + 以上所有命令在终端中使用时,将 `/skill` 替换为 `cow skill` 即可。完整命令说明参考 [技能管理命令](/zh/cli/skill)。 + diff --git a/docs/zh/skills/knowledge-wiki.mdx b/docs/zh/skills/knowledge-wiki.mdx new file mode 100644 index 00000000..40b4d298 --- /dev/null +++ b/docs/zh/skills/knowledge-wiki.mdx @@ -0,0 +1,112 @@ +--- +title: knowledge-wiki - 知识库 +description: 维护本地结构化知识库,自动归档、分类和交叉引用 +--- + +帮你把对话中产生的资料、灵感和零散笔记整理成结构化的本地知识库,自动维护索引和页面之间的交叉引用。 + +`knowledge-wiki` 在工作空间下维护一个 `knowledge/` 目录,相当于 Agent 的「外脑」。技能设置了 `always: true`,会**常驻上下文**,不需要任何外部依赖。 + +## 什么时候会触发 + +- 你分享了一篇文章、一份文档或一个 URL,想要沉淀下来 +- 聊天过程中聊出了值得长期保留的结论 +- 你想查一下之前积累过的知识 + +## 目录结构 + +``` +knowledge/ +├── index.md # 全局索引(必须维护) +├── log.md # 操作日志(只追加) +└── / # 分类子目录(按内容自由分组) + └── .md # 知识页(文件名用小写加中划线) +``` + +## 三个核心操作 + +### 1. 收录(Ingest) + +你分享了一段资料时,Agent 会: + +1. 读懂原文,提取关键信息 +2. 按内容决定放到哪个分类下——先看 `index.md` 里有没有合适的分类,没有就新建一个 +3. 生成知识页 `knowledge//.md` +4. 更新索引 `index.md` 和日志 `log.md` + +### 2. 综合(Synthesize) + +聊天中产生了新的结论或洞见时: + +1. 在合适的分类下创建新知识页 +2. 给相关的已有页面加上互相指向的链接 +3. 更新索引和日志 + +### 3. 查询(Query) + +你问到以前积累的知识时: + +1. 先从 `index.md` 里找可能相关的页面 +2. 用 `read` 工具打开具体页面 +3. 需要时再用 `memory_search` 补充检索 +4. 回答里会带上知识页的链接,方便你点过去看原文 + +## 知识页怎么写 + +```markdown +# 页面标题 + +> Source: <来源 URL 或简要说明> + +正文内容。页面之间用相对路径链接: +[相关页](../category/related-page.md) + +## 要点 + +- ... + +## 相关页面 + +- [页面 A](../category/page-a.md) — 为什么相关 +``` + + +- `> Source:` 用来记录这条知识的来源。有明确来源时一定要写 +- 交叉引用很重要:创建或更新某页时,记得也去关联页面里补上反向链接 +- **只链接已经存在的页面**。如果某个概念值得单独成页,先建好再加链接 + + +## 索引格式 + +`knowledge/index.md` 采用扁平列表,按分类分组,每个知识页占一行: + +```markdown +# Knowledge Index + +## 分类 A +- [页面标题](category-a/page-slug.md) — 一句话摘要 + +## 分类 B +- [页面标题](category-b/page-slug.md) — 一句话摘要 +``` + +不用表格,不加 emoji。分类怎么起名、怎么组织都可以灵活调整。 + +## 日志格式 + +`knowledge/log.md` 只追加、不修改,最新的写在最下面: + +```markdown +## [YYYY-MM-DD] ingest | 页面标题 +## [YYYY-MM-DD] synthesize | 页面标题 +``` + +## 写作约定 + +- **文件名**用小写加中划线,比如 `machine-learning.md` +- **一页只讲一件事**,需要关联的内容通过链接串起来 +- **有了就更新,不要重复建页** +- **每次改完都要更新索引** `knowledge/index.md` +- **写精华别抄全文**,抓住要点就行 +- **对话里引用知识页时用完整路径**,比如 `[标题](knowledge//.md)`。页面之间互相链接才用相对路径 +- **基于知识页回答问题时附上链接**,方便深入查阅 diff --git a/docs/zh/skills/skill-creator.mdx b/docs/zh/skills/skill-creator.mdx new file mode 100644 index 00000000..623a74f6 --- /dev/null +++ b/docs/zh/skills/skill-creator.mdx @@ -0,0 +1,180 @@ +--- +title: skill-creator - 技能创建 +description: 创建、安装、更新技能,规范 SKILL.md 写法与目录结构 +--- + +`skill-creator` 是一个「元技能」,专门用来帮助 Agent 创建、安装和更新其他技能,确保所有技能的 `SKILL.md` 写法和目录结构保持一致。 + +## 什么时候会触发 + +- 用户想从 URL 或远程仓库安装一个技能 +- 用户想从头创建一个全新的技能 +- 需要升级或重构已有技能 + +## 技能是什么 + +简单来说,技能就是一份「可复用的说明书」加上可选的脚本和资源。它给 Agent 注入了某个领域的专业知识,让 Agent 在遇到对应任务时能像专家一样处理。 + +一个技能通常包含以下内容: + +1. **专项工作流** — 某类任务的完整步骤 +2. **工具用法** — 怎么调某种 API 或处理某种文件 +3. **领域知识** — 团队约定、业务规则、数据结构之类 +4. **附带资源** — 脚本、参考文档、模板等 + + +**核心原则:能省则省**。只写 Agent 自己想不到的内容,每加一行都要问自己:值不值得占这些 token? + + +## 目录结构 + +``` +skill-name/ +├── SKILL.md # 必需:技能定义 +│ ├── YAML frontmatter(必填 name / description) +│ └── Markdown 正文(说明 + 示例) +└── 可选资源 + ├── scripts/ # 可执行脚本(Python / Bash 等) + ├── references/ # 内容较多的参考文档,Agent 按需读取 + └── assets/ # 模板、图标等,会直接用在输出里 +``` + +## SKILL.md 规范定义 + +SKILL.md 文件头部的 `frontmatter` 字段: + +| 字段 | 说明 | +| --- | --- | +| `name` | 技能名,小写加中划线,必须和目录名一致 | +| `description` | **最关键的字段**。写清楚「这个技能干什么」和「什么情况下该用它」,Agent 看到这段来决定要不要调它。注意:所有触发相关的描述都放在这里,不要写到正文里 | +| `metadata.cowagent.requires.bins` | 系统里必须装了哪些命令行工具 | +| `metadata.cowagent.requires.env` | 需要哪些环境变量(全部满足才行) | +| `metadata.cowagent.requires.anyEnv` | 多个 API Key 满足一个就行 | +| `metadata.cowagent.requires.anyBins` | 多个工具满足一个就行 | +| `metadata.cowagent.always` | 设为 `true` 会始终加载,不检查依赖 | +| `metadata.cowagent.emoji` | 展示用的 emoji(可选) | +| `metadata.cowagent.os` | 限定系统,如 `["darwin", "linux"]` | + + +`category` 字段不需要手写,系统会自动设成 `skill`。 + + +声明 API Key 依赖有两种写法: + +```yaml +metadata: + cowagent: + requires: + env: ["MYAPI_KEY"] # 必须有 +``` + +```yaml +metadata: + cowagent: + requires: + anyEnv: ["OPENAI_API_KEY", "LINKAI_API_KEY"] # 有一个就行 +``` + +**技能会自动按依赖启禁用**:环境变量齐了就自动启用,缺了就自动禁用,不需要手动 `/skill enable`。 + +## 资源目录怎么用 + +| 目录 | 放什么 | 不要放 | +| --- | --- | --- | +| `scripts/` | 需要反复执行的代码,或需要确定性结果的脚本 | 纯演示用的代码片段 | +| `references/` | **超过 500 行**、SKILL.md 实在塞不下的大文档(比如完整的数据库 Schema) | 普通 API 文档、示例、教程 | +| `assets/` | 会出现在最终产物里的文件(模板、图标、样板代码等) | 说明性文档 | + + +**原则上所有内容都写在 `SKILL.md` 里**,只有确实放不下才拆到资源目录。 + +不要给技能加 `README.md`、`CHANGELOG.md`、`INSTALLATION_GUIDE.md` 之类的文件——全部放进 `SKILL.md`。资源目录里只放真正要跑的脚本或真正要用的素材。 + + +## 安装外部技能 + +安装后最终落在 `/skills//` 目录。 + +| 来源 | 怎么装 | +| --- | --- | +| URL(单文件) | curl / web_fetch 直接拉 | +| URL(zip 包) | 下载解压 | +| 本地 SKILL.md | 直接读 | +| 本地 zip 包 | 解压 | + +安装步骤: + +1. 找到 `SKILL.md`(可能在包的根目录或某个子目录里) +2. 从 frontmatter 里读出 `name` +3. 把**整个技能目录**(包括 `SKILL.md`、`scripts/`、`assets/` 等)复制到 `/skills//` +4. 如果包里有 `INSTALL.md` 之类的安装脚本,照着跑一遍,但最终结果仍然要落在 `/skills//` 下 + +## 从头创建技能 + +推荐按这个顺序来: + +1. **搞清楚需求** — 让用户举几个具体的使用场景,一次别问太多 +2. **想好结构** — 这个技能需要脚本吗?需要参考文档吗?需要模板素材吗? +3. **生成骨架** — 用初始化脚本: + + ```bash + scripts/init_skill.py --path /skills [--resources scripts,references,assets] [--examples] + ``` + +4. **填充内容** — 写好 SKILL.md、补上脚本和资源。脚本写完一定要实际跑一遍 +5. **格式校验**(可选): + + ```bash + scripts/quick_validate.py /skills/ + ``` + +6. **迭代完善** — 实际用起来之后根据反馈持续改进 + +## 命名规则 + +- 只用小写字母、数字和中划线。用户给的名字需要做标准化处理,比如 `Plan Mode` → `plan-mode` +- 长度别超过 64 个字符 +- 尽量短、用动词开头、一看就知道干什么 +- 必要时用工具名做前缀,比如 `gh-address-comments`、`linear-address-issue` +- 目录名和 `name` 字段必须完全一致 + +## 三级加载机制 + +技能不会一次性全部塞进上下文,而是分三级按需加载: + +1. **元信息**(`name` + `description`)— 常驻上下文,约 100 词。Agent 靠它判断「要不要用这个技能」 +2. **SKILL.md 正文** — 确定要用了才加载,建议控制在 500 行以内 +3. **资源文件** — Agent 需要的时候再读 + +如果一个技能涉及多个变体(比如多云厂商部署),建议这样组织: + +``` +cloud-deploy/ +├── SKILL.md # 主流程和厂商选择逻辑 +└── references/ + ├── aws.md + ├── gcp.md + └── azure.md +``` + +用户选了 AWS,Agent 只需要读 `aws.md`,不用把三家的文档全加载进来。 + +## 常见设计模式 + +**步骤式**:按编号列出操作步骤和对应脚本。 + +```markdown +1. 分析表单结构(运行 analyze_form.py) +2. 生成字段映射(编辑 fields.json) +3. 自动填充表单(运行 fill_form.py) +``` + +**分支式**:根据用户意图走不同流程。 + +```markdown +1. 判断操作类型: + **新建内容?** → 走「创建流程」 + **编辑已有内容?** → 走「编辑流程」 +``` + +**模板式**:输出格式有严格要求时,在 SKILL.md 里直接给一个样板,让 Agent 照着写。 diff --git a/docs/zh/tools/bash.mdx b/docs/zh/tools/bash.mdx new file mode 100644 index 00000000..0090fe14 --- /dev/null +++ b/docs/zh/tools/bash.mdx @@ -0,0 +1,28 @@ +--- +title: bash - 终端 +description: 执行系统命令 +--- + +在当前工作目录执行 Bash 命令,返回 stdout 和 stderr。`env_config` 中配置的 API Key 会自动注入到环境变量中。 + +## 依赖 + +无额外依赖,默认可用。 + +## 参数 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `command` | string | 是 | 要执行的命令 | +| `timeout` | integer | 否 | 超时时间(秒) | + +## 使用场景 + +- 安装软件包和依赖 +- 运行代码和测试 +- 部署应用和服务(Nginx 配置、进程管理等) +- 系统运维和排查 + + + + diff --git a/docs/zh/tools/browser.mdx b/docs/zh/tools/browser.mdx new file mode 100644 index 00000000..0d3c9197 --- /dev/null +++ b/docs/zh/tools/browser.mdx @@ -0,0 +1,172 @@ +--- +title: browser - 浏览器 +description: 控制浏览器访问和操作网页 +--- + +控制 Chromium 浏览器进行网页导航、元素交互和内容提取。支持 JavaScript 渲染的动态页面,使用精简 DOM 快照让 Agent 高效理解页面结构。 + +## 安装 + + + + ```bash + cow install-browser + ``` + + 该命令会自动完成: + - 安装 `playwright` Python 包(旧系统自动降级兼容版本) + - 在 Linux 上安装系统依赖 + - 下载 Chromium 浏览器(Linux 服务器自动使用无头精简版) + - 自动检测国内网络并使用镜像加速 + + + ```bash + pip install playwright + playwright install chromium + ``` + + Linux 服务器还需安装系统依赖: + ```bash + sudo playwright install-deps chromium + ``` + + 如果系统较旧(如 Ubuntu 18.04,glibc < 2.28),需安装兼容版本: + ```bash + pip install playwright==1.28.0 + python -m playwright install chromium + ``` + + 国内网络下载 Chromium 较慢,可设置镜像加速: + ```bash + export PLAYWRIGHT_DOWNLOAD_HOST=https://registry.npmmirror.com/-/binary/playwright + python -m playwright install chromium + ``` + + + + + 1. 支持 Ubuntu 20.04+、Debian 10+、macOS、Windows。Ubuntu 18.04 等旧系统会自动降级安装兼容版本。 + 2. 浏览器工具依赖较重(约300MB),为可选安装。轻量的网页内容获取可使用 `web_fetch` 工具。 + + +## 工作流程 + +Agent 使用浏览器的典型流程: + +1. **`navigate`** — 打开目标 URL +2. **`snapshot`** — 获取页面精简 DOM,交互元素自动编号(ref) +3. **`click` / `fill` / `select`** — 通过 ref 编号操作元素 +4. **`snapshot`** — 再次快照验证操作结果 + +## 支持的操作 + +| 操作 | 说明 | 关键参数 | +| --- | --- | --- | +| `navigate` | 打开 URL | `url` | +| `snapshot` | 获取页面结构化文本(主要方式) | `selector`(可选) | +| `click` | 点击元素 | `ref` 或 `selector` | +| `fill` | 填入文本 | `ref` 或 `selector`,`text` | +| `select` | 下拉选择 | `ref` 或 `selector`,`value` | +| `scroll` | 滚动页面 | `direction`(up/down/left/right) | +| `screenshot` | 截图保存到工作区 | `full_page` | +| `wait` | 等待元素或超时 | `selector`,`timeout` | +| `press` | 按键(Enter、Tab 等) | `key` | +| `back` / `forward` | 浏览器前进/后退 | - | +| `get_text` | 获取元素文本内容 | `selector` | +| `evaluate` | 执行 JavaScript | `script` | + +## 使用场景 + +- 访问指定 URL 获取动态页面内容 +- 填写表单、登录操作 +- 操作网页元素(点击按钮、选择选项等) +- 验证部署后的网页效果 +- 抓取需要 JS 渲染的动态内容 + +## 运行模式 + +浏览器会根据运行环境自动选择模式: + +| 环境 | 模式 | +| --- | --- | +| macOS / Windows | 有头模式(显示浏览器窗口) | +| Linux 桌面(有 DISPLAY) | 有头模式 | +| Linux 服务器(无 DISPLAY) | 无头模式(headless) | + +可在 `config.json` 中手动覆盖: + +```json +{ + "tools": { + "browser": { + "headless": true + } + } +} +``` + +## 登录态持久化 + +**只需登录一次目标网站,Agent 后续可直接使用**。提供两种方式: + +### 方式一:Persistent 模式(默认) + +开箱即用,登录信息保存在 `~/.cow/browser_profile`。无需任何配置。 + +如需关闭持久化模式,每次都用纯净环境: + +```json +{ + "tools": { + "browser": { + "persistent": false + } + } +} +``` + +### 方式二:CDP 模式(接管真实 Chrome) + +让 Agent 连接独立启动的真实 Chrome(而非 Playwright 自带的 Chromium),获得完整浏览器指纹,适合反爬严格的网站。 + +启动 Chrome 时加上调试端口和独立用户目录: + + + + ```bash + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \ + --remote-debugging-port=9222 \ + --user-data-dir="$HOME/.cow/chrome-cdp" + ``` + + + ```bash + google-chrome \ + --remote-debugging-port=9222 \ + --user-data-dir="$HOME/.cow/chrome-cdp" + ``` + + + ```powershell + & "C:\Program Files\Google\Chrome\Application\chrome.exe" ` + --remote-debugging-port=9222 ` + --user-data-dir="$env:USERPROFILE\.cow\chrome-cdp" + ``` + + + +在 `config.json` 中配置端点: + +```json +{ + "tools": { + "browser": { + "cdp_endpoint": "http://localhost:9222" + } + } +} +``` + + + Chrome 137+ 限制 `--remote-debugging-port` 必须搭配独立 `--user-data-dir`,因此 CDP 启动的 Chrome **无法直接复用你日常 Chrome 的登录态**,需要在独立目录中重新登录一次。 + diff --git a/docs/zh/tools/edit.mdx b/docs/zh/tools/edit.mdx new file mode 100644 index 00000000..717af2ba --- /dev/null +++ b/docs/zh/tools/edit.mdx @@ -0,0 +1,24 @@ +--- +title: edit - 文件编辑 +description: 通过精确文本替换编辑文件 +--- + +通过精确文本替换编辑文件。如果 `oldText` 为空则追加到文件末尾。 + +## 依赖 + +无额外依赖,默认可用。 + +## 参数 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `path` | string | 是 | 文件路径 | +| `oldText` | string | 是 | 要替换的原始文本(为空时追加到末尾) | +| `newText` | string | 是 | 替换后的文本 | + +## 使用场景 + +- 修改配置文件中的特定参数 +- 修复代码中的 bug +- 在文件指定位置插入内容 diff --git a/docs/zh/tools/env-config.mdx b/docs/zh/tools/env-config.mdx new file mode 100644 index 00000000..d5d52c68 --- /dev/null +++ b/docs/zh/tools/env-config.mdx @@ -0,0 +1,36 @@ +--- +title: env_config - 环境变量 +description: 管理 API Key 等秘钥配置 +--- + +管理工作空间 `.env` 文件中的环境变量(API Key 等秘钥),支持通过对话安全地添加和更新。内置安全保护和脱敏策略。 + +## 依赖 + +| 依赖 | 安装命令 | +| --- | --- | +| `python-dotenv` ≥ 1.0.0 | `pip install python-dotenv>=1.0.0` | + +安装扩展依赖时已包含:`pip3 install -r requirements-optional.txt` + +## 参数 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `action` | string | 是 | 操作类型:`get`、`set`、`list`、`delete` | +| `key` | string | 否 | 环境变量名称 | +| `value` | string | 否 | 环境变量值(仅 `set` 时需要) | + +## 使用方式 + +直接告诉 Agent 需要配置的秘钥,Agent 会自动调用该工具: + +- "帮我配置 BOCHA_API_KEY" +- "设置 OPENAI_API_KEY 为 sk-xxx" +- "查看已配置的环境变量" + +配置的秘钥会自动注入到 `bash` 工具的执行环境中。 + + + + diff --git a/docs/zh/tools/index.mdx b/docs/zh/tools/index.mdx new file mode 100644 index 00000000..088c0731 --- /dev/null +++ b/docs/zh/tools/index.mdx @@ -0,0 +1,69 @@ +--- +title: 工具概览 +description: CowAgent 内置工具系统 +--- + +工具是 Agent 访问操作系统资源的核心能力。Agent 会根据任务需求智能选择和调用工具,完成文件操作、命令执行、联网搜索、定时任务等各类操作。工具实现在项目的 `agent/tools/` 目录下。 + +## 内置工具 + +以下工具默认可用,无需额外配置: + + + + 读取文件内容,支持文本、图片、PDF + + + 创建或覆盖写入文件 + + + 通过精确文本替换编辑文件 + + + 列出目录内容 + + + 执行系统命令 + + + 向用户发送文件或图片 + + + 搜索和读取长期记忆 + + + 管理 API Key 等秘钥配置 + + + 获取网页或文档内容 + + + 创建和管理定时任务 + + + +## 可选工具 + +以下工具需要安装额外依赖或配置 API Key 后启用: + + + + 搜索互联网获取实时信息 + + + 分析图片内容(识别、描述、OCR 文字提取等) + + + 控制浏览器访问和操作网页 + + + +## MCP 工具 + +通过 [Model Context Protocol](https://modelcontextprotocol.io) 接入社区生态中的各种MCP工具,配置一次 `mcp.json` 即用即得: + + + + 支持 stdio / SSE 标准协议,热更新,零代码接入 + + diff --git a/docs/zh/tools/ls.mdx b/docs/zh/tools/ls.mdx new file mode 100644 index 00000000..e4d25fc5 --- /dev/null +++ b/docs/zh/tools/ls.mdx @@ -0,0 +1,23 @@ +--- +title: ls - 目录列表 +description: 列出目录内容 +--- + +列出目录内容,按字母排序,目录名带 `/` 后缀,包含隐藏文件。 + +## 依赖 + +无额外依赖,默认可用。 + +## 参数 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `path` | string | 是 | 目录路径,相对路径基于工作空间目录 | +| `limit` | integer | 否 | 最大返回条目数,默认 500 | + +## 使用场景 + +- 浏览项目结构 +- 查找特定文件 +- 检查目录是否存在 diff --git a/docs/zh/tools/mcp.mdx b/docs/zh/tools/mcp.mdx new file mode 100644 index 00000000..8b7670c1 --- /dev/null +++ b/docs/zh/tools/mcp.mdx @@ -0,0 +1,112 @@ +--- +title: MCP 工具 +description: 通过 Model Context Protocol 接入外部工具生态 +--- + +CowAgent 支持 [Model Context Protocol (MCP)](https://modelcontextprotocol.io),让 Agent 能够直接调用社区中数以万计的 MCP 工具。配置一次 `mcp.json`,工具就会以与内置工具完全相同的方式呈现给 LLM,可被自动选择和调用。 + +## 配置文件 + +CowAgent 读取 `~/cow/mcp.json`。文件不存在时不会启用任何 MCP 工具,也不会报错。 + +Docker 部署时,官方 `docker-compose.yml` 已经把宿主机 `./cow` 挂载到容器内 `/home/agent/cow`(即容器用户的 `~/cow`),把 `mcp.json` 放进宿主机 `./cow/` 目录即可生效。 + +### 标准格式 + +完全兼容 MCP 社区标准,同 Claude Desktop / Cursor 一致: + +```json +{ + "mcpServers": { + "": { + "command": "npx", + "args": ["-y", "some-mcp-package"], + "env": { + "API_KEY": "your-key-here" + } + } + } +} +``` + +| 字段 | 必填 | 说明 | +| --- | --- | --- | +| `command` | stdio | 启动 server 的可执行命令(如 `npx`、`python`、`uvx`) | +| `args` | 否 | 传给 command 的参数列表 | +| `env` | 否 | 子进程的环境变量,常用于 API Key | +| `url` | SSE / Streamable HTTP | 远程端点 URL(与 `command` 二选一) | +| `type` | 远程 | 远程传输类型,可选 `sse` 或 `streamable-http`,默认 `sse` | +| `headers` | 否 | 远程请求附加 HTTP 头(如 `Authorization`),仅 Streamable HTTP 使用 | +| `disabled` | 否 | `true` 时跳过该 server,便于临时关闭 | + +### 完整示例 + +```json +{ + "mcpServers": { + "fetch": { + "command": "uvx", + "args": ["mcp-server-fetch"] + }, + "github": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-github"], + "env": { + "GITHUB_PERSONAL_ACCESS_TOKEN": "" + } + } + } +} +``` + +- **fetch**:通用网页抓取,返回页面文本内容,无需 API Key +- **github**:访问 GitHub 仓库、Issue、PR 等,需要 Personal Access Token + +## 让 Agent 帮你配置 + +CowAgent 自带 `read` / `write` / `edit` 工具,**直接把要装的 MCP 配置发给 Agent,让它写到配置文件中: + +例如: + +```markdown +帮我把这个 MCP 加到 ~/cow/mcp.json 里: + +{"mcpServers":{"fetch":{"command":"uvx","args":["mcp-server-fetch"]}}} +``` + +Agent 会: + +1. 访问 MCP 配置文件,合并新 server 配置,保留已有项 +2. 自动重载增量的 MCP Server,下一次对话即可使用相应 Tools + +## 工作方式 + +- 启动时**异步加载**:`mcp.json` 中配置的所有 server 会在后台异步加载,不阻塞主流程,对话可以立刻使用 +- **热更新**:用户或 Agent 修改 `mcp.json` 后,消息处理完成时会自动重载变更的 server,无需重启 cow +- **平铺呈现**:每个 MCP server 暴露的多个方法会平铺为独立的工具,LLM 直接选择调用,不需要二次决策 + +## 支持的传输协议 + +| 协议 | 说明 | 配置字段 | +| --- | --- | --- | +| **stdio** | 子进程通信,最常见,社区生态最丰富 | `command` + `args` | +| **SSE** | HTTP Server-Sent Events,旧版远程协议 | `url`(默认) | +| **Streamable HTTP** | 新版远程协议,单端点收发,逐步取代 SSE | `type: "streamable-http"` + `url` | + +## 排错 + +| 现象 | 排查方向 | +| --- | --- | +| 启动后 Agent 没有 MCP 工具 | 检查 `~/cow/mcp.json` 是否存在、JSON 格式是否合法 | +| 某个 server 加载失败 | 查看启动日志中的 `[MCP] Server 'xxx' load failed`,常见为依赖未装、API Key 缺失 | +| 修改 `mcp.json` 没有生效 | 改动会在**下一条消息**生效;若 server 配置不变(如只改注释),不会触发重启 | +| Docker 部署 | 确认宿主机 `./cow` 已挂载到容器内 `/home/agent/cow`,`mcp.json` 直接放进宿主机 `./cow/` 目录即可,或者直接对话 Agent 安装 | + +## MCP 市场推荐 + +可以从各个第三方广场寻找现成的 MCP server,复制 JSON 配置即可使用,例如: + +- [mcp.so](https://mcp.so) — 全球 MCP 服务索引 +- [ModelScope MCP 广场](https://modelscope.cn/mcp) — 魔搭社区 MCP 广场,国内访问更稳定 + +只要遵循 MCP 标准协议(stdio / SSE / Streamable HTTP),都可以直接接入 CowAgent。 diff --git a/docs/zh/tools/memory.mdx b/docs/zh/tools/memory.mdx new file mode 100644 index 00000000..c3cc6fe4 --- /dev/null +++ b/docs/zh/tools/memory.mdx @@ -0,0 +1,43 @@ +--- +title: memory - 记忆与知识 +description: 搜索和读取长期记忆及知识库文件 +--- + +记忆工具包含两个子工具:`memory_search`(搜索记忆)和 `memory_get`(读取记忆或知识文件)。 + +当 [知识库](/knowledge) 功能开启时,这两个工具同时支持访问 `memory/` 和 `knowledge/` 目录下的文件。 + +## 依赖 + +无额外依赖,默认可用。由 Agent Core 的记忆系统管理。 + +## memory_search + +搜索历史记忆和知识库内容,支持关键词和向量混合检索。 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `query` | string | 是 | 搜索查询 | + +## memory_get + +读取特定记忆文件或知识库文件的内容。 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `path` | string | 是 | 文件的相对路径(如 `MEMORY.md`、`memory/2026-01-01.md`、`knowledge/concepts/rag.md`) | +| `start_line` | integer | 否 | 起始行号 | +| `end_line` | integer | 否 | 结束行号 | + +## 工作方式 + +Agent 会在以下场景自动调用记忆工具: + +- 用户分享重要信息时 → 存储到记忆 +- 需要参考历史信息时 → 搜索相关记忆 +- 对话达到一定长度时 → 提取摘要存储 +- 讨论到专业知识时 → 检索知识库中的相关页面 + + + 当 `knowledge` 配置为 `false` 时,工具的描述和搜索范围会自动调整为仅包含记忆文件。 + diff --git a/docs/zh/tools/read.mdx b/docs/zh/tools/read.mdx new file mode 100644 index 00000000..07e08b88 --- /dev/null +++ b/docs/zh/tools/read.mdx @@ -0,0 +1,24 @@ +--- +title: read - 文件读取 +description: 读取文件内容 +--- + +读取文件内容。支持文本文件、PDF 文件、图片(返回元数据)等格式。 + +## 依赖 + +无额外依赖,默认可用。 + +## 参数 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `path` | string | 是 | 文件路径,相对路径基于工作空间目录 | +| `offset` | integer | 否 | 起始行号(1-indexed),负值表示从末尾读取 | +| `limit` | integer | 否 | 读取行数 | + +## 使用场景 + +- 查看配置文件、日志文件 +- 读取代码文件进行分析 +- 检查图片/视频的文件信息 diff --git a/docs/zh/tools/scheduler.mdx b/docs/zh/tools/scheduler.mdx new file mode 100644 index 00000000..2648116f --- /dev/null +++ b/docs/zh/tools/scheduler.mdx @@ -0,0 +1,80 @@ +--- +title: scheduler - 定时任务 +description: 创建和管理定时任务 +--- + +创建和管理动态定时任务,支持灵活的调度方式和执行模式。 + +## 依赖 + +| 依赖 | 安装命令 | +| --- | --- | +| `croniter` ≥ 2.0.0 | `pip install croniter>=2.0.0` | + +安装核心依赖时已包含:`pip3 install -r requirements.txt` + +## 调度方式 + +| 方式 | 说明 | +| --- | --- | +| 一次性任务 | 在指定时间执行一次 | +| 固定间隔 | 按固定时间间隔重复执行 | +| Cron 表达式 | 使用 Cron 语法定义复杂调度规则 | + +## 执行模式 + +- **固定消息发送**:到达触发时间时发送预设消息 +- **Agent 动态任务**:到达触发时间时由 Agent 智能执行任务 + +## 使用方式 + +通过自然语言即可创建和管理定时任务: + +- "每天早上 9 点给我发天气预报" +- "每隔 2 小时检查一下服务器状态" +- "明天下午 3 点提醒我开会" +- "查看所有定时任务" + + + + + +## 结果进入会话上下文 + +定时任务在隔离 session 中执行(内部规划与 tool 调用不污染用户会话),但**最终输出**会作为一对消息回写到接收者的真实会话,用户可以直接追问"刚才那条第二点展开说说"。 + +**默认策略** + +- Agent 动态任务的输出进入上下文 +- 固定消息类任务默认不进入上下文(可通过配置打开) +- 每个会话最多保留最近 **3 对** scheduler 消息,更早的自动清理;普通用户消息不受影响 + +**配置项** + +| 配置项 | 默认值 | 说明 | +| --- | --- | --- | +| `scheduler_inject_to_session` | `true` | 总开关 | +| `scheduler_inject_max_per_session` | `3` | 每会话保留 scheduler 消息对数上限 | +| `scheduler_inject_send_message` | `false` | 是否同时注入固定消息类任务 | + +```json +{ + "scheduler_inject_to_session": true, + "scheduler_inject_max_per_session": 3, + "scheduler_inject_send_message": false +} +``` + +## 任务执行时的上下文 + +定时任务的隔离 session 会保留最近几次执行的对话历史,便于做"对比上次"、"延续之前结论"等操作;但为了避免高频任务(如每 5 分钟监控)prompt 越积越长,会按公式自动裁剪: + +``` +scheduler_keep_turns = max(1, agent_max_context_turns / 5) +``` + +`agent_max_context_turns` 默认为 `20`,所以定时任务每次执行默认带最近 **4 轮**历史。需要更长记忆可调大 `agent_max_context_turns`。 + + +群聊场景(飞书 / 企微群机器人 / 钉钉等)下用户的真实 session_id 形如 `user_id:group_id`,与 receiver 不同。创建任务时会自动记录正确的 session_id;老的 `tasks.json` 缺该字段时回落到 receiver,行为与历史版本一致。 + diff --git a/docs/zh/tools/send.mdx b/docs/zh/tools/send.mdx new file mode 100644 index 00000000..05f73a6b --- /dev/null +++ b/docs/zh/tools/send.mdx @@ -0,0 +1,23 @@ +--- +title: send - 文件发送 +description: 向用户发送文件 +--- + +向用户发送文件(图片、视频、音频、文档等),当用户明确要求发送/分享文件时使用。 + +## 依赖 + +无额外依赖,默认可用。 + +## 参数 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `path` | string | 是 | 文件路径,可以是绝对路径或相对于工作空间的路径 | +| `message` | string | 否 | 附带的消息说明 | + +## 使用场景 + +- 将生成的代码或文档发送给用户 +- 发送截图、图表 +- 分享下载的文件 diff --git a/docs/zh/tools/vision.mdx b/docs/zh/tools/vision.mdx new file mode 100644 index 00000000..675afe41 --- /dev/null +++ b/docs/zh/tools/vision.mdx @@ -0,0 +1,75 @@ +--- +title: vision - 图片理解 +description: 分析图片内容(识别、描述、OCR 等) +--- + +使用 Vision API 分析本地图片或图片 URL,支持内容描述、文字提取(OCR)、物体识别等。 + +## 模型选择 + +Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置即可使用: + +1. **主模型** — 优先使用当前配置的主模型进行图像识别(需要是多模态模型) +2. **其他已配置模型** — 自动发现已配置 API Key 的其他多模态模型作为备选 + +如果当前 provider 调用失败,会自动尝试下一个,直到成功或全部失败。 + +### 支持的模型 + +| 厂商 | 视觉模型 | 说明 | +| --- | --- | --- | +| OpenAI / 兼容协议 | 使用主模型 | 支持所有 OpenAI 协议兼容的多模态模型 | +| 通义千问 (DashScope) | 使用主模型 | 例如 qwen3.6-plus 等 | +| Claude | 使用主模型 | Anthropic 原生图像格式 | +| Gemini | 使用主模型 | inlineData 格式 | +| 豆包 (Doubao) | 使用主模型 | doubao-seed-2-0 系列原生支持 | +| Kimi (Moonshot) | 使用主模型 | kimi-k2.6、kimi-k2.5 原生支持 | +| 百度千帆 (Qianfan) | 使用主模型 | 默认使用多模态主模型 (如 ernie-5.1),主模型不支持时兜底使用 ernie-4.5-turbo-vl | +| 智谱 AI | glm-5v-turbo | 固定使用视觉专用模型 | +| MiniMax | MiniMax-Text-01 | 固定使用视觉专用模型 | + + + 智谱和 MiniMax 的文本模型不支持图像理解,因此始终使用对应的视觉专用模型,无需手动指定。 + + +> 当 `use_linkai=true` 时,默认使用 LinkAI 的多模态模型进行 + +## 自定义配置 + +如果希望指定 Vision 使用的模型,可在 `config.json` 中配置,例如: + +```json +{ + "tools": { + "vision": { + "model": "gpt-4.1" + } + } +} +``` + +指定的模型会被**优先使用**,工具会根据模型名自动路由到对应的 provider;若调用失败,会自动 fallback 到其他已配置的 provider。 + +大多数情况下无需配置,主模型支持多模态或配置任意一个支持视觉的 API Key 即可自动工作。 + +## 参数 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `image` | string | 是 | 本地文件路径或 HTTP(S) 图片 URL | +| `question` | string | 是 | 对图片提出的问题 | + +支持的图片格式:jpg、jpeg、png、gif、webp + + + +## 使用场景 + +- 描述图片中的内容 +- 提取图片中的文字(OCR) +- 识别物体、颜色、场景 +- 分析截图、文档扫描图片等 + + + 超过 1MB 的图片会自动压缩后上传,所有图片(包括远程 URL)会统一转为 base64 传输,确保兼容所有模型后端。 + diff --git a/docs/zh/tools/web-fetch.mdx b/docs/zh/tools/web-fetch.mdx new file mode 100644 index 00000000..12f85953 --- /dev/null +++ b/docs/zh/tools/web-fetch.mdx @@ -0,0 +1,32 @@ +--- +title: web_fetch - 网页获取 +description: 获取网页或文档内容 +--- + +获取 HTTP/HTTPS URL 的内容。对网页提取可读文本,对文档文件(PDF、Word、Excel 等)自动下载并解析内容。 + +## 参数 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `url` | string | 是 | HTTP/HTTPS URL(网页或文档链接) | + +## 支持的文件类型 + +| 类型 | 格式 | +| --- | --- | +| PDF | `.pdf` | +| Word | `.docx` | +| 文本 | `.txt`、`.md`、`.csv`、`.log` | +| 表格 | `.xls`、`.xlsx` | +| 演示文稿 | `.ppt`、`.pptx` | + +## 使用场景 + +- 获取网页的文本内容 +- 下载并解析远程文档 +- 获取 API 响应内容 + + + `web_fetch` 只能获取静态 HTML 内容。如果页面需要 JavaScript 渲染(如 SPA 单页应用),请使用 `browser` 工具。 + diff --git a/docs/zh/tools/web-search.mdx b/docs/zh/tools/web-search.mdx new file mode 100644 index 00000000..86c170c0 --- /dev/null +++ b/docs/zh/tools/web-search.mdx @@ -0,0 +1,51 @@ +--- +title: web_search - 联网搜索 +description: 搜索互联网获取实时信息,支持多个搜索厂商 +--- + +搜索互联网获取实时信息、新闻、研究等内容。支持博查、百度千帆、智谱、LinkAI 四个后端,配置任意一家即可使用。 + + + 推荐通过 [Web 控制台](/zh/channels/web) 的「模型管理 → 搜索」面板可视化配置厂商与策略,无需手动编辑配置文件。 + + +## 厂商 + +| 厂商 | 凭证 | 申请入口 | +| --- | --- | --- | +| 博查 Bocha | `tools.web_search.bocha_api_key` | [博查开放平台](https://open.bochaai.com/) | +| 百度千帆 | 复用 `qianfan_api_key` | [千帆控制台](https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy) | +| 智谱 Zhipu | 复用 `zhipu_ai_api_key` | [智谱开放平台](https://docs.bigmodel.cn/cn/guide/tools/web-search) | +| LinkAI | 复用 `linkai_api_key` | [LinkAI 控制台](https://link-ai.tech/console/interface) | + +除博查需要单独的 `bocha_api_key` 外,其他三家直接复用对应模型的 API Key,配好模型即同时获得搜索能力。 + +## 路由策略 + +```json +{ + "tools": { + "web_search": { + "strategy": "auto", + "provider": "" + } + } +} +``` + +- `auto`(默认):由 Agent 在已配置的厂商中智能选择,并可在一次任务中多次调用、切换不同厂商以获取更全面的结果;未指定时按 `bocha → qianfan → zhipu → linkai` 顺序兜底。 +- `fixed`:固定使用 `provider` 指定的厂商;该厂商凭证缺失时自动回落到 auto 顺序。 + +## 工具参数 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `query` | string | 是 | 搜索关键词 | +| `count` | integer | 否 | 返回结果数量(1–50,默认 10) | +| `freshness` | string | 否 | 时间范围:`noLimit`(默认)、`oneDay`、`oneWeek`、`oneMonth`、`oneYear`,或日期范围如 `2025-01-01..2025-02-01` | +| `summary` | boolean | 否 | 是否返回页面摘要(默认 false) | +| `provider` | string | 否 | `auto` 策略下配置了多个厂商时可见,用于单次切换厂商 | + + + 四家凭证均未配置时,该工具不会注册到 Agent。 + diff --git a/docs/zh/tools/write.mdx b/docs/zh/tools/write.mdx new file mode 100644 index 00000000..51cf66f1 --- /dev/null +++ b/docs/zh/tools/write.mdx @@ -0,0 +1,27 @@ +--- +title: write - 文件写入 +description: 创建或覆盖写入文件 +--- + +写入内容到文件。文件不存在则自动创建,已存在则覆盖。自动创建父目录。 + +## 依赖 + +无额外依赖,默认可用。 + +## 参数 + +| 参数 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `path` | string | 是 | 文件路径 | +| `content` | string | 是 | 要写入的内容 | + +## 使用场景 + +- 创建新的代码文件或脚本 +- 生成配置文件 +- 保存处理结果 + + + 单次写入不应超过 10KB。对于大文件,建议先创建骨架,再使用 edit 工具分块添加内容。 + From 9e6a2cc2c059df3c0dd90f3e91fd197361a0eba8 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sun, 31 May 2026 20:11:23 +0800 Subject: [PATCH 5/5] feat(installer): revamp install flow with i18n --- cli/commands/install.py | 4 +- cli/commands/process.py | 5 +- cli/commands/skill.py | 4 +- config-template.json | 2 +- run.sh | 851 +++++++++++++++++++++++++++++----------- scripts/run.ps1 | 644 ++++++++++++++++++++---------- 6 files changed, 1066 insertions(+), 444 deletions(-) diff --git a/cli/commands/install.py b/cli/commands/install.py index ed22f296..b72bcba1 100644 --- a/cli/commands/install.py +++ b/cli/commands/install.py @@ -118,9 +118,11 @@ def run_install_browser( 0 on success, 1 on fatal failure (pip or chromium install failed). """ from cli.utils import get_cli_language - from common import i18n + # Import `common` only after get_cli_language() runs ensure_sys_path(), + # so it works when `cow` is invoked from outside the project directory. get_cli_language() # resolve cow_lang so i18n.t reflects config + from common import i18n _t = i18n.t stream = stream or _default_stream diff --git a/cli/commands/process.py b/cli/commands/process.py index 9d22b67f..94e8126b 100644 --- a/cli/commands/process.py +++ b/cli/commands/process.py @@ -276,9 +276,12 @@ def status(): """Show CowAgent running status.""" from cli import __version__ from cli.utils import load_config_json, get_cli_language - from common import i18n + # get_cli_language() calls ensure_sys_path(), which adds the project root + # to sys.path. Import `common` only AFTER that, otherwise it fails with + # ModuleNotFoundError when `cow` runs from outside the project dir. get_cli_language() # resolve cow_lang so i18n.t reflects config + from common import i18n _t = i18n.t pid = _read_pid() diff --git a/cli/commands/skill.py b/cli/commands/skill.py index fa5a3167..61f92fb7 100644 --- a/cli/commands/skill.py +++ b/cli/commands/skill.py @@ -518,9 +518,11 @@ def _install_targz_bytes(content: bytes, name: str, skills_dir: str, result: Ins def _print_install_success(name: str, source: str): """Print a unified install success message with description and source.""" from cli.utils import get_cli_language - from common import i18n + # Import `common` only after get_cli_language() runs ensure_sys_path(), + # so it works when `cow` is invoked from outside the project directory. get_cli_language() # resolve cow_lang so i18n.t reflects config + from common import i18n _t = i18n.t skills_dir = get_skills_dir() diff --git a/config-template.json b/config-template.json index dcd19774..8be82885 100644 --- a/config-template.json +++ b/config-template.json @@ -1,6 +1,6 @@ { "cow_lang": "auto", - "channel_type": "weixin", + "channel_type": "web", "model": "deepseek-v4-flash", "deepseek_api_key": "", "deepseek_api_base": "https://api.deepseek.com/v1", diff --git a/run.sh b/run.sh index 5dff9785..21bb5a9f 100755 --- a/run.sh +++ b/run.sh @@ -28,6 +28,261 @@ if [ -z "$BASH_VERSION" ]; then exit 1 fi +# ============================ +# i18n: install-flow language +# ============================ +# UI_LANG controls the language of install prompts/menus. Detected on first run +# (or chosen by the user), defaults to auto-detection. "zh" or "en". +UI_LANG="" + +# A terminal we can read from. When the script runs via `curl | bash`, stdin is +# the script pipe (EOF on read), so interactive prompts must read from the tty. +TTY_DEV="/dev/tty" +HAS_TTY=false +if [ -r /dev/tty ] && [ -w /dev/tty ]; then + HAS_TTY=true +fi + +# Detect default UI language from environment (best-effort, mirrors common/i18n). +detect_ui_lang() { + local loc="" + # macOS: prefer AppleLocale, which reflects the real UI language + if [ "$(uname)" = "Darwin" ] && command -v defaults &> /dev/null; then + loc=$(defaults read -g AppleLocale 2>/dev/null || true) + fi + [ -z "$loc" ] && loc="${LC_ALL:-${LC_MESSAGES:-${LANG:-}}}" + case "$loc" in + zh* | *zh_* | *_CN* | *_TW* | *_HK* | *Hans* | *Hant*) echo "zh" ;; + *) echo "en" ;; + esac +} + +# Translation helper: t +t() { + if [ "$UI_LANG" = "en" ]; then + printf '%s' "$2" + else + printf '%s' "$1" + fi +} + +# Read a line from the controlling terminal (works under `curl | bash`). +# Usage: tty_read VAR "prompt" +tty_read() { + local __var=$1 __prompt=$2 __input="" + if [ "$HAS_TTY" = true ]; then + # Ensure the tty is in normal line mode. A preceding arrow-key menu + # may have left it in cbreak/-echo mode; without this, `read` could + # return immediately or not echo typed characters. + stty sane < "$TTY_DEV" 2>/dev/null || true + # Print the prompt explicitly (not via read -p, whose prompt can be + # swallowed right after an arrow-key menu) and read from the tty. + # `|| true` so a non-zero read (EOF) does NOT trip `set -e`. + printf '%s' "$__prompt" > /dev/tty + read -r __input < "$TTY_DEV" || true + else + read -r -p "$__prompt" __input || true + fi + printf -v "$__var" '%s' "$__input" +} + +# Arrow-key selectable menu with number fallback. +# Usage: select_menu OUT_VAR "Title" "opt1" "opt2" ... +# Result: OUT_VAR is set to the selected index (1-based). +select_menu() { + # Interactive function: never let a non-zero command (read EOF, arithmetic + # evaluating to 0, etc.) abort the caller under `set -e`. + set +e + local __out=$1; shift + local title=$1; shift + local options=("$@") + local count=${#options[@]} + # Initial highlight: MENU_DEFAULT (1-based) if set, else first option. + local cur=0 + if [[ "${MENU_DEFAULT:-}" =~ ^[0-9]+$ ]] && (( MENU_DEFAULT >= 1 && MENU_DEFAULT <= count )); then + cur=$((MENU_DEFAULT - 1)) + fi + MENU_DEFAULT="" + + # Fallback to numbered input when no interactive terminal is available + # (e.g. CI, non-tty pipe). Arrow-key rendering needs a real tty. + if [ "$HAS_TTY" != true ] || [ ! -t 1 ]; then + local def=$((cur + 1)) + echo -e "${CYAN}${BOLD}${title}${NC}" + local i=1 + for opt in "${options[@]}"; do + echo -e " ${YELLOW}${i})${NC} ${opt}" + i=$((i + 1)) + done + local choice="" + while true; do + tty_read choice "$(t "请输入序号" "Enter number") [1-${count}, $(t "默认" "default") ${def}]: " + choice=${choice:-$def} + if [[ "$choice" =~ ^[0-9]+$ ]] && (( choice >= 1 && choice <= count )); then + break + fi + echo -e "${RED}$(t "无效选择,请输入" "Invalid choice, enter") 1-${count}${NC}" + done + printf -v "$__out" '%s' "$choice" + return + fi + + # Interactive arrow-key menu. + # Use literal escape characters (via $'...') and printf instead of + # `echo -e`, because `echo`'s backslash handling is not portable and + # leaks raw "\e[K" text on some shells/terminals. + local ESC=$'\033' + local UP="${ESC}[A" # move cursor up one line + local CLR="${ESC}[K" # clear to end of line + + # fd 3 is a long-lived (read) handle to the controlling terminal, opened + # once by menu_session_begin() before the install flow. Reusing one fd + # across all menus avoids the bash 3.2 bug where re-opening /dev/tty per + # menu makes the second menu read EOF and auto-select the default. + # Detect whether fd 3 is already open using a READ redirection (fd 3 is + # read-only; testing with `>&3` would wrongly report it as closed). + local _own_fd3=false + if ! { : <&3; } 2>/dev/null; then + exec 3<"$TTY_DEV" + _own_fd3=true + fi + + # Put the terminal into cbreak/raw input mode so single keystrokes arrive + # immediately and are not echoed. + # -echo : don't echo keystrokes (otherwise arrow keys leak as ^[[A) + # -icanon : disable line buffering + # min 1 time 0 : read returns as soon as 1 byte is available + local _restore="tput cnorm 2>/dev/null; stty echo icanon <${TTY_DEV} 2>/dev/null" + trap "$_restore" EXIT INT TERM + tput civis 2>/dev/null || true + stty -echo -icanon min 1 time 0 <&3 2>/dev/null || true + + printf '%b\n' "${CYAN}${BOLD}${title}${NC}" + printf '%b\n' "${CYAN}$(t "↑/↓ 选择,Enter 确认" "Use ↑/↓ to move, Enter to select")${NC}" + + local first_draw=true + while true; do + # Move cursor up to the top of the option block to redraw it. + if [ "$first_draw" = false ]; then + local i=0 + while [ $i -lt $count ]; do + printf '%s' "$UP" + i=$((i + 1)) + done + fi + first_draw=false + + local idx=0 + for opt in "${options[@]}"; do + if [ $idx -eq $cur ]; then + printf '%s%b\n' "$CLR" " ${GREEN}${BOLD}❯ ${opt}${NC}" + else + printf '%s%b\n' "$CLR" " ${opt}" + fi + idx=$((idx + 1)) + done + + # Read one key from the shared terminal fd 3. + local key="" + IFS= read -rsn1 key <&3 + local rc=$? + if [ $rc -ne 0 ]; then + # No usable terminal: restore and fall back to numbered input. + eval "$_restore"; trap - EXIT INT TERM + [ "${_own_fd3:-}" = true ] && exec 3<&- 2>/dev/null + local choice="" + while true; do + tty_read choice "$(t "请输入序号" "Enter number") [1-${count}]: " + choice=${choice:-$((cur + 1))} + if [[ "$choice" =~ ^[0-9]+$ ]] && (( choice >= 1 && choice <= count )); then + break + fi + done + printf -v "$__out" '%s' "$choice" + return + fi + + # Empty key means Enter/Return (read -n1 strips the newline delimiter). + if [ -z "$key" ]; then + break + fi + + case "$key" in + "$ESC") + # Arrow key: ESC [ A/B (or ESC O A/B). Read the two trailing + # bytes one at a time, no timeout (bash 3.2 has no fractional + # read -t; in cbreak mode the bytes are already buffered). + local b2="" b3="" + IFS= read -rsn1 b2 <&3 2>/dev/null || b2="" + IFS= read -rsn1 b3 <&3 2>/dev/null || b3="" + case "${b2}${b3}" in + "[A" | "OA") cur=$(( (cur - 1 + count) % count )) ;; # up + "[B" | "OB") cur=$(( (cur + 1) % count )) ;; # down + esac + ;; + $'\n' | $'\r') + break + ;; + [0-9]) + if (( key >= 1 && key <= count )); then + cur=$((key - 1)) + break + fi + ;; + $'\003') + # Ctrl-C: restore and abort. + eval "$_restore"; trap - EXIT INT TERM + [ "${_own_fd3:-}" = true ] && exec 3<&- 2>/dev/null + printf '\n%b\n' "${RED}$(t "已取消安装" "Installation cancelled")${NC}" + exit 130 + ;; + esac + done + + eval "$_restore" + trap - EXIT INT TERM + [ "${_own_fd3:-}" = true ] && exec 3<&- 2>/dev/null + printf -v "$__out" '%s' "$((cur + 1))" +} + +# Open/close a long-lived terminal handle (fd 3) shared by all menus in an +# install/config session. Opening fd 3 once avoids per-menu re-open issues on +# bash 3.2 (second menu reading EOF). Safe no-ops when there is no tty. +menu_session_begin() { + [ "$HAS_TTY" = true ] || return 0 + exec 3<"$TTY_DEV" 2>/dev/null || true +} +menu_session_end() { + exec 3<&- 2>/dev/null || true +} + +# Ask the user to choose the install/UI language (first step of install). +select_language() { + # Order is fixed (English first, Chinese second). The default highlight + # follows detection, but conservatively: only a confident "zh" signal + # (macOS AppleLocale / Linux zh_* locale) preselects Chinese; everything + # else (English, empty/C/POSIX locale, server images) defaults to English. + local detected + detected=$(detect_ui_lang) + if [ "$detected" = "zh" ]; then + MENU_DEFAULT=2 + UI_LANG="zh" + else + MENU_DEFAULT=1 + UI_LANG="en" + fi + + local lang_choice + select_menu lang_choice "Select Language / 选择语言" "English" "中文 (Chinese)" + case "$lang_choice" in + 1) UI_LANG="en" ;; + 2) UI_LANG="zh" ;; + *) UI_LANG="en" ;; + esac + # Remember for the rest of the flow (config write happens later) + INSTALL_LANG="$UI_LANG" +} + # Cross-platform timeout: prefer GNU timeout/gtimeout, fallback to a pure-bash implementation # that uses background process + sleep to enforce a hard time limit. if command -v timeout &> /dev/null; then @@ -49,8 +304,20 @@ else } fi -# Get current script directory -export BASE_DIR=$(cd "$(dirname "$0")"; pwd) +# Get current script directory. +# When launched via process substitution (`bash <(curl ...)`) or a pipe, +# $0 points at /dev/fd/* or "bash", so dirname is meaningless. Fall back to +# the current working directory in that case (remote install will cd into +# the cloned project dir and reset BASE_DIR afterwards). +_script_src="$0" +case "$_script_src" in + /dev/fd/* | /proc/self/fd/* | bash | sh | -* | "") + export BASE_DIR="$(pwd)" + ;; + *) + export BASE_DIR=$(cd "$(dirname "$_script_src")" 2>/dev/null && pwd || pwd) + ;; +esac # Detect if in project directory IS_PROJECT_DIR=false @@ -151,28 +418,11 @@ clone_project() { echo -e "${GREEN}🔍 Cloning CowAgent project...${NC}" if [ -d "CowAgent" ]; then - echo -e "${YELLOW}⚠️ Directory 'CowAgent' already exists.${NC}" - read -p "Choose action: overwrite(o), backup(b), or quit(q)? [press Enter for default: b]: " choice - choice=${choice:-b} - case "$choice" in - o|O) - echo -e "${YELLOW}🗑️ Overwriting 'CowAgent' directory...${NC}" - rm -rf CowAgent - ;; - b|B) - backup_dir="CowAgent_backup_$(date +%s)" - echo -e "${YELLOW}🔀 Backing up to '$backup_dir'...${NC}" - mv CowAgent "$backup_dir" - ;; - q|Q) - echo -e "${RED}❌ Installation cancelled.${NC}" - exit 1 - ;; - *) - echo -e "${RED}❌ Invalid choice. Exiting.${NC}" - exit 1 - ;; - esac + # An existing directory is automatically backed up (no prompt) so the + # installer stays one-shot / hands-off. + local backup_dir="CowAgent_backup_$(date +%s)" + echo -e "${YELLOW}⚠️ $(t "目录 'CowAgent' 已存在,自动备份到" "Directory 'CowAgent' exists, backing up to") '$backup_dir'...${NC}" + mv CowAgent "$backup_dir" fi check_and_install_tool git @@ -188,9 +438,25 @@ clone_project() { echo -e "${RED}❌ Cannot download project. Please install Git, wget, or curl.${NC}" exit 1 fi - unzip CowAgent.zip - mv CowAgent-master CowAgent - rm CowAgent.zip + # Unzip: prefer `unzip`, otherwise fall back to Python's zipfile (no + # extra dependency) so minimal environments without unzip still work. + if command -v unzip &> /dev/null; then + unzip CowAgent.zip + elif command -v python3 &> /dev/null; then + python3 -m zipfile -e CowAgent.zip . + elif command -v python &> /dev/null; then + python -m zipfile -e CowAgent.zip . + else + echo -e "${RED}❌ Cannot extract archive. Please install 'unzip' or Python.${NC}" + exit 1 + fi + # Archive top-level dir name may vary (CowAgent-master, etc.); detect it. + local _extracted="CowAgent-master" + if [ ! -d "$_extracted" ]; then + _extracted=$(ls -d CowAgent-*/ 2>/dev/null | head -1 | sed 's:/*$::') + fi + [ -n "$_extracted" ] && [ -d "$_extracted" ] && mv "$_extracted" CowAgent + rm -f CowAgent.zip else local clone_ok=false # Detect and temporarily disable invalid git proxy settings @@ -240,15 +506,37 @@ clone_project() { # Install dependencies install_dependencies() { echo -e "${GREEN}📦 Installing dependencies...${NC}" + # Pick the pip index by install language, then fall back to the other if the + # preferred one is unreachable: + # - zh users: Tsinghua mirror first (fast in China), official PyPI fallback + # - others : official PyPI first, Tsinghua mirror fallback local PIP_MIRROR="" - if curl -s --connect-timeout 5 https://pypi.tuna.tsinghua.edu.cn/simple/ > /dev/null 2>&1; then - PIP_MIRROR="-i https://pypi.tuna.tsinghua.edu.cn/simple" + local _tuna="https://pypi.tuna.tsinghua.edu.cn/simple" + local _pypi="https://pypi.org/simple" + if [ "$UI_LANG" = "zh" ]; then + # Prefer Tsinghua; if it's down, fall back to official PyPI (pip default). + if curl -s --connect-timeout 5 "${_tuna}/" > /dev/null 2>&1; then + PIP_MIRROR="-i $_tuna" + fi + else + # Prefer official PyPI; only use Tsinghua if PyPI is unreachable. + if ! curl -s --connect-timeout 5 "${_pypi}/" > /dev/null 2>&1 \ + && curl -s --connect-timeout 5 "${_tuna}/" > /dev/null 2>&1; then + PIP_MIRROR="-i $_tuna" + fi + fi + if [ -n "$PIP_MIRROR" ]; then + echo -e "${YELLOW}Using pip mirror: ${_tuna}${NC}" fi + # Only pass --break-system-packages if this pip actually supports it + # (pip >= 23.x). Older pip versions error out with "no such option", + # which previously dumped a confusing usage message and failed the install. PIP_EXTRA_ARGS="" - if $PYTHON_CMD -c "import sys; exit(0 if sys.version_info >= (3, 11) else 1)" 2>/dev/null; then + if $PYTHON_CMD -c "import sys; exit(0 if sys.version_info >= (3, 11) else 1)" 2>/dev/null \ + && $PYTHON_CMD -m pip install --help 2>/dev/null | grep -q -- "--break-system-packages"; then PIP_EXTRA_ARGS="--break-system-packages" - echo -e "${YELLOW}Python 3.11+ detected, using --break-system-packages for pip installations${NC}" + echo -e "${YELLOW}Python 3.11+ with break-system-packages support detected${NC}" fi echo -e "${YELLOW}Upgrading pip and basic tools...${NC}" @@ -306,199 +594,221 @@ install_dependencies() { # Select model select_model() { echo "" - echo -e "${CYAN}${BOLD}=========================================${NC}" - echo -e "${CYAN}${BOLD} Select AI Model${NC}" - echo -e "${CYAN}${BOLD}=========================================${NC}" - echo -e "${YELLOW}1) DeepSeek (deepseek-v4-flash, deepseek-v4-pro, etc.)${NC}" - echo -e "${YELLOW}2) MiniMax (MiniMax-M2.7, MiniMax-M2.5, etc.)${NC}" - echo -e "${YELLOW}3) Claude (claude-opus-4-8, claude-opus-4-7, claude-sonnet-4-6, etc.)${NC}" - echo -e "${YELLOW}4) Gemini (gemini-3.1-flash-lite-preview, gemini-3.1-pro-preview, etc.)${NC}" - echo -e "${YELLOW}5) OpenAI GPT (gpt-5.4, gpt-5.2, gpt-4.1, etc.)${NC}" - echo -e "${YELLOW}6) Zhipu AI (glm-5.1, glm-5-turbo, glm-5, etc.)${NC}" - echo -e "${YELLOW}7) Qwen (qwen3.6-plus, qwen3.5-plus, qwen3-max, qwq-plus, etc.)${NC}" - echo -e "${YELLOW}8) Doubao (doubao-seed-2-0-code-preview-260215, etc.)${NC}" - echo -e "${YELLOW}9) Kimi (kimi-k2.6, kimi-k2.5, kimi-k2, etc.)${NC}" - echo -e "${YELLOW}10) LinkAI (access multiple models via one API)${NC}" - echo "" - - while true; do - read -p "Enter your choice [press Enter for default: 1 - DeepSeek]: " model_choice - model_choice=${model_choice:-1} - case "$model_choice" in - 1|2|3|4|5|6|7|8|9|10) - break - ;; - *) - echo -e "${RED}Invalid choice. Please enter 1-10.${NC}" - ;; - esac - done + local title sel + title="$(t "选择 AI 模型" "Select AI Model")" + # The 11th option is "skip" -> configure later in the web console. + select_menu sel "$title" \ + "DeepSeek (deepseek-v4-flash, deepseek-v4-pro, etc.)" \ + "Claude (claude-opus-4-8, claude-opus-4-7, claude-sonnet-4-6, etc.)" \ + "Gemini (gemini-3.1-flash-lite-preview, gemini-3.1-pro-preview, etc.)" \ + "OpenAI GPT (gpt-5.4, gpt-5.2, gpt-4.1, etc.)" \ + "MiniMax (MiniMax-M2.7, MiniMax-M2.5, etc.)" \ + "Zhipu AI (glm-5.1, glm-5-turbo, glm-5, etc.)" \ + "Qwen (qwen3.6-plus, qwen3.5-plus, qwen3-max, qwq-plus, etc.)" \ + "Doubao (doubao-seed-2-0-code-preview-260215, etc.)" \ + "Kimi (kimi-k2.6, kimi-k2.5, kimi-k2, etc.)" \ + "LinkAI ($(t "一个 Key 接入所有模型" "access all models via one API"))" \ + "$(t "⏭ 跳过(稍后在 Web 控制台配置)" "⏭ Skip (configure later in the web console)")" + model_choice="$sel" } # Read model config: provider, default_model, key_variable_name read_model_config() { local provider=$1 default_model=$2 key_var=$3 - echo -e "${GREEN}Configuring ${provider}...${NC}" - read -p "Enter ${provider} API Key: " _api_key - read -p "Enter model name [press Enter for default: ${default_model}]: " model_name - model_name=${model_name:-$default_model} - MODEL_NAME="$model_name" - eval "${key_var}=\"\$_api_key\"" + echo -e "${GREEN}$(t "正在配置" "Configuring") ${provider}...${NC}" + # Only ask for the API key here; the model name and API base default to + # sensible values and can be changed later in the web console. + local _api_key + tty_read _api_key "$(t "请输入" "Enter") ${provider} API Key ($(t "回车跳过,稍后在 Web 控制台填写" "press Enter to skip, set later in web console")): " + MODEL_NAME="$default_model" + # printf -v (not eval) so keys containing quotes/backticks/$() are safe. + printf -v "${key_var}" '%s' "$_api_key" } -# Read optional API base URL -read_api_base() { - local base_var=$1 default_url=$2 - read -p "Enter API Base URL [press Enter for default: ${default_url}]: " api_base - api_base=${api_base:-$default_url} - eval "${base_var}=\"\$api_base\"" -} - -# Configure model +# Configure model. The "skip" choice leaves the model empty so the user can +# finish configuration in the web console after first start. configure_model() { case "$model_choice" in 1) read_model_config "DeepSeek" "deepseek-v4-flash" "DEEPSEEK_KEY" ;; - 2) read_model_config "MiniMax" "MiniMax-M2.7" "MINIMAX_KEY" ;; - 3) - read_model_config "Claude" "claude-opus-4-8" "CLAUDE_KEY" - read_api_base "CLAUDE_BASE" "https://api.anthropic.com/v1" - ;; - 4) - read_model_config "Gemini" "gemini-3.1-pro-preview" "GEMINI_KEY" - read_api_base "GEMINI_BASE" "https://generativelanguage.googleapis.com" - ;; - 5) - read_model_config "OpenAI GPT" "gpt-5.4" "OPENAI_KEY" - read_api_base "OPENAI_BASE" "https://api.openai.com/v1" - ;; + 2) read_model_config "Claude" "claude-opus-4-8" "CLAUDE_KEY" ;; + 3) read_model_config "Gemini" "gemini-3.1-pro-preview" "GEMINI_KEY" ;; + 4) read_model_config "OpenAI GPT" "gpt-5.4" "OPENAI_KEY" ;; + 5) read_model_config "MiniMax" "MiniMax-M2.7" "MINIMAX_KEY" ;; 6) read_model_config "Zhipu AI" "glm-5.1" "ZHIPU_KEY" ;; 7) read_model_config "Qwen (DashScope)" "qwen3.6-plus" "DASHSCOPE_KEY" ;; 8) read_model_config "Doubao (Volcengine Ark)" "doubao-seed-2-0-code-preview-260215" "ARK_KEY" ;; 9) read_model_config "Kimi (Moonshot)" "kimi-k2.6" "MOONSHOT_KEY" ;; 10) + # Show where to obtain a LinkAI key (zh users -> console page). + echo -e "${CYAN}$(t "获取 LinkAI Key" "Get your LinkAI Key"): https://link-ai.tech/console/interface${NC}" read_model_config "LinkAI" "deepseek-v4-flash" "LINKAI_KEY" USE_LINKAI="true" ;; + 11) + # Skip: leave model unset, will be configured in web console + MODEL_SKIPPED="true" + MODEL_NAME="" + echo -e "${YELLOW}$(t "已跳过模型配置,稍后可在 Web 控制台填写" "Model configuration skipped, you can set it later in the web console")${NC}" + ;; esac } -# Select channel -select_channel() { - echo "" - echo -e "${CYAN}${BOLD}=========================================${NC}" - echo -e "${CYAN}${BOLD} Select Communication Channel${NC}" - echo -e "${CYAN}${BOLD}=========================================${NC}" - echo -e "${YELLOW}1) Weixin (微信)${NC}" - echo -e "${YELLOW}2) Feishu (飞书)${NC}" - echo -e "${YELLOW}3) DingTalk (钉钉)${NC}" - echo -e "${YELLOW}4) WeCom Bot (企微智能机器人)${NC}" - echo -e "${YELLOW}5) QQ (QQ 机器人)${NC}" - echo -e "${YELLOW}6) WeCom App (企微自建应用)${NC}" - echo -e "${YELLOW}7) Web (网页)${NC}" - echo "" - - while true; do - read -p "Enter your choice [press Enter for default: 1 - Weixin]: " channel_choice - channel_choice=${channel_choice:-1} - case "$channel_choice" in - 1|2|3|4|5|6|7) - break - ;; - *) - echo -e "${RED}Invalid choice. Please enter 1-7.${NC}" - ;; - esac - done +# Channel label by stable key (independent of menu order). +channel_label() { + case "$1" in + web) t "Web 网页控制台(推荐,开箱即用)" "Web Console (recommended, ready to use)" ;; + weixin) t "微信" "WeChat (Weixin)" ;; + feishu) t "飞书" "Feishu / Lark" ;; + dingtalk) t "钉钉" "DingTalk" ;; + wecom_bot) t "企微智能机器人" "WeCom Bot" ;; + qq) printf '%s' "QQ" ;; + wechatcom_app) t "企微自建应用" "WeCom App" ;; + telegram) printf '%s' "Telegram" ;; + slack) printf '%s' "Slack" ;; + discord) printf '%s' "Discord" ;; + skip) t "⏭ 跳过(稍后在 Web 控制台配置)" "⏭ Skip (configure later in the web console)" ;; + esac } -# Configure channel +# Select channel. The display order depends on the install language: +# - English: Web first, then the global IM channels (Telegram/Discord/Slack), +# then the China-focused channels. +# - Chinese: Web first, then China-focused channels, then global ones. +# A stable key list (CHANNEL_KEYS) decouples the menu order from the config +# logic, so reordering the menu never breaks configure_channel(). +select_channel() { + echo "" + local title sel + title="$(t "选择接入渠道" "Select Communication Channel")" + if [ "$UI_LANG" = "en" ]; then + CHANNEL_KEYS=(web telegram discord slack weixin feishu dingtalk wecom_bot qq wechatcom_app skip) + else + CHANNEL_KEYS=(web weixin feishu dingtalk wecom_bot qq wechatcom_app telegram slack discord skip) + fi + local labels=() k + for k in "${CHANNEL_KEYS[@]}"; do + labels+=("$(channel_label "$k")") + done + select_menu sel "$title" "${labels[@]}" + # Map the 1-based menu position back to the stable channel key. + channel_choice="${CHANNEL_KEYS[$((sel - 1))]}" +} + +# Configure channel, dispatched by stable channel key (not menu position). configure_channel() { case "$channel_choice" in - 1) + web|skip) + # Web (also the default when skipped). Use the default port with + # no prompt; it can be changed later in the web console / config. + CHANNEL_TYPE="web" + WEB_PORT="9899" + ACCESS_INFO="$(t "Web 控制台地址" "Web console") : http://localhost:9899/chat" + ;; + weixin) # Weixin CHANNEL_TYPE="weixin" - ACCESS_INFO="Weixin channel configured. Scan QR code in terminal or web console to login." + ACCESS_INFO="$(t "微信渠道已配置,请在终端或 Web 控制台扫码登录" "Weixin channel configured. Scan QR code in terminal or web console to login.")" ;; - 2) + feishu) # Feishu (WebSocket mode) CHANNEL_TYPE="feishu" - echo -e "${GREEN}Configure Feishu (WebSocket mode)...${NC}" - read -p "Enter Feishu App ID: " fs_app_id - read -p "Enter Feishu App Secret: " fs_app_secret - + echo -e "${GREEN}$(t "配置飞书(WebSocket 模式)" "Configure Feishu (WebSocket mode)")...${NC}" + local fs_app_id fs_app_secret + tty_read fs_app_id "$(t "请输入飞书 App ID" "Enter Feishu App ID"): " + tty_read fs_app_secret "$(t "请输入飞书 App Secret" "Enter Feishu App Secret"): " FEISHU_APP_ID="$fs_app_id" FEISHU_APP_SECRET="$fs_app_secret" FEISHU_EVENT_MODE="websocket" - ACCESS_INFO="Feishu channel configured (WebSocket mode)" + ACCESS_INFO="$(t "飞书渠道已配置(WebSocket 模式)" "Feishu channel configured (WebSocket mode)")" ;; - 3) + dingtalk) # DingTalk CHANNEL_TYPE="dingtalk" - echo -e "${GREEN}Configure DingTalk...${NC}" - read -p "Enter DingTalk Client ID: " dt_client_id - read -p "Enter DingTalk Client Secret: " dt_client_secret - + echo -e "${GREEN}$(t "配置钉钉" "Configure DingTalk")...${NC}" + local dt_client_id dt_client_secret + tty_read dt_client_id "$(t "请输入钉钉 Client ID" "Enter DingTalk Client ID"): " + tty_read dt_client_secret "$(t "请输入钉钉 Client Secret" "Enter DingTalk Client Secret"): " DT_CLIENT_ID="$dt_client_id" DT_CLIENT_SECRET="$dt_client_secret" - ACCESS_INFO="DingTalk channel configured" + ACCESS_INFO="$(t "钉钉渠道已配置" "DingTalk channel configured")" ;; - 4) + wecom_bot) # WeCom Bot CHANNEL_TYPE="wecom_bot" - echo -e "${GREEN}Configure WeCom Bot...${NC}" - read -p "Enter WeCom Bot ID: " wecom_bot_id - read -p "Enter WeCom Bot Secret: " wecom_bot_secret - + echo -e "${GREEN}$(t "配置企微智能机器人" "Configure WeCom Bot")...${NC}" + local wecom_bot_id wecom_bot_secret + tty_read wecom_bot_id "$(t "请输入 WeCom Bot ID" "Enter WeCom Bot ID"): " + tty_read wecom_bot_secret "$(t "请输入 WeCom Bot Secret" "Enter WeCom Bot Secret"): " WECOM_BOT_ID="$wecom_bot_id" WECOM_BOT_SECRET="$wecom_bot_secret" - ACCESS_INFO="WeCom Bot channel configured" + ACCESS_INFO="$(t "企微智能机器人渠道已配置" "WeCom Bot channel configured")" ;; - 5) + qq) # QQ CHANNEL_TYPE="qq" - echo -e "${GREEN}Configure QQ Bot...${NC}" - read -p "Enter QQ App ID: " qq_app_id - read -p "Enter QQ App Secret: " qq_app_secret - + echo -e "${GREEN}$(t "配置 QQ 机器人" "Configure QQ Bot")...${NC}" + local qq_app_id qq_app_secret + tty_read qq_app_id "$(t "请输入 QQ App ID" "Enter QQ App ID"): " + tty_read qq_app_secret "$(t "请输入 QQ App Secret" "Enter QQ App Secret"): " QQ_APP_ID="$qq_app_id" QQ_APP_SECRET="$qq_app_secret" - ACCESS_INFO="QQ Bot channel configured" + ACCESS_INFO="$(t "QQ 机器人渠道已配置" "QQ Bot channel configured")" ;; - 6) + wechatcom_app) # WeCom App CHANNEL_TYPE="wechatcom_app" - echo -e "${GREEN}Configure WeCom App...${NC}" - read -p "Enter WeChat Corp ID: " corp_id - read -p "Enter WeChat Com App Token: " com_token - read -p "Enter WeChat Com App Secret: " com_secret - read -p "Enter WeChat Com App Agent ID: " com_agent_id - read -p "Enter WeChat Com App AES Key: " com_aes_key - read -p "Enter WeChat Com App Port [press Enter for default: 9898]: " com_port + echo -e "${GREEN}$(t "配置企微自建应用" "Configure WeCom App")...${NC}" + local corp_id com_token com_secret com_agent_id com_aes_key com_port + tty_read corp_id "$(t "请输入企业 Corp ID" "Enter WeChat Corp ID"): " + tty_read com_token "$(t "请输入应用 Token" "Enter WeChat Com App Token"): " + tty_read com_secret "$(t "请输入应用 Secret" "Enter WeChat Com App Secret"): " + tty_read com_agent_id "$(t "请输入应用 Agent ID" "Enter WeChat Com App Agent ID"): " + tty_read com_aes_key "$(t "请输入应用 AES Key" "Enter WeChat Com App AES Key"): " + tty_read com_port "$(t "请输入应用端口" "Enter WeChat Com App Port") [$(t "默认" "default"): 9898]: " com_port=${com_port:-9898} - WECHATCOM_CORP_ID="$corp_id" WECHATCOM_TOKEN="$com_token" WECHATCOM_SECRET="$com_secret" WECHATCOM_AGENT_ID="$com_agent_id" WECHATCOM_AES_KEY="$com_aes_key" WECHATCOM_PORT="$com_port" - ACCESS_INFO="WeCom App channel configured on port ${com_port}" + ACCESS_INFO="$(t "企微自建应用渠道已配置,端口" "WeCom App channel configured on port") ${com_port}" ;; - 7) - # Web - CHANNEL_TYPE="web" - read -p "Enter web port [press Enter for default: 9899]: " web_port - web_port=${web_port:-9899} - - WEB_PORT="$web_port" - ACCESS_INFO="Web interface will be available at: http://localhost:${web_port}/chat" + telegram) + # Telegram + CHANNEL_TYPE="telegram" + echo -e "${GREEN}$(t "配置 Telegram" "Configure Telegram")...${NC}" + local tg_token + tty_read tg_token "$(t "请输入 Telegram Bot Token" "Enter Telegram Bot Token"): " + TELEGRAM_TOKEN="$tg_token" + ACCESS_INFO="$(t "Telegram 渠道已配置" "Telegram channel configured")" + ;; + slack) + # Slack + CHANNEL_TYPE="slack" + echo -e "${GREEN}$(t "配置 Slack" "Configure Slack")...${NC}" + local slack_bot slack_app + tty_read slack_bot "$(t "请输入 Slack Bot Token" "Enter Slack Bot Token") (xoxb-...): " + tty_read slack_app "$(t "请输入 Slack App Token" "Enter Slack App Token") (xapp-...): " + SLACK_BOT_TOKEN="$slack_bot" + SLACK_APP_TOKEN="$slack_app" + ACCESS_INFO="$(t "Slack 渠道已配置" "Slack channel configured")" + ;; + discord) + # Discord + CHANNEL_TYPE="discord" + echo -e "${GREEN}$(t "配置 Discord" "Configure Discord")...${NC}" + local discord_token + tty_read discord_token "$(t "请输入 Discord Bot Token" "Enter Discord Bot Token"): " + DISCORD_TOKEN="$discord_token" + ACCESS_INFO="$(t "Discord 渠道已配置" "Discord channel configured")" ;; esac } # Generate config file create_config_file() { - echo -e "${GREEN}📝 Generating config.json...${NC}" + echo -e "${GREEN}📝 $(t "正在生成 config.json" "Generating config.json")...${NC}" CHANNEL_TYPE="$CHANNEL_TYPE" \ MODEL_NAME="$MODEL_NAME" \ @@ -532,12 +842,18 @@ create_config_file() { WECHATCOM_AGENT_ID="${WECHATCOM_AGENT_ID:-}" \ WECHATCOM_AES_KEY="${WECHATCOM_AES_KEY:-}" \ WECHATCOM_PORT="${WECHATCOM_PORT:-}" \ + TELEGRAM_TOKEN="${TELEGRAM_TOKEN:-}" \ + SLACK_BOT_TOKEN="${SLACK_BOT_TOKEN:-}" \ + SLACK_APP_TOKEN="${SLACK_APP_TOKEN:-}" \ + DISCORD_TOKEN="${DISCORD_TOKEN:-}" \ + COW_LANG="${INSTALL_LANG:-auto}" \ $PYTHON_CMD -c " import json, os e = os.environ.get base = { - 'channel_type': e('CHANNEL_TYPE'), - 'model': e('MODEL_NAME'), + 'channel_type': e('CHANNEL_TYPE') or 'web', + 'model': e('MODEL_NAME') or '', + 'cow_lang': e('COW_LANG', 'auto'), 'open_ai_api_key': e('OPENAI_KEY', ''), 'open_ai_api_base': e('OPENAI_BASE'), 'claude_api_key': e('CLAUDE_KEY', ''), @@ -571,19 +887,28 @@ channel_map = { 'wecom_bot': {'wecom_bot_id': 'WECOM_BOT_ID', 'wecom_bot_secret': 'WECOM_BOT_SECRET'}, 'qq': {'qq_app_id': 'QQ_APP_ID', 'qq_app_secret': 'QQ_APP_SECRET'}, 'wechatcom_app': {'wechatcom_corp_id': 'WECHATCOM_CORP_ID', 'wechatcomapp_token': 'WECHATCOM_TOKEN', 'wechatcomapp_secret': 'WECHATCOM_SECRET', 'wechatcomapp_agent_id': 'WECHATCOM_AGENT_ID', 'wechatcomapp_aes_key': 'WECHATCOM_AES_KEY', 'wechatcomapp_port': ('WECHATCOM_PORT', int)}, + 'telegram': {'telegram_token': 'TELEGRAM_TOKEN'}, + 'slack': {'slack_bot_token': 'SLACK_BOT_TOKEN', 'slack_app_token': 'SLACK_APP_TOKEN'}, + 'discord': {'discord_token': 'DISCORD_TOKEN'}, } -ch = e('CHANNEL_TYPE') +def _to_int(val, default): + try: + return int(val) + except (TypeError, ValueError): + return default +ch = e('CHANNEL_TYPE') or 'web' for key, spec in channel_map.get(ch, {}).items(): if isinstance(spec, tuple): env_name, conv = spec - base[key] = conv(e(env_name)) + # Guard int() against non-numeric input; fall back to a sane port. + base[key] = _to_int(e(env_name), 9899 if key == 'web_port' else 9898) if conv is int else conv(e(env_name)) else: base[key] = e(spec, '') with open('config.json', 'w') as f: json.dump(base, f, indent=2, ensure_ascii=False) " - echo -e "${GREEN}✅ Configuration file created successfully.${NC}" + echo -e "${GREEN}✅ $(t "配置文件创建成功" "Configuration file created successfully").${NC}" } # Start project @@ -622,29 +947,37 @@ start_project() { sleep 2 echo "" echo -e "${CYAN}${BOLD}=========================================${NC}" - echo -e "${GREEN}${EMOJI_CHECK} CowAgent is now running in background!${NC}" - echo -e "${GREEN}${EMOJI_CHECK} Process will continue after closing terminal.${NC}" + echo -e "${GREEN}${EMOJI_CHECK} $(t "CowAgent 已在后台运行" "CowAgent is now running in background")!${NC}" + echo -e "${GREEN}${EMOJI_CHECK} $(t "关闭终端后进程仍会继续运行" "Process will continue after closing terminal").${NC}" echo -e "${CYAN}$ACCESS_INFO${NC}" + + # If the model was skipped, guide the user to finish setup in the web console. + if [ "${MODEL_SKIPPED:-}" = "true" ]; then + local _port="${WEB_PORT:-9899}" + echo "" + echo -e "${YELLOW}${EMOJI_WARN} $(t "尚未配置模型,请在 Web 控制台完成配置" "Model not configured yet, please finish setup in the web console"):${NC}" + echo -e "${CYAN} http://localhost:${_port}/chat${NC}" + fi echo "" - echo -e "${CYAN}${BOLD}Management Commands:${NC}" + echo -e "${CYAN}${BOLD}$(t "管理命令" "Management Commands"):${NC}" if $USE_COW; then - echo -e " ${GREEN}cow stop${NC} Stop the service" - echo -e " ${GREEN}cow restart${NC} Restart the service" - echo -e " ${GREEN}cow status${NC} Check status" - echo -e " ${GREEN}cow logs${NC} View logs" - echo -e " ${GREEN}cow update${NC} Update and restart" - echo -e " ${GREEN}cow install-browser${NC} Install browser tool" + echo -e " ${GREEN}cow stop${NC} $(t "停止服务" "Stop the service")" + echo -e " ${GREEN}cow restart${NC} $(t "重启服务" "Restart the service")" + echo -e " ${GREEN}cow status${NC} $(t "查看状态" "Check status")" + echo -e " ${GREEN}cow logs${NC} $(t "查看日志" "View logs")" + echo -e " ${GREEN}cow update${NC} $(t "更新并重启" "Update and restart")" + echo -e " ${GREEN}cow install-browser${NC} $(t "安装浏览器工具" "Install browser tool")" else - echo -e " ${GREEN}./run.sh stop${NC} Stop the service" - echo -e " ${GREEN}./run.sh restart${NC} Restart the service" - echo -e " ${GREEN}./run.sh status${NC} Check status" - echo -e " ${GREEN}./run.sh logs${NC} View logs" - echo -e " ${GREEN}./run.sh update${NC} Update and restart" + echo -e " ${GREEN}./run.sh stop${NC} $(t "停止服务" "Stop the service")" + echo -e " ${GREEN}./run.sh restart${NC} $(t "重启服务" "Restart the service")" + echo -e " ${GREEN}./run.sh status${NC} $(t "查看状态" "Check status")" + echo -e " ${GREEN}./run.sh logs${NC} $(t "查看日志" "View logs")" + echo -e " ${GREEN}./run.sh update${NC} $(t "更新并重启" "Update and restart")" fi echo -e "${CYAN}${BOLD}=========================================${NC}" echo "" - echo -e "${YELLOW}Showing recent logs (Ctrl+C to exit, agent keeps running):${NC}" + echo -e "${YELLOW}$(t "显示最近日志(Ctrl+C 退出,Agent 继续运行)" "Showing recent logs (Ctrl+C to exit, agent keeps running)"):${NC}" sleep 2 tail -n 30 -f "${BASE_DIR}/nohup.out" } @@ -655,20 +988,20 @@ show_usage() { echo -e "${CYAN}${BOLD} ${EMOJI_COW} CowAgent Management Script${NC}" echo -e "${CYAN}${BOLD}=========================================${NC}" echo "" - echo -e "${YELLOW}Usage:${NC}" - echo -e " ${GREEN}./run.sh${NC} ${CYAN}# Install/Configure project${NC}" - echo -e " ${GREEN}./run.sh ${NC} ${CYAN}# Execute management command${NC}" + echo -e "${YELLOW}$(t "用法" "Usage"):${NC}" + echo -e " ${GREEN}./run.sh${NC} ${CYAN}# $(t "安装/配置项目" "Install/Configure project")${NC}" + echo -e " ${GREEN}./run.sh ${NC} ${CYAN}# $(t "执行管理命令" "Execute management command")${NC}" echo "" - echo -e "${YELLOW}Commands:${NC}" - echo -e " ${GREEN}start${NC} Start the service" - echo -e " ${GREEN}stop${NC} Stop the service" - echo -e " ${GREEN}restart${NC} Restart the service" - echo -e " ${GREEN}status${NC} Check service status" - echo -e " ${GREEN}logs${NC} View logs (tail -f)" - echo -e " ${GREEN}config${NC} Reconfigure project" - echo -e " ${GREEN}update${NC} Update and restart" + echo -e "${YELLOW}$(t "命令" "Commands"):${NC}" + echo -e " ${GREEN}start${NC} $(t "启动服务" "Start the service")" + echo -e " ${GREEN}stop${NC} $(t "停止服务" "Stop the service")" + echo -e " ${GREEN}restart${NC} $(t "重启服务" "Restart the service")" + echo -e " ${GREEN}status${NC} $(t "查看服务状态" "Check service status")" + echo -e " ${GREEN}logs${NC} $(t "查看日志 (tail -f)" "View logs (tail -f)")" + echo -e " ${GREEN}config${NC} $(t "重新配置项目" "Reconfigure project")" + echo -e " ${GREEN}update${NC} $(t "更新并重启" "Update and restart")" echo "" - echo -e "${YELLOW}Examples:${NC}" + echo -e "${YELLOW}$(t "示例" "Examples"):${NC}" echo -e " ${GREEN}./run.sh start${NC}" echo -e " ${GREEN}./run.sh logs${NC}" echo -e " ${GREEN}./run.sh status${NC}" @@ -701,8 +1034,8 @@ has_cow() { # Start service cmd_start() { if [ ! -f "${BASE_DIR}/config.json" ]; then - echo -e "${RED}${EMOJI_CROSS} config.json not found${NC}" - echo -e "${YELLOW}Please run './run.sh' to configure first${NC}" + echo -e "${RED}${EMOJI_CROSS} $(t "未找到 config.json" "config.json not found")${NC}" + echo -e "${YELLOW}$(t "请先运行 './run.sh' 进行配置" "Please run './run.sh' to configure first")${NC}" exit 1 fi @@ -711,8 +1044,8 @@ cmd_start() { cow start else if is_running; then - echo -e "${YELLOW}${EMOJI_WARN} CowAgent is already running (PID: $(get_pid))${NC}" - echo -e "${YELLOW}Use './run.sh restart' to restart${NC}" + echo -e "${YELLOW}${EMOJI_WARN} $(t "CowAgent 已在运行中" "CowAgent is already running") (PID: $(get_pid))${NC}" + echo -e "${YELLOW}$(t "使用 './run.sh restart' 重启" "Use './run.sh restart' to restart")${NC}" return fi check_python_version @@ -722,34 +1055,37 @@ cmd_start() { # Stop service cmd_stop() { + # Don't let kill/return non-zero (e.g. process already gone) abort the + # caller (cmd_restart) under `set -e`. + set +e if has_cow; then cd "${BASE_DIR}" cow stop else - echo -e "${GREEN}${EMOJI_STOP} Stopping CowAgent...${NC}" + echo -e "${GREEN}${EMOJI_STOP} $(t "正在停止 CowAgent" "Stopping CowAgent")...${NC}" if ! is_running; then - echo -e "${YELLOW}${EMOJI_WARN} CowAgent is not running${NC}" - return + echo -e "${YELLOW}${EMOJI_WARN} $(t "CowAgent 未在运行" "CowAgent is not running")${NC}" + return 0 fi pid=$(get_pid) if [ -z "$pid" ] || ! echo "$pid" | grep -qE '^[0-9]+$'; then - echo -e "${RED}❌ Failed to get valid PID (got: ${pid})${NC}" - return 1 + echo -e "${RED}❌ $(t "获取有效 PID 失败" "Failed to get valid PID") (${pid})${NC}" + return 0 fi - echo -e "${GREEN}Found running process (PID: ${pid})${NC}" + echo -e "${GREEN}$(t "找到运行中的进程" "Found running process") (PID: ${pid})${NC}" - kill ${pid} + kill ${pid} 2>/dev/null || true sleep 3 if ps -p ${pid} > /dev/null 2>&1; then - echo -e "${YELLOW}⚠️ Process not stopped, forcing termination...${NC}" - kill -9 ${pid} + echo -e "${YELLOW}⚠️ $(t "进程未停止,强制终止" "Process not stopped, forcing termination")...${NC}" + kill -9 ${pid} 2>/dev/null || true fi - echo -e "${GREEN}${EMOJI_CHECK} CowAgent stopped${NC}" + echo -e "${GREEN}${EMOJI_CHECK} $(t "CowAgent 已停止" "CowAgent stopped")${NC}" fi } @@ -777,20 +1113,21 @@ cmd_status() { if is_running; then pid=$(get_pid) - echo -e "${GREEN}Status:${NC} ✅ Running" + echo -e "${GREEN}$(t "状态" "Status"):${NC} ✅ $(t "运行中" "Running")" echo -e "${GREEN}PID:${NC} ${pid}" if [ -f "${BASE_DIR}/nohup.out" ]; then - echo -e "${GREEN}Logs:${NC} ${BASE_DIR}/nohup.out" + echo -e "${GREEN}$(t "日志" "Logs"):${NC} ${BASE_DIR}/nohup.out" fi else - echo -e "${YELLOW}Status:${NC} ⭐ Stopped" + echo -e "${YELLOW}$(t "状态" "Status"):${NC} ⭐ $(t "已停止" "Stopped")" fi if [ -f "${BASE_DIR}/config.json" ]; then - model=$(grep -o '"model"[[:space:]]*:[[:space:]]*"[^"]*"' "${BASE_DIR}/config.json" | cut -d'"' -f4) - channel=$(grep -o '"channel_type"[[:space:]]*:[[:space:]]*"[^"]*"' "${BASE_DIR}/config.json" | cut -d'"' -f4) - echo -e "${GREEN}Model:${NC} ${model}" - echo -e "${GREEN}Channel:${NC} ${channel}" + # `|| true`: grep returns 1 when the key is absent (set -e safe). + model=$(grep -o '"model"[[:space:]]*:[[:space:]]*"[^"]*"' "${BASE_DIR}/config.json" 2>/dev/null | cut -d'"' -f4 || true) + channel=$(grep -o '"channel_type"[[:space:]]*:[[:space:]]*"[^"]*"' "${BASE_DIR}/config.json" 2>/dev/null | cut -d'"' -f4 || true) + echo -e "${GREEN}$(t "模型" "Model"):${NC} ${model:-$(t "(未配置)" "(not set)")}" + echo -e "${GREEN}$(t "渠道" "Channel"):${NC} ${channel:-$(t "(未配置)" "(not set)")}" fi echo -e "${CYAN}${BOLD}=========================================${NC}" @@ -804,22 +1141,30 @@ cmd_logs() { cow logs -f else if [ -f "${BASE_DIR}/nohup.out" ]; then - echo -e "${YELLOW}Viewing logs (Ctrl+C to exit):${NC}" + echo -e "${YELLOW}$(t "查看日志(Ctrl+C 退出)" "Viewing logs (Ctrl+C to exit)"):${NC}" tail -f "${BASE_DIR}/nohup.out" else - echo -e "${RED}❌ Log file not found: ${BASE_DIR}/nohup.out${NC}" + echo -e "${RED}❌ $(t "日志文件未找到" "Log file not found"): ${BASE_DIR}/nohup.out${NC}" fi fi } # Reconfigure cmd_config() { - echo -e "${YELLOW}${EMOJI_WRENCH} Reconfiguring CowAgent...${NC}" + # Interactive flow: disable `set -e` (see install_mode for rationale). + set +e + # One shared terminal handle for all menus in this session. + menu_session_begin + + # Choose language first so the rest of the flow is localized. + select_language + echo "" + echo -e "${YELLOW}${EMOJI_WRENCH} $(t "正在重新配置 CowAgent" "Reconfiguring CowAgent")...${NC}" if [ -f "${BASE_DIR}/config.json" ]; then backup_file="${BASE_DIR}/config.json.backup.$(date +%s)" cp "${BASE_DIR}/config.json" "${backup_file}" - echo -e "${GREEN}✅ Backed up config to: ${backup_file}${NC}" + echo -e "${GREEN}✅ $(t "已备份配置到" "Backed up config to"): ${backup_file}${NC}" fi check_python_version @@ -828,10 +1173,12 @@ cmd_config() { configure_model select_channel configure_channel + menu_session_end create_config_file echo "" - read -p "Restart service now? [Y/n]: " restart_now + local restart_now + tty_read restart_now "$(t "现在重启服务" "Restart service now")? [Y/n]: " if [[ ! $restart_now == [Nn]* ]]; then cmd_restart fi @@ -839,27 +1186,27 @@ cmd_config() { # Update project cmd_update() { - echo -e "${GREEN}${EMOJI_WRENCH} Updating CowAgent...${NC}" + echo -e "${GREEN}${EMOJI_WRENCH} $(t "正在更新 CowAgent" "Updating CowAgent")...${NC}" cd "${BASE_DIR}" # Pull latest code first (service still running) local pull_ok=false if [ -d .git ]; then - echo -e "${GREEN}🔄 Pulling latest code...${NC}" + echo -e "${GREEN}🔄 $(t "正在拉取最新代码" "Pulling latest code")...${NC}" if git pull; then pull_ok=true else - echo -e "${YELLOW}⚠️ git pull failed, trying Gitee mirror...${NC}" + echo -e "${YELLOW}⚠️ $(t "git pull 失败,尝试 Gitee 镜像" "git pull failed, trying Gitee mirror")...${NC}" git remote set-url origin https://gitee.com/zhayujie/CowAgent.git if git pull; then pull_ok=true else - echo -e "${RED}❌ Failed to pull code. Update aborted.${NC}" + echo -e "${RED}❌ $(t "拉取代码失败,更新已中止" "Failed to pull code. Update aborted").${NC}" exit 1 fi fi else - echo -e "${YELLOW}⚠️ Not a git repository, skipping code update${NC}" + echo -e "${YELLOW}⚠️ $(t "非 git 仓库,跳过代码更新" "Not a git repository, skipping code update")${NC}" fi # Re-exec with the updated run.sh to pick up new logic @@ -885,24 +1232,38 @@ cmd_post_update() { # Installation mode install_mode() { + # Interactive flow: disable `set -e` so a single non-zero command (e.g. an + # arithmetic `(( ))` evaluating to 0, a `read` hitting EOF, or an optional + # step failing) does not silently abort the whole installer. + set +e clear echo -e "${CYAN}${BOLD}=========================================${NC}" echo -e "${CYAN}${BOLD} ${EMOJI_COW} CowAgent Installation${NC}" echo -e "${CYAN}${BOLD}=========================================${NC}" echo "" + + # Open one shared terminal handle for ALL menus in this session (language, + # model, channel). One long-lived fd 3 avoids per-menu re-open issues on + # bash 3.2. Closed on early return and before config generation. + menu_session_begin + + # Step 0: choose the install/UI language. Everything after this is localized. + select_language + echo "" sleep 1 if [ "$IS_PROJECT_DIR" = true ]; then - echo -e "${GREEN}✅ Detected existing project directory.${NC}" + echo -e "${GREEN}✅ $(t "检测到已有项目目录" "Detected existing project directory").${NC}" if [ -f "${BASE_DIR}/config.json" ]; then - echo -e "${GREEN}✅ Project already configured${NC}" + menu_session_end + echo -e "${GREEN}✅ $(t "项目已配置" "Project already configured")${NC}" echo "" show_usage return fi - echo -e "${YELLOW}📝 No config.json found. Let's configure your project!${NC}" + echo -e "${YELLOW}📝 $(t "未找到 config.json,开始配置项目" "No config.json found. Let's configure your project")!${NC}" echo "" # Project directory already exists, skip clone @@ -919,34 +1280,44 @@ install_mode() { configure_model select_channel configure_channel + menu_session_end create_config_file + # Auto-start after configuration for a true out-of-the-box experience. echo "" - read -p "Start CowAgent now? [Y/n]: " start_now - if [[ ! $start_now == [Nn]* ]]; then - start_project - else - echo -e "${GREEN}✅ Installation complete!${NC}" - echo "" - echo -e "${CYAN}${BOLD}To start manually:${NC}" - echo -e "${YELLOW} cd ${BASE_DIR}${NC}" - echo -e "${YELLOW} ./run.sh start${NC}" - echo "" - echo -e "${CYAN}Or use nohup directly:${NC}" - echo -e "${YELLOW} nohup $PYTHON_CMD app.py > nohup.out 2>&1 & tail -f nohup.out${NC}" - fi + start_project } # Require running inside the project directory require_project_dir() { if [ "$IS_PROJECT_DIR" = false ]; then - echo -e "${RED}${EMOJI_CROSS} Must run in project directory${NC}" + echo -e "${RED}${EMOJI_CROSS} $(t "必须在项目目录下运行" "Must run in project directory")${NC}" exit 1 fi } +# Initialize UI_LANG for management commands: prefer cow_lang from an existing +# config.json, otherwise fall back to environment detection. The install flow +# overrides this later via select_language(). +init_ui_lang() { + [ -n "$UI_LANG" ] && return + local cfg_lang="" + if [ -f "${BASE_DIR}/config.json" ]; then + # `|| true`: grep returns 1 when cow_lang is absent, which would abort + # the whole script under `set -e` at the very first management command. + cfg_lang=$(grep -o '"cow_lang"[[:space:]]*:[[:space:]]*"[^"]*"' "${BASE_DIR}/config.json" 2>/dev/null | cut -d'"' -f4 || true) + fi + case "$cfg_lang" in + zh) UI_LANG="zh" ;; + en) UI_LANG="en" ;; + *) UI_LANG=$(detect_ui_lang) ;; + esac +} + # Main function main() { + init_ui_lang + case "$1" in start|stop|restart|status|logs|config|update|_post_update) require_project_dir @@ -969,7 +1340,7 @@ main() { install_mode ;; *) - echo -e "${RED}${EMOJI_CROSS} Unknown command: $1${NC}" + echo -e "${RED}${EMOJI_CROSS} $(t "未知命令" "Unknown command"): $1${NC}" echo "" show_usage exit 1 diff --git a/scripts/run.ps1 b/scripts/run.ps1 index 7c5f0b06..f2171258 100644 --- a/scripts/run.ps1 +++ b/scripts/run.ps1 @@ -18,10 +18,19 @@ param( $ErrorActionPreference = "Stop" -# ── ensure UTF-8 console encoding on Windows ───────────────────── -[Console]::OutputEncoding = [System.Text.Encoding]::UTF8 +# ── ensure UTF-8 everywhere on Windows ─────────────────────────── +# Without this, Chinese text renders as mojibake (e.g. "éæ©") on Windows +# PowerShell 5.1, whose console defaults to the system ANSI code page (GBK on +# Chinese systems). Set the active code page AND the console encodings so both +# our output and any child process (git/python) speak UTF-8. +try { chcp 65001 | Out-Null } catch {} +try { + [Console]::OutputEncoding = [System.Text.Encoding]::UTF8 + [Console]::InputEncoding = [System.Text.Encoding]::UTF8 +} catch {} +# $OutputEncoding controls how strings are piped to external programs. +$OutputEncoding = [System.Text.Encoding]::UTF8 $env:PYTHONIOENCODING = "utf-8" -chcp 65001 | Out-Null # ── colours ────────────────────────────────────────────────────── function Write-Cow { param([string]$M) Write-Host $M -ForegroundColor Green } @@ -29,6 +38,31 @@ function Write-Warn { param([string]$M) Write-Host $M -ForegroundColor Yellow } function Write-Err { param([string]$M) Write-Host $M -ForegroundColor Red } function Write-Info { param([string]$M) Write-Host $M -ForegroundColor Cyan } +# ── i18n: install-flow language ────────────────────────────────── +# $UiLang controls the language of install prompts/menus ("zh" or "en"). +# Chosen by the user at the first step; defaults to environment detection +# for management commands (start/stop/...). +$script:UiLang = "" + +# Detect default UI language from the OS culture (best-effort). Checks the +# display/UI culture first (closest to the user's chosen Windows language), +# then the regional format culture as a fallback. Any zh-* signal -> "zh". +function Get-DefaultUiLang { + foreach ($getter in @({ (Get-UICulture).Name }, { (Get-Culture).Name })) { + try { + $name = & $getter + if ($name -match '^zh') { return "zh" } + } catch {} + } + return "en" +} + +# Translation helper: T -> string in the active UI language. +function T { + param([string]$Zh, [string]$En) + if ($script:UiLang -eq "en") { return $En } else { return $Zh } +} + # ── detect project directory ───────────────────────────────────── $ScriptDir = if ($PSScriptRoot) { $PSScriptRoot } else { $PWD.Path } $BaseDir = Split-Path $ScriptDir -Parent @@ -39,6 +73,125 @@ if (-not $IsProjectDir) { $IsProjectDir = (Test-Path "$BaseDir\app.py") -and (Test-Path "$BaseDir\config-template.json") } +# Initialize $UiLang for management commands: prefer cow_lang from an existing +# config.json, otherwise fall back to environment detection. +function Initialize-UiLang { + if ($script:UiLang) { return } + $cfgLang = "" + if (Test-Path "$BaseDir\config.json") { + try { + $cfg = Get-Content "$BaseDir\config.json" -Raw | ConvertFrom-Json + if ($cfg.cow_lang) { $cfgLang = "$($cfg.cow_lang)" } + } catch {} + } + switch ($cfgLang) { + "zh" { $script:UiLang = "zh" } + "en" { $script:UiLang = "en" } + default { $script:UiLang = Get-DefaultUiLang } + } +} + +# ── arrow-key selectable menu with number fallback ─────────────── +# Usage: $idx = Select-Menu -Title "..." -Options @("a","b") [-Default 1] +# Returns the selected 1-based index. +function Select-Menu { + param( + [string]$Title, + [string[]]$Options, + [int]$Default = 1 + ) + $count = $Options.Count + $cur = [Math]::Max(0, [Math]::Min($Default - 1, $count - 1)) + + # Fallback to numbered input when there is no interactive console + # (e.g. piped input, redirected host). + $interactive = $true + try { + if ([Console]::IsInputRedirected) { $interactive = $false } + } catch { $interactive = $false } + + if (-not $interactive) { + Write-Info $Title + for ($i = 0; $i -lt $count; $i++) { + Write-Host (" {0}) {1}" -f ($i + 1), $Options[$i]) + } + do { + $sel = Read-Host (T "请输入序号" "Enter number") + if (-not $sel) { $sel = "$($cur + 1)" } + } while ($sel -notmatch '^\d+$' -or [int]$sel -lt 1 -or [int]$sel -gt $count) + return [int]$sel + } + + Write-Info $Title + Write-Host (T "↑/↓ 选择,Enter 确认" "Use ↑/↓ to move, Enter to select") -ForegroundColor Cyan + + [Console]::CursorVisible = $false + $firstDraw = $true + try { + while ($true) { + if (-not $firstDraw) { + # Move cursor up to the top of the option block to redraw it. + $top = [Console]::CursorTop - $count + if ($top -lt 0) { $top = 0 } + [Console]::SetCursorPosition(0, $top) + } + $firstDraw = $false + + for ($i = 0; $i -lt $count; $i++) { + # Clear the line first to avoid leftover characters. + Write-Host (" " * ([Console]::WindowWidth - 1)) -NoNewline + [Console]::SetCursorPosition(0, [Console]::CursorTop) + if ($i -eq $cur) { + Write-Host (" > " + $Options[$i]) -ForegroundColor Green + } else { + Write-Host (" " + $Options[$i]) + } + } + + $key = [Console]::ReadKey($true) + switch ($key.Key) { + "UpArrow" { $cur = (($cur - 1 + $count) % $count) } + "DownArrow" { $cur = (($cur + 1) % $count) } + "Enter" { return ($cur + 1) } + default { + # Number shortcut (1-9) jumps to that option and confirms. + $ch = $key.KeyChar + if ($ch -match '^[1-9]$') { + $n = [int]"$ch" + if ($n -ge 1 -and $n -le $count) { return $n } + } + } + } + } + } finally { + [Console]::CursorVisible = $true + } +} + +# ── language selection (first step of install) ─────────────────── +function Select-Language { + # Order is fixed (English first, Chinese second). The default highlight + # follows detection, but conservatively: only a confident "zh" signal + # (a zh-* system culture) preselects Chinese; everything else defaults to + # English. The menu hint shows in the detected language for familiarity. + $detected = Get-DefaultUiLang + if ($detected -eq "zh") { + $default = 2 + $script:UiLang = "zh" + } else { + $default = 1 + $script:UiLang = "en" + } + + $idx = Select-Menu -Title "Select Language / 选择语言" -Options @("English", "中文 (Chinese)") -Default $default + switch ($idx) { + 1 { $script:UiLang = "en" } + 2 { $script:UiLang = "zh" } + default { $script:UiLang = "en" } + } + $script:InstallLang = $script:UiLang +} + # ── Python detection ───────────────────────────────────────────── function Find-Python { foreach ($cmd in @("python3", "python")) { @@ -59,45 +212,36 @@ function Find-Python { $PythonCmd = Find-Python function Assert-Python { if (-not $PythonCmd) { - Write-Err "Python 3.9-3.13 not found. Please install from https://www.python.org/downloads/" - Read-Host "Press Enter to exit" + Write-Err (T "未找到 Python 3.9-3.13,请从 https://www.python.org/downloads/ 安装" "Python 3.9-3.13 not found. Please install from https://www.python.org/downloads/") + Read-Host (T "按回车退出" "Press Enter to exit") exit 1 } - Write-Cow "Found Python: $PythonCmd" + Write-Cow ((T "找到 Python" "Found Python") + ": $PythonCmd") } # ── clone project ──────────────────────────────────────────────── function Install-Project { if (Test-Path "CowAgent") { - Write-Warn "Directory 'CowAgent' already exists." - $choice = Read-Host "Overwrite(o), backup(b), or quit(q)? [default: b]" - if (-not $choice) { $choice = "b" } - switch ($choice.ToLower()) { - "o" { Remove-Item -Recurse -Force "CowAgent" } - "b" { - $backup = "CowAgent_backup_$(Get-Date -Format 'yyyyMMddHHmmss')" - Rename-Item "CowAgent" $backup - Write-Cow "Backed up to '$backup'" - } - "q" { Write-Err "Installation cancelled."; exit 1 } - default { Write-Err "Invalid choice."; exit 1 } - } + # Auto-backup the existing directory without prompting. + $backup = "CowAgent_backup_$(Get-Date -Format 'yyyyMMddHHmmss')" + Rename-Item "CowAgent" $backup + Write-Warn ((T "已存在 CowAgent 目录,已自动备份为" "Existing 'CowAgent' directory backed up to") + " '$backup'") } $gitBin = Get-Command git -ErrorAction SilentlyContinue if (-not $gitBin) { - Write-Err "Git not found. Please install from https://git-scm.com/download/win" - Read-Host "Press Enter to exit" + Write-Err (T "未找到 Git,请从 https://git-scm.com/download/win 安装" "Git not found. Please install from https://git-scm.com/download/win") + Read-Host (T "按回车退出" "Press Enter to exit") exit 1 } - Write-Cow "Cloning CowAgent project..." + Write-Cow (T "正在克隆 CowAgent 项目..." "Cloning CowAgent project...") $cloneOk = $false # Test GitHub connectivity before attempting clone try { $null = Invoke-WebRequest -Uri "https://github.com" -UseBasicParsing -TimeoutSec 5 -ErrorAction Stop - Write-Cow "GitHub is reachable, cloning from GitHub..." + Write-Cow (T "GitHub 可达,正在从 GitHub 克隆..." "GitHub is reachable, cloning from GitHub...") $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" git clone --depth 10 --progress "https://github.com/zhayujie/CowAgent.git" 2>&1 | ForEach-Object { Write-Host $_ } if ($LASTEXITCODE -eq 0) { $cloneOk = $true } @@ -108,7 +252,7 @@ function Install-Project { } catch {} if (-not $cloneOk) { - Write-Warn "GitHub clone failed or timed out, switching to Gitee mirror..." + Write-Warn (T "GitHub 克隆失败或超时,切换到 Gitee 镜像..." "GitHub clone failed or timed out, switching to Gitee mirror...") $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" git clone --depth 10 --progress "https://gitee.com/zhayujie/CowAgent.git" 2>&1 | ForEach-Object { Write-Host $_ } if ($LASTEXITCODE -eq 0) { $cloneOk = $true } @@ -119,35 +263,83 @@ function Install-Project { } if (-not $cloneOk) { - Write-Err "Clone failed from both GitHub and Gitee. Please check your network connection." - Write-Err "You can also manually clone: git clone https://gitee.com/zhayujie/CowAgent.git" - Read-Host "Press Enter to exit" + Write-Err (T "GitHub 和 Gitee 均克隆失败,请检查网络连接。" "Clone failed from both GitHub and Gitee. Please check your network connection.") + Write-Err (T "你也可以手动克隆: git clone https://gitee.com/zhayujie/CowAgent.git" "You can also manually clone: git clone https://gitee.com/zhayujie/CowAgent.git") + Read-Host (T "按回车退出" "Press Enter to exit") exit 1 } Set-Location "CowAgent" $script:BaseDir = $PWD.Path $script:IsProjectDir = $true - Write-Cow "Project cloned: $BaseDir" + Write-Cow ((T "项目已克隆" "Project cloned") + ": $BaseDir") +} + +# Test whether a URL is reachable within a short timeout. Uses a HEAD request +# and hides progress so it never blocks the UI for long. Any failure (DNS, TLS, +# timeout) just returns $false so the caller falls back gracefully. +function Test-UrlReachable { + param([string]$Url, [int]$TimeoutSec = 4) + $oldPP = $ProgressPreference; $ProgressPreference = "SilentlyContinue" + try { + $null = Invoke-WebRequest -Uri $Url -Method Head -UseBasicParsing -TimeoutSec $TimeoutSec -ErrorAction Stop + return $true + } catch { + return $false + } finally { + $ProgressPreference = $oldPP + } +} + +# Pick the pip index by install language, with the other as fallback: +# - zh users: Tsinghua mirror first, official PyPI fallback +# - others : official PyPI first, Tsinghua mirror fallback +# Returns an args array to splat into pip (empty = pip default / official PyPI). +function Get-PipMirrorArgs { + $tuna = "https://pypi.tuna.tsinghua.edu.cn/simple" + $pypi = "https://pypi.org/simple" + if ($script:UiLang -eq "zh") { + if (Test-UrlReachable "$tuna/") { + Write-Warn ((T "使用 pip 镜像" "Using pip mirror") + ": $tuna") + return @("-i", $tuna) + } + } else { + if ((-not (Test-UrlReachable "$pypi/")) -and (Test-UrlReachable "$tuna/")) { + Write-Warn ((T "使用 pip 镜像" "Using pip mirror") + ": $tuna") + return @("-i", $tuna) + } + } + return @() } # ── install dependencies ───────────────────────────────────────── function Install-Dependencies { - Write-Cow "Installing dependencies..." + Write-Cow (T "正在安装依赖..." "Installing dependencies...") + # Probe the mirror first (with progress hidden so the slow IWR call doesn't + # leave the screen blank for too long). + $oldPP = $ProgressPreference; $ProgressPreference = "SilentlyContinue" + $pipMirror = Get-PipMirrorArgs + $ProgressPreference = $oldPP + + # Keep pip output VISIBLE (do not pipe to Out-Null): on slow networks the + # download can take minutes, and a silent step looks like a hang. $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" - & $PythonCmd -m pip install --upgrade pip setuptools wheel 2>&1 | Out-Null - & $PythonCmd -m pip install -r "$BaseDir\requirements.txt" 2>&1 | ForEach-Object { Write-Host $_ } + Write-Info (T "正在升级 pip 等基础工具..." "Upgrading pip and basic tools...") + & $PythonCmd -m pip install --upgrade pip setuptools wheel @pipMirror + + Write-Info (T "正在安装项目依赖(可能需要几分钟)..." "Installing project dependencies (may take a few minutes)...") + & $PythonCmd -m pip install -r "$BaseDir\requirements.txt" @pipMirror $pipExit = $LASTEXITCODE $ErrorActionPreference = $prevEAP if ($pipExit -ne 0) { - Write-Warn "Some dependencies may have issues, but continuing..." + Write-Warn (T "部分依赖可能有问题,但继续安装..." "Some dependencies may have issues, but continuing...") } - Write-Cow "Registering cow CLI..." + Write-Cow (T "正在注册 cow CLI..." "Registering cow CLI...") $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" - & $PythonCmd -m pip install -e $BaseDir 2>&1 | Out-Null + & $PythonCmd -m pip install -e $BaseDir @pipMirror 2>&1 | Out-Null $ErrorActionPreference = $prevEAP # Ensure Python Scripts dir is in PATH for this session @@ -160,139 +352,206 @@ function Install-Dependencies { $cowBin = Get-Command cow -ErrorAction SilentlyContinue if ($cowBin) { - Write-Cow "cow CLI registered: $($cowBin.Source)" + Write-Cow ((T "cow CLI 注册成功" "cow CLI registered") + ": $($cowBin.Source)") } else { - Write-Warn "cow CLI not in PATH. You can use: $PythonCmd -m cli.cli" - Write-Warn "To fix permanently, add Python Scripts directory to your system PATH." + Write-Warn ((T "cow CLI 不在 PATH 中,你可以使用" "cow CLI not in PATH. You can use") + ": $PythonCmd -m cli.cli") + Write-Warn (T "如需永久修复,请将 Python Scripts 目录加入系统 PATH。" "To fix permanently, add Python Scripts directory to your system PATH.") } } # ── model selection ────────────────────────────────────────────── +# Order mirrors run.sh: DeepSeek, Claude, Gemini, OpenAI, MiniMax, Zhipu, +# Qwen, Doubao, Kimi, LinkAI, then Skip (11th option). +# Each entry: Provider / default model name / config key field / optional base. $ModelChoices = @{ - "1" = @{ Provider = "DeepSeek"; Default = "deepseek-v4-flash"; Key = "DEEPSEEK_KEY" } - "2" = @{ Provider = "MiniMax"; Default = "MiniMax-M2.7"; Key = "MINIMAX_KEY" } - "3" = @{ Provider = "Zhipu AI"; Default = "glm-5.1"; Key = "ZHIPU_KEY" } - "4" = @{ Provider = "Kimi (Moonshot)"; Default = "kimi-k2.6"; Key = "MOONSHOT_KEY" } - "5" = @{ Provider = "Doubao (Volcengine Ark)"; Default = "doubao-seed-2-0-code-preview-260215"; Key = "ARK_KEY" } - "6" = @{ Provider = "Qwen (DashScope)"; Default = "qwen3.6-plus"; Key = "DASHSCOPE_KEY" } - "7" = @{ Provider = "Claude"; Default = "claude-opus-4-8"; Key = "CLAUDE_KEY"; Base = "https://api.anthropic.com/v1" } - "8" = @{ Provider = "Gemini"; Default = "gemini-3.1-pro-preview"; Key = "GEMINI_KEY"; Base = "https://generativelanguage.googleapis.com" } - "9" = @{ Provider = "OpenAI GPT"; Default = "gpt-5.4"; Key = "OPENAI_KEY"; Base = "https://api.openai.com/v1" } - "10" = @{ Provider = "LinkAI"; Default = "deepseek-v4-flash"; Key = "LINKAI_KEY" } + 1 = @{ Provider = "DeepSeek"; Default = "deepseek-v4-flash"; Field = "deepseek_api_key" } + 2 = @{ Provider = "Claude"; Default = "claude-opus-4-8"; Field = "claude_api_key"; BaseField = "claude_api_base" } + 3 = @{ Provider = "Gemini"; Default = "gemini-3.1-pro-preview"; Field = "gemini_api_key"; BaseField = "gemini_api_base" } + 4 = @{ Provider = "OpenAI GPT"; Default = "gpt-5.4"; Field = "open_ai_api_key"; BaseField = "open_ai_api_base" } + 5 = @{ Provider = "MiniMax"; Default = "MiniMax-M2.7"; Field = "minimax_api_key" } + 6 = @{ Provider = "Zhipu AI"; Default = "glm-5.1"; Field = "zhipu_ai_api_key" } + 7 = @{ Provider = "Qwen (DashScope)"; Default = "qwen3.6-plus"; Field = "dashscope_api_key" } + 8 = @{ Provider = "Doubao (Volcengine Ark)"; Default = "doubao-seed-2-0-code-preview-260215"; Field = "ark_api_key" } + 9 = @{ Provider = "Kimi (Moonshot)"; Default = "kimi-k2.6"; Field = "moonshot_api_key" } + 10 = @{ Provider = "LinkAI"; Default = "deepseek-v4-flash"; Field = "linkai_api_key"; Linkai = $true } } function Select-Model { - Write-Info "=========================================" - Write-Info " Select AI Model" - Write-Info "=========================================" - Write-Host "1) DeepSeek (deepseek-v4-flash, deepseek-v4-pro, etc.)" - Write-Host "2) MiniMax (MiniMax-M2.7, MiniMax-M2.5, etc.)" - Write-Host "3) Zhipu AI (glm-5.1, glm-5-turbo, glm-5, etc.)" - Write-Host "4) Kimi (kimi-k2.6, kimi-k2.5, kimi-k2, etc.)" - Write-Host "5) Doubao (doubao-seed-2-0-code-preview-260215, etc.)" - Write-Host "6) Qwen (qwen3.6-plus, qwen3.5-plus, qwen3-max, qwq-plus, etc.)" - Write-Host "7) Claude (claude-opus-4-8, claude-opus-4-7, claude-sonnet-4-6, etc.)" - Write-Host "8) Gemini (gemini-3.1-flash-lite-preview, gemini-3.1-pro-preview, etc.)" - Write-Host "9) OpenAI GPT (gpt-5.4, gpt-5.2, gpt-4.1, etc.)" - Write-Host "10) LinkAI (access multiple models via one API)" Write-Host "" + $title = T "选择 AI 模型" "Select AI Model" + $options = @( + "DeepSeek (deepseek-v4-flash, deepseek-v4-pro, etc.)", + "Claude (claude-opus-4-8, claude-opus-4-7, claude-sonnet-4-6, etc.)", + "Gemini (gemini-3.1-flash-lite-preview, gemini-3.1-pro-preview, etc.)", + "OpenAI GPT (gpt-5.4, gpt-5.2, gpt-4.1, etc.)", + "MiniMax (MiniMax-M2.7, MiniMax-M2.5, etc.)", + "Zhipu AI (glm-5.1, glm-5-turbo, glm-5, etc.)", + "Qwen (qwen3.6-plus, qwen3.5-plus, qwen3-max, qwq-plus, etc.)", + "Doubao (doubao-seed-2-0-code-preview-260215, etc.)", + "Kimi (kimi-k2.6, kimi-k2.5, kimi-k2, etc.)", + ("LinkAI (" + (T "一个 Key 接入所有模型" "access all models via one API") + ")"), + (T "⏭ 跳过(稍后在 Web 控制台配置)" "⏭ Skip (configure later in the web console)") + ) + $script:ModelChoice = Select-Menu -Title $title -Options $options -Default 1 +} - do { - $choice = Read-Host "Enter your choice [default: 1 - DeepSeek]" - if (-not $choice) { $choice = "1" } - } while ($choice -notmatch '^([1-9]|10)$') +# Configure model. Only ask for the API key; model name and base default to +# sensible values and can be changed later in the web console. +function Configure-Model { + # Reset model-related state + $script:ModelName = "" + $script:ModelField = "" + $script:ApiKey = "" + $script:ApiBase = "" + $script:ApiBaseField = "" + $script:UseLinkai = $false - $m = $ModelChoices[$choice] - Write-Cow "Configuring $($m.Provider)..." - - $script:ApiKey = Read-Host "Enter $($m.Provider) API Key" - $model = Read-Host "Enter model name [default: $($m.Default)]" - if (-not $model) { $model = $m.Default } - $script:ModelName = $model - $script:KeyName = $m.Key - $script:UseLinkai = ($choice -eq "10") - - if ($m.Base) { - $base = Read-Host "Enter API Base URL [default: $($m.Base)]" - if (-not $base) { $base = $m.Base } - $script:ApiBase = $base - } else { - $script:ApiBase = "" + if ($script:ModelChoice -eq 11) { + # Skip: leave model unset, will be configured in the web console. + Write-Warn (T "已跳过模型配置,稍后可在 Web 控制台填写" "Model configuration skipped, you can set it later in the web console") + return } - $script:ModelChoice = $choice + + $m = $ModelChoices[$script:ModelChoice] + Write-Cow ((T "正在配置" "Configuring") + " $($m.Provider)...") + # Show where to obtain a LinkAI key. + if ($m.Linkai) { + Write-Info ((T "获取 LinkAI Key" "Get your LinkAI Key") + ": https://link-ai.tech/console/interface") + } + $hint = T "回车跳过,稍后在 Web 控制台填写" "press Enter to skip, set later in web console" + $script:ApiKey = Read-Host ((T "请输入" "Enter") + " $($m.Provider) API Key ($hint)") + $script:ModelName = $m.Default + $script:ModelField = $m.Field + if ($m.BaseField) { $script:ApiBaseField = $m.BaseField } + if ($m.Linkai) { $script:UseLinkai = $true } } # ── channel selection ──────────────────────────────────────────── +# Channel label by stable key (independent of menu order). +function Get-ChannelLabel { + param([string]$Key) + switch ($Key) { + "web" { return (T "Web 网页控制台(推荐,开箱即用)" "Web Console (recommended, ready to use)") } + "weixin" { return (T "微信 Weixin" "WeChat (Weixin)") } + "feishu" { return (T "飞书 Feishu" "Feishu / Lark") } + "dingtalk" { return (T "钉钉 DingTalk" "DingTalk") } + "wecom_bot" { return (T "企微智能机器人 WeCom Bot" "WeCom Bot") } + "qq" { return "QQ" } + "wechatcom_app" { return (T "企微自建应用 WeCom App" "WeCom App") } + "telegram" { return "Telegram" } + "slack" { return "Slack" } + "discord" { return "Discord" } + "skip" { return (T "⏭ 跳过(稍后在 Web 控制台配置)" "⏭ Skip (configure later in the web console)") } + } +} + +# Select channel. The display order depends on the install language: +# - English: Web first, then the global IM channels (Telegram/Discord/Slack), +# then the China-focused channels. +# - Chinese: Web first, then China-focused channels, then global ones. +# A stable key list decouples the menu order from the config logic. function Select-Channel { Write-Host "" - Write-Info "=========================================" - Write-Info " Select Communication Channel" - Write-Info "=========================================" - Write-Host "1) Weixin" - Write-Host "2) Feishu" - Write-Host "3) DingTalk" - Write-Host "4) WeCom Bot" - Write-Host "5) QQ" - Write-Host "6) WeCom App" - Write-Host "7) Web" - Write-Host "" - - do { - $choice = Read-Host "Enter your choice [default: 1 - Weixin]" - if (-not $choice) { $choice = "1" } - } while ($choice -notmatch '^[1-7]$') + $title = T "选择接入渠道" "Select Communication Channel" + if ($script:UiLang -eq "en") { + $script:ChannelKeys = @("web", "telegram", "discord", "slack", "weixin", "feishu", "dingtalk", "wecom_bot", "qq", "wechatcom_app", "skip") + } else { + $script:ChannelKeys = @("web", "weixin", "feishu", "dingtalk", "wecom_bot", "qq", "wechatcom_app", "telegram", "slack", "discord", "skip") + } + $options = @($script:ChannelKeys | ForEach-Object { Get-ChannelLabel $_ }) + $idx = Select-Menu -Title $title -Options $options -Default 1 + # Map the 1-based menu position back to the stable channel key. + $script:ChannelChoice = $script:ChannelKeys[$idx - 1] +} +# Configure channel, dispatched by stable channel key (not menu position). +function Configure-Channel { $script:ChannelExtra = @{} + $script:AccessInfo = "" - switch ($choice) { - "1" { $script:ChannelType = "weixin" } - "2" { - $script:ChannelType = "feishu" - $script:ChannelExtra["feishu_app_id"] = Read-Host "Enter Feishu App ID" - $script:ChannelExtra["feishu_app_secret"] = Read-Host "Enter Feishu App Secret" - } - "3" { - $script:ChannelType = "dingtalk" - $script:ChannelExtra["dingtalk_client_id"] = Read-Host "Enter DingTalk Client ID" - $script:ChannelExtra["dingtalk_client_secret"] = Read-Host "Enter DingTalk Client Secret" - } - "4" { - $script:ChannelType = "wecom_bot" - $script:ChannelExtra["wecom_bot_id"] = Read-Host "Enter WeCom Bot ID" - $script:ChannelExtra["wecom_bot_secret"] = Read-Host "Enter WeCom Bot Secret" - } - "5" { - $script:ChannelType = "qq" - $script:ChannelExtra["qq_app_id"] = Read-Host "Enter QQ App ID" - $script:ChannelExtra["qq_app_secret"] = Read-Host "Enter QQ App Secret" - } - "6" { - $script:ChannelType = "wechatcom_app" - $script:ChannelExtra["wechatcom_corp_id"] = Read-Host "Enter WeChat Corp ID" - $script:ChannelExtra["wechatcomapp_token"] = Read-Host "Enter WeChat Com App Token" - $script:ChannelExtra["wechatcomapp_secret"] = Read-Host "Enter WeChat Com App Secret" - $script:ChannelExtra["wechatcomapp_agent_id"] = Read-Host "Enter WeChat Com App Agent ID" - $script:ChannelExtra["wechatcomapp_aes_key"] = Read-Host "Enter WeChat Com App AES Key" - $port = Read-Host "Enter port [default: 9898]" - if (-not $port) { $port = "9898" } - $script:ChannelExtra["wechatcomapp_port"] = [int]$port - } - "7" { + switch ($script:ChannelChoice) { + { $_ -eq "web" -or $_ -eq "skip" } { + # Web (also the default when skipped). Default port, no prompt. $script:ChannelType = "web" - $port = Read-Host "Enter web port [default: 9899]" - if (-not $port) { $port = "9899" } - $script:ChannelExtra["web_port"] = [int]$port + $script:ChannelExtra["web_port"] = 9899 + $script:AccessInfo = (T "Web 控制台地址" "Web console") + " : http://localhost:9899/chat" + } + "weixin" { + $script:ChannelType = "weixin" + $script:AccessInfo = T "微信渠道已配置,请在终端或 Web 控制台扫码登录" "Weixin channel configured. Scan QR code in terminal or web console to login." + } + "feishu" { + $script:ChannelType = "feishu" + Write-Cow (T "配置飞书(WebSocket 模式)..." "Configure Feishu (WebSocket mode)...") + $script:ChannelExtra["feishu_app_id"] = Read-Host (T "请输入飞书 App ID" "Enter Feishu App ID") + $script:ChannelExtra["feishu_app_secret"] = Read-Host (T "请输入飞书 App Secret" "Enter Feishu App Secret") + $script:ChannelExtra["feishu_event_mode"] = "websocket" + $script:AccessInfo = T "飞书渠道已配置(WebSocket 模式)" "Feishu channel configured (WebSocket mode)" + } + "dingtalk" { + $script:ChannelType = "dingtalk" + Write-Cow (T "配置钉钉..." "Configure DingTalk...") + $script:ChannelExtra["dingtalk_client_id"] = Read-Host (T "请输入钉钉 Client ID" "Enter DingTalk Client ID") + $script:ChannelExtra["dingtalk_client_secret"] = Read-Host (T "请输入钉钉 Client Secret" "Enter DingTalk Client Secret") + $script:AccessInfo = T "钉钉渠道已配置" "DingTalk channel configured" + } + "wecom_bot" { + $script:ChannelType = "wecom_bot" + Write-Cow (T "配置企微智能机器人..." "Configure WeCom Bot...") + $script:ChannelExtra["wecom_bot_id"] = Read-Host (T "请输入 WeCom Bot ID" "Enter WeCom Bot ID") + $script:ChannelExtra["wecom_bot_secret"] = Read-Host (T "请输入 WeCom Bot Secret" "Enter WeCom Bot Secret") + $script:AccessInfo = T "企微智能机器人渠道已配置" "WeCom Bot channel configured" + } + "qq" { + $script:ChannelType = "qq" + Write-Cow (T "配置 QQ 机器人..." "Configure QQ Bot...") + $script:ChannelExtra["qq_app_id"] = Read-Host (T "请输入 QQ App ID" "Enter QQ App ID") + $script:ChannelExtra["qq_app_secret"] = Read-Host (T "请输入 QQ App Secret" "Enter QQ App Secret") + $script:AccessInfo = T "QQ 机器人渠道已配置" "QQ Bot channel configured" + } + "wechatcom_app" { + $script:ChannelType = "wechatcom_app" + Write-Cow (T "配置企微自建应用..." "Configure WeCom App...") + $script:ChannelExtra["wechatcom_corp_id"] = Read-Host (T "请输入企业 Corp ID" "Enter WeChat Corp ID") + $script:ChannelExtra["wechatcomapp_token"] = Read-Host (T "请输入应用 Token" "Enter WeChat Com App Token") + $script:ChannelExtra["wechatcomapp_secret"] = Read-Host (T "请输入应用 Secret" "Enter WeChat Com App Secret") + $script:ChannelExtra["wechatcomapp_agent_id"] = Read-Host (T "请输入应用 Agent ID" "Enter WeChat Com App Agent ID") + $script:ChannelExtra["wechatcomapp_aes_key"] = Read-Host (T "请输入应用 AES Key" "Enter WeChat Com App AES Key") + $port = Read-Host ((T "请输入应用端口" "Enter port") + " [" + (T "默认" "default") + ": 9898]") + if (-not ($port -match '^\d+$')) { $port = "9898" } + $script:ChannelExtra["wechatcomapp_port"] = [int]$port + $script:AccessInfo = (T "企微自建应用渠道已配置,端口" "WeCom App channel configured on port") + " $port" + } + "telegram" { + $script:ChannelType = "telegram" + Write-Cow (T "配置 Telegram..." "Configure Telegram...") + $script:ChannelExtra["telegram_token"] = Read-Host (T "请输入 Telegram Bot Token" "Enter Telegram Bot Token") + $script:AccessInfo = T "Telegram 渠道已配置" "Telegram channel configured" + } + "slack" { + $script:ChannelType = "slack" + Write-Cow (T "配置 Slack..." "Configure Slack...") + $script:ChannelExtra["slack_bot_token"] = Read-Host ((T "请输入 Slack Bot Token" "Enter Slack Bot Token") + " (xoxb-...)") + $script:ChannelExtra["slack_app_token"] = Read-Host ((T "请输入 Slack App Token" "Enter Slack App Token") + " (xapp-...)") + $script:AccessInfo = T "Slack 渠道已配置" "Slack channel configured" + } + "discord" { + $script:ChannelType = "discord" + Write-Cow (T "配置 Discord..." "Configure Discord...") + $script:ChannelExtra["discord_token"] = Read-Host (T "请输入 Discord Bot Token" "Enter Discord Bot Token") + $script:AccessInfo = T "Discord 渠道已配置" "Discord channel configured" } } } # ── generate config.json ───────────────────────────────────────── function New-ConfigFile { - Write-Cow "Generating config.json..." + Write-Cow (T "正在生成 config.json..." "Generating config.json...") $config = [ordered]@{ - channel_type = $ChannelType - model = $ModelName + channel_type = if ($script:ChannelType) { $script:ChannelType } else { "web" } + model = if ($script:ModelName) { $script:ModelName } else { "" } + cow_lang = if ($script:InstallLang) { $script:InstallLang } else { "auto" } open_ai_api_key = "" open_ai_api_base = "https://api.openai.com/v1" claude_api_key = "" @@ -311,7 +570,7 @@ function New-ConfigFile { voice_reply_voice = $false speech_recognition = $true group_speech_recognition = $false - use_linkai = $UseLinkai + use_linkai = [bool]$script:UseLinkai linkai_api_key = "" linkai_app_code = "" agent = $true @@ -320,51 +579,33 @@ function New-ConfigFile { agent_max_steps = 15 } - # Set the correct API key field - $keyMap = @{ - OPENAI_KEY = "open_ai_api_key" - CLAUDE_KEY = "claude_api_key" - GEMINI_KEY = "gemini_api_key" - ZHIPU_KEY = "zhipu_ai_api_key" - MOONSHOT_KEY = "moonshot_api_key" - ARK_KEY = "ark_api_key" - DASHSCOPE_KEY = "dashscope_api_key" - MINIMAX_KEY = "minimax_api_key" - DEEPSEEK_KEY = "deepseek_api_key" - LINKAI_KEY = "linkai_api_key" + # Set the API key into the right field (skipped models leave it empty). + if ($script:ModelField -and $config.Contains($script:ModelField)) { + $config[$script:ModelField] = $script:ApiKey } - if ($keyMap.ContainsKey($KeyName)) { - $config[$keyMap[$KeyName]] = $ApiKey - } - - # Set API base if provided - $baseMap = @{ - "7" = "claude_api_base" - "8" = "gemini_api_base" - "9" = "open_ai_api_base" - } - if ($ApiBase -and $baseMap.ContainsKey($ModelChoice)) { - $config[$baseMap[$ModelChoice]] = $ApiBase + # Set API base if the model has a configurable base and the user changed it. + if ($script:ApiBase -and $script:ApiBaseField -and $config.Contains($script:ApiBaseField)) { + $config[$script:ApiBaseField] = $script:ApiBase } # Merge channel-specific fields - foreach ($k in $ChannelExtra.Keys) { - $config[$k] = $ChannelExtra[$k] + foreach ($k in $script:ChannelExtra.Keys) { + $config[$k] = $script:ChannelExtra[$k] } $jsonText = $config | ConvertTo-Json -Depth 5 [System.IO.File]::WriteAllText("$BaseDir\config.json", $jsonText, (New-Object System.Text.UTF8Encoding $false)) - Write-Cow "Configuration file created." + Write-Cow (T "配置文件创建成功。" "Configuration file created.") } # ── start via cow CLI ───────────────────────────────────────────── function Start-CowAgent { - Write-Cow "Starting CowAgent..." + Write-Cow (T "正在启动 CowAgent..." "Starting CowAgent...") $cowBin = Get-Command cow -ErrorAction SilentlyContinue if ($cowBin) { & cow start } else { - Write-Warn "cow CLI not found, starting directly..." + Write-Warn (T "未找到 cow CLI,直接启动..." "cow CLI not found, starting directly...") & $PythonCmd "$BaseDir\app.py" } } @@ -376,7 +617,7 @@ function Invoke-CowCommand { if ($cowBin) { & cow $Cmd } else { - Write-Err "cow CLI not found. Run this script without arguments first to install." + Write-Err (T "未找到 cow CLI,请先不带参数运行本脚本进行安装。" "cow CLI not found. Run this script without arguments first to install.") exit 1 } } @@ -387,19 +628,19 @@ function Show-Usage { Write-Info " CowAgent Management Script (Windows)" Write-Info "=========================================" Write-Host "" - Write-Host "Usage:" - Write-Host " .\run.ps1 # Install / Configure" - Write-Host " .\run.ps1 # Management command" + Write-Host (T "用法:" "Usage:") + Write-Host (" .\run.ps1 # " + (T "安装 / 配置" "Install / Configure")) + Write-Host (" .\run.ps1 # " + (T "管理命令" "Management command")) Write-Host "" - Write-Host "Commands:" - Write-Host " start Start the service" - Write-Host " stop Stop the service" - Write-Host " restart Restart the service" - Write-Host " status Check service status" - Write-Host " logs View logs" - Write-Host " config Reconfigure project" - Write-Host " update Update and restart" - Write-Host " help Show this message" + Write-Host (T "命令:" "Commands:") + Write-Host (" start " + (T "启动服务" "Start the service")) + Write-Host (" stop " + (T "停止服务" "Stop the service")) + Write-Host (" restart " + (T "重启服务" "Restart the service")) + Write-Host (" status " + (T "查看状态" "Check service status")) + Write-Host (" logs " + (T "查看日志" "View logs")) + Write-Host (" config " + (T "重新配置项目" "Reconfigure project")) + Write-Host (" update " + (T "更新并重启" "Update and restart")) + Write-Host (" help " + (T "显示本帮助" "Show this message")) Write-Host "" } @@ -411,15 +652,19 @@ function Install-Mode { Write-Info "=========================================" Write-Host "" + # Step 0: choose the install/UI language. Everything after this is localized. + Select-Language + Write-Host "" + if ($IsProjectDir) { - Write-Cow "Detected existing project directory." + Write-Cow (T "检测到已有项目目录。" "Detected existing project directory.") if (Test-Path "$BaseDir\config.json") { - Write-Cow "Project already configured." + Write-Cow (T "项目已配置。" "Project already configured.") Write-Host "" Show-Usage return } - Write-Warn "No config.json found. Let's configure your project!" + Write-Warn (T "未找到 config.json,开始配置项目!" "No config.json found. Let's configure your project!") Write-Host "" Assert-Python } else { @@ -429,25 +674,20 @@ function Install-Mode { Install-Dependencies Select-Model + Configure-Model Select-Channel + Configure-Channel New-ConfigFile + # Auto-start after configuration for a true out-of-the-box experience. Write-Host "" - $startNow = Read-Host "Start CowAgent now? [Y/n]" - if ($startNow -ne "n" -and $startNow -ne "N") { - Start-CowAgent - } else { - Write-Cow "Installation complete!" - Write-Host "" - Write-Host "To start manually:" - Write-Host " cd $BaseDir" - Write-Host " cow start" - } + if ($script:AccessInfo) { Write-Cow $script:AccessInfo } + Start-CowAgent } # ── update ──────────────────────────────────────────────────────── function Update-Project { - Write-Cow "Updating CowAgent..." + Write-Cow (T "正在更新 CowAgent..." "Updating CowAgent...") Set-Location $BaseDir # Stop if running @@ -459,20 +699,20 @@ function Update-Project { } if (Test-Path "$BaseDir\.git") { - Write-Cow "Pulling latest code..." + Write-Cow (T "正在拉取最新代码..." "Pulling latest code...") $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" git pull 2>&1 | Out-Null $pullExit = $LASTEXITCODE $ErrorActionPreference = $prevEAP if ($pullExit -ne 0) { - Write-Warn "GitHub failed, trying Gitee..." + Write-Warn (T "GitHub 拉取失败,尝试 Gitee..." "GitHub failed, trying Gitee...") $ErrorActionPreference = "Continue" git remote set-url origin https://gitee.com/zhayujie/CowAgent.git 2>&1 | Out-Null git pull 2>&1 | Out-Null $ErrorActionPreference = $prevEAP } } else { - Write-Warn "Not a git repository, skipping code update." + Write-Warn (T "非 git 仓库,跳过代码更新。" "Not a git repository, skipping code update.") } Assert-Python @@ -480,11 +720,13 @@ function Update-Project { # Start via python -m cli.cli instead of cow.exe, because the exe may # still be cached/locked from the previous installation on Windows. - Write-Cow "Starting CowAgent..." + Write-Cow (T "正在启动 CowAgent..." "Starting CowAgent...") & $PythonCmd -m cli.cli start } # ── main ────────────────────────────────────────────────────────── +Initialize-UiLang + switch ($Command.ToLower()) { "" { Install-Mode } "start" { Invoke-CowCommand "start" } @@ -496,15 +738,17 @@ switch ($Command.ToLower()) { Assert-Python Install-Dependencies Select-Model + Configure-Model Select-Channel + Configure-Channel New-ConfigFile - $r = Read-Host "Restart service now? [Y/n]" + $r = Read-Host (T "现在重启服务吗?[Y/n]" "Restart service now? [Y/n]") if ($r -ne "n" -and $r -ne "N") { Invoke-CowCommand "restart" } } "update" { Update-Project } "help" { Show-Usage } default { - Write-Err "Unknown command: $Command" + Write-Err ((T "未知命令" "Unknown command") + ": $Command") Show-Usage exit 1 }