feat: add skills and upgrade feishu/dingtalk channel

2026-07-19 04:37:28 +08:00 · 2026-02-02 00:42:39 +08:00
parent 77c2bfcc1e
commit a8d5309c90
32 changed files with 2931 additions and 200 deletions
--- a/agent/prompt/builder.py
+++ b/agent/prompt/builder.py
@@ -237,8 +237,8 @@ def _build_tooling_section(tools: List[Any], language: str) -> List[str]:
        "叙述要求: 保持简洁、信息密度高，避免重复显而易见的步骤。",
        "",
        "完成标准:",
-        "- 确保用户的需求得到实际解决，而不仅仅是制定计划",
+        "- 确保用户的需求得到实际解决，而不仅仅是制定计划。",
-        "- 当任务需要多次工具调用时，持续推进直到完成",
+        "- 当任务需要多次工具调用时，持续推进直到完成, 解决完后向用户报告结果或回复用户的问题",
        "- 每次工具调用后，评估是否已获得足够信息来推进或完成任务",
        "- 避免重复调用相同的工具和相同参数获取相同的信息，除非用户明确要求",
        "",
--- a/agent/protocol/agent.py
+++ b/agent/protocol/agent.py
@@ -360,6 +360,9 @@ class Agent:
        # Update agent's message history from executor
        self.messages = executor.messages
        # Store executor reference for agent_bridge to access files_to_send
        self.stream_executor = executor
        # Execute all post-process tools
        self._execute_post_process_tools()
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -58,6 +58,9 @@ class AgentStreamExecutor:
        # Tool failure tracking for retry protection
        self.tool_failure_history = []  # List of (tool_name, args_hash, success) tuples
        # Track files to send (populated by read tool)
        self.files_to_send = []  # List of file metadata dicts
    def _emit_event(self, event_type: str, data: dict = None):
        """Emit event"""
@@ -191,21 +194,47 @@ class AgentStreamExecutor:
                            logger.info(
                                f"Memory flush recommended: tokens={current_tokens}, turns={self.agent.memory_manager.flush_manager.turn_count}")
-                # Call LLM
+                # Call LLM (enable retry_on_empty for better reliability)
-                assistant_msg, tool_calls = self._call_llm_stream()
+                assistant_msg, tool_calls = self._call_llm_stream(retry_on_empty=True)
                final_response = assistant_msg
                # No tool calls, end loop
                if not tool_calls:
                    # 检查是否返回了空响应
                    if not assistant_msg:
-                        logger.warning(f"[Agent] LLM returned empty response (no content and no tool calls)")
+                        logger.warning(f"[Agent] LLM returned empty response after retry (no content and no tool calls)")
                        logger.info(f"[Agent] This usually happens when LLM thinks the task is complete after tool execution")
-                        # 生成通用的友好提示
+                        # 如果之前有工具调用，强制要求 LLM 生成文本回复
-                        final_response = (
+                        if turn > 1:
-                            "抱歉，我暂时无法生成回复。请尝试换一种方式描述你的需求，或稍后再试。"
+                            logger.info(f"[Agent] Requesting explicit response from LLM...")
-                        )
+                            
-                        logger.info(f"Generated fallback response for empty LLM output")
+                            # 添加一条消息，明确要求回复用户
                            self.messages.append({
                                "role": "user",
                                "content": [{
                                    "type": "text",
                                    "text": "请向用户说明刚才工具执行的结果或回答用户的问题。"
                                }]
                            })
                            # 再调用一次 LLM
                            assistant_msg, tool_calls = self._call_llm_stream(retry_on_empty=False)
                            final_response = assistant_msg
                            # 如果还是空，才使用 fallback
                            if not assistant_msg and not tool_calls:
                                logger.warning(f"[Agent] Still empty after explicit request")
                                final_response = (
                                    "抱歉，我暂时无法生成回复。请尝试换一种方式描述你的需求，或稍后再试。"
                                )
                                logger.info(f"Generated fallback response for empty LLM output")
                        else:
                            # 第一轮就空回复，直接 fallback
                            final_response = (
                                "抱歉，我暂时无法生成回复。请尝试换一种方式描述你的需求，或稍后再试。"
                            )
                            logger.info(f"Generated fallback response for empty LLM output")
                    else:
                        logger.info(f"💭 {assistant_msg[:150]}{'...' if len(assistant_msg) > 150 else ''}")
@@ -235,6 +264,14 @@ class AgentStreamExecutor:
                        result = self._execute_tool(tool_call)
                        tool_results.append(result)
                        # Check if this is a file to send (from read tool)
                        if result.get("status") == "success" and isinstance(result.get("result"), dict):
                            result_data = result.get("result")
                            if result_data.get("type") == "file_to_send":
                                # Store file metadata for later sending
                                self.files_to_send.append(result_data)
                                logger.info(f"📎 检测到待发送文件: {result_data.get('file_name', result_data.get('path'))}")
                        # Check for critical error - abort entire conversation
                        if result.get("status") == "critical_error":
                            logger.error(f"💥 检测到严重错误，终止对话")
@@ -392,6 +429,7 @@ class AgentStreamExecutor:
        # Streaming response
        full_content = ""
        tool_calls_buffer = {}  # {index: {id, name, arguments}}
        stop_reason = None  # Track why the stream stopped
        try:
            stream = self.model.call_stream(request)
@@ -404,21 +442,47 @@ class AgentStreamExecutor:
                    if isinstance(error_data, dict):
                        error_msg = error_data.get("message", chunk.get("message", "Unknown error"))
                        error_code = error_data.get("code", "")
                        error_type = error_data.get("type", "")
                    else:
                        error_msg = chunk.get("message", str(error_data))
                        error_code = ""
                        error_type = ""
                    status_code = chunk.get("status_code", "N/A")
                    logger.error(f"API Error: {error_msg} (Status: {status_code}, Code: {error_code})")
                    logger.error(f"Full error chunk: {chunk}")
-                    # Raise exception with full error message for retry logic
+                    # Log error with all available information
-                    raise Exception(f"{error_msg} (Status: {status_code})")
+                    logger.error(f"🔴 Stream API Error:")
                    logger.error(f"   Message: {error_msg}")
                    logger.error(f"   Status Code: {status_code}")
                    logger.error(f"   Error Code: {error_code}")
                    logger.error(f"   Error Type: {error_type}")
                    logger.error(f"   Full chunk: {chunk}")
                    # Check if this is a context overflow error (keyword-based, works for all models)
                    # Don't rely on specific status codes as different providers use different codes
                    error_msg_lower = error_msg.lower()
                    is_overflow = any(keyword in error_msg_lower for keyword in [
                        'context length exceeded', 'maximum context length', 'prompt is too long',
                        'context overflow', 'context window', 'too large', 'exceeds model context',
                        'request_too_large', 'request exceeds the maximum size', 'tokens exceed'
                    ])
                    if is_overflow:
                        # Mark as context overflow for special handling
                        raise Exception(f"[CONTEXT_OVERFLOW] {error_msg} (Status: {status_code})")
                    else:
                        # Raise exception with full error message for retry logic
                        raise Exception(f"{error_msg} (Status: {status_code}, Code: {error_code}, Type: {error_type})")
                # Parse chunk
                if isinstance(chunk, dict) and "choices" in chunk:
                    choice = chunk["choices"][0]
                    delta = choice.get("delta", {})
                    # Capture finish_reason if present
                    finish_reason = choice.get("finish_reason")
                    if finish_reason:
                        stop_reason = finish_reason
                    # Handle text content
                    if "content" in delta and delta["content"]:
@@ -449,9 +513,46 @@ class AgentStreamExecutor:
                                    tool_calls_buffer[index]["arguments"] += func["arguments"]
        except Exception as e:
-            error_str = str(e).lower()
+            error_str = str(e)
            error_str_lower = error_str.lower()
            # Check if error is context overflow (non-retryable, needs session reset)
            # Method 1: Check for special marker (set in stream error handling above)
            is_context_overflow = '[context_overflow]' in error_str_lower
            # Method 2: Fallback to keyword matching for non-stream errors
            if not is_context_overflow:
                is_context_overflow = any(keyword in error_str_lower for keyword in [
                    'context length exceeded', 'maximum context length', 'prompt is too long',
                    'context overflow', 'context window', 'too large', 'exceeds model context',
                    'request_too_large', 'request exceeds the maximum size'
                ])
            # Check if error is message format error (incomplete tool_use/tool_result pairs)
            # This happens when previous conversation had tool failures
            is_message_format_error = any(keyword in error_str_lower for keyword in [
                'tool_use', 'tool_result', 'without', 'immediately after',
                'corresponding', 'must have', 'each'
            ]) and 'status: 400' in error_str_lower
            if is_context_overflow or is_message_format_error:
                error_type = "context overflow" if is_context_overflow else "message format error"
                logger.error(f"💥 {error_type} detected: {e}")
                # Clear message history to recover
                logger.warning("🔄 Clearing conversation history to recover")
                self.messages.clear()
                # Raise special exception with user-friendly message
                if is_context_overflow:
                    raise Exception(
                        "抱歉，对话历史过长导致上下文溢出。我已清空历史记录，请重新描述你的需求。"
                    )
                else:
                    raise Exception(
                        "抱歉，之前的对话出现了问题。我已清空历史记录，请重新发送你的消息。"
                    )
            # Check if error is retryable (timeout, connection, rate limit, server busy, etc.)
-            is_retryable = any(keyword in error_str for keyword in [
+            is_retryable = any(keyword in error_str_lower for keyword in [
                'timeout', 'timed out', 'connection', 'network', 
                'rate limit', 'overloaded', 'unavailable', 'busy', 'retry',
                '429', '500', '502', '503', '504', '512'
@@ -505,11 +606,12 @@ class AgentStreamExecutor:
        # Check for empty response and retry once if enabled
        if retry_on_empty and not full_content and not tool_calls:
-            logger.warning(f"⚠️  LLM returned empty response, retrying once...")
+            logger.warning(f"⚠️  LLM returned empty response (stop_reason: {stop_reason}), retrying once...")
            self._emit_event("message_end", {
                "content": "",
                "tool_calls": [],
-                "empty_retry": True
+                "empty_retry": True,
                "stop_reason": stop_reason
            })
            # Retry without retry flag to avoid infinite loop
            return self._call_llm_stream(
--- a/agent/skills/loader.py
+++ b/agent/skills/loader.py
@@ -137,6 +137,10 @@ class SkillLoader:
        name = frontmatter.get('name', parent_dir_name)
        description = frontmatter.get('description', '')
        # Special handling for linkai-agent: dynamically load apps from config.json
        if name == 'linkai-agent':
            description = self._load_linkai_agent_description(skill_dir, description)
        if not description or not description.strip():
            diagnostics.append(f"Skill {name} has no description: {file_path}")
            return LoadSkillsResult(skills=[], diagnostics=diagnostics)
@@ -161,6 +165,45 @@ class SkillLoader:
        return LoadSkillsResult(skills=[skill], diagnostics=diagnostics)
    def _load_linkai_agent_description(self, skill_dir: str, default_description: str) -> str:
        """
        Dynamically load LinkAI agent description from config.json
        :param skill_dir: Skill directory
        :param default_description: Default description from SKILL.md
        :return: Dynamic description with app list
        """
        import json
        config_path = os.path.join(skill_dir, "config.json")
        template_path = os.path.join(skill_dir, "config.json.template")
        # Try to load config.json or fallback to template
        config_file = config_path if os.path.exists(config_path) else template_path
        if not os.path.exists(config_file):
            return default_description
        try:
            with open(config_file, 'r', encoding='utf-8') as f:
                config = json.load(f)
            apps = config.get("apps", [])
            if not apps:
                return default_description
            # Build dynamic description with app details
            app_descriptions = "; ".join([
                f"{app['app_name']}({app['app_code']}: {app['app_description']})"
                for app in apps
            ])
            return f"Call LinkAI apps/workflows. {app_descriptions}"
        except Exception as e:
            logger.warning(f"[SkillLoader] Failed to load linkai-agent config: {e}")
            return default_description
    def load_all_skills(
        self,
        managed_dir: Optional[str] = None,
--- a/agent/tools/init.py
+++ b/agent/tools/init.py
@@ -8,6 +8,7 @@ from agent.tools.write.write import Write
 from agent.tools.edit.edit import Edit
 from agent.tools.bash.bash import Bash
 from agent.tools.ls.ls import Ls
 from agent.tools.send.send import Send
 # Import memory tools
 from agent.tools.memory.memory_search import MemorySearchTool
@@ -112,6 +113,7 @@ __all__ = [
    'Edit',
    'Bash',
    'Ls',
    'Send',
    'MemorySearchTool',
    'MemoryGetTool',
    'EnvConfig',
--- a/agent/tools/bash/bash.py
+++ b/agent/tools/bash/bash.py
@@ -3,12 +3,14 @@ Bash tool - Execute bash commands
 """
 import os
 import sys
 import subprocess
 import tempfile
 from typing import Dict, Any
 from agent.tools.base_tool import BaseTool, ToolResult
 from agent.tools.utils.truncate import truncate_tail, format_size, DEFAULT_MAX_LINES, DEFAULT_MAX_BYTES
 from common.log import logger
 class Bash(BaseTool):
@@ -60,6 +62,12 @@ IMPORTANT SAFETY GUIDELINES:
        if not command:
            return ToolResult.fail("Error: command parameter is required")
        # Security check: Prevent accessing sensitive config files
        if "~/.cow/.env" in command or "~/.cow" in command:
            return ToolResult.fail(
                "Error: Access denied. API keys and credentials must be accessed through the env_config tool only."
            )
        # Optional safety check - only warn about extremely dangerous commands
        if self.safety_mode:
            warning = self._get_safety_warning(command)
@@ -68,7 +76,31 @@ IMPORTANT SAFETY GUIDELINES:
                    f"Safety Warning: {warning}\n\nIf you believe this command is safe and necessary, please ask the user for confirmation first, explaining what the command does and why it's needed.")
        try:
-            # Execute command
+            # Prepare environment with .env file variables
            env = os.environ.copy()
            # Load environment variables from ~/.cow/.env if it exists
            env_file = os.path.expanduser("~/.cow/.env")
            if os.path.exists(env_file):
                try:
                    from dotenv import dotenv_values
                    env_vars = dotenv_values(env_file)
                    env.update(env_vars)
                    logger.debug(f"[Bash] Loaded {len(env_vars)} variables from {env_file}")
                except ImportError:
                    logger.debug("[Bash] python-dotenv not installed, skipping .env loading")
                except Exception as e:
                    logger.debug(f"[Bash] Failed to load .env: {e}")
            # Debug logging
            logger.debug(f"[Bash] CWD: {self.cwd}")
            logger.debug(f"[Bash] Command: {command[:500]}")
            logger.debug(f"[Bash] OPENAI_API_KEY in env: {'OPENAI_API_KEY' in env}")
            logger.debug(f"[Bash] SHELL: {env.get('SHELL', 'not set')}")
            logger.debug(f"[Bash] Python executable: {sys.executable}")
            logger.debug(f"[Bash] Process UID: {os.getuid()}")
            # Execute command with inherited environment variables
            result = subprocess.run(
                command,
                shell=True,
@@ -76,8 +108,50 @@ IMPORTANT SAFETY GUIDELINES:
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
-                timeout=timeout
+                timeout=timeout,
                env=env
            )
            logger.debug(f"[Bash] Exit code: {result.returncode}")
            logger.debug(f"[Bash] Stdout length: {len(result.stdout)}")
            logger.debug(f"[Bash] Stderr length: {len(result.stderr)}")
            # Workaround for exit code 126 with no output
            if result.returncode == 126 and not result.stdout and not result.stderr:
                logger.warning(f"[Bash] Exit 126 with no output - trying alternative execution method")
                # Try using argument list instead of shell=True
                import shlex
                try:
                    parts = shlex.split(command)
                    if len(parts) > 0:
                        logger.info(f"[Bash] Retrying with argument list: {parts[:3]}...")
                        retry_result = subprocess.run(
                            parts,
                            cwd=self.cwd,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            text=True,
                            timeout=timeout,
                            env=env
                        )
                        logger.debug(f"[Bash] Retry exit code: {retry_result.returncode}, stdout: {len(retry_result.stdout)}, stderr: {len(retry_result.stderr)}")
                        # If retry succeeded, use retry result
                        if retry_result.returncode == 0 or retry_result.stdout or retry_result.stderr:
                            result = retry_result
                        else:
                            # Both attempts failed - check if this is openai-image-vision skill
                            if 'openai-image-vision' in command or 'vision.sh' in command:
                                # Create a mock result with helpful error message
                                from types import SimpleNamespace
                                result = SimpleNamespace(
                                    returncode=1,
                                    stdout='{"error": "图片无法解析", "reason": "该图片格式可能不受支持，或图片文件存在问题", "suggestion": "请尝试其他图片"}',
                                    stderr=''
                                )
                                logger.info(f"[Bash] Converted exit 126 to user-friendly image error message for vision skill")
                except Exception as retry_err:
                    logger.warning(f"[Bash] Retry failed: {retry_err}")
            # Combine stdout and stderr
            output = result.stdout
--- a/agent/tools/env_config/env_config.py
+++ b/agent/tools/env_config/env_config.py
@@ -27,7 +27,7 @@ class EnvConfig(BaseTool):
    name: str = "env_config"
    description: str = (
-        "Manage API keys and skill configurations stored in the workspace .env file. "
+        "Manage API keys and skill configurations securely. "
        "Use this tool when user wants to configure API keys (like BOCHA_API_KEY, OPENAI_API_KEY), "
        "view configured keys, or manage skill settings. "
        "Actions: 'set' (add/update key), 'get' (view specific key), 'list' (show all configured keys), 'delete' (remove key). "
@@ -65,16 +65,17 @@ class EnvConfig(BaseTool):
    def __init__(self, config: dict = None):
        self.config = config or {}
-        self.workspace_dir = self.config.get("workspace_dir", os.path.expanduser("~/cow"))
+        # Store env config in ~/.cow directory (outside workspace for security)
-        self.env_path = os.path.join(self.workspace_dir, '.env')
+        self.env_dir = os.path.expanduser("~/.cow")
        self.env_path = os.path.join(self.env_dir, '.env')
        self.agent_bridge = self.config.get("agent_bridge")  # Reference to AgentBridge for hot reload
        # Don't create .env file in __init__ to avoid issues during tool discovery
        # It will be created on first use in execute()
    def _ensure_env_file(self):
        """Ensure the .env file exists"""
-        # Create workspace directory if it doesn't exist
+        # Create ~/.cow directory if it doesn't exist
-        os.makedirs(self.workspace_dir, exist_ok=True)
+        os.makedirs(self.env_dir, exist_ok=True)
        if not os.path.exists(self.env_path):
            Path(self.env_path).touch()
--- a/agent/tools/ls/ls.py
+++ b/agent/tools/ls/ls.py
@@ -50,6 +50,13 @@ class Ls(BaseTool):
        # Resolve path
        absolute_path = self._resolve_path(path)
        # Security check: Prevent accessing sensitive config directory
        env_config_dir = os.path.expanduser("~/.cow")
        if os.path.abspath(absolute_path) == os.path.abspath(env_config_dir):
            return ToolResult.fail(
                "Error: Access denied. API keys and credentials must be accessed through the env_config tool only."
            )
        if not os.path.exists(absolute_path):
            # Provide helpful hint if using relative path
            if not os.path.isabs(path) and not path.startswith('~'):
--- a/agent/tools/read/read.py
+++ b/agent/tools/read/read.py
@@ -15,7 +15,7 @@ class Read(BaseTool):
    """Tool for reading file contents"""
    name: str = "read"
-    description: str = f"Read the contents of a file. Supports text files, PDF files, and images (jpg, png, gif, webp). For text files, output is truncated to {DEFAULT_MAX_LINES} lines or {DEFAULT_MAX_BYTES // 1024}KB (whichever is hit first). Use offset/limit for large files."
+    description: str = f"Read or inspect file contents. For text/PDF files, returns content (truncated to {DEFAULT_MAX_LINES} lines or {DEFAULT_MAX_BYTES // 1024}KB). For images/videos/audio, returns metadata only (file info, size, type). Use offset/limit for large text files."
    params: dict = {
        "type": "object",
@@ -39,10 +39,25 @@ class Read(BaseTool):
    def __init__(self, config: dict = None):
        self.config = config or {}
        self.cwd = self.config.get("cwd", os.getcwd())
-        # Supported image formats
+        
-        self.image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.webp'}
+        # File type categories
-        # Supported PDF format
+        self.image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg', '.ico'}
        self.video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv', '.webm', '.m4v'}
        self.audio_extensions = {'.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.wma'}
        self.binary_extensions = {'.exe', '.dll', '.so', '.dylib', '.bin', '.dat', '.db', '.sqlite'}
        self.archive_extensions = {'.zip', '.tar', '.gz', '.rar', '.7z', '.bz2', '.xz'}
        self.pdf_extensions = {'.pdf'}
        # Readable text formats (will be read with truncation)
        self.text_extensions = {
            '.txt', '.md', '.markdown', '.rst', '.log', '.csv', '.tsv', '.json', '.xml', '.yaml', '.yml',
            '.py', '.js', '.ts', '.java', '.c', '.cpp', '.h', '.hpp', '.go', '.rs', '.rb', '.php',
            '.html', '.css', '.scss', '.sass', '.less', '.vue', '.jsx', '.tsx',
            '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd',
            '.sql', '.r', '.m', '.swift', '.kt', '.scala', '.clj', '.erl', '.ex',
            '.dockerfile', '.makefile', '.cmake', '.gradle', '.properties', '.ini', '.conf', '.cfg',
            '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'  # Office documents
        }
    def execute(self, args: Dict[str, Any]) -> ToolResult:
        """
@@ -61,6 +76,13 @@ class Read(BaseTool):
        # Resolve path
        absolute_path = self._resolve_path(path)
        # Security check: Prevent reading sensitive config files
        env_config_path = os.path.expanduser("~/.cow/.env")
        if os.path.abspath(absolute_path) == os.path.abspath(env_config_path):
            return ToolResult.fail(
                "Error: Access denied. API keys and credentials must be accessed through the env_config tool only."
            )
        # Check if file exists
        if not os.path.exists(absolute_path):
            # Provide helpful hint if using relative path
@@ -78,16 +100,25 @@ class Read(BaseTool):
        # Check file type
        file_ext = Path(absolute_path).suffix.lower()
        file_size = os.path.getsize(absolute_path)
-        # Check if image
+        # Check if image - return metadata for sending
        if file_ext in self.image_extensions:
            return self._read_image(absolute_path, file_ext)
        # Check if video/audio/binary/archive - return metadata only
        if file_ext in self.video_extensions:
            return self._return_file_metadata(absolute_path, "video", file_size)
        if file_ext in self.audio_extensions:
            return self._return_file_metadata(absolute_path, "audio", file_size)
        if file_ext in self.binary_extensions or file_ext in self.archive_extensions:
            return self._return_file_metadata(absolute_path, "binary", file_size)
        # Check if PDF
        if file_ext in self.pdf_extensions:
            return self._read_pdf(absolute_path, path, offset, limit)
-        # Read text file
+        # Read text file (with truncation for large files)
        return self._read_text(absolute_path, path, offset, limit)
    def _resolve_path(self, path: str) -> str:
@@ -103,25 +134,56 @@ class Read(BaseTool):
            return path
        return os.path.abspath(os.path.join(self.cwd, path))
    def _return_file_metadata(self, absolute_path: str, file_type: str, file_size: int) -> ToolResult:
        """
        Return file metadata for non-readable files (video, audio, binary, etc.)
        :param absolute_path: Absolute path to the file
        :param file_type: Type of file (video, audio, binary, etc.)
        :param file_size: File size in bytes
        :return: File metadata
        """
        file_name = Path(absolute_path).name
        file_ext = Path(absolute_path).suffix.lower()
        # Determine MIME type
        mime_types = {
            # Video
            '.mp4': 'video/mp4', '.avi': 'video/x-msvideo', '.mov': 'video/quicktime',
            '.mkv': 'video/x-matroska', '.webm': 'video/webm',
            # Audio
            '.mp3': 'audio/mpeg', '.wav': 'audio/wav', '.ogg': 'audio/ogg',
            '.m4a': 'audio/mp4', '.flac': 'audio/flac',
            # Binary
            '.zip': 'application/zip', '.tar': 'application/x-tar',
            '.gz': 'application/gzip', '.rar': 'application/x-rar-compressed',
        }
        mime_type = mime_types.get(file_ext, 'application/octet-stream')
        result = {
            "type": f"{file_type}_metadata",
            "file_type": file_type,
            "path": absolute_path,
            "file_name": file_name,
            "mime_type": mime_type,
            "size": file_size,
            "size_formatted": format_size(file_size),
            "message": f"{file_type.capitalize()} 文件: {file_name} ({format_size(file_size)})\n提示: 如果需要发送此文件，请使用 send 工具。"
        }
        return ToolResult.success(result)
    def _read_image(self, absolute_path: str, file_ext: str) -> ToolResult:
        """
-        Read image file
+        Read image file - always return metadata only (images should be sent, not read into context)
        :param absolute_path: Absolute path to the image file
        :param file_ext: File extension
-        :return: Result containing image information
+        :return: Result containing image metadata for sending
        """
        try:
            # Read image file
            with open(absolute_path, 'rb') as f:
                image_data = f.read()
            # Get file size
-            file_size = len(image_data)
+            file_size = os.path.getsize(absolute_path)
            # Return image information (actual image data can be base64 encoded when needed)
            import base64
            base64_data = base64.b64encode(image_data).decode('utf-8')
            # Determine MIME type
            mime_type_map = {
@@ -133,12 +195,15 @@ class Read(BaseTool):
            }
            mime_type = mime_type_map.get(file_ext, 'image/jpeg')
            # Return metadata for images (NOT file_to_send - use send tool to actually send)
            result = {
-                "type": "image",
+                "type": "image_metadata",
                "file_type": "image",
                "path": absolute_path,
                "mime_type": mime_type,
                "size": file_size,
                "size_formatted": format_size(file_size),
-                "data": base64_data  # Base64 encoded image data
+                "message": f"图片文件: {Path(absolute_path).name} ({format_size(file_size)})\n提示: 如果需要发送此图片，请使用 send 工具。"
            }
            return ToolResult.success(result)
@@ -157,10 +222,32 @@ class Read(BaseTool):
        :return: File content or error message
        """
        try:
            # Check file size first
            file_size = os.path.getsize(absolute_path)
            MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB
            if file_size > MAX_FILE_SIZE:
                # File too large, return metadata only
                return ToolResult.success({
                    "type": "file_to_send",
                    "file_type": "document",
                    "path": absolute_path,
                    "size": file_size,
                    "size_formatted": format_size(file_size),
                    "message": f"文件过大 ({format_size(file_size)} > 50MB)，无法读取内容。文件路径: {absolute_path}"
                })
            # Read file
            with open(absolute_path, 'r', encoding='utf-8') as f:
                content = f.read()
            # Truncate content if too long (20K characters max for model context)
            MAX_CONTENT_CHARS = 20 * 1024  # 20K characters
            content_truncated = False
            if len(content) > MAX_CONTENT_CHARS:
                content = content[:MAX_CONTENT_CHARS]
                content_truncated = True
            all_lines = content.split('\n')
            total_file_lines = len(all_lines)
@@ -197,6 +284,10 @@ class Read(BaseTool):
            output_text = ""
            details = {}
            # Add truncation warning if content was truncated
            if content_truncated:
                output_text = f"[文件内容已截断到前 {format_size(MAX_CONTENT_CHARS)}，完整文件大小: {format_size(file_size)}]\n\n"
            if truncation.first_line_exceeds_limit:
                # First line exceeds 30KB limit
                first_line_size = format_size(len(all_lines[start_line].encode('utf-8')))
--- a/agent/tools/scheduler/README.md
+++ b/agent/tools/scheduler/README.md
@@ -42,24 +42,26 @@ Agent: [调用 scheduler 工具]
 **示例对话：**
 ```
-用户: 每天早上8点帮我搜索一下当前新闻
+用户: 每天早上8点帮我读取一下今日日程
 Agent: [调用 scheduler 工具]
      action: create
-      name: 每日新闻
+      name: 每日日程
      tool_call:
-        tool_name: bocha_search
+        tool_name: read
        tool_params:
-          query: 今日新闻
+          file_path: ~/cow/schedule.txt
-        result_prefix: 📰 今日新闻播报
+        result_prefix: 📅 今日日程
      schedule_type: cron
      schedule_value: 0 8 * * *
 ```
 **工具调用参数说明：**
- `tool_name`: 要调用的工具名称（如 `bocha_search`、`web_fetch` 等）
+- `tool_name`: 要调用的工具名称（如 `bash`、`read`、`write` 等内置工具）
 - `tool_params`: 工具的参数（字典格式）
 - `result_prefix`: 可选，在结果前添加的前缀文本
 **注意：** 如果要使用 skills（如 bocha-search），需要通过 `bash` 工具调用 skill 脚本
 ### 2. 支持的调度类型
 #### Cron 表达式 (`cron`)
@@ -167,7 +169,7 @@ Agent: [调用 scheduler 工具]
 ```json
 {
  "id": "def456",
-  "name": "每日新闻",
+  "name": "每日日程",
  "enabled": true,
  "created_at": "2024-01-01T10:00:00",
  "updated_at": "2024-01-01T10:00:00",
@@ -177,11 +179,11 @@ Agent: [调用 scheduler 工具]
  },
  "action": {
    "type": "tool_call",
-    "tool_name": "bocha_search",
+    "tool_name": "read",
    "tool_params": {
-      "query": "今日新闻"
+      "file_path": "~/cow/schedule.txt"
    },
-    "result_prefix": "📰 今日新闻播报",
+    "result_prefix": "📅 今日日程",
    "receiver": "wxid_xxx",
    "receiver_name": "张三",
    "is_group": false,
@@ -234,30 +236,29 @@ Agent: [创建 cron: 0 18 * * 1-5]
 Agent: [创建 interval: 3600]
 ```
-### 4. 每日新闻推送（动态工具调用）
+### 4. 每日日程推送（动态工具调用）
 ```
-用户: 每天早上8点帮我搜索一下当前新闻
+用户: 每天早上8点帮我读取今日日程
 Agent: ✅ 定时任务创建成功
-       任务ID: news001
+       任务ID: schedule001
       调度: 每天 8:00
-       工具: bocha_search(query='今日新闻')
+       工具: read(file_path='~/cow/schedule.txt')
-       前缀: 📰 今日新闻播报
+       前缀: 📅 今日日程
 ```
-### 5. 定时天气查询（动态工具调用）
+### 5. 定时文件备份（动态工具调用）
 ```
-用户: 每天早上7点查询今天的天气
+用户: 每天晚上11点备份工作文件
-Agent: [创建 cron: 0 7 * * *]
+Agent: [创建 cron: 0 23 * * *]
-       工具: bocha_search(query='今日天气')
+       工具: bash(command='cp ~/cow/work.txt ~/cow/backup/work_$(date +%Y%m%d).txt')
-       前缀: 🌤️ 今日天气预报
+       前缀: ✅ 文件已备份
 ```
-### 6. 周报提醒（动态工具调用）
+### 6. 周报提醒（静态消息）
 ```
-用户: 每周五下午5点搜索本周热点
+用户: 每周五下午5点提醒我写周报
 Agent: [创建 cron: 0 17 * * 5]
-       工具: bocha_search(query='本周热点新闻')
+       消息: 📊 该写周报了！
       前缀: 📊 本周热点回顾
 ```
 ### 4. 特定日期提醒
--- a/agent/tools/scheduler/integration.py
+++ b/agent/tools/scheduler/integration.py
@@ -45,10 +45,17 @@ def init_scheduler(agent_bridge) -> bool:
                action = task.get("action", {})
                action_type = action.get("type")
-                if action_type == "send_message":
+                if action_type == "agent_task":
                    _execute_agent_task(task, agent_bridge)
                elif action_type == "send_message":
                    # Legacy support for old tasks
                    _execute_send_message(task, agent_bridge)
                elif action_type == "tool_call":
                    # Legacy support for old tasks
                    _execute_tool_call(task, agent_bridge)
                elif action_type == "skill_call":
                    # Legacy support for old tasks
                    _execute_skill_call(task, agent_bridge)
                else:
                    logger.warning(f"[Scheduler] Unknown action type: {action_type}")
            except Exception as e:
@@ -76,6 +83,100 @@ def get_scheduler_service():
    return _scheduler_service
 def _execute_agent_task(task: dict, agent_bridge):
    """
    Execute an agent_task action - let Agent handle the task
    Args:
        task: Task dictionary
        agent_bridge: AgentBridge instance
    """
    try:
        action = task.get("action", {})
        task_description = action.get("task_description")
        receiver = action.get("receiver")
        is_group = action.get("is_group", False)
        channel_type = action.get("channel_type", "unknown")
        if not task_description:
            logger.error(f"[Scheduler] Task {task['id']}: No task_description specified")
            return
        if not receiver:
            logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
            return
        # Check for unsupported channels
        if channel_type == "dingtalk":
            logger.warning(f"[Scheduler] Task {task['id']}: DingTalk channel does not support scheduled messages (Stream mode limitation). Task will execute but message cannot be sent.")
        logger.info(f"[Scheduler] Task {task['id']}: Executing agent task '{task_description}'")
        # Create context for Agent
        context = Context(ContextType.TEXT, task_description)
        context["receiver"] = receiver
        context["isgroup"] = is_group
        context["session_id"] = receiver
        # Channel-specific setup
        if channel_type == "web":
            import uuid
            request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
            context["request_id"] = request_id
        elif channel_type == "feishu":
            context["receive_id_type"] = "chat_id" if is_group else "open_id"
            context["msg"] = None
        elif channel_type == "dingtalk":
            # DingTalk requires msg object, set to None for scheduled tasks
            context["msg"] = None
            # 如果是单聊，需要传递 sender_staff_id
            if not is_group:
                sender_staff_id = action.get("dingtalk_sender_staff_id")
                if sender_staff_id:
                    context["dingtalk_sender_staff_id"] = sender_staff_id
        # Use Agent to execute the task
        # Mark this as a scheduled task execution to prevent recursive task creation
        context["is_scheduled_task"] = True
        try:
            reply = agent_bridge.agent_reply(task_description, context=context, on_event=None, clear_history=True)
            if reply and reply.content:
                # Send the reply via channel
                from channel.channel_factory import create_channel
                try:
                    channel = create_channel(channel_type)
                    if channel:
                        # For web channel, register request_id
                        if channel_type == "web" and hasattr(channel, 'request_to_session'):
                            request_id = context.get("request_id")
                            if request_id:
                                channel.request_to_session[request_id] = receiver
                                logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
                        # Send the reply
                        channel.send(reply, context)
                        logger.info(f"[Scheduler] Task {task['id']} executed successfully, result sent to {receiver}")
                    else:
                        logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
                except Exception as e:
                    logger.error(f"[Scheduler] Failed to send result: {e}")
            else:
                logger.error(f"[Scheduler] Task {task['id']}: No result from agent execution")
        except Exception as e:
            logger.error(f"[Scheduler] Failed to execute task via Agent: {e}")
            import traceback
            logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
    except Exception as e:
        logger.error(f"[Scheduler] Error in _execute_agent_task: {e}")
        import traceback
        logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
 def _execute_send_message(task: dict, agent_bridge):
    """
    Execute a send_message action
@@ -116,6 +217,17 @@ def _execute_send_message(task: dict, agent_bridge):
            # Feishu channel will detect this and send as new message instead of reply
            context["msg"] = None
            logger.debug(f"[Scheduler] Feishu: receive_id_type={context['receive_id_type']}, is_group={is_group}, receiver={receiver}")
        elif channel_type == "dingtalk":
            # DingTalk channel setup
            context["msg"] = None
            # 如果是单聊，需要传递 sender_staff_id
            if not is_group:
                sender_staff_id = action.get("dingtalk_sender_staff_id")
                if sender_staff_id:
                    context["dingtalk_sender_staff_id"] = sender_staff_id
                    logger.debug(f"[Scheduler] DingTalk single chat: sender_staff_id={sender_staff_id}")
                else:
                    logger.warning(f"[Scheduler] Task {task['id']}: DingTalk single chat message missing sender_staff_id")
        # Create reply
        reply = Reply(ReplyType.TEXT, content)
@@ -156,8 +268,9 @@ def _execute_tool_call(task: dict, agent_bridge):
    """
    try:
        action = task.get("action", {})
-        tool_name = action.get("tool_name")
+        # Support both old and new field names
-        tool_params = action.get("tool_params", {})
+        tool_name = action.get("call_name") or action.get("tool_name")
        tool_params = action.get("call_params") or action.get("tool_params", {})
        result_prefix = action.get("result_prefix", "")
        receiver = action.get("receiver")
        is_group = action.get("is_group", False)
@@ -237,6 +350,82 @@ def _execute_tool_call(task: dict, agent_bridge):
        logger.error(f"[Scheduler] Error in _execute_tool_call: {e}")
 def _execute_skill_call(task: dict, agent_bridge):
    """
    Execute a skill_call action by asking Agent to run the skill
    Args:
        task: Task dictionary
        agent_bridge: AgentBridge instance
    """
    try:
        action = task.get("action", {})
        # Support both old and new field names
        skill_name = action.get("call_name") or action.get("skill_name")
        skill_params = action.get("call_params") or action.get("skill_params", {})
        result_prefix = action.get("result_prefix", "")
        receiver = action.get("receiver")
        is_group = action.get("isgroup", False)
        channel_type = action.get("channel_type", "unknown")
        if not skill_name:
            logger.error(f"[Scheduler] Task {task['id']}: No skill_name specified")
            return
        if not receiver:
            logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
            return
        logger.info(f"[Scheduler] Task {task['id']}: Executing skill '{skill_name}' with params {skill_params}")
        # Build a natural language query for the Agent to execute the skill
        # Format: "Use skill-name to do something with params"
        param_str = ", ".join([f"{k}={v}" for k, v in skill_params.items()])
        query = f"Use {skill_name} skill"
        if param_str:
            query += f" with {param_str}"
        # Create context for Agent
        context = Context(ContextType.TEXT, query)
        context["receiver"] = receiver
        context["isgroup"] = is_group
        context["session_id"] = receiver
        # Channel-specific setup
        if channel_type == "web":
            import uuid
            request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
            context["request_id"] = request_id
        elif channel_type == "feishu":
            context["receive_id_type"] = "chat_id" if is_group else "open_id"
            context["msg"] = None
        # Use Agent to execute the skill
        try:
            reply = agent_bridge.agent_reply(query, context=context, on_event=None, clear_history=True)
            if reply and reply.content:
                content = reply.content
                # Add prefix if specified
                if result_prefix:
                    content = f"{result_prefix}\n\n{content}"
                logger.info(f"[Scheduler] Task {task['id']} executed: skill result sent to {receiver}")
            else:
                logger.error(f"[Scheduler] Task {task['id']}: No result from skill execution")
        except Exception as e:
            logger.error(f"[Scheduler] Failed to execute skill via Agent: {e}")
            import traceback
            logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
    except Exception as e:
        logger.error(f"[Scheduler] Error in _execute_skill_call: {e}")
        import traceback
        logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
 def attach_scheduler_to_tool(tool, context: Context = None):
    """
    Attach scheduler components to a SchedulerTool instance
--- a/agent/tools/scheduler/scheduler_service.py
+++ b/agent/tools/scheduler/scheduler_service.py
@@ -118,6 +118,34 @@ class SchedulerService:
        try:
            next_run = datetime.fromisoformat(next_run_str)
            # Check if task is overdue (e.g., service restart)
            if next_run < now:
                time_diff = (now - next_run).total_seconds()
                # If overdue by more than 5 minutes, skip this run and schedule next
                if time_diff > 300:  # 5 minutes
                    logger.warning(f"[Scheduler] Task {task['id']} is overdue by {int(time_diff)}s, skipping and scheduling next run")
                    # For one-time tasks, disable them
                    schedule = task.get("schedule", {})
                    if schedule.get("type") == "once":
                        self.task_store.update_task(task['id'], {
                            "enabled": False,
                            "last_run_at": now.isoformat()
                        })
                        logger.info(f"[Scheduler] One-time task {task['id']} expired, disabled")
                        return False
                    # For recurring tasks, calculate next run from now
                    next_next_run = self._calculate_next_run(task, now)
                    if next_next_run:
                        self.task_store.update_task(task['id'], {
                            "next_run_at": next_next_run.isoformat()
                        })
                        logger.info(f"[Scheduler] Rescheduled task {task['id']} to {next_next_run}")
                    return False
            return now >= next_run
        except:
            return False
--- a/agent/tools/scheduler/scheduler_tool.py
+++ b/agent/tools/scheduler/scheduler_tool.py
@@ -20,23 +20,16 @@ class SchedulerTool(BaseTool):
    name: str = "scheduler"
    description: str = (
-        "创建、查询和管理定时任务。支持两种任务类型：\n"
+        "创建、查询和管理定时任务。支持固定消息和AI任务两种类型。\n\n"
        "1. 静态消息任务：定时发送预定义的消息\n"
        "2. 动态工具任务：定时执行工具调用并发送结果（如搜索新闻、查询天气等）\n\n"
        "使用方法：\n"
-        "- 创建静态消息任务：action='create', name='任务名', message='消息内容', schedule_type='interval'/'cron'/'once', schedule_value='间隔秒数/cron表达式/时间'\n"
+        "- 创建：action='create', name='任务名', message/ai_task='内容', schedule_type='once/interval/cron', schedule_value='...'\n"
-        "- 创建动态工具任务：action='create', name='任务名', tool_call={'tool_name': '工具名', 'tool_params': {...}, 'result_prefix': '前缀'}, schedule_type='interval'/'cron'/'once', schedule_value='值'\n"
+        "- 查询：action='list' / action='get', task_id='任务ID'\n"
-        "- 查询列表：action='list'\n"
+        "- 管理：action='delete/enable/disable', task_id='任务ID'\n\n"
-        "- 查看详情：action='get', task_id='任务ID'\n"
+        "调度类型：\n"
-        "- 删除任务：action='delete', task_id='任务ID'\n"
+        "- once: 一次性任务，支持相对时间(+5s,+10m,+1h,+1d)或ISO时间\n"
-        "- 启用任务：action='enable', task_id='任务ID'\n"
+        "- interval: 固定间隔(秒)，如3600表示每小时\n"
-        "- 禁用任务：action='disable', task_id='任务ID'\n\n"
+        "- cron: cron表达式，如'0 8 * * *'表示每天8点\n\n"
-        "调度类型说明：\n"
+        "注意：'X秒后'用once+相对时间，'每X秒'用interval"
        "- interval: 固定间隔秒数（如3600表示每小时）\n"
        "- cron: cron表达式（如'0 9 * * *'表示每天9点，'*/10 * * * *'表示每10分钟）\n"
        "- once: 一次性任务，ISO时间格式（如'2024-12-25T09:00:00'）\n\n"
        "示例：每天早上8点搜索新闻\n"
        "action='create', name='每日新闻', tool_call={'tool_name': 'bocha_search', 'tool_params': {'query': '今日新闻'}, 'result_prefix': '📰 今日新闻播报'}, schedule_type='cron', schedule_value='0 8 * * *'"
    )
    params: dict = {
        "type": "object",
@@ -56,26 +49,11 @@ class SchedulerTool(BaseTool):
            },
            "message": {
                "type": "string",
-                "description": "要发送的静态消息内容 (用于 create 操作，与tool_call二选一)"
+                "description": "固定消息内容 (与ai_task二选一)"
            },
-            "tool_call": {
+            "ai_task": {
-                "type": "object",
+                "type": "string",
-                "description": "要执行的工具调用 (用于 create 操作，与message二选一)",
+                "description": "AI任务描述 (与message二选一)，如'搜索今日新闻'、'查询天气'"
                "properties": {
                    "tool_name": {
                        "type": "string",
                        "description": "工具名称，如 'bocha_search'"
                    },
                    "tool_params": {
                        "type": "object",
                        "description": "工具参数"
                    },
                    "result_prefix": {
                        "type": "string",
                        "description": "结果前缀，如 '今日新闻：'"
                    }
                },
                "required": ["tool_name"]
            },
            "schedule_type": {
                "type": "string",
@@ -84,12 +62,7 @@ class SchedulerTool(BaseTool):
            },
            "schedule_value": {
                "type": "string",
-                "description": (
+                "description": "调度值: cron表达式/间隔秒数/时间(+5s,+10m,+1h或ISO格式)"
                    "调度值 (用于 create 操作):\n"
                    "- cron类型: cron表达式，如 '0 9 * * *' (每天9点)，'*/10 * * * *' (每10分钟)\n"
                    "- interval类型: 间隔秒数，如 '3600' (每小时)，'10' (每10秒)\n"
                    "- once类型: ISO时间，如 '2024-12-25T09:00:00'"
                )
            }
        },
        "required": ["action"]
@@ -151,17 +124,20 @@ class SchedulerTool(BaseTool):
        """Create a new scheduled task"""
        name = kwargs.get("name")
        message = kwargs.get("message")
-        tool_call = kwargs.get("tool_call")
+        ai_task = kwargs.get("ai_task")
        schedule_type = kwargs.get("schedule_type")
        schedule_value = kwargs.get("schedule_value")
        # Validate required fields
        if not name:
            return "错误: 缺少任务名称 (name)"
-        if not message and not tool_call:
+        
-            return "错误: 必须提供 message 或 tool_call 之一"
+        # Check that exactly one of message/ai_task is provided
-        if message and tool_call:
+        if not message and not ai_task:
-            return "错误: message 和 tool_call 不能同时提供，请选择其一"
+            return "错误: 必须提供 message（固定消息）或 ai_task（AI任务）之一"
        if message and ai_task:
            return "错误: message 和 ai_task 只能提供其中一个"
        if not schedule_type:
            return "错误: 缺少调度类型 (schedule_type)"
        if not schedule_value:
@@ -181,7 +157,7 @@ class SchedulerTool(BaseTool):
        # Create task
        task_id = str(uuid.uuid4())[:8]
-        # Build action based on message or tool_call
+        # Build action based on message or ai_task
        if message:
            action = {
                "type": "send_message",
@@ -191,19 +167,22 @@ class SchedulerTool(BaseTool):
                "is_group": context.get("isgroup", False),
                "channel_type": self.config.get("channel_type", "unknown")
            }
-        else:  # tool_call
+        else:  # ai_task
            action = {
-                "type": "tool_call",
+                "type": "agent_task",
-                "tool_name": tool_call.get("tool_name"),
+                "task_description": ai_task,
                "tool_params": tool_call.get("tool_params", {}),
                "result_prefix": tool_call.get("result_prefix", ""),
                "receiver": context.get("receiver"),
                "receiver_name": self._get_receiver_name(context),
                "is_group": context.get("isgroup", False),
                "channel_type": self.config.get("channel_type", "unknown")
            }
-        task = {
+        # 针对钉钉单聊，额外存储 sender_staff_id
        msg = context.kwargs.get("msg")
        if msg and hasattr(msg, 'sender_staff_id') and not context.get("isgroup", False):
            action["dingtalk_sender_staff_id"] = msg.sender_staff_id
        task_data = {
            "id": task_id,
            "name": name,
            "enabled": True,
@@ -214,26 +193,21 @@ class SchedulerTool(BaseTool):
        }
        # Calculate initial next_run_at
-        next_run = self._calculate_next_run(task)
+        next_run = self._calculate_next_run(task_data)
        if next_run:
-            task["next_run_at"] = next_run.isoformat()
+            task_data["next_run_at"] = next_run.isoformat()
        # Save task
-        self.task_store.add_task(task)
+        self.task_store.add_task(task_data)
        # Format response
        schedule_desc = self._format_schedule_description(schedule)
-        receiver_desc = task["action"]["receiver_name"] or task["action"]["receiver"]
+        receiver_desc = task_data["action"]["receiver_name"] or task_data["action"]["receiver"]
        if message:
-            content_desc = f"💬 消息: {message}"
+            content_desc = f"💬 固定消息: {message}"
        else:
-            tool_name = tool_call.get("tool_name")
+            content_desc = f"🤖 AI任务: {ai_task}"
            tool_params_str = str(tool_call.get("tool_params", {}))
            prefix = tool_call.get("result_prefix", "")
            content_desc = f"🔧 工具调用: {tool_name}({tool_params_str})"
            if prefix:
                content_desc += f"\n📝 结果前缀: {prefix}"
        return (
            f"✅ 定时任务创建成功\n\n"
@@ -353,9 +327,38 @@ class SchedulerTool(BaseTool):
                return {"type": "interval", "seconds": seconds}
            elif schedule_type == "once":
-                # Parse datetime
+                # Parse datetime - support both relative and absolute time
-                datetime.fromisoformat(schedule_value)
+                
-                return {"type": "once", "run_at": schedule_value}
+                # Check if it's relative time (e.g., "+5s", "+10m", "+1h", "+1d")
                if schedule_value.startswith("+"):
                    import re
                    match = re.match(r'\+(\d+)([smhd])', schedule_value)
                    if match:
                        amount = int(match.group(1))
                        unit = match.group(2)
                        from datetime import timedelta
                        now = datetime.now()
                        if unit == 's':  # seconds
                            target_time = now + timedelta(seconds=amount)
                        elif unit == 'm':  # minutes
                            target_time = now + timedelta(minutes=amount)
                        elif unit == 'h':  # hours
                            target_time = now + timedelta(hours=amount)
                        elif unit == 'd':  # days
                            target_time = now + timedelta(days=amount)
                        else:
                            return None
                        return {"type": "once", "run_at": target_time.isoformat()}
                    else:
                        logger.error(f"[SchedulerTool] Invalid relative time format: {schedule_value}")
                        return None
                else:
                    # Absolute time in ISO format
                    datetime.fromisoformat(schedule_value)
                    return {"type": "once", "run_at": schedule_value}
        except Exception as e:
            logger.error(f"[SchedulerTool] Invalid schedule: {e}")
--- a/agent/tools/send/init.py
+++ b/agent/tools/send/init.py
@@ -0,0 +1,3 @@
 from .send import Send
 __all__ = ['Send']
--- a/agent/tools/send/send.py
+++ b/agent/tools/send/send.py
@@ -0,0 +1,159 @@
 """
 Send tool - Send files to the user
 """
 import os
 from typing import Dict, Any
 from pathlib import Path
 from agent.tools.base_tool import BaseTool, ToolResult
 class Send(BaseTool):
    """Tool for sending files to the user"""
    name: str = "send"
    description: str = "Send a file (image, video, audio, document) to the user. Use this when the user explicitly asks to send/share a file."
    params: dict = {
        "type": "object",
        "properties": {
            "path": {
                "type": "string",
                "description": "Path to the file to send. Can be absolute path or relative to workspace."
            },
            "message": {
                "type": "string",
                "description": "Optional message to accompany the file"
            }
        },
        "required": ["path"]
    }
    def __init__(self, config: dict = None):
        self.config = config or {}
        self.cwd = self.config.get("cwd", os.getcwd())
        # Supported file types
        self.image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg', '.ico'}
        self.video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv', '.webm', '.m4v'}
        self.audio_extensions = {'.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.wma'}
        self.document_extensions = {'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.txt', '.md'}
    def execute(self, args: Dict[str, Any]) -> ToolResult:
        """
        Execute file send operation
        :param args: Contains file path and optional message
        :return: File metadata for channel to send
        """
        path = args.get("path", "").strip()
        message = args.get("message", "")
        if not path:
            return ToolResult.fail("Error: path parameter is required")
        # Resolve path
        absolute_path = self._resolve_path(path)
        # Check if file exists
        if not os.path.exists(absolute_path):
            return ToolResult.fail(f"Error: File not found: {path}")
        # Check if readable
        if not os.access(absolute_path, os.R_OK):
            return ToolResult.fail(f"Error: File is not readable: {path}")
        # Get file info
        file_ext = Path(absolute_path).suffix.lower()
        file_size = os.path.getsize(absolute_path)
        file_name = Path(absolute_path).name
        # Determine file type
        if file_ext in self.image_extensions:
            file_type = "image"
            mime_type = self._get_image_mime_type(file_ext)
        elif file_ext in self.video_extensions:
            file_type = "video"
            mime_type = self._get_video_mime_type(file_ext)
        elif file_ext in self.audio_extensions:
            file_type = "audio"
            mime_type = self._get_audio_mime_type(file_ext)
        elif file_ext in self.document_extensions:
            file_type = "document"
            mime_type = self._get_document_mime_type(file_ext)
        else:
            file_type = "file"
            mime_type = "application/octet-stream"
        # Return file_to_send metadata
        result = {
            "type": "file_to_send",
            "file_type": file_type,
            "path": absolute_path,
            "file_name": file_name,
            "mime_type": mime_type,
            "size": file_size,
            "size_formatted": self._format_size(file_size),
            "message": message or f"正在发送 {file_name}"
        }
        return ToolResult.success(result)
    def _resolve_path(self, path: str) -> str:
        """Resolve path to absolute path"""
        path = os.path.expanduser(path)
        if os.path.isabs(path):
            return path
        return os.path.abspath(os.path.join(self.cwd, path))
    def _get_image_mime_type(self, ext: str) -> str:
        """Get MIME type for image"""
        mime_map = {
            '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg',
            '.png': 'image/png', '.gif': 'image/gif',
            '.webp': 'image/webp', '.bmp': 'image/bmp',
            '.svg': 'image/svg+xml', '.ico': 'image/x-icon'
        }
        return mime_map.get(ext, 'image/jpeg')
    def _get_video_mime_type(self, ext: str) -> str:
        """Get MIME type for video"""
        mime_map = {
            '.mp4': 'video/mp4', '.avi': 'video/x-msvideo',
            '.mov': 'video/quicktime', '.mkv': 'video/x-matroska',
            '.webm': 'video/webm', '.flv': 'video/x-flv'
        }
        return mime_map.get(ext, 'video/mp4')
    def _get_audio_mime_type(self, ext: str) -> str:
        """Get MIME type for audio"""
        mime_map = {
            '.mp3': 'audio/mpeg', '.wav': 'audio/wav',
            '.ogg': 'audio/ogg', '.m4a': 'audio/mp4',
            '.flac': 'audio/flac', '.aac': 'audio/aac'
        }
        return mime_map.get(ext, 'audio/mpeg')
    def _get_document_mime_type(self, ext: str) -> str:
        """Get MIME type for document"""
        mime_map = {
            '.pdf': 'application/pdf',
            '.doc': 'application/msword',
            '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
            '.xls': 'application/vnd.ms-excel',
            '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
            '.ppt': 'application/vnd.ms-powerpoint',
            '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
            '.txt': 'text/plain',
            '.md': 'text/markdown'
        }
        return mime_map.get(ext, 'application/octet-stream')
    def _format_size(self, size_bytes: int) -> str:
        """Format file size in human-readable format"""
        for unit in ['B', 'KB', 'MB', 'GB']:
            if size_bytes < 1024.0:
                return f"{size_bytes:.1f}{unit}"
            size_bytes /= 1024.0
        return f"{size_bytes:.1f}TB"
--- a/bridge/agent_bridge.py
+++ b/bridge/agent_bridge.py
@@ -2,6 +2,7 @@
 Agent Bridge - Integrates Agent system with existing COW bridge
 """
 import os
 from typing import Optional, List
 from agent.protocol import Agent, LLMModel, LLMRequest
@@ -269,8 +270,11 @@ class AgentBridge:
        # Get workspace from config
        workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
-        # Load environment variables from workspace .env file
+        # Migrate API keys from config.json to environment variables (if not already set)
-        env_file = os.path.join(workspace_root, '.env')
+        self._migrate_config_to_env(workspace_root)
        # Load environment variables from secure .env file location
        env_file = os.path.expanduser("~/.cow/.env")
        if os.path.exists(env_file):
            try:
                from dotenv import load_dotenv
@@ -280,9 +284,6 @@ class AgentBridge:
                logger.warning("[AgentBridge] python-dotenv not installed, skipping .env file loading")
            except Exception as e:
                logger.warning(f"[AgentBridge] Failed to load .env file: {e}")
        # Migrate API keys from config.json to environment variables (if not already set)
        self._migrate_config_to_env(workspace_root)
        # Initialize workspace and create template files
        from agent.prompt import ensure_workspace, load_context_files, PromptBuilder
@@ -377,7 +378,6 @@ class AgentBridge:
                if tool_name == "env_config":
                    from agent.tools import EnvConfig
                    tool = EnvConfig({
                        "workspace_dir": workspace_root,
                        "agent_bridge": self  # Pass self reference for hot reload
                    })
                else:
@@ -390,12 +390,6 @@ class AgentBridge:
                        tool.cwd = file_config.get("cwd", tool.cwd if hasattr(tool, 'cwd') else None)
                        if 'memory_manager' in file_config:
                            tool.memory_manager = file_config['memory_manager']
                    # Apply API key for bocha_search tool
                    elif tool_name == 'bocha_search':
                        bocha_api_key = conf().get("bocha_api_key", "")
                        if bocha_api_key:
                            tool.config = {"bocha_api_key": bocha_api_key}
                            tool.api_key = bocha_api_key
                    tools.append(tool)
                    logger.debug(f"[AgentBridge] Loaded tool: {tool_name}")
            except Exception as e:
@@ -504,8 +498,11 @@ class AgentBridge:
        # Get workspace from config
        workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
-        # Load environment variables from workspace .env file
+        # Migrate API keys from config.json to environment variables (if not already set)
-        env_file = os.path.join(workspace_root, '.env')
+        self._migrate_config_to_env(workspace_root)
        # Load environment variables from secure .env file location
        env_file = os.path.expanduser("~/.cow/.env")
        if os.path.exists(env_file):
            try:
                from dotenv import load_dotenv
@@ -609,11 +606,6 @@ class AgentBridge:
                        tool.cwd = file_config.get("cwd", tool.cwd if hasattr(tool, 'cwd') else None)
                        if 'memory_manager' in file_config:
                            tool.memory_manager = file_config['memory_manager']
                    elif tool_name == 'bocha_search':
                        bocha_api_key = conf().get("bocha_api_key", "")
                        if bocha_api_key:
                            tool.config = {"bocha_api_key": bocha_api_key}
                            tool.api_key = bocha_api_key
                    tools.append(tool)
            except Exception as e:
                logger.warning(f"[AgentBridge] Failed to load tool {tool_name} for session {session_id}: {e}")
@@ -767,23 +759,52 @@ class AgentBridge:
            if not agent:
                return Reply(ReplyType.ERROR, "Failed to initialize super agent")
-            # Attach context to scheduler tool if present
+            # Filter tools based on context
-            if context and agent.tools:
+            original_tools = agent.tools
-                for tool in agent.tools:
+            filtered_tools = original_tools
                    if tool.name == "scheduler":
                        try:
                            from agent.tools.scheduler.integration import attach_scheduler_to_tool
                            attach_scheduler_to_tool(tool, context)
                        except Exception as e:
                            logger.warning(f"[AgentBridge] Failed to attach context to scheduler: {e}")
                        break
-            # Use agent's run_stream method
+            # If this is a scheduled task execution, exclude scheduler tool to prevent recursion
-            response = agent.run_stream(
+            if context and context.get("is_scheduled_task"):
-                user_message=query,
+                filtered_tools = [tool for tool in agent.tools if tool.name != "scheduler"]
-                on_event=on_event,
+                agent.tools = filtered_tools
-                clear_history=clear_history
+                logger.info(f"[AgentBridge] Scheduled task execution: excluded scheduler tool ({len(filtered_tools)}/{len(original_tools)} tools)")
-            )
+            else:
                # Attach context to scheduler tool if present
                if context and agent.tools:
                    for tool in agent.tools:
                        if tool.name == "scheduler":
                            try:
                                from agent.tools.scheduler.integration import attach_scheduler_to_tool
                                attach_scheduler_to_tool(tool, context)
                            except Exception as e:
                                logger.warning(f"[AgentBridge] Failed to attach context to scheduler: {e}")
                            break
            try:
                # Use agent's run_stream method
                response = agent.run_stream(
                    user_message=query,
                    on_event=on_event,
                    clear_history=clear_history
                )
            finally:
                # Restore original tools
                if context and context.get("is_scheduled_task"):
                    agent.tools = original_tools
            # Check if there are files to send (from read tool)
            if hasattr(agent, 'stream_executor') and hasattr(agent.stream_executor, 'files_to_send'):
                files_to_send = agent.stream_executor.files_to_send
                if files_to_send:
                    # Send the first file (for now, handle one file at a time)
                    file_info = files_to_send[0]
                    logger.info(f"[AgentBridge] Sending file: {file_info.get('path')}")
                    # Clear files_to_send for next request
                    agent.stream_executor.files_to_send = []
                    # Return file reply based on file type
                    return self._create_file_reply(file_info, response, context)
            return Reply(ReplyType.TEXT, response)
@@ -791,12 +812,53 @@ class AgentBridge:
            logger.error(f"Agent reply error: {e}")
            return Reply(ReplyType.ERROR, f"Agent error: {str(e)}")
    def _create_file_reply(self, file_info: dict, text_response: str, context: Context = None) -> Reply:
        """
        Create a reply for sending files
        Args:
            file_info: File metadata from read tool
            text_response: Text response from agent
            context: Context object
        Returns:
            Reply object for file sending
        """
        file_type = file_info.get("file_type", "file")
        file_path = file_info.get("path")
        # For images, use IMAGE_URL type (channel will handle upload)
        if file_type == "image":
            # Convert local path to file:// URL for channel processing
            file_url = f"file://{file_path}"
            logger.info(f"[AgentBridge] Sending image: {file_url}")
            reply = Reply(ReplyType.IMAGE_URL, file_url)
            # Attach text message if present (for channels that support text+image)
            if text_response:
                reply.text_content = text_response  # Store accompanying text
            return reply
        # For documents (PDF, Excel, Word, PPT), use FILE type
        if file_type == "document":
            file_url = f"file://{file_path}"
            logger.info(f"[AgentBridge] Sending document: {file_url}")
            reply = Reply(ReplyType.FILE, file_url)
            reply.file_name = file_info.get("file_name", os.path.basename(file_path))
            return reply
        # For other files (video, audio), we need channel-specific handling
        # For now, return text with file info
        # TODO: Implement video/audio sending when channel supports it
        message = text_response or file_info.get("message", "文件已准备")
        message += f"\n\n[文件: {file_info.get('file_name', file_path)}]"
        return Reply(ReplyType.TEXT, message)
    def _migrate_config_to_env(self, workspace_root: str):
        """
        Migrate API keys from config.json to .env file if not already set
        Args:
-            workspace_root: Workspace directory path
+            workspace_root: Workspace directory path (not used, kept for compatibility)
        """
        from config import conf
        import os
@@ -810,7 +872,8 @@ class AgentBridge:
            "linkai_api_key": "LINKAI_API_KEY",
        }
-        env_file = os.path.join(workspace_root, '.env')
+        # Use fixed secure location for .env file
        env_file = os.path.expanduser("~/.cow/.env")
        # Read existing env vars from .env file
        existing_env_vars = {}
@@ -830,19 +893,25 @@ class AgentBridge:
        for config_key, env_key in key_mapping.items():
            # Skip if already in .env file
            if env_key in existing_env_vars:
                logger.debug(f"[AgentBridge] Skipping {env_key} - already in .env")
                continue
            # Get value from config.json
            value = conf().get(config_key, "")
            if value and value.strip():  # Only migrate non-empty values
                keys_to_migrate[env_key] = value.strip()
                logger.debug(f"[AgentBridge] Will migrate {env_key} from config.json")
            else:
                logger.debug(f"[AgentBridge] Skipping {env_key} - no value in config.json")
        # Write new keys to .env file
        if keys_to_migrate:
            try:
-                # Ensure .env file exists
+                # Ensure ~/.cow directory and .env file exist
                env_dir = os.path.dirname(env_file)
                if not os.path.exists(env_dir):
                    os.makedirs(env_dir, exist_ok=True)
                if not os.path.exists(env_file):
                    os.makedirs(os.path.dirname(env_file), exist_ok=True)
                    open(env_file, 'a').close()
                # Append new keys
--- a/channel/chat_channel.py
+++ b/channel/chat_channel.py
@@ -64,15 +64,22 @@ class ChatChannel(Channel):
                        check_contain(group_name, group_name_keyword_white_list),
                    ]
                ):
-                    group_chat_in_one_session = conf().get("group_chat_in_one_session", [])
+                    # Check global group_shared_session config first
-                    session_id = cmsg.actual_user_id
+                    group_shared_session = conf().get("group_shared_session", True)
-                    if any(
+                    if group_shared_session:
-                        [
+                        # All users in the group share the same session
                            group_name in group_chat_in_one_session,
                            "ALL_GROUP" in group_chat_in_one_session,
                        ]
                    ):
                        session_id = group_id
                    else:
                        # Check group-specific whitelist (legacy behavior)
                        group_chat_in_one_session = conf().get("group_chat_in_one_session", [])
                        session_id = cmsg.actual_user_id
                        if any(
                            [
                                group_name in group_chat_in_one_session,
                                "ALL_GROUP" in group_chat_in_one_session,
                            ]
                        ):
                            session_id = group_id
                else:
                    logger.debug(f"No need reply, groupName not in whitelist, group_name={group_name}")
                    return None
@@ -283,7 +290,98 @@ class ChatChannel(Channel):
            reply = e_context["reply"]
            if not e_context.is_pass() and reply and reply.type:
                logger.debug("[chat_channel] ready to send reply: {}, context: {}".format(reply, context))
-                self._send(reply, context)
+                
                # 如果是文本回复，尝试提取并发送图片
                if reply.type == ReplyType.TEXT:
                    self._extract_and_send_images(reply, context)
                # 如果是图片回复但带有文本内容，先发文本再发图片
                elif reply.type == ReplyType.IMAGE_URL and hasattr(reply, 'text_content') and reply.text_content:
                    # 先发送文本
                    text_reply = Reply(ReplyType.TEXT, reply.text_content)
                    self._send(text_reply, context)
                    # 短暂延迟后发送图片
                    time.sleep(0.3)
                    self._send(reply, context)
                else:
                    self._send(reply, context)
    def _extract_and_send_images(self, reply: Reply, context: Context):
        """
        从文本回复中提取图片/视频URL并单独发送
        支持格式：[图片: /path/to/image.png], [视频: /path/to/video.mp4], ![](url), <img src="url">
        最多发送5个媒体文件
        """
        content = reply.content
        media_items = []  # [(url, type), ...]
        # 正则提取各种格式的媒体URL
        patterns = [
            (r'\[图片:\s*([^\]]+)\]', 'image'),   # [图片: /path/to/image.png]
            (r'\[视频:\s*([^\]]+)\]', 'video'),   # [视频: /path/to/video.mp4]
            (r'!\[.*?\]\(([^\)]+)\)', 'image'),   # ![alt](url) - 默认图片
            (r'<img[^>]+src=["\']([^"\']+)["\']', 'image'),  # <img src="url">
            (r'<video[^>]+src=["\']([^"\']+)["\']', 'video'),  # <video src="url">
            (r'https?://[^\s]+\.(?:jpg|jpeg|png|gif|webp)', 'image'),  # 直接的图片URL
            (r'https?://[^\s]+\.(?:mp4|avi|mov|wmv|flv)', 'video'),  # 直接的视频URL
        ]
        for pattern, media_type in patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            for match in matches:
                media_items.append((match, media_type))
        # 去重（保持顺序）并限制最多5个
        seen = set()
        unique_items = []
        for url, mtype in media_items:
            if url not in seen:
                seen.add(url)
                unique_items.append((url, mtype))
        media_items = unique_items[:5]
        if media_items:
            logger.info(f"[chat_channel] Extracted {len(media_items)} media item(s) from reply")
            # 先发送文本（保持原文本不变）
            self._send(reply, context)
            # 然后逐个发送媒体文件
            for i, (url, media_type) in enumerate(media_items):
                try:
                    # 判断是本地文件还是URL
                    if url.startswith(('http://', 'https://')):
                        # 网络资源
                        if media_type == 'video':
                            # 视频使用 FILE 类型发送
                            media_reply = Reply(ReplyType.FILE, url)
                            media_reply.file_name = os.path.basename(url)
                        else:
                            # 图片使用 IMAGE_URL 类型
                            media_reply = Reply(ReplyType.IMAGE_URL, url)
                    elif os.path.exists(url):
                        # 本地文件
                        if media_type == 'video':
                            # 视频使用 FILE 类型，转换为 file:// URL
                            media_reply = Reply(ReplyType.FILE, f"file://{url}")
                            media_reply.file_name = os.path.basename(url)
                        else:
                            # 图片使用 IMAGE_URL 类型，转换为 file:// URL
                            media_reply = Reply(ReplyType.IMAGE_URL, f"file://{url}")
                    else:
                        logger.warning(f"[chat_channel] Media file not found or invalid URL: {url}")
                        continue
                    # 发送媒体文件（添加小延迟避免频率限制）
                    if i > 0:
                        time.sleep(0.5)
                    self._send(media_reply, context)
                    logger.info(f"[chat_channel] Sent {media_type} {i+1}/{len(media_items)}: {url[:50]}...")
                except Exception as e:
                    logger.error(f"[chat_channel] Failed to send {media_type} {url}: {e}")
        else:
            # 没有媒体文件，正常发送文本
            self._send(reply, context)
    def _send(self, reply: Reply, context: Context, retry_cnt=0):
        try:
--- a/channel/dingtalk/dingtalk_channel.py
+++ b/channel/dingtalk/dingtalk_channel.py
@@ -9,6 +9,7 @@ import json
 # -*- coding=utf-8 -*-
 import logging
 import time
 import requests
 import dingtalk_stream
 from dingtalk_stream import AckMessage
@@ -107,16 +108,156 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
        conf()["group_name_white_list"] = ["ALL_GROUP"]
        # 单聊无需前缀
        conf()["single_chat_prefix"] = [""]
        # Access token cache
        self._access_token = None
        self._access_token_expires_at = 0
        # Robot code cache (extracted from incoming messages)
        self._robot_code = None
    def startup(self):
        credential = dingtalk_stream.Credential(self.dingtalk_client_id, self.dingtalk_client_secret)
        client = dingtalk_stream.DingTalkStreamClient(credential)
        client.register_callback_handler(dingtalk_stream.chatbot.ChatbotMessage.TOPIC, self)
        client.start_forever()
    def get_access_token(self):
        """
        获取企业内部应用的 access_token
        文档: https://open.dingtalk.com/document/orgapp/obtain-orgapp-token
        """
        current_time = time.time()
        # 如果 token 还没过期，直接返回缓存的 token
        if self._access_token and current_time < self._access_token_expires_at:
            return self._access_token
        # 获取新的 access_token
        url = "https://api.dingtalk.com/v1.0/oauth2/accessToken"
        headers = {"Content-Type": "application/json"}
        data = {
            "appKey": self.dingtalk_client_id,
            "appSecret": self.dingtalk_client_secret
        }
        try:
            response = requests.post(url, headers=headers, json=data, timeout=10)
            result = response.json()
            if response.status_code == 200 and "accessToken" in result:
                self._access_token = result["accessToken"]
                # Token 有效期为 2 小时，提前 5 分钟刷新
                self._access_token_expires_at = current_time + result.get("expireIn", 7200) - 300
                logger.info("[DingTalk] Access token refreshed successfully")
                return self._access_token
            else:
                logger.error(f"[DingTalk] Failed to get access token: {result}")
                return None
        except Exception as e:
            logger.error(f"[DingTalk] Error getting access token: {e}")
            return None
    def send_single_message(self, user_id: str, content: str, robot_code: str) -> bool:
        """
        Send message to single user (private chat)
        API: https://open.dingtalk.com/document/orgapp/chatbots-send-one-on-one-chat-messages-in-batches
        """
        access_token = self.get_access_token()
        if not access_token:
            logger.error("[DingTalk] Failed to send single message: Access token not available.")
            return False
        if not robot_code:
            logger.error("[DingTalk] Cannot send single message: robot_code is required")
            return False
        url = "https://api.dingtalk.com/v1.0/robot/oToMessages/batchSend"
        headers = {
            "x-acs-dingtalk-access-token": access_token,
            "Content-Type": "application/json"
        }
        data = {
            "msgParam": json.dumps({"content": content}),
            "msgKey": "sampleText",
            "userIds": [user_id],
            "robotCode": robot_code
        }
        logger.info(f"[DingTalk] Sending single message to user {user_id} with robot_code {robot_code}")
        try:
            response = requests.post(url, headers=headers, json=data, timeout=10)
            result = response.json()
            if response.status_code == 200 and result.get("processQueryKey"):
                logger.info(f"[DingTalk] Single message sent successfully to {user_id}")
                return True
            else:
                logger.error(f"[DingTalk] Failed to send single message: {result}")
                return False
        except Exception as e:
            logger.error(f"[DingTalk] Error sending single message: {e}")
            return False
    def send_group_message(self, conversation_id: str, content: str, robot_code: str = None):
        """
        主动发送群消息
        文档: https://open.dingtalk.com/document/orgapp/the-robot-sends-a-group-message
        Args:
            conversation_id: 会话ID (openConversationId)
            content: 消息内容
            robot_code: 机器人编码，默认使用 dingtalk_client_id
        """
        access_token = self.get_access_token()
        if not access_token:
            logger.error("[DingTalk] Cannot send group message: no access token")
            return False
        # Validate robot_code
        if not robot_code:
            logger.error("[DingTalk] Cannot send group message: robot_code is required")
            return False
        url = "https://api.dingtalk.com/v1.0/robot/groupMessages/send"
        headers = {
            "x-acs-dingtalk-access-token": access_token,
            "Content-Type": "application/json"
        }
        data = {
            "msgParam": json.dumps({"content": content}),
            "msgKey": "sampleText",
            "openConversationId": conversation_id,
            "robotCode": robot_code
        }
        try:
            response = requests.post(url, headers=headers, json=data, timeout=10)
            result = response.json()
            if response.status_code == 200:
                logger.info(f"[DingTalk] Group message sent successfully to {conversation_id}")
                return True
            else:
                logger.error(f"[DingTalk] Failed to send group message: {result}")
                return False
        except Exception as e:
            logger.error(f"[DingTalk] Error sending group message: {e}")
            return False
    async def process(self, callback: dingtalk_stream.CallbackMessage):
        try:
            incoming_message = dingtalk_stream.ChatbotMessage.from_dict(callback.data)
            # Debug: 打印完整的 event 数据
            logger.info(f"[DingTalk] ===== Incoming Message Debug =====")
            logger.info(f"[DingTalk] callback.data keys: {callback.data.keys() if hasattr(callback.data, 'keys') else 'N/A'}")
            logger.info(f"[DingTalk] incoming_message attributes: {dir(incoming_message)}")
            logger.info(f"[DingTalk] robot_code: {getattr(incoming_message, 'robot_code', 'N/A')}")
            logger.info(f"[DingTalk] chatbot_corp_id: {getattr(incoming_message, 'chatbot_corp_id', 'N/A')}")
            logger.info(f"[DingTalk] chatbot_user_id: {getattr(incoming_message, 'chatbot_user_id', 'N/A')}")
            logger.info(f"[DingTalk] conversation_id: {getattr(incoming_message, 'conversation_id', 'N/A')}")
            logger.info(f"[DingTalk] Raw callback.data: {callback.data}")
            logger.info(f"[DingTalk] =====================================")
            image_download_handler = self  # 传入方法所在的类实例
            dingtalk_msg = DingTalkMessage(incoming_message, image_download_handler)
@@ -174,8 +315,48 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
    def send(self, reply: Reply, context: Context):
        receiver = context["receiver"]
-        isgroup = context.kwargs['msg'].is_group
+        
-        incoming_message = context.kwargs['msg'].incoming_message
+        # Check if msg exists (for scheduled tasks, msg might be None)
        msg = context.kwargs.get('msg')
        if msg is None:
            # 定时任务场景：使用主动发送 API
            is_group = context.get("isgroup", False)
            logger.info(f"[DingTalk] Sending scheduled task message to {receiver} (is_group={is_group})")
            # 使用缓存的 robot_code 或配置的值
            robot_code = self._robot_code or conf().get("dingtalk_robot_code")
            logger.info(f"[DingTalk] Using robot_code: {robot_code}, cached: {self._robot_code}, config: {conf().get('dingtalk_robot_code')}")
            if not robot_code:
                logger.error(f"[DingTalk] Cannot send scheduled task: robot_code not available. Please send at least one message to the bot first, or configure dingtalk_robot_code in config.json")
                return
            # 根据是否群聊选择不同的 API
            if is_group:
                success = self.send_group_message(receiver, reply.content, robot_code)
            else:
                # 单聊场景：尝试从 context 中获取 dingtalk_sender_staff_id
                sender_staff_id = context.get("dingtalk_sender_staff_id")
                if not sender_staff_id:
                    logger.error(f"[DingTalk] Cannot send single chat scheduled message: sender_staff_id not available in context")
                    return
                logger.info(f"[DingTalk] Sending single message to staff_id: {sender_staff_id}")
                success = self.send_single_message(sender_staff_id, reply.content, robot_code)
            if not success:
                logger.error(f"[DingTalk] Failed to send scheduled task message")
            return
        # 从正常消息中提取并缓存 robot_code
        if hasattr(msg, 'robot_code'):
            robot_code = msg.robot_code
            if robot_code and robot_code != self._robot_code:
                self._robot_code = robot_code
                logger.info(f"[DingTalk] Cached robot_code: {robot_code}")
        isgroup = msg.is_group
        incoming_message = msg.incoming_message
        if conf().get("dingtalk_card_enabled"):
            logger.info("[Dingtalk] sendMsg={}, receiver={}".format(reply, receiver))
--- a/channel/dingtalk/dingtalk_message.py
+++ b/channel/dingtalk/dingtalk_message.py
@@ -22,6 +22,7 @@ class DingTalkMessage(ChatMessage):
        self.create_time = event.create_at
        self.image_content = event.image_content
        self.rich_text_content = event.rich_text_content
        self.robot_code = event.robot_code  # 机器人编码
        if event.conversation_type == "1":
            self.is_group = False
        else:
--- a/channel/feishu/feishu_channel.py
+++ b/channel/feishu/feishu_channel.py
@@ -204,10 +204,36 @@ class FeiShuChanel(ChatChannel):
            # 图片上传
            reply_content = self._upload_image_url(reply.content, access_token)
            if not reply_content:
-                logger.warning("[FeiShu] upload file failed")
+                logger.warning("[FeiShu] upload image failed")
                return
            msg_type = "image"
            content_key = "image_key"
        elif reply.type == ReplyType.FILE:
            # 判断是否为视频文件
            file_path = reply.content
            if file_path.startswith("file://"):
                file_path = file_path[7:]
            is_video = file_path.lower().endswith(('.mp4', '.avi', '.mov', '.wmv', '.flv'))
            if is_video:
                # 视频使用 media 类型
                file_key = self._upload_video_url(reply.content, access_token)
                if not file_key:
                    logger.warning("[FeiShu] upload video failed")
                    return
                reply_content = file_key
                msg_type = "media"
                content_key = "file_key"
            else:
                # 其他文件使用 file 类型
                file_key = self._upload_file_url(reply.content, access_token)
                if not file_key:
                    logger.warning("[FeiShu] upload file failed")
                    return
                reply_content = file_key
                msg_type = "file"
                content_key = "file_key"
        # Check if we can reply to an existing message (need msg_id)
        can_reply = is_group and msg and hasattr(msg, 'msg_id') and msg.msg_id
@@ -260,7 +286,34 @@ class FeiShuChanel(ChatChannel):
    def _upload_image_url(self, img_url, access_token):
-        logger.debug(f"[WX] start download image, img_url={img_url}")
+        logger.debug(f"[FeiShu] start process image, img_url={img_url}")
        # Check if it's a local file path (file:// protocol)
        if img_url.startswith("file://"):
            local_path = img_url[7:]  # Remove "file://" prefix
            logger.info(f"[FeiShu] uploading local file: {local_path}")
            if not os.path.exists(local_path):
                logger.error(f"[FeiShu] local file not found: {local_path}")
                return None
            # Upload directly from local file
            upload_url = "https://open.feishu.cn/open-apis/im/v1/images"
            data = {'image_type': 'message'}
            headers = {'Authorization': f'Bearer {access_token}'}
            with open(local_path, "rb") as file:
                upload_response = requests.post(upload_url, files={"image": file}, data=data, headers=headers)
                logger.info(f"[FeiShu] upload file, res={upload_response.content}")
                response_data = upload_response.json()
                if response_data.get("code") == 0:
                    return response_data.get("data").get("image_key")
                else:
                    logger.error(f"[FeiShu] upload failed: {response_data}")
                    return None
        # Original logic for HTTP URLs
        response = requests.get(img_url)
        suffix = utils.get_path_suffix(img_url)
        temp_name = str(uuid.uuid4()) + "." + suffix
@@ -283,6 +336,207 @@ class FeiShuChanel(ChatChannel):
            os.remove(temp_name)
            return upload_response.json().get("data").get("image_key")
    def _upload_video_url(self, video_url, access_token):
        """
        Upload video to Feishu and return file_key (for media type messages)
        Supports:
        - file:// URLs for local files
        - http(s):// URLs (download then upload)
        """
        # For file:// URLs (local files), upload directly
        if video_url.startswith("file://"):
            local_path = video_url[7:]  # Remove file:// prefix
            if not os.path.exists(local_path):
                logger.error(f"[FeiShu] local video file not found: {local_path}")
                return None
            file_name = os.path.basename(local_path)
            file_ext = os.path.splitext(file_name)[1].lower()
            # Determine file type for Feishu API (for media messages)
            # Media type only supports mp4
            file_type_map = {
                '.mp4': 'mp4',
            }
            file_type = file_type_map.get(file_ext, 'mp4')  # Default to mp4
            # Upload video to Feishu (use file upload API, but send as media type)
            upload_url = "https://open.feishu.cn/open-apis/im/v1/files"
            data = {'file_type': file_type, 'file_name': file_name}
            headers = {'Authorization': f'Bearer {access_token}'}
            try:
                with open(local_path, "rb") as file:
                    upload_response = requests.post(
                        upload_url, 
                        files={"file": file}, 
                        data=data, 
                        headers=headers,
                        timeout=(5, 60)  # 5s connect, 60s read timeout (videos are larger)
                    )
                    logger.info(f"[FeiShu] upload video response, status={upload_response.status_code}, res={upload_response.content}")
                    response_data = upload_response.json()
                    if response_data.get("code") == 0:
                        return response_data.get("data").get("file_key")
                    else:
                        logger.error(f"[FeiShu] upload video failed: {response_data}")
                        return None
            except Exception as e:
                logger.error(f"[FeiShu] upload video exception: {e}")
                return None
        # For HTTP URLs, download first then upload
        try:
            logger.info(f"[FeiShu] Downloading video from URL: {video_url}")
            response = requests.get(video_url, timeout=(5, 60))
            if response.status_code != 200:
                logger.error(f"[FeiShu] download video failed, status={response.status_code}")
                return None
            # Save to temp file
            import uuid
            file_name = os.path.basename(video_url) or "video.mp4"
            temp_name = str(uuid.uuid4()) + "_" + file_name
            with open(temp_name, "wb") as file:
                file.write(response.content)
            logger.info(f"[FeiShu] Video downloaded, size={len(response.content)} bytes, uploading...")
            # Upload
            file_ext = os.path.splitext(file_name)[1].lower()
            file_type_map = {
                '.mp4': 'mp4',
            }
            file_type = file_type_map.get(file_ext, 'mp4')
            upload_url = "https://open.feishu.cn/open-apis/im/v1/files"
            data = {'file_type': file_type, 'file_name': file_name}
            headers = {'Authorization': f'Bearer {access_token}'}
            with open(temp_name, "rb") as file:
                upload_response = requests.post(upload_url, files={"file": file}, data=data, headers=headers, timeout=(5, 60))
                logger.info(f"[FeiShu] upload video, res={upload_response.content}")
                response_data = upload_response.json()
                os.remove(temp_name)  # Clean up temp file
                if response_data.get("code") == 0:
                    return response_data.get("data").get("file_key")
                else:
                    logger.error(f"[FeiShu] upload video failed: {response_data}")
                    return None
        except Exception as e:
            logger.error(f"[FeiShu] upload video from URL exception: {e}")
            # Clean up temp file if exists
            if 'temp_name' in locals() and os.path.exists(temp_name):
                os.remove(temp_name)
            return None
    def _upload_file_url(self, file_url, access_token):
        """
        Upload file to Feishu
        Supports both local files (file://) and HTTP URLs
        """
        logger.debug(f"[FeiShu] start process file, file_url={file_url}")
        # Check if it's a local file path (file:// protocol)
        if file_url.startswith("file://"):
            local_path = file_url[7:]  # Remove "file://" prefix
            logger.info(f"[FeiShu] uploading local file: {local_path}")
            if not os.path.exists(local_path):
                logger.error(f"[FeiShu] local file not found: {local_path}")
                return None
            # Get file info
            file_name = os.path.basename(local_path)
            file_ext = os.path.splitext(file_name)[1].lower()
            # Determine file type for Feishu API
            # Feishu supports: opus, mp4, pdf, doc, xls, ppt, stream (other types)
            file_type_map = {
                '.opus': 'opus',
                '.mp4': 'mp4',
                '.pdf': 'pdf',
                '.doc': 'doc', '.docx': 'doc',
                '.xls': 'xls', '.xlsx': 'xls',
                '.ppt': 'ppt', '.pptx': 'ppt',
            }
            file_type = file_type_map.get(file_ext, 'stream')  # Default to stream for other types
            # Upload file to Feishu
            upload_url = "https://open.feishu.cn/open-apis/im/v1/files"
            data = {'file_type': file_type, 'file_name': file_name}
            headers = {'Authorization': f'Bearer {access_token}'}
            try:
                with open(local_path, "rb") as file:
                    upload_response = requests.post(
                        upload_url, 
                        files={"file": file}, 
                        data=data, 
                        headers=headers,
                        timeout=(5, 30)  # 5s connect, 30s read timeout
                    )
                    logger.info(f"[FeiShu] upload file response, status={upload_response.status_code}, res={upload_response.content}")
                    response_data = upload_response.json()
                    if response_data.get("code") == 0:
                        return response_data.get("data").get("file_key")
                    else:
                        logger.error(f"[FeiShu] upload file failed: {response_data}")
                        return None
            except Exception as e:
                logger.error(f"[FeiShu] upload file exception: {e}")
                return None
        # For HTTP URLs, download first then upload
        try:
            response = requests.get(file_url, timeout=(5, 30))
            if response.status_code != 200:
                logger.error(f"[FeiShu] download file failed, status={response.status_code}")
                return None
            # Save to temp file
            import uuid
            file_name = os.path.basename(file_url)
            temp_name = str(uuid.uuid4()) + "_" + file_name
            with open(temp_name, "wb") as file:
                file.write(response.content)
            # Upload
            file_ext = os.path.splitext(file_name)[1].lower()
            file_type_map = {
                '.opus': 'opus', '.mp4': 'mp4', '.pdf': 'pdf',
                '.doc': 'doc', '.docx': 'doc',
                '.xls': 'xls', '.xlsx': 'xls',
                '.ppt': 'ppt', '.pptx': 'ppt',
            }
            file_type = file_type_map.get(file_ext, 'stream')
            upload_url = "https://open.feishu.cn/open-apis/im/v1/files"
            data = {'file_type': file_type, 'file_name': file_name}
            headers = {'Authorization': f'Bearer {access_token}'}
            with open(temp_name, "rb") as file:
                upload_response = requests.post(upload_url, files={"file": file}, data=data, headers=headers)
                logger.info(f"[FeiShu] upload file, res={upload_response.content}")
                response_data = upload_response.json()
                os.remove(temp_name)  # Clean up temp file
                if response_data.get("code") == 0:
                    return response_data.get("data").get("file_key")
                else:
                    logger.error(f"[FeiShu] upload file failed: {response_data}")
                    return None
        except Exception as e:
            logger.error(f"[FeiShu] upload file from URL exception: {e}")
            return None
    def _compose_context(self, ctype: ContextType, content, **kwargs):
        context = Context(ctype, content)
        context.kwargs = kwargs
@@ -291,13 +545,18 @@ class FeiShuChanel(ChatChannel):
        cmsg = context["msg"]
-        # Set session_id based on chat type to ensure proper session isolation
+        # Set session_id based on chat type
        if cmsg.is_group:
-            # Group chat: combine user_id and group_id to create unique session per user per group
+            # Group chat: check if group_shared_session is enabled
-            # This ensures:
+            if conf().get("group_shared_session", True):
-            # - Same user in different groups have separate conversation histories
+                # All users in the group share the same session context
-            # - Same user in private chat and group chat have separate histories
+                context["session_id"] = cmsg.other_user_id  # group_id
-            context["session_id"] = f"{cmsg.from_user_id}:{cmsg.other_user_id}"
+            else:
                # Each user has their own session within the group
                # This ensures:
                # - Same user in different groups have separate conversation histories
                # - Same user in private chat and group chat have separate histories
                context["session_id"] = f"{cmsg.from_user_id}:{cmsg.other_user_id}"
        else:
            # Private chat: use user_id only
            context["session_id"] = cmsg.from_user_id
--- a/channel/feishu/feishu_message.py
+++ b/channel/feishu/feishu_message.py
@@ -1,10 +1,12 @@
 from bridge.context import ContextType
 from channel.chat_message import ChatMessage
 import json
 import os
 import requests
 from common.log import logger
 from common.tmp_dir import TmpDir
 from common import utils
 from config import conf
 class FeishuMessage(ChatMessage):
@@ -22,6 +24,99 @@ class FeishuMessage(ChatMessage):
            self.ctype = ContextType.TEXT
            content = json.loads(msg.get('content'))
            self.content = content.get("text").strip()
        elif msg_type == "image":
            # 单张图片消息，不处理和存储
            self.ctype = ContextType.IMAGE
            content = json.loads(msg.get("content"))
            image_key = content.get("image_key")
            # 仅记录图片key，不下载
            self.content = f"[图片: {image_key}]"
            logger.info(f"[FeiShu] Received single image message, key={image_key}, skipped download")
        elif msg_type == "post":
            # 富文本消息，可能包含图片、文本等多种元素
            content = json.loads(msg.get("content"))
            # 飞书富文本消息结构：content 直接包含 title 和 content 数组
            # 不是嵌套在 post 字段下
            title = content.get("title", "")
            content_list = content.get("content", [])
            logger.info(f"[FeiShu] Post message - title: '{title}', content_list length: {len(content_list)}")
            # 收集所有图片和文本
            image_keys = []
            text_parts = []
            if title:
                text_parts.append(title)
            for block in content_list:
                logger.debug(f"[FeiShu] Processing block: {block}")
                # block 本身就是元素列表
                if not isinstance(block, list):
                    continue
                for element in block:
                    element_tag = element.get("tag")
                    logger.debug(f"[FeiShu] Element tag: {element_tag}, element: {element}")
                    if element_tag == "img":
                        # 找到图片元素
                        image_key = element.get("image_key")
                        if image_key:
                            image_keys.append(image_key)
                    elif element_tag == "text":
                        # 文本元素
                        text_content = element.get("text", "")
                        if text_content:
                            text_parts.append(text_content)
            logger.info(f"[FeiShu] Parsed - images: {len(image_keys)}, text_parts: {text_parts}")
            # 富文本消息统一作为文本消息处理
            self.ctype = ContextType.TEXT
            if image_keys:
                # 如果包含图片，下载并在文本中引用本地路径
                workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
                tmp_dir = os.path.join(workspace_root, "tmp")
                os.makedirs(tmp_dir, exist_ok=True)
                # 保存图片路径映射
                self.image_paths = {}
                for image_key in image_keys:
                    image_path = os.path.join(tmp_dir, f"{image_key}.png")
                    self.image_paths[image_key] = image_path
                def _download_images():
                    for image_key, image_path in self.image_paths.items():
                        url = f"https://open.feishu.cn/open-apis/im/v1/messages/{self.msg_id}/resources/{image_key}"
                        headers = {"Authorization": "Bearer " + access_token}
                        params = {"type": "image"}
                        response = requests.get(url=url, headers=headers, params=params)
                        if response.status_code == 200:
                            with open(image_path, "wb") as f:
                                f.write(response.content)
                            logger.info(f"[FeiShu] Image downloaded from post message, key={image_key}, path={image_path}")
                        else:
                            logger.error(f"[FeiShu] Failed to download image from post, key={image_key}, status={response.status_code}")
                # 立即下载图片，不使用延迟下载
                # 因为 TEXT 类型消息不会调用 prepare()
                _download_images()
                # 构建消息内容：文本 + 图片路径
                content_parts = []
                if text_parts:
                    content_parts.append("\n".join(text_parts).strip())
                for image_key, image_path in self.image_paths.items():
                    content_parts.append(f"[图片: {image_path}]")
                self.content = "\n".join(content_parts)
                logger.info(f"[FeiShu] Received post message with {len(image_keys)} image(s) and text: {self.content}")
            else:
                # 纯文本富文本消息
                self.content = "\n".join(text_parts).strip() if text_parts else "[富文本消息]"
                logger.info(f"[FeiShu] Received post message (text only): {self.content}")
        elif msg_type == "file":
            self.ctype = ContextType.FILE
            content = json.loads(msg.get("content"))
--- a/config-template.json
+++ b/config-template.json
@@ -20,9 +20,7 @@
    "Agent测试群",
    "ChatGPT测试群2"
  ],
-  "image_create_prefix": [
+  "image_create_prefix": [""],
    "画"
  ],
  "speech_recognition": true,
  "group_speech_recognition": false,
  "voice_reply_voice": false,
--- a/config.py
+++ b/config.py
@@ -35,6 +35,7 @@ available_setting = {
    "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],  # 开启自动回复的群名称列表
    "group_name_keyword_white_list": [],  # 开启自动回复的群名称关键词列表
    "group_chat_in_one_session": ["ChatGPT测试群"],  # 支持会话上下文共享的群名称
    "group_shared_session": True,  # 群聊是否共享会话上下文（所有成员共享），默认为True。False时每个用户在群内有独立会话
    "nick_name_black_list": [],  # 用户昵称黑名单
    "group_welcome_msg": "",  # 配置新人进群固定欢迎语，不配置则使用随机风格欢迎
    "trigger_by_self": False,  # 是否允许机器人触发
--- a/models/claudeapi/claude_api_bot.py
+++ b/models/claudeapi/claude_api_bot.py
@@ -365,6 +365,7 @@ class ClaudeAPIBot(Bot, OpenAIImage):
        # Track tool use state
        tool_uses_map = {}  # {index: {id, name, input}}
        current_tool_use_index = -1
        stop_reason = None  # Track stop reason from Claude
        try:
            # Make streaming HTTP request
@@ -440,6 +441,12 @@ class ClaudeAPIBot(Bot, OpenAIImage):
                                        tool_uses_map[current_tool_use_index]["input"] += delta.get("partial_json", "")
                            elif event_type == "message_delta":
                                # Extract stop_reason from delta
                                delta = event.get("delta", {})
                                if "stop_reason" in delta:
                                    stop_reason = delta.get("stop_reason")
                                    logger.info(f"[Claude] Stream stop_reason: {stop_reason}")
                                # Message complete - yield tool calls if any
                                if tool_uses_map:
                                    for idx in sorted(tool_uses_map.keys()):
@@ -462,9 +469,13 @@ class ClaudeAPIBot(Bot, OpenAIImage):
                                                        }
                                                    }]
                                                },
-                                                "finish_reason": None
+                                                "finish_reason": stop_reason
                                            }]
                                        }
                            elif event_type == "message_stop":
                                # Final event - log completion
                                logger.debug(f"[Claude] Stream completed with stop_reason: {stop_reason}")
                        except json.JSONDecodeError:
                            continue
--- a/skills/linkai-agent/README.md
+++ b/skills/linkai-agent/README.md
@@ -0,0 +1,297 @@
 # LinkAI Agent Skill
 这个 skill 允许你调用 LinkAI 平台上的多个应用(App)和工作流(Workflow)，通过简单的配置即可集成多个智能体能力。
 ## 特性
 - ✅ **多应用支持** - 在一个配置文件中管理多个 LinkAI 应用/工作流
 - ✅ **动态加载** - skill 系统加载时自动从 `config.json` 读取应用列表
 - ✅ **自动技能描述** - 所有配置的应用会自动添加到技能描述中
 - ✅ **模型切换** - 可以为每个请求指定不同的模型
 - ✅ **知识库集成** - 支持应用绑定的知识库
 - ✅ **插件能力** - 支持应用启用的各类插件
 - ✅ **工作流执行** - 支持执行复杂的多步骤工作流
 ## 快速开始
 ### 1. 配置 API Key
 ```bash
 env_config(action="set", key="LINKAI_API_KEY", value="your-linkai-api-key")
 ```
 获取 API Key: https://link-ai.tech/console/interface
 ### 2. 配置应用列表
 将 `config.json.template` 复制为 `config.json`：
 ```bash
 cp config.json.template config.json
 ```
 编辑 `config.json`，添加你的应用/工作流：
 ```json
 {
  "apps": [
    {
      "app_code": "G7z6vKwp",
      "app_name": "通用助手",
      "app_description": "通用AI助手，可以回答各类问题"
    },
    {
      "app_code": "your_kb_app",
      "app_name": "产品文档助手",
      "app_description": "基于产品文档知识库的问答助手"
    },
    {
      "app_code": "your_workflow",
      "app_name": "数据分析工作流",
      "app_description": "执行数据清洗、分析和可视化的完整工作流"
    }
  ]
 }
 ```
 **注意：** 修改 `config.json` 后，Agent 在下次加载技能时会自动读取新配置。
 ### 3. 调用应用
 ```bash
 bash scripts/call.sh "G7z6vKwp" "What is artificial intelligence?"
 ```
 ## 使用示例
 ### 基础调用
 ```bash
 # 调用默认模型
 bash scripts/call.sh "G7z6vKwp" "解释一下量子计算"
 ```
 ### 指定模型
 ```bash
 # 使用 GPT-4.1 模型
 bash scripts/call.sh "G7z6vKwp" "写一篇关于AI的文章" "LinkAI-4.1"
 # 使用 DeepSeek 模型
 bash scripts/call.sh "G7z6vKwp" "帮我写代码" "deepseek-chat"
 # 使用 Claude 模型
 bash scripts/call.sh "G7z6vKwp" "分析这段文本" "claude-4-sonnet"
 ```
 ### 调用工作流
 ```bash
 # 工作流会按照配置的节点顺序执行
 bash scripts/call.sh "workflow_code" "输入数据或问题"
 ```
 ## ⚠️ 重要提示
 ### 超时配置
 LinkAI 应用（特别是视频/图片生成、复杂工作流）可能需要较长时间处理。
 **脚本内置超时**：
 - 默认：120 秒（适合大多数场景）
 - 可通过第 5 个参数自定义：`bash scripts/call.sh <app_code> <question> "" "false" "180"`
 **推荐超时时间**：
 - **文本问答**：120 秒（默认）
 - **图片生成**：120-180 秒
 - **视频生成**：180-300 秒
 Agent 调用时会自动设置合适的超时时间。
 ## 配置说明
 ### config.json 字段
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `app_code` | string | 应用或工作流的唯一标识码，从 LinkAI 控制台获取 |
 | `app_name` | string | 应用名称，会显示在技能描述中 |
 | `app_description` | string | 应用功能描述，帮助 Agent 理解何时使用该应用 |
 ### 获取 app_code
 1. 登录 [LinkAI 控制台](https://link-ai.tech/console)
 2. 进入「应用管理」或「工作流管理」
 3. 选择要集成的应用/工作流
 4. 在应用详情页找到 `app_code`
 ## 支持的模型
 LinkAI 支持多种主流 AI 模型：
 **OpenAI 系列：**
 - `LinkAI-4.1` - GPT-4.1 (1000K 上下文)
 - `LinkAI-4.1-mini` - GPT-4.1 mini (1000K)
 - `LinkAI-4.1-nano` - GPT-4.1 nano (1000K)
 - `LinkAI-4o` - GPT-4o (128K)
 - `LinkAI-4o-mini` - GPT-4o mini (128K)
 **DeepSeek 系列：**
 - `deepseek-chat` - DeepSeek-V3 对话模型 (64K)
 - `deepseek-reasoner` - DeepSeek-R1 推理模型 (64K)
 **Claude 系列：**
 - `claude-4-sonnet` - Claude 4 Sonnet (200K)
 - `claude-3-7-sonnet` - Claude 3.7 (200K)
 - `claude-3-5-sonnet` - Claude 3.5 (200K)
 **Google 系列：**
 - `gemini-2.5-pro` - Gemini 2.5 Pro (1000K)
 - `gemini-2.0-flash` - Gemini 2.0 Flash (1000K)
 **国产模型：**
 - `qwen3` - 通义千问3 (128K)
 - `wenxin-4.5` - 文心一言4.5 (8K)
 - `doubao-1.5-pro-256k` - 豆包1.5 (256K)
 - `glm-4-plus` - 智谱GLM-4-Plus (4K)
 完整模型列表：https://link-ai.tech/console/models
 ## 应用类型
 ### 1. 普通应用
 配置了系统提示词和参数的标准对话应用，可以：
 - 设置角色和性格
 - 绑定知识库
 - 启用插件（图像识别、网页搜索、代码执行等）
 ### 2. 知识库应用
 基于特定知识库的问答应用，适合：
 - 企业内部知识库
 - 产品文档问答
 - 客户支持
 ### 3. 工作流
 多步骤的自动化流程，可以：
 - 串联多个处理节点
 - 条件分支
 - 循环处理
 - 调用外部 API
 ## 响应格式
 ### 成功响应
 ```json
 {
  "app_code": "G7z6vKwp",
  "content": "人工智能（AI）是计算机科学的一个分支...",
  "usage": {
    "prompt_tokens": 10,
    "completion_tokens": 150,
    "total_tokens": 160
  }
 }
 ```
 ### 错误响应
 ```json
 {
  "error": "LinkAI API error",
  "message": "应用不存在",
  "response": { ... }
 }
 ```
 ## 常见错误
 ### LINKAI_API_KEY environment variable is not set
 **原因：** 未配置 API Key  
 **解决：** 使用 `env_config` 工具设置 LINKAI_API_KEY
 ### 应用不存在 (402)
 **原因：** app_code 不正确或应用已删除  
 **解决：** 检查 app_code 是否正确，确认应用存在
 ### 无访问权限 (403)
 **原因：** 尝试访问他人的私有应用  
 **解决：** 确保应用是公开的或你是创建者
 ### 账号积分额度不足 (406)
 **原因：** LinkAI 账户余额不足  
 **解决：** 前往控制台充值
 ### 内容审核不通过 (409)
 **原因：** 请求或响应包含敏感内容  
 **解决：** 修改输入内容，避免敏感词
 ## 技术实现
 ### 自动技能描述生成
 当 skill 系统加载 `linkai-agent` 时，会自动：
 1. 读取 `config.json` 中的应用列表
 2. 将每个应用的 name 和 description 动态添加到技能描述中
 3. Agent 加载时会看到完整的应用列表
 这是在 `agent/skills/loader.py` 中实现的特殊处理。
 ### 工作流程
 ```
 用户配置 config.json
  ↓
 Agent 启动/重新加载技能
  ↓
 SkillLoader 检测到 linkai-agent
  ↓
 动态读取 config.json
  ↓
 生成包含所有应用描述的 description
  ↓
 Agent 看到所有可用应用的完整信息
  ↓
 用户请求触发
  ↓
 Agent 根据描述选择合适的应用
  ↓
 调用 call.sh <app_code> <question>
  ↓
 LinkAI API 处理并返回结果
 ```
 ## 最佳实践
 1. **清晰的描述** - 为每个应用写清晰、具体的描述，帮助 Agent 理解应用用途
 2. **合理分工** - 不同应用负责不同领域，避免功能重叠
 3. **无需重启** - 修改 config.json 后，Agent 下次加载技能时会自动更新
 4. **模型选择** - 根据任务复杂度选择合适的模型
 5. **知识库优化** - 为专业领域的应用绑定相关知识库
 ## 扩展用法
 ### 在 Agent 系统中使用
 当 Agent 系统加载这个 skill 时，会自动从 `config.json` 读取应用列表并生成描述：
 ```
 Call LinkAI apps/workflows. 通用助手(G7z6vKwp: 通用AI助手，可以回答各类问题); 产品文档助手(kb_app_001: 基于产品文档知识库的问答助手); 数据分析工作流(wf_002: 执行数据清洗、分析和可视化的完整工作流)
 ```
 Agent 会根据用户问题自动选择最合适的应用进行调用。
 ## 相关链接
 - LinkAI 平台: https://link-ai.tech
 - API 文档: https://docs.link-ai.tech
 - 控制台: https://link-ai.tech/console
 - 模型列表: https://link-ai.tech/console/models
 - 应用广场: https://link-ai.tech/square
 ## License
 Part of the chatgpt-on-wechat project.
--- a/skills/linkai-agent/SKILL.md
+++ b/skills/linkai-agent/SKILL.md
@@ -0,0 +1,165 @@
 ---
 name: linkai-agent
 description: Call LinkAI applications and workflows. Use bash command to execute like 'bash <base_dir>/scripts/call.sh <app_code> <question>'.
 homepage: https://link-ai.tech
 metadata:
  emoji: 🤖
  requires:
    bins: ["curl"]
    env: ["LINKAI_API_KEY"]
  primaryEnv: "LINKAI_API_KEY"
 ---
 # LinkAI Agent Caller
 Call LinkAI applications and workflows through API. Supports multiple apps/workflows configured in config.json.
 The available apps are dynamically loaded from `config.json` at skill loading time.
 ## Setup
 This skill requires a LinkAI API key. If not configured:
 1. Get your API key from https://link-ai.tech/console/api-keys
 2. Set the key using: `env_config(action="set", key="LINKAI_API_KEY", value="your-key")`
 ## Configuration
 1. Copy `config.json.template` to `config.json`
 2. Configure your apps/workflows:
 ```json
 {
  "apps": [
    {
      "app_code": "your_app_code",
      "app_name": "App Name",
      "app_description": "What this app does"
    }
  ]
 }
 ```
 3. The skill description will be automatically updated when the agent loads this skill
 ## Usage
 **Important**: Scripts are located relative to this skill's base directory.
 When you see this skill in `<available_skills>`, note the `<base_dir>` path.
 **CRITICAL**: Always use `bash` command to execute the script:
 ```bash
 # General pattern (MUST start with bash):
 bash "<base_dir>/scripts/call.sh" "<app_code>" "<question>" [model] [stream] [timeout]
 # DO NOT execute the script directly like this (WRONG):
 # "<base_dir>/scripts/call.sh" ...
 # Parameters:
 # - app_code: LinkAI app or workflow code (required)
 # - question: User question (required)
 # - model: Override model (optional, uses app default if not specified)
 # - stream: Enable streaming (true/false, default: false)
 # - timeout: curl timeout in seconds (default: 120, recommended for video/image generation)
 ```
 **IMPORTANT - Timeout Configuration**:
 - The script has a **default timeout of 120 seconds** (suitable for most cases)
 - For complex tasks (video generation, large workflows), pass a longer timeout as the 5th parameter
 - The bash tool also needs sufficient timeout - set its `timeout` parameter accordingly
 - Example: `bash(command="bash <script> <app_code> <question> '' 'false' 180", timeout=200)`
 ## Examples
 ### Call an app (uses default 60s timeout)
 ```bash
 bash(command='bash "<base_dir>/scripts/call.sh" "G7z6vKwp" "What is AI?"', timeout=60)
 ```
 ### Call an app with specific model
 ```bash
 bash(command='bash "<base_dir>/scripts/call.sh" "G7z6vKwp" "Explain machine learning" "LinkAI-4.1"', timeout=60)
 ```
 ### Call a workflow with custom timeout (video generation)
 ```bash
 # Pass timeout as 5th parameter to script, and set bash timeout slightly longer
 bash(command='bash "<base_dir>/scripts/call.sh" "workflow_code" "Generate a sunset video" "" "false" "180"', timeout=180)
 ```
 ```bash
 bash "<base_dir>/scripts/call.sh" "workflow_code" "Analyze this data: ..."
 ```
 ## Supported Models
 You can specify any LinkAI supported model:
 - `LinkAI-4.1` - Latest GPT-4.1 model (1000K context)
 - `LinkAI-4.1-mini` - GPT-4.1 mini (1000K context)
 - `LinkAI-4o` - GPT-4o model (128K context)
 - `LinkAI-4o-mini` - GPT-4o mini (128K context)
 - `deepseek-chat` - DeepSeek-V3 (64K context)
 - `deepseek-reasoner` - DeepSeek-R1 reasoning model
 - `claude-4-sonnet` - Claude 4 Sonnet (200K context)
 - `gemini-2.5-pro` - Gemini 2.5 Pro (1000K context)
 - And many more...
 Full model list: https://link-ai.tech/console/models
 ## Response Format
 Success response:
 ```json
 {
  "app_code": "G7z6vKwp",
  "content": "AI stands for Artificial Intelligence...",
  "usage": {
    "prompt_tokens": 10,
    "completion_tokens": 50,
    "total_tokens": 60
  }
 }
 ```
 Error response:
 ```json
 {
  "error": "Error description",
  "message": "Detailed error message"
 }
 ```
 ## Features
 - ✅ **Multiple Apps**: Configure and call multiple LinkAI apps/workflows
 - ✅ **Dynamic Loading**: Apps are loaded from config.json at runtime
 - ✅ **Model Override**: Optionally specify model per request
 - ✅ **Streaming Support**: Enable streaming output
 - ✅ **Knowledge Base**: Apps can use configured knowledge bases
 - ✅ **Plugins**: Apps can use enabled plugins (image recognition, web search, etc.)
 - ✅ **Workflows**: Execute complex multi-step workflows
 ## Notes
 - Each app/workflow maintains its own configuration (prompt, model, temperature, etc.)
 - Apps can have knowledge bases attached for domain-specific Q&A
 - Workflows execute from start node to end node and return final output
 - Token usage and costs depend on the model used
 - See LinkAI documentation for pricing: https://link-ai.tech/console/funds
 - The skill description is automatically generated from config.json when loaded
 ## Troubleshooting
 **"LINKAI_API_KEY environment variable is not set"**
 - Use env_config tool to set the API key
 **"app_code is required"**
 - Make sure you're passing the app_code as the first parameter
 **"应用不存在" (App not found)**
 - Check that the app_code is correct
 - Ensure you have access to the app
 **"账号积分额度不足" (Insufficient credits)**
 - Top up your LinkAI account credits
--- a/skills/linkai-agent/config.json.template
+++ b/skills/linkai-agent/config.json.template
@@ -0,0 +1,14 @@
 {
  "apps": [
    {
      "app_code": "your_app_code_2",
      "app_name": "知识库助手",
      "app_description": "基于特定领域知识库提供智能问答的知识助手"
    },
    {
      "app_code": "your_workflow_code",
      "app_name": "数据分析工作流",
      "app_description": "用于数据分析任务的工作流程"
    }
  ]
 }
--- a/skills/linkai-agent/scripts/call.sh
+++ b/skills/linkai-agent/scripts/call.sh
@@ -0,0 +1,138 @@
 #!/usr/bin/env bash
 # LinkAI Agent Caller
 # API Docs: https://api.link-ai.tech/v1/chat/completions
 set -euo pipefail
 app_code="${1:-}"
 question="${2:-}"
 model="${3:-}"
 stream="${4:-false}"
 timeout="${5:-120}"  # Default 120 seconds for video/image generation
 if [ -z "$app_code" ]; then
    echo '{"error": "app_code is required", "usage": "bash call.sh <app_code> <question> [model] [stream] [timeout]"}'
    exit 1
 fi
 if [ -z "$question" ]; then
    echo '{"error": "question is required", "usage": "bash call.sh <app_code> <question> [model] [stream] [timeout]"}'
    exit 1
 fi
 if [ -z "${LINKAI_API_KEY:-}" ]; then
    echo '{"error": "LINKAI_API_KEY environment variable is not set", "help": "Use env_config to set LINKAI_API_KEY"}'
    exit 1
 fi
 # API endpoint
 api_url="https://api.link-ai.tech/v1/chat/completions"
 # Build JSON request body
 if [ -n "$model" ]; then
    request_body=$(cat <<EOF
 {
  "app_code": "$app_code",
  "model": "$model",
  "messages": [
    {
      "role": "user",
      "content": "$question"
    }
  ],
  "stream": $stream
 }
 EOF
 )
 else
    request_body=$(cat <<EOF
 {
  "app_code": "$app_code",
  "messages": [
    {
      "role": "user",
      "content": "$question"
    }
  ],
  "stream": $stream
 }
 EOF
 )
 fi
 # Call LinkAI API
 response=$(curl -sS --max-time "$timeout" \
    -X POST \
    -H "Authorization: Bearer $LINKAI_API_KEY" \
    -H "Content-Type: application/json" \
    -d "$request_body" \
    "$api_url" 2>&1)
 curl_exit_code=$?
 if [ $curl_exit_code -ne 0 ]; then
    echo "{\"error\": \"Failed to call LinkAI API\", \"details\": \"$response\"}"
    exit 1
 fi
 # Simple JSON validation
 if [[ ! "$response" =~ ^[[:space:]]*[\{\[] ]]; then
    echo "{\"error\": \"Invalid JSON response from API\", \"response\": \"$response\"}"
    exit 1
 fi
 # Check for API error (top-level error only, not content_filter_result)
 if echo "$response" | grep -q '^[[:space:]]*{[[:space:]]*"error"[[:space:]]*:' || echo "$response" | grep -q '"error"[[:space:]]*:[[:space:]]*{[^}]*"code"[[:space:]]*:[[:space:]]*"[^"]*"[^}]*"message"'; then
    # Make sure it's not just content_filter_result inside choices
    if ! echo "$response" | grep -q '"choices"[[:space:]]*:[[:space:]]*\['; then
        # Extract error message
        error_msg=$(echo "$response" | grep -o '"message"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/"message"[[:space:]]*:[[:space:]]*"\(.*\)"/\1/' | head -1)
        error_code=$(echo "$response" | grep -o '"code"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/"code"[[:space:]]*:[[:space:]]*"\(.*\)"/\1/' | head -1)
        if [ -z "$error_msg" ]; then
            error_msg="Unknown API error"
        fi
        # Provide friendly error message for content filter
        if [ "$error_code" = "content_filter_error" ] || echo "$error_msg" | grep -qi "content.*filter"; then
            echo "{\"error\": \"内容安全审核\", \"message\": \"您的问题或应用返回的内容触发了LinkAI的安全审核机制，请换一种方式提问或检查应用配置\", \"details\": \"$error_msg\"}"
        else
            echo "{\"error\": \"LinkAI API error\", \"message\": \"$error_msg\", \"code\": \"$error_code\"}"
        fi
        exit 1
    fi
 fi
 # For non-stream mode, extract and format the response
 if [ "$stream" = "false" ]; then
    # Extract content from response
    content=$(echo "$response" | grep -o '"content"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/"content"[[:space:]]*:[[:space:]]*"\(.*\)"/\1/' | head -1)
    # Extract usage information
    prompt_tokens=$(echo "$response" | grep -o '"prompt_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
    completion_tokens=$(echo "$response" | grep -o '"completion_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
    total_tokens=$(echo "$response" | grep -o '"total_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
    if [ -n "$content" ]; then
        # Unescape JSON content
        content=$(echo "$content" | sed 's/\\n/\n/g' | sed 's/\\"/"/g')
        cat <<EOF
 {
  "app_code": "$app_code",
  "content": "$content",
  "usage": {
    "prompt_tokens": ${prompt_tokens:-0},
    "completion_tokens": ${completion_tokens:-0},
    "total_tokens": ${total_tokens:-0}
  }
 }
 EOF
    else
        # Return full response if we can't extract content
        echo "$response"
    fi
 else
    # For stream mode, return raw response (caller needs to handle streaming)
    echo "$response"
 fi
--- a/skills/openai-image-vision/EXAMPLE.md
+++ b/skills/openai-image-vision/EXAMPLE.md
@@ -0,0 +1,168 @@
 # OpenAI Image Vision - Usage Examples
 ## Setup
 Set up your API credentials using the agent's env_config tool:
 ```bash
 # Set your OpenAI API key
 env_config(action="set", key="OPENAI_API_KEY", value="sk-your-api-key-here")
 # Optional: Set custom API base URL (for proxy or compatible services)
 env_config(action="set", key="OPENAI_API_BASE", value="https://api.openai.com/v1")
 ```
 ## Example 1: Analyze a Local Image
 ```bash
 bash scripts/vision.sh "/path/to/photo.jpg" "What's in this image?"
 ```
 **Expected Output:**
 ```json
 {
  "model": "gpt-4.1-mini",
  "content": "The image shows a beautiful landscape with mountains in the background and a lake in the foreground. The sky is clear with some clouds, and there are trees along the shoreline.",
  "usage": {
    "prompt_tokens": 1234,
    "completion_tokens": 45,
    "total_tokens": 1279
  }
 }
 ```
 ## Example 2: Analyze an Image from URL
 ```bash
 bash scripts/vision.sh "https://example.com/image.jpg" "Describe this image in detail"
 ```
 ## Example 3: Extract Text (OCR)
 ```bash
 bash scripts/vision.sh "document.png" "Extract all text from this image"
 ```
 **Use Case:** Extract text from screenshots, scanned documents, or photos of text.
 ## Example 4: Identify Objects
 ```bash
 bash scripts/vision.sh "scene.jpg" "List all objects you can identify in this image"
 ```
 ## Example 5: Analyze Colors and Composition
 ```bash
 bash scripts/vision.sh "artwork.jpg" "Describe the color palette and composition of this image"
 ```
 ## Example 6: Count Items
 ```bash
 bash scripts/vision.sh "crowd.jpg" "How many people are in this image?"
 ```
 ## Example 7: Use Different Models
 ```bash
 # Use gpt-4.1-mini (default, latest mini model)
 bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4.1-mini"
 # Use gpt-4.1 (most capable, best for complex analysis)
 bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4.1"
 # Use gpt-4o-mini (previous mini model)
 bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4o-mini"
 ```
 ## Example 8: Complex Analysis
 ```bash
 bash scripts/vision.sh "product.jpg" "Analyze this product image. Describe the product, its features, colors, and suggest what kind of marketing copy would work well for it."
 ```
 ## Example 9: Safety and Content Moderation
 ```bash
 bash scripts/vision.sh "content.jpg" "Is there any inappropriate or unsafe content in this image?"
 ```
 ## Example 10: Technical Analysis
 ```bash
 bash scripts/vision.sh "diagram.png" "Explain what this technical diagram represents and how it works"
 ```
 ## Integration with Agent
 When the agent loads this skill, it will be available in the `<available_skills>` section. The agent can use it like:
 ```bash
 bash "<base_dir>/scripts/vision.sh" "user_uploaded_image.jpg" "What's in this image?"
 ```
 The `<base_dir>` will be automatically provided by the skill system.
 ## Error Handling Examples
 ### Missing API Key
 ```bash
 $ bash scripts/vision.sh "image.jpg" "What is this?"
 {"error": "OPENAI_API_KEY environment variable is not set", "help": "Visit https://platform.openai.com/api-keys to get an API key"}
 ```
 ### File Not Found
 ```bash
 $ bash scripts/vision.sh "nonexistent.jpg" "What is this?"
 {"error": "Image file not found", "path": "nonexistent.jpg"}
 ```
 ### Unsupported Format
 ```bash
 $ bash scripts/vision.sh "file.bmp" "What is this?"
 {"error": "Unsupported image format", "extension": "bmp", "supported": ["jpg", "jpeg", "png", "gif", "webp"]}
 ```
 ### Missing Parameters
 ```bash
 $ bash scripts/vision.sh
 {"error": "Image path or URL is required", "usage": "bash vision.sh <image_path_or_url> <question> [model]"}
 ```
 ## Tips for Best Results
 1. **Be Specific**: Ask clear, specific questions about what you want to know
 2. **Image Quality**: Higher quality images generally produce better results
 3. **Model Selection**: 
   - Use `gpt-4.1` for complex analysis requiring highest accuracy
   - Use `gpt-4.1-mini` (default) for most tasks - latest mini model with good balance
 4. **Text Extraction**: For OCR tasks, ensure text is clearly visible and not too small
 5. **Multiple Aspects**: You can ask about multiple things in one question
 6. **Context**: Provide context in your question if needed (e.g., "This is a medical scan, what do you see?")
 ## Performance Notes
 - **Local Files**: Automatically base64-encoded, adds ~33% size overhead
 - **URLs**: Passed directly to API, no encoding overhead
 - **Timeout**: 60 seconds for API calls
 - **Max Tokens**: 1000 tokens for responses (configurable in script)
 - **Rate Limits**: Subject to your OpenAI API plan
 ## Supported Image Formats
 ✅ JPEG (`.jpg`, `.jpeg`)  
 ✅ PNG (`.png`)  
 ✅ GIF (`.gif`)  
 ✅ WebP (`.webp`)  
 ❌ BMP, TIFF, SVG, and other formats are not supported
 ## Cost Considerations
 Vision API calls cost more than text-only calls because they include image tokens. Costs vary by:
 - Model used (gpt-4.1 vs gpt-4.1-mini)
 - Image size and resolution
 - Length of response
 Check OpenAI's pricing page for current rates: https://openai.com/pricing
--- a/skills/openai-image-vision/README.md
+++ b/skills/openai-image-vision/README.md
@@ -0,0 +1,178 @@
 # OpenAI Image Vision Skill
 This skill enables image analysis using OpenAI's Vision API (GPT-4 Vision models).
 ## Features
 - ✅ Analyze images from local files or URLs
 - ✅ Support for multiple image formats (JPEG, PNG, GIF, WebP)
 - ✅ Automatic base64 encoding for local files
 - ✅ Direct URL passing for remote images
 - ✅ Configurable model selection
 - ✅ Custom API base URL support
 - ✅ Pure bash/curl implementation (no Python dependencies)
 ## Quick Start
 1. **Set up API credentials using env_config:**
   ```bash
   env_config(action="set", key="OPENAI_API_KEY", value="sk-your-api-key-here")
   # Optional: custom API base
   env_config(action="set", key="OPENAI_API_BASE", value="https://api.openai.com/v1")
   ```
 2. **Analyze an image:**
   ```bash
   bash scripts/vision.sh "/path/to/photo.jpg" "What's in this image?"
   ```
 3. **Analyze from URL:**
   ```bash
   bash scripts/vision.sh "https://example.com/image.jpg" "Describe this image"
   ```
   ```bash
   bash scripts/vision.sh "/path/to/image.jpg" "What's in this image?"
   ```
 3. **Analyze from URL:**
   ```bash
   bash scripts/vision.sh "https://example.com/image.jpg" "Describe this image"
   ```
 ## Usage Examples
 ### Basic image analysis
 ```bash
 bash scripts/vision.sh "photo.jpg" "What objects can you see?"
 ```
 ### Text extraction (OCR)
 ```bash
 bash scripts/vision.sh "document.png" "Extract all text from this image"
 ```
 ### Detailed description
 ```bash
 bash scripts/vision.sh "scene.jpg" "Describe this scene in detail, including colors, mood, and composition"
 ```
 ### Using different models
 ```bash
 # Use gpt-4.1-mini (default, latest mini model)
 bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4.1-mini"
 # Use gpt-4.1 (most capable, latest model)
 bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4.1"
 # Use gpt-4o-mini (previous mini model)
 bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4o-mini"
 ```
 ## Environment Variables
 | Variable | Required | Default | Description |
 |----------|----------|---------|-------------|
 | `OPENAI_API_KEY` | Yes | - | Your OpenAI API key |
 | `OPENAI_API_BASE` | No | `https://api.openai.com/v1` | Custom API base URL |
 ## Response Format
 Success response:
 ```json
 {
  "model": "gpt-4.1-mini",
  "content": "The image shows a beautiful sunset over mountains...",
  "usage": {
    "prompt_tokens": 1234,
    "completion_tokens": 567,
    "total_tokens": 1801
  }
 }
 ```
 Error response:
 ```json
 {
  "error": "Error description",
  "details": "Additional information"
 }
 ```
 ## Supported Models
 - `gpt-4.1-mini` (default) - Latest mini model, fast and cost-effective
 - `gpt-4.1` - Latest GPT-4 variant, most capable
 - `gpt-4o-mini` - Previous generation mini model
 - `gpt-4-turbo` - Previous generation turbo model
 ## Supported Image Formats
 - JPEG (`.jpg`, `.jpeg`)
 - PNG (`.png`)
 - GIF (`.gif`)
 - WebP (`.webp`)
 ## Technical Details
 - **Implementation**: Pure bash script using curl and base64
 - **Timeout**: 60 seconds for API calls
 - **Max tokens**: 1000 tokens for responses
 - **Image handling**: 
  - Local files are automatically base64-encoded
  - URLs are passed directly to the API
  - MIME types are auto-detected from file extensions
 ## Error Handling
 The script handles various error cases:
 - Missing required parameters
 - Missing API key
 - File not found
 - Unsupported image formats
 - API errors
 - Network timeouts
 - Invalid JSON responses
 ## Integration with Agent System
 When loaded by the agent system, this skill will appear in `<available_skills>` with a `<base_dir>` path. Use it like:
 ```bash
 bash "<base_dir>/scripts/vision.sh" "image.jpg" "What's in this image?"
 ```
 The agent will automatically:
 - Load environment variables from `~/.cow/.env`
 - Provide the correct `<base_dir>` path
 - Handle skill discovery and registration
 ## Notes
 - Images are sent to OpenAI's servers for processing
 - Large images may be automatically resized by the API
 - Rate limits depend on your OpenAI API plan
 - Token usage includes both the image and text in the prompt
 - Base64 encoding increases the size of local images by ~33%
 ## Troubleshooting
 **"OPENAI_API_KEY environment variable is not set"**
 - Set the environment variable using env_config tool
 - Or use the agent's env_config tool
 **"Image file not found"**
 - Check the file path is correct
 - Use absolute paths or paths relative to current directory
 **"Unsupported image format"**
 - Only JPEG, PNG, GIF, and WebP are supported
 - Check the file extension matches the actual format
 **"Failed to call OpenAI API"**
 - Check your internet connection
 - Verify the API key is valid
 - Check if custom API base URL is correct
 ## License
 Part of the chatgpt-on-wechat project.
--- a/skills/openai-image-vision/SKILL.md
+++ b/skills/openai-image-vision/SKILL.md
@@ -0,0 +1,119 @@
 ---
 name: openai-image-vision
 description: Analyze images using OpenAI's Vision API. Use bash command to execute the vision script like 'bash <base_dir>/scripts/vision.sh <image> <question>'. Can understand image content, objects, text, colors, and answer questions about images.
 homepage: https://platform.openai.com/docs/guides/vision
 metadata:
  emoji: 👁️
  requires:
    bins: ["curl", "base64"]
    env: ["OPENAI_API_KEY"]
  primaryEnv: "OPENAI_API_KEY"
 ---
 # OpenAI Image Vision
 Analyze images using OpenAI's GPT-4 Vision API. The model can understand visual elements including objects, shapes, colors, textures, and text within images.
 ## Setup
 This skill requires an OpenAI API key. If not configured:
 1. Get your API key from https://platform.openai.com/api-keys
 2. Set the key using: `env_config(action="set", key="OPENAI_API_KEY", value="your-key")`
 Optional: Set custom API base URL (default: https://api.openai.com/v1):
 ```bash
 env_config(action="set", key="OPENAI_API_BASE", value="your-base-url")
 ```
 ## Usage
 **Important**: Scripts are located relative to this skill's base directory.
 When you see this skill in `<available_skills>`, note the `<base_dir>` path.
 **CRITICAL**: Always use `bash` command to execute the script:
 ```bash
 # General pattern (MUST start with bash):
 bash "<base_dir>/scripts/vision.sh" "<image_path_or_url>" "<question>" [model]
 # DO NOT execute the script directly like this (WRONG):
 # "<base_dir>/scripts/vision.sh" ...
 # Parameters:
 # - image_path_or_url: Local image file path or HTTP(S) URL (required)
 # - question: Question to ask about the image (required)
 # - model: OpenAI model to use (default: gpt-4.1-mini)
 #   Options: gpt-4.1-mini, gpt-4.1, gpt-4o-mini, gpt-4-turbo
 ```
 ## Examples
 ### Analyze a local image
 ```bash
 bash "<base_dir>/scripts/vision.sh" "/path/to/image.jpg" "What's in this image?"
 ```
 ### Analyze an image from URL
 ```bash
 bash "<base_dir>/scripts/vision.sh" "https://example.com/image.jpg" "Describe this image in detail"
 ```
 ### Use specific model
 ```bash
 bash "<base_dir>/scripts/vision.sh" "/path/to/photo.png" "What colors are prominent?" "gpt-4o-mini"
 ```
 ### Extract text from image
 ```bash
 bash "<base_dir>/scripts/vision.sh" "/path/to/document.jpg" "Extract all text from this image"
 ```
 ### Analyze multiple aspects
 ```bash
 bash "<base_dir>/scripts/vision.sh" "image.jpg" "List all objects you can see and describe the overall scene"
 ```
 ## Supported Image Formats
 - JPEG (.jpg, .jpeg)
 - PNG (.png)
 - GIF (.gif)
 - WebP (.webp)
 **Performance Optimization**: Files larger than 1MB are automatically compressed to 800px (longest side) to avoid command-line parameter limits. This happens transparently without affecting analysis quality.
 ## Response Format
 The script returns a JSON response:
 ```json
 {
  "model": "gpt-4.1-mini",
  "content": "The image shows...",
  "usage": {
    "prompt_tokens": 1234,
    "completion_tokens": 567,
    "total_tokens": 1801
  }
 }
 ```
 Or in case of error:
 ```json
 {
  "error": "Error description",
  "details": "Additional error information"
 }
 ```
 ## Notes
 - **Image size**: Images are automatically resized if too large
 - **Timeout**: 60 seconds for API calls
 - **Rate limits**: Subject to your OpenAI API plan limits
 - **Privacy**: Images are sent to OpenAI's servers for processing
 - **Local files**: Automatically converted to base64 for API submission
 - **URLs**: Can be passed directly to the API without downloading
--- a/skills/openai-image-vision/scripts/vision.sh
+++ b/skills/openai-image-vision/scripts/vision.sh
@@ -0,0 +1,233 @@
 #!/usr/bin/env bash
 # OpenAI Vision API wrapper
 # API Docs: https://platform.openai.com/docs/guides/vision
 set -euo pipefail
 image_input="${1:-}"
 question="${2:-}"
 model="${3:-gpt-4.1-mini}"
 if [ -z "$image_input" ]; then
    echo '{"error": "Image path or URL is required", "usage": "bash vision.sh <image_path_or_url> <question> [model]"}'
    exit 1
 fi
 if [ -z "$question" ]; then
    echo '{"error": "Question is required", "usage": "bash vision.sh <image_path_or_url> <question> [model]"}'
    exit 1
 fi
 if [ -z "${OPENAI_API_KEY:-}" ]; then
    echo '{"error": "OPENAI_API_KEY environment variable is not set", "help": "Visit https://platform.openai.com/api-keys to get an API key"}'
    exit 1
 fi
 # Set API base URL (default to OpenAI's official endpoint)
 api_base="${OPENAI_API_BASE:-https://api.openai.com/v1}"
 # Remove trailing slash if present
 api_base="${api_base%/}"
 # Determine if input is a URL or local file
 if [[ "$image_input" =~ ^https?:// ]]; then
    # It's a URL - use it directly
    image_url="$image_input"
    # Build JSON request body with URL
    request_body=$(cat <<EOF
 {
  "model": "$model",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "$question"
        },
        {
          "type": "image_url",
          "image_url": {
            "url": "$image_url"
          }
        }
      ]
    }
  ],
  "max_tokens": 1000
 }
 EOF
 )
 else
    # It's a local file - need to encode as base64
    if [ ! -f "$image_input" ]; then
        echo "{\"error\": \"Image file not found\", \"path\": \"$image_input\"}"
        exit 1
    fi
    # Check file size and compress if needed to avoid "Argument list too long" error
    # Files larger than 1MB should be compressed
    file_size=$(wc -c < "$image_input" | tr -d ' ')
    max_size=1048576  # 1MB
    image_to_encode="$image_input"
    temp_compressed=""
    if [ "$file_size" -gt "$max_size" ]; then
        # File is too large, compress it
        temp_compressed=$(mktemp "${TMPDIR:-/tmp}/vision_compressed_XXXXXX.jpg")
        # Use sips (macOS) or convert (ImageMagick) to compress
        if command -v sips &> /dev/null; then
            # macOS: resize to max 800px on longest side
            sips -Z 800 "$image_input" --out "$temp_compressed" &> /dev/null
            if [ $? -eq 0 ]; then
                image_to_encode="$temp_compressed"
                >&2 echo "[vision.sh] Compressed large image ($(($file_size / 1024))KB) to avoid parameter limit"
            fi
        elif command -v convert &> /dev/null; then
            # Linux: use ImageMagick
            convert "$image_input" -resize 800x800\> "$temp_compressed" 2>/dev/null
            if [ $? -eq 0 ]; then
                image_to_encode="$temp_compressed"
                >&2 echo "[vision.sh] Compressed large image ($(($file_size / 1024))KB) to avoid parameter limit"
            fi
        fi
    fi
    # Detect image format from file extension
    extension="${image_to_encode##*.}"
    extension_lower=$(echo "$extension" | tr '[:upper:]' '[:lower:]')
    case "$extension_lower" in
        jpg|jpeg)
            mime_type="image/jpeg"
            ;;
        png)
            mime_type="image/png"
            ;;
        gif)
            mime_type="image/gif"
            ;;
        webp)
            mime_type="image/webp"
            ;;
        *)
            echo "{\"error\": \"Unsupported image format\", \"extension\": \"$extension\", \"supported\": [\"jpg\", \"jpeg\", \"png\", \"gif\", \"webp\"]}"
            # Clean up temp file if exists
            [ -n "$temp_compressed" ] && rm -f "$temp_compressed"
            exit 1
            ;;
    esac
    # Encode image to base64
    if command -v base64 &> /dev/null; then
        # macOS and most Linux systems
        base64_image=$(base64 -i "$image_to_encode" 2>/dev/null || base64 "$image_to_encode" 2>/dev/null)
    else
        echo '{"error": "base64 command not found", "help": "Please install base64 utility"}'
        # Clean up temp file if exists
        [ -n "$temp_compressed" ] && rm -f "$temp_compressed"
        exit 1
    fi
    # Clean up temp compressed file
    [ -n "$temp_compressed" ] && rm -f "$temp_compressed"
    if [ -z "$base64_image" ]; then
        echo "{\"error\": \"Failed to encode image to base64\", \"path\": \"$image_input\"}"
        exit 1
    fi
    # Escape question for JSON (replace " with \")
    escaped_question=$(echo "$question" | sed 's/"/\\"/g')
    # Build JSON request body with base64 image
    # Note: Using printf to avoid issues with special characters
    request_body=$(cat <<EOF
 {
  "model": "$model",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "$escaped_question"
        },
        {
          "type": "image_url",
          "image_url": {
            "url": "data:$mime_type;base64,$base64_image"
          }
        }
      ]
    }
  ],
  "max_tokens": 1000
 }
 EOF
 )
 fi
 # Call OpenAI API
 response=$(curl -sS --max-time 60 \
    -X POST \
    -H "Authorization: Bearer $OPENAI_API_KEY" \
    -H "Content-Type: application/json" \
    -d "$request_body" \
    "$api_base/chat/completions" 2>&1)
 curl_exit_code=$?
 if [ $curl_exit_code -ne 0 ]; then
    echo "{\"error\": \"Failed to call OpenAI API\", \"details\": \"$response\"}"
    exit 1
 fi
 # Simple JSON validation - check if response starts with { or [
 if [[ ! "$response" =~ ^[[:space:]]*[\{\[] ]]; then
    echo "{\"error\": \"Invalid JSON response from API\", \"response\": \"$response\"}"
    exit 1
 fi
 # Check for API error (look for "error" field in response)
 if echo "$response" | grep -q '"error"[[:space:]]*:[[:space:]]*{'; then
    # Extract error message if possible
    error_msg=$(echo "$response" | grep -o '"message"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/"message"[[:space:]]*:[[:space:]]*"\(.*\)"/\1/' | head -1)
    if [ -z "$error_msg" ]; then
        error_msg="Unknown API error"
    fi
    echo "{\"error\": \"OpenAI API error\", \"message\": \"$error_msg\", \"response\": $response}"
    exit 1
 fi
 # Extract the content from the response
 # The response structure is: choices[0].message.content
 content=$(echo "$response" | grep -o '"content"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/"content"[[:space:]]*:[[:space:]]*"\(.*\)"/\1/' | head -1)
 # Extract usage information
 prompt_tokens=$(echo "$response" | grep -o '"prompt_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
 completion_tokens=$(echo "$response" | grep -o '"completion_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
 total_tokens=$(echo "$response" | grep -o '"total_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
 # Build simplified response
 if [ -n "$content" ]; then
    # Unescape JSON content (basic unescaping)
    content=$(echo "$content" | sed 's/\\n/\n/g' | sed 's/\\"/"/g')
    cat <<EOF
 {
  "model": "$model",
  "content": "$content",
  "usage": {
    "prompt_tokens": ${prompt_tokens:-0},
    "completion_tokens": ${completion_tokens:-0},
    "total_tokens": ${total_tokens:-0}
  }
 }
 EOF
 else
    # If we can't extract content, return the full response
    echo "$response"
 fi
		`@@ -0,0 +1,3 @@`
							`from .send import Send`

							`__all__ = ['Send']`