feat: optimize agent configuration and memory

2026-07-19 21:07:28 +08:00 · 2026-02-02 11:48:53 +08:00
parent a8d5309c90
commit 46fa07e4a9
36 changed files with 1245 additions and 355 deletions
--- a/channel/chat_channel.py
+++ b/channel/chat_channel.py
@@ -173,11 +173,11 @@ class ChatChannel(Channel):
    def _handle(self, context: Context):
        if context is None or not context.content:
            return
-        logger.debug("[chat_channel] ready to handle context: {}".format(context))
+        logger.debug("[chat_channel] handling context: {}".format(context))
        # reply的构建步骤
        reply = self._generate_reply(context)

-        logger.debug("[chat_channel] ready to decorate reply: {}".format(reply))
+        logger.debug("[chat_channel] decorating reply: {}".format(reply))

        # reply的包装步骤
        if reply and reply.content:
@@ -195,7 +195,7 @@ class ChatChannel(Channel):
        )
        reply = e_context["reply"]
        if not e_context.is_pass():
-            logger.debug("[chat_channel] ready to handle context: type={}, content={}".format(context.type, context.content))
+            logger.debug("[chat_channel] type={}, content={}".format(context.type, context.content))
            if context.type == ContextType.TEXT or context.type == ContextType.IMAGE_CREATE:  # 文字和图片消息
                context["channel"] = e_context["channel"]
                reply = super().build_reply_content(context.content, context)
@@ -289,7 +289,7 @@ class ChatChannel(Channel):
            )
            reply = e_context["reply"]
            if not e_context.is_pass() and reply and reply.type:
-                logger.debug("[chat_channel] ready to send reply: {}, context: {}".format(reply, context))
+                logger.debug("[chat_channel] sending reply: {}, context: {}".format(reply, context))
                
                # 如果是文本回复，尝试提取并发送图片
                if reply.type == ReplyType.TEXT:
@@ -343,7 +343,9 @@ class ChatChannel(Channel):
            logger.info(f"[chat_channel] Extracted {len(media_items)} media item(s) from reply")
            
            # 先发送文本（保持原文本不变）
+            logger.info(f"[chat_channel] Sending text content before media: {reply.content[:100]}...")
            self._send(reply, context)
+            logger.info(f"[chat_channel] Text sent, now sending {len(media_items)} media item(s)")
            
            # 然后逐个发送媒体文件
            for i, (url, media_type) in enumerate(media_items):
@@ -381,7 +383,7 @@ class ChatChannel(Channel):
                    logger.error(f"[chat_channel] Failed to send {media_type} {url}: {e}")
        else:
            # 没有媒体文件，正常发送文本
-            self._send(reply, context)
+                self._send(reply, context)

    def _send(self, reply: Reply, context: Context, retry_cnt=0):
        try:
--- a/channel/dingtalk/dingtalk_channel.py
+++ b/channel/dingtalk/dingtalk_channel.py
@@ -8,6 +8,7 @@ import copy
 import json
 # -*- coding=utf-8 -*-
 import logging
+import os
 import time
 import requests

@@ -102,7 +103,7 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
        self.logger = self.setup_logger()
        # 历史消息id暂存，用于幂等控制
        self.receivedMsgs = ExpiredDict(conf().get("expires_in_seconds", 3600))
-        logger.info("[DingTalk] client_id={}, client_secret={} ".format(
+        logger.debug("[DingTalk] client_id={}, client_secret={} ".format(
            self.dingtalk_client_id, self.dingtalk_client_secret))
        # 无需群校验和前缀
        conf()["group_name_white_list"] = ["ALL_GROUP"]
@@ -118,6 +119,7 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
        credential = dingtalk_stream.Credential(self.dingtalk_client_id, self.dingtalk_client_secret)
        client = dingtalk_stream.DingTalkStreamClient(credential)
        client.register_callback_handler(dingtalk_stream.chatbot.ChatbotMessage.TOPIC, self)
+        logger.info("[DingTalk] ✅ Stream connected, ready to receive messages")
        client.start_forever()
    
    def get_access_token(self):
@@ -242,21 +244,241 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
        except Exception as e:
            logger.error(f"[DingTalk] Error sending group message: {e}")
            return False
+    
+    def upload_media(self, file_path: str, media_type: str = "image") -> str:
+        """
+        上传媒体文件到钉钉
+        
+        Args:
+            file_path: 本地文件路径或URL
+            media_type: 媒体类型 (image, video, voice, file)
+        
+        Returns:
+            media_id，如果上传失败返回 None
+        """
+        access_token = self.get_access_token()
+        if not access_token:
+            logger.error("[DingTalk] Cannot upload media: no access token")
+            return None
+        
+        # 处理 file:// URL
+        if file_path.startswith("file://"):
+            file_path = file_path[7:]
+        
+        # 如果是 HTTP URL，先下载
+        if file_path.startswith("http://") or file_path.startswith("https://"):
+            try:
+                import uuid
+                response = requests.get(file_path, timeout=(5, 60))
+                if response.status_code != 200:
+                    logger.error(f"[DingTalk] Failed to download file from URL: {file_path}")
+                    return None
+                
+                # 保存到临时文件
+                file_name = os.path.basename(file_path) or f"media_{uuid.uuid4()}"
+                workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
+                tmp_dir = os.path.join(workspace_root, "tmp")
+                os.makedirs(tmp_dir, exist_ok=True)
+                temp_file = os.path.join(tmp_dir, file_name)
+                
+                with open(temp_file, "wb") as f:
+                    f.write(response.content)
+                
+                file_path = temp_file
+                logger.info(f"[DingTalk] Downloaded file to {file_path}")
+            except Exception as e:
+                logger.error(f"[DingTalk] Error downloading file: {e}")
+                return None
+        
+        if not os.path.exists(file_path):
+            logger.error(f"[DingTalk] File not found: {file_path}")
+            return None
+        
+        # 上传到钉钉
+        # 钉钉上传媒体文件 API: https://open.dingtalk.com/document/orgapp/upload-media-files
+        url = "https://oapi.dingtalk.com/media/upload"
+        params = {
+            "access_token": access_token,
+            "type": media_type
+        }
+        
+        try:
+            with open(file_path, "rb") as f:
+                files = {"media": (os.path.basename(file_path), f)}
+                response = requests.post(url, params=params, files=files, timeout=(5, 60))
+                result = response.json()
+                
+                if result.get("errcode") == 0:
+                    media_id = result.get("media_id")
+                    logger.info(f"[DingTalk] Media uploaded successfully, media_id={media_id}")
+                    return media_id
+                else:
+                    logger.error(f"[DingTalk] Failed to upload media: {result}")
+                    return None
+        except Exception as e:
+            logger.error(f"[DingTalk] Error uploading media: {e}")
+            return None
+    
+    def send_image_with_media_id(self, access_token: str, media_id: str, incoming_message, is_group: bool) -> bool:
+        """
+        发送图片消息（使用 media_id）
+        
+        Args:
+            access_token: 访问令牌
+            media_id: 媒体ID
+            incoming_message: 钉钉消息对象
+            is_group: 是否为群聊
+        
+        Returns:
+            是否发送成功
+        """
+        headers = {
+            "x-acs-dingtalk-access-token": access_token,
+            'Content-Type': 'application/json'
+        }
+        
+        msg_param = {
+            "photoURL": media_id  # 钉钉图片消息使用 photoURL 字段
+        }
+        
+        body = {
+            "robotCode": incoming_message.robot_code,
+            "msgKey": "sampleImageMsg",
+            "msgParam": json.dumps(msg_param),
+        }
+        
+        if is_group:
+            # 群聊
+            url = "https://api.dingtalk.com/v1.0/robot/groupMessages/send"
+            body["openConversationId"] = incoming_message.conversation_id
+        else:
+            # 单聊
+            url = "https://api.dingtalk.com/v1.0/robot/oToMessages/batchSend"
+            body["userIds"] = [incoming_message.sender_staff_id]
+        
+        try:
+            response = requests.post(url=url, headers=headers, json=body, timeout=10)
+            result = response.json()
+            
+            logger.info(f"[DingTalk] Image send result: {response.text}")
+            
+            if response.status_code == 200:
+                return True
+            else:
+                logger.error(f"[DingTalk] Send image error: {response.text}")
+                return False
+        except Exception as e:
+            logger.error(f"[DingTalk] Send image exception: {e}")
+            return False
+
+    def send_image_message(self, receiver: str, media_id: str, is_group: bool, robot_code: str) -> bool:
+        """
+        发送图片消息
+        
+        Args:
+            receiver: 接收者ID (user_id 或 conversation_id)
+            media_id: 媒体ID
+            is_group: 是否为群聊
+            robot_code: 机器人编码
+        
+        Returns:
+            是否发送成功
+        """
+        access_token = self.get_access_token()
+        if not access_token:
+            logger.error("[DingTalk] Cannot send image: no access token")
+            return False
+        
+        if not robot_code:
+            logger.error("[DingTalk] Cannot send image: robot_code is required")
+            return False
+        
+        if is_group:
+            # 发送群聊图片
+            url = "https://api.dingtalk.com/v1.0/robot/groupMessages/send"
+            headers = {
+                "x-acs-dingtalk-access-token": access_token,
+                "Content-Type": "application/json"
+            }
+            data = {
+                "msgParam": json.dumps({"mediaId": media_id}),
+                "msgKey": "sampleImageMsg",
+                "openConversationId": receiver,
+                "robotCode": robot_code
+            }
+        else:
+            # 发送单聊图片
+            url = "https://api.dingtalk.com/v1.0/robot/oToMessages/batchSend"
+            headers = {
+                "x-acs-dingtalk-access-token": access_token,
+                "Content-Type": "application/json"
+            }
+            data = {
+                "msgParam": json.dumps({"mediaId": media_id}),
+                "msgKey": "sampleImageMsg",
+                "userIds": [receiver],
+                "robotCode": robot_code
+            }
+        
+        try:
+            response = requests.post(url, headers=headers, json=data, timeout=10)
+            result = response.json()
+            
+            if response.status_code == 200:
+                logger.info(f"[DingTalk] Image message sent successfully")
+                return True
+            else:
+                logger.error(f"[DingTalk] Failed to send image message: {result}")
+                return False
+        except Exception as e:
+            logger.error(f"[DingTalk] Error sending image message: {e}")
+            return False
+    
+    def get_image_download_url(self, download_code: str) -> str:
+        """
+        获取图片下载地址
+        使用钉钉 API: https://open.dingtalk.com/document/orgapp/download-the-robot-to-receive-the-file
+        """
+        access_token = self.get_access_token()
+        if not access_token:
+            logger.error("[DingTalk] Cannot get access token for image download")
+            return None
+        
+        url = f"https://oapi.dingtalk.com/robot/messageFiles/download"
+        params = {
+            "access_token": access_token,
+            "downloadCode": download_code
+        }
+        
+        try:
+            response = requests.get(url, params=params, timeout=10)
+            if response.status_code == 200:
+                # 返回图片的直接下载 URL（实际上这个 API 直接返回文件内容）
+                # 我们需要保存文件并返回本地路径
+                logger.info(f"[DingTalk] Successfully got image download URL for code: {download_code}")
+                # 返回一个特殊的 URL，包含 download_code，后续会用它来下载
+                return f"dingtalk://download/{download_code}"
+            else:
+                logger.error(f"[DingTalk] Failed to get image download URL: {response.text}")
+                return None
+        except Exception as e:
+            logger.error(f"[DingTalk] Exception getting image download URL: {e}")
+            return None

    async def process(self, callback: dingtalk_stream.CallbackMessage):
        try:
            incoming_message = dingtalk_stream.ChatbotMessage.from_dict(callback.data)
            
            # Debug: 打印完整的 event 数据
-            logger.info(f"[DingTalk] ===== Incoming Message Debug =====")
-            logger.info(f"[DingTalk] callback.data keys: {callback.data.keys() if hasattr(callback.data, 'keys') else 'N/A'}")
-            logger.info(f"[DingTalk] incoming_message attributes: {dir(incoming_message)}")
-            logger.info(f"[DingTalk] robot_code: {getattr(incoming_message, 'robot_code', 'N/A')}")
-            logger.info(f"[DingTalk] chatbot_corp_id: {getattr(incoming_message, 'chatbot_corp_id', 'N/A')}")
-            logger.info(f"[DingTalk] chatbot_user_id: {getattr(incoming_message, 'chatbot_user_id', 'N/A')}")
-            logger.info(f"[DingTalk] conversation_id: {getattr(incoming_message, 'conversation_id', 'N/A')}")
-            logger.info(f"[DingTalk] Raw callback.data: {callback.data}")
-            logger.info(f"[DingTalk] =====================================")
+            logger.debug(f"[DingTalk] ===== Incoming Message Debug =====")
+            logger.debug(f"[DingTalk] callback.data keys: {callback.data.keys() if hasattr(callback.data, 'keys') else 'N/A'}")
+            logger.debug(f"[DingTalk] incoming_message attributes: {dir(incoming_message)}")
+            logger.debug(f"[DingTalk] robot_code: {getattr(incoming_message, 'robot_code', 'N/A')}")
+            logger.debug(f"[DingTalk] chatbot_corp_id: {getattr(incoming_message, 'chatbot_corp_id', 'N/A')}")
+            logger.debug(f"[DingTalk] chatbot_user_id: {getattr(incoming_message, 'chatbot_user_id', 'N/A')}")
+            logger.debug(f"[DingTalk] conversation_id: {getattr(incoming_message, 'conversation_id', 'N/A')}")
+            logger.debug(f"[DingTalk] Raw callback.data: {callback.data}")
+            logger.debug(f"[DingTalk] =====================================")
            
            image_download_handler = self  # 传入方法所在的类实例
            dingtalk_msg = DingTalkMessage(incoming_message, image_download_handler)
@@ -267,7 +489,8 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
                self.handle_single(dingtalk_msg)
            return AckMessage.STATUS_OK, 'OK'
        except Exception as e:
-            logger.error(f"dingtalk process error={e}")
+            logger.error(f"[DingTalk] process error: {e}")
+            logger.exception(e)  # 打印完整堆栈跟踪
            return AckMessage.STATUS_SYSTEM_EXCEPTION, 'ERROR'

    @time_checker
@@ -286,6 +509,43 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
            logger.debug("[DingTalk]receive text msg: {}".format(cmsg.content))
        else:
            logger.debug("[DingTalk]receive other msg: {}".format(cmsg.content))
+        
+        # 处理文件缓存逻辑
+        from channel.file_cache import get_file_cache
+        file_cache = get_file_cache()
+        
+        # 单聊的 session_id 就是 sender_id
+        session_id = cmsg.from_user_id
+        
+        # 如果是单张图片消息，缓存起来
+        if cmsg.ctype == ContextType.IMAGE:
+            if hasattr(cmsg, 'image_path') and cmsg.image_path:
+                file_cache.add(session_id, cmsg.image_path, file_type='image')
+                logger.info(f"[DingTalk] Image cached for session {session_id}, waiting for user query...")
+            # 单张图片不直接处理，等待用户提问
+            return
+        
+        # 如果是文本消息，检查是否有缓存的文件
+        if cmsg.ctype == ContextType.TEXT:
+            cached_files = file_cache.get(session_id)
+            if cached_files:
+                # 将缓存的文件附加到文本消息中
+                file_refs = []
+                for file_info in cached_files:
+                    file_path = file_info['path']
+                    file_type = file_info['type']
+                    if file_type == 'image':
+                        file_refs.append(f"[图片: {file_path}]")
+                    elif file_type == 'video':
+                        file_refs.append(f"[视频: {file_path}]")
+                    else:
+                        file_refs.append(f"[文件: {file_path}]")
+                
+                cmsg.content = cmsg.content + "\n" + "\n".join(file_refs)
+                logger.info(f"[DingTalk] Attached {len(cached_files)} cached file(s) to user query")
+                # 清除缓存
+                file_cache.clear(session_id)
+        
        context = self._compose_context(cmsg.ctype, cmsg.content, isgroup=False, msg=cmsg)
        if context:
            self.produce(context)
@@ -307,6 +567,46 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
            logger.debug("[DingTalk]receive text msg: {}".format(cmsg.content))
        else:
            logger.debug("[DingTalk]receive other msg: {}".format(cmsg.content))
+        
+        # 处理文件缓存逻辑
+        from channel.file_cache import get_file_cache
+        file_cache = get_file_cache()
+        
+        # 群聊的 session_id
+        if conf().get("group_shared_session", True):
+            session_id = cmsg.other_user_id  # conversation_id
+        else:
+            session_id = cmsg.from_user_id + "_" + cmsg.other_user_id
+        
+        # 如果是单张图片消息，缓存起来
+        if cmsg.ctype == ContextType.IMAGE:
+            if hasattr(cmsg, 'image_path') and cmsg.image_path:
+                file_cache.add(session_id, cmsg.image_path, file_type='image')
+                logger.info(f"[DingTalk] Image cached for session {session_id}, waiting for user query...")
+            # 单张图片不直接处理，等待用户提问
+            return
+        
+        # 如果是文本消息，检查是否有缓存的文件
+        if cmsg.ctype == ContextType.TEXT:
+            cached_files = file_cache.get(session_id)
+            if cached_files:
+                # 将缓存的文件附加到文本消息中
+                file_refs = []
+                for file_info in cached_files:
+                    file_path = file_info['path']
+                    file_type = file_info['type']
+                    if file_type == 'image':
+                        file_refs.append(f"[图片: {file_path}]")
+                    elif file_type == 'video':
+                        file_refs.append(f"[视频: {file_path}]")
+                    else:
+                        file_refs.append(f"[文件: {file_path}]")
+                
+                cmsg.content = cmsg.content + "\n" + "\n".join(file_refs)
+                logger.info(f"[DingTalk] Attached {len(cached_files)} cached file(s) to user query")
+                # 清除缓存
+                file_cache.clear(session_id)
+        
        context = self._compose_context(cmsg.ctype, cmsg.content, isgroup=True, msg=cmsg)
        context['no_need_at'] = True
        if context:
@@ -314,6 +614,7 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):


    def send(self, reply: Reply, context: Context):
+        logger.info(f"[DingTalk] send() called with reply.type={reply.type}, content_length={len(str(reply.content))}")
        receiver = context["receiver"]
        
        # Check if msg exists (for scheduled tasks, msg might be None)
@@ -357,29 +658,184 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
        
        isgroup = msg.is_group
        incoming_message = msg.incoming_message
-
-        if conf().get("dingtalk_card_enabled"):
-            logger.info("[Dingtalk] sendMsg={}, receiver={}".format(reply, receiver))
-            def reply_with_text():
-                self.reply_text(reply.content, incoming_message)
-            def reply_with_at_text():
-                self.reply_text("📢 您有一条新的消息，请查看。", incoming_message)
-            def reply_with_ai_markdown():
-                button_list, markdown_content = self.generate_button_markdown_content(context, reply)
-                self.reply_ai_markdown_button(incoming_message, markdown_content, button_list, "", "📌 内容由AI生成", "",[incoming_message.sender_staff_id])
-
-            if reply.type in [ReplyType.IMAGE_URL, ReplyType.IMAGE, ReplyType.TEXT]:
-                if isgroup:
-                    reply_with_ai_markdown()
-                    reply_with_at_text()
+        robot_code = self._robot_code or conf().get("dingtalk_robot_code")
+        
+        # 处理图片和视频发送
+        if reply.type == ReplyType.IMAGE_URL:
+            logger.info(f"[DingTalk] Sending image: {reply.content}")
+            
+            # 如果有附加的文本内容，先发送文本
+            if hasattr(reply, 'text_content') and reply.text_content:
+                self.reply_text(reply.text_content, incoming_message)
+                import time
+                time.sleep(0.3)  # 短暂延迟，确保文本先到达
+            
+            media_id = self.upload_media(reply.content, media_type="image")
+            if media_id:
+                # 使用主动发送 API 发送图片
+                access_token = self.get_access_token()
+                if access_token:
+                    success = self.send_image_with_media_id(
+                        access_token,
+                        media_id,
+                        incoming_message,
+                        isgroup
+                    )
+                    if not success:
+                        logger.error("[DingTalk] Failed to send image message")
+                        self.reply_text("抱歉，图片发送失败", incoming_message)
                else:
-                    reply_with_ai_markdown()
+                    logger.error("[DingTalk] Cannot get access token")
+                    self.reply_text("抱歉，图片发送失败（无法获取token）", incoming_message)
            else:
-                # 暂不支持其它类型消息回复
-                reply_with_text()
-        else:
-            self.reply_text(reply.content, incoming_message)
+                logger.error("[DingTalk] Failed to upload image")
+                self.reply_text("抱歉，图片上传失败", incoming_message)
+            return
+        
+        elif reply.type == ReplyType.FILE:
+            # 如果有附加的文本内容，先发送文本
+            if hasattr(reply, 'text_content') and reply.text_content:
+                self.reply_text(reply.text_content, incoming_message)
+                import time
+                time.sleep(0.3)  # 短暂延迟，确保文本先到达
+            
+            # 判断是否为视频文件
+            file_path = reply.content
+            if file_path.startswith("file://"):
+                file_path = file_path[7:]
+            
+            is_video = file_path.lower().endswith(('.mp4', '.avi', '.mov', '.wmv', '.flv'))
+            
+            access_token = self.get_access_token()
+            if not access_token:
+                logger.error("[DingTalk] Cannot get access token")
+                self.reply_text("抱歉，文件发送失败（无法获取token）", incoming_message)
+                return
+            
+            if is_video:
+                logger.info(f"[DingTalk] Sending video: {reply.content}")
+                media_id = self.upload_media(reply.content, media_type="video")
+                if media_id:
+                    # 发送视频消息
+                    msg_param = {
+                        "duration": "30",  # TODO: 获取实际视频时长
+                        "videoMediaId": media_id,
+                        "videoType": "mp4",
+                        "height": "400",
+                        "width": "600",
+                    }
+                    success = self._send_file_message(
+                        access_token,
+                        incoming_message,
+                        "sampleVideo",
+                        msg_param,
+                        isgroup
+                    )
+                    if not success:
+                        self.reply_text("抱歉，视频发送失败", incoming_message)
+                else:
+                    logger.error("[DingTalk] Failed to upload video")
+                    self.reply_text("抱歉，视频上传失败", incoming_message)
+            else:
+                # 其他文件类型
+                logger.info(f"[DingTalk] Sending file: {reply.content}")
+                media_id = self.upload_media(reply.content, media_type="file")
+                if media_id:
+                    file_name = os.path.basename(file_path)
+                    file_base, file_extension = os.path.splitext(file_name)
+                    msg_param = {
+                        "mediaId": media_id,
+                        "fileName": file_name,
+                        "fileType": file_extension[1:] if file_extension else "file"
+                    }
+                    success = self._send_file_message(
+                        access_token,
+                        incoming_message,
+                        "sampleFile",
+                        msg_param,
+                        isgroup
+                    )
+                    if not success:
+                        self.reply_text("抱歉，文件发送失败", incoming_message)
+                else:
+                    logger.error("[DingTalk] Failed to upload file")
+                    self.reply_text("抱歉，文件上传失败", incoming_message)
+            return
+        
+        # 处理文本消息
+        elif reply.type == ReplyType.TEXT:
+            logger.info(f"[DingTalk] Sending text message, length={len(reply.content)}")
+            if conf().get("dingtalk_card_enabled"):
+                logger.info("[Dingtalk] sendMsg={}, receiver={}".format(reply, receiver))
+                def reply_with_text():
+                    self.reply_text(reply.content, incoming_message)
+                def reply_with_at_text():
+                    self.reply_text("📢 您有一条新的消息，请查看。", incoming_message)
+                def reply_with_ai_markdown():
+                    button_list, markdown_content = self.generate_button_markdown_content(context, reply)
+                    self.reply_ai_markdown_button(incoming_message, markdown_content, button_list, "", "📌 内容由AI生成", "",[incoming_message.sender_staff_id])

+                if reply.type in [ReplyType.IMAGE_URL, ReplyType.IMAGE, ReplyType.TEXT]:
+                    if isgroup:
+                        reply_with_ai_markdown()
+                        reply_with_at_text()
+                    else:
+                        reply_with_ai_markdown()
+                else:
+                    # 暂不支持其它类型消息回复
+                    reply_with_text()
+            else:
+                self.reply_text(reply.content, incoming_message)
+            return
+    
+    def _send_file_message(self, access_token: str, incoming_message, msg_key: str, msg_param: dict, is_group: bool) -> bool:
+        """
+        发送文件/视频消息的通用方法
+        
+        Args:
+            access_token: 访问令牌
+            incoming_message: 钉钉消息对象
+            msg_key: 消息类型 (sampleFile, sampleVideo, sampleAudio)
+            msg_param: 消息参数
+            is_group: 是否为群聊
+        
+        Returns:
+            是否发送成功
+        """
+        headers = {
+            "x-acs-dingtalk-access-token": access_token,
+            'Content-Type': 'application/json'
+        }
+        
+        body = {
+            "robotCode": incoming_message.robot_code,
+            "msgKey": msg_key,
+            "msgParam": json.dumps(msg_param),
+        }
+        
+        if is_group:
+            # 群聊
+            url = "https://api.dingtalk.com/v1.0/robot/groupMessages/send"
+            body["openConversationId"] = incoming_message.conversation_id
+        else:
+            # 单聊
+            url = "https://api.dingtalk.com/v1.0/robot/oToMessages/batchSend"
+            body["userIds"] = [incoming_message.sender_staff_id]
+        
+        try:
+            response = requests.post(url=url, headers=headers, json=body, timeout=10)
+            result = response.json()
+            
+            logger.info(f"[DingTalk] File send result: {response.text}")
+            
+            if response.status_code == 200:
+                return True
+            else:
+                logger.error(f"[DingTalk] Send file error: {response.text}")
+                return False
+        except Exception as e:
+            logger.error(f"[DingTalk] Send file exception: {e}")
+            return False

    def generate_button_markdown_content(self, context, reply):
        image_url = context.kwargs.get("image_url")
--- a/channel/dingtalk/dingtalk_message.py
+++ b/channel/dingtalk/dingtalk_message.py
@@ -1,4 +1,5 @@
 import os
+import re

 import requests
 from dingtalk_stream import ChatbotMessage
@@ -8,6 +9,7 @@ from channel.chat_message import ChatMessage
 # -*- coding=utf-8 -*-
 from common.log import logger
 from common.tmp_dir import TmpDir
+from config import conf


 class DingTalkMessage(ChatMessage):
@@ -37,15 +39,67 @@ class DingTalkMessage(ChatMessage):
            self.content = event.extensions['content']['recognition'].strip()
            self.ctype = ContextType.TEXT
        elif (self.message_type == 'picture') or (self.message_type == 'richText'):
-            self.ctype = ContextType.IMAGE
            # 钉钉图片类型或富文本类型消息处理
            image_list = event.get_image_list()
-            if len(image_list) > 0:
+            
+            if self.message_type == 'picture' and len(image_list) > 0:
+                # 单张图片消息：下载到工作空间，用于文件缓存
+                self.ctype = ContextType.IMAGE
                download_code = image_list[0]
                download_url = image_download_handler.get_image_download_url(download_code)
-                self.content = download_image_file(download_url, TmpDir().path())
+                
+                # 下载到工作空间 tmp 目录
+                workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
+                tmp_dir = os.path.join(workspace_root, "tmp")
+                os.makedirs(tmp_dir, exist_ok=True)
+                
+                image_path = download_image_file(download_url, tmp_dir)
+                if image_path:
+                    self.content = image_path
+                    self.image_path = image_path  # 保存图片路径用于缓存
+                    logger.info(f"[DingTalk] Downloaded single image to {image_path}")
+                else:
+                    self.content = "[图片下载失败]"
+                    self.image_path = None
+            
+            elif self.message_type == 'richText' and len(image_list) > 0:
+                # 富文本消息：下载所有图片并附加到文本中
+                self.ctype = ContextType.TEXT
+                
+                # 下载到工作空间 tmp 目录
+                workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
+                tmp_dir = os.path.join(workspace_root, "tmp")
+                os.makedirs(tmp_dir, exist_ok=True)
+                
+                # 提取富文本中的文本内容
+                text_content = ""
+                if self.rich_text_content:
+                    # rich_text_content 是一个 RichTextContent 对象，需要从中提取文本
+                    text_list = event.get_text_list()
+                    if text_list:
+                        text_content = "".join(text_list).strip()
+                
+                # 下载所有图片
+                image_paths = []
+                for download_code in image_list:
+                    download_url = image_download_handler.get_image_download_url(download_code)
+                    image_path = download_image_file(download_url, tmp_dir)
+                    if image_path:
+                        image_paths.append(image_path)
+                
+                # 构建消息内容：文本 + 图片路径
+                content_parts = []
+                if text_content:
+                    content_parts.append(text_content)
+                for img_path in image_paths:
+                    content_parts.append(f"[图片: {img_path}]")
+                
+                self.content = "\n".join(content_parts) if content_parts else "[富文本消息]"
+                logger.info(f"[DingTalk] Received richText with {len(image_paths)} image(s): {self.content}")
            else:
-                logger.debug(f"[Dingtalk] messageType :{self.message_type} , imageList isEmpty")
+                self.ctype = ContextType.IMAGE
+                self.content = "[未找到图片]"
+                logger.debug(f"[DingTalk] messageType: {self.message_type}, imageList isEmpty")

        if self.is_group:
            self.from_user_id = event.conversation_id
@@ -59,27 +113,95 @@ class DingTalkMessage(ChatMessage):


 def download_image_file(image_url, temp_dir):
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'
-    }
-    # 设置代理
-    # self.proxies
-    # , proxies=self.proxies
-    response = requests.get(image_url, headers=headers, stream=True, timeout=60 * 5)
-    if response.status_code == 200:
-
-        # 生成文件名
-        file_name = image_url.split("/")[-1].split("?")[0]
-
-        # 检查临时目录是否存在，如果不存在则创建
-        if not os.path.exists(temp_dir):
-            os.makedirs(temp_dir)
-
-        # 将文件保存到临时目录
-        file_path = os.path.join(temp_dir, file_name)
-        with open(file_path, 'wb') as file:
-            file.write(response.content)
-        return file_path
+    """
+    下载图片文件
+    支持两种方式：
+    1. 普通 HTTP(S) URL
+    2. 钉钉 downloadCode: dingtalk://download/{download_code}
+    """
+    # 检查临时目录是否存在，如果不存在则创建
+    if not os.path.exists(temp_dir):
+        os.makedirs(temp_dir)
+    
+    # 处理钉钉 downloadCode
+    if image_url.startswith("dingtalk://download/"):
+        download_code = image_url.replace("dingtalk://download/", "")
+        logger.info(f"[DingTalk] Downloading image with downloadCode: {download_code[:20]}...")
+        
+        # 需要从外部传入 access_token，这里先用一个临时方案
+        # 从 config 获取 dingtalk_client_id 和 dingtalk_client_secret
+        from config import conf
+        client_id = conf().get("dingtalk_client_id")
+        client_secret = conf().get("dingtalk_client_secret")
+        
+        if not client_id or not client_secret:
+            logger.error("[DingTalk] Missing dingtalk_client_id or dingtalk_client_secret")
+            return None
+        
+        # 获取 access_token
+        token_url = "https://oapi.dingtalk.com/gettoken"
+        token_params = {
+            "appkey": client_id,
+            "appsecret": client_secret
+        }
+        
+        try:
+            token_response = requests.get(token_url, params=token_params, timeout=10)
+            token_data = token_response.json()
+            
+            if token_data.get("errcode") == 0:
+                access_token = token_data.get("access_token")
+                
+                # 下载图片
+                download_url = f"https://oapi.dingtalk.com/robot/messageFiles/download"
+                download_params = {
+                    "access_token": access_token,
+                    "downloadCode": download_code
+                }
+                
+                response = requests.get(download_url, params=download_params, stream=True, timeout=60)
+                if response.status_code == 200:
+                    # 生成文件名（使用 download_code 的 hash，避免特殊字符）
+                    import hashlib
+                    file_hash = hashlib.md5(download_code.encode()).hexdigest()[:16]
+                    file_name = f"{file_hash}.png"
+                    file_path = os.path.join(temp_dir, file_name)
+                    
+                    with open(file_path, 'wb') as file:
+                        file.write(response.content)
+                    
+                    logger.info(f"[DingTalk] Image downloaded successfully: {file_path}")
+                    return file_path
+                else:
+                    logger.error(f"[DingTalk] Failed to download image: {response.status_code}")
+                    return None
+            else:
+                logger.error(f"[DingTalk] Failed to get access token: {token_data}")
+                return None
+        except Exception as e:
+            logger.error(f"[DingTalk] Exception downloading image: {e}")
+            return None
+    
+    # 普通 HTTP(S) URL
    else:
-        logger.info(f"[Dingtalk] Failed to download image file, {response.content}")
-        return None
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'
+        }
+        
+        try:
+            response = requests.get(image_url, headers=headers, stream=True, timeout=60 * 5)
+            if response.status_code == 200:
+                # 生成文件名
+                file_name = image_url.split("/")[-1].split("?")[0]
+                
+                # 将文件保存到临时目录
+                file_path = os.path.join(temp_dir, file_name)
+                with open(file_path, 'wb') as file:
+                    file.write(response.content)
+                return file_path
+            else:
+                logger.info(f"[Dingtalk] Failed to download image file, {response.content}")
+                return None
+        except Exception as e:
+            logger.error(f"[Dingtalk] Exception downloading image: {e}")
+            return None
--- a/channel/feishu/feishu_channel.py
+++ b/channel/feishu/feishu_channel.py
@@ -55,7 +55,7 @@ class FeiShuChanel(ChatChannel):
        super().__init__()
        # 历史消息id暂存，用于幂等控制
        self.receivedMsgs = ExpiredDict(60 * 60 * 7.1)
-        logger.info("[FeiShu] app_id={}, app_secret={}, verification_token={}, event_mode={}".format(
+        logger.debug("[FeiShu] app_id={}, app_secret={}, verification_token={}, event_mode={}".format(
            self.feishu_app_id, self.feishu_app_secret, self.feishu_token, self.feishu_event_mode))
        # 无需群校验和前缀
        conf()["group_name_white_list"] = ["ALL_GROUP"]
@@ -74,7 +74,7 @@ class FeiShuChanel(ChatChannel):

    def _startup_webhook(self):
        """启动HTTP服务器接收事件(webhook模式)"""
-        logger.info("[FeiShu] Starting in webhook mode...")
+        logger.debug("[FeiShu] Starting in webhook mode...")
        urls = (
            '/', 'channel.feishu.feishu_channel.FeishuController'
        )
@@ -84,7 +84,7 @@ class FeiShuChanel(ChatChannel):

    def _startup_websocket(self):
        """启动长连接接收事件(websocket模式)"""
-        logger.info("[FeiShu] Starting in websocket mode...")
+        logger.debug("[FeiShu] Starting in websocket mode...")

        # 创建事件处理器
        def handle_message_event(data: lark.im.v1.P2ImMessageReceiveV1) -> None:
@@ -118,7 +118,7 @@ class FeiShuChanel(ChatChannel):
        # 在新线程中启动客户端，避免阻塞主线程
        def start_client():
            try:
-                logger.info("[FeiShu] Websocket client starting...")
+                logger.debug("[FeiShu] Websocket client starting...")
                ws_client.start()
            except Exception as e:
                logger.error(f"[FeiShu] Websocket client error: {e}", exc_info=True)
@@ -127,7 +127,7 @@ class FeiShuChanel(ChatChannel):
        ws_thread.start()

        # 保持主线程运行
-        logger.info("[FeiShu] Websocket mode started, waiting for events...")
+        logger.info("[FeiShu] ✅ Websocket connected, ready to receive messages")
        ws_thread.join()

    def _handle_message_event(self, event: dict):
@@ -173,6 +173,48 @@ class FeiShuChanel(ChatChannel):
        if not feishu_msg:
            return

+        # 处理文件缓存逻辑
+        from channel.file_cache import get_file_cache
+        file_cache = get_file_cache()
+        
+        # 获取 session_id（用于缓存关联）
+        if is_group:
+            if conf().get("group_shared_session", True):
+                session_id = msg.get("chat_id")  # 群共享会话
+            else:
+                session_id = feishu_msg.from_user_id + "_" + msg.get("chat_id")
+        else:
+            session_id = feishu_msg.from_user_id
+        
+        # 如果是单张图片消息，缓存起来
+        if feishu_msg.ctype == ContextType.IMAGE:
+            if hasattr(feishu_msg, 'image_path') and feishu_msg.image_path:
+                file_cache.add(session_id, feishu_msg.image_path, file_type='image')
+                logger.info(f"[FeiShu] Image cached for session {session_id}, waiting for user query...")
+            # 单张图片不直接处理，等待用户提问
+            return
+        
+        # 如果是文本消息，检查是否有缓存的文件
+        if feishu_msg.ctype == ContextType.TEXT:
+            cached_files = file_cache.get(session_id)
+            if cached_files:
+                # 将缓存的文件附加到文本消息中
+                file_refs = []
+                for file_info in cached_files:
+                    file_path = file_info['path']
+                    file_type = file_info['type']
+                    if file_type == 'image':
+                        file_refs.append(f"[图片: {file_path}]")
+                    elif file_type == 'video':
+                        file_refs.append(f"[视频: {file_path}]")
+                    else:
+                        file_refs.append(f"[文件: {file_path}]")
+                
+                feishu_msg.content = feishu_msg.content + "\n" + "\n".join(file_refs)
+                logger.info(f"[FeiShu] Attached {len(cached_files)} cached file(s) to user query")
+                # 清除缓存
+                file_cache.clear(session_id)
+
        context = self._compose_context(
            feishu_msg.ctype,
            feishu_msg.content,
@@ -183,7 +225,7 @@ class FeiShuChanel(ChatChannel):
        )
        if context:
            self.produce(context)
-        logger.info(f"[FeiShu] query={feishu_msg.content}, type={feishu_msg.ctype}")
+        logger.debug(f"[FeiShu] query={feishu_msg.content}, type={feishu_msg.ctype}")

    def send(self, reply: Reply, context: Context):
        msg = context.get("msg")
@@ -197,7 +239,7 @@ class FeiShuChanel(ChatChannel):
            "Content-Type": "application/json",
        }
        msg_type = "text"
-        logger.info(f"[FeiShu] start send reply message, type={context.type}, content={reply.content}")
+        logger.debug(f"[FeiShu] sending reply, type={context.type}, content={reply.content[:100]}...")
        reply_content = reply.content
        content_key = "text"
        if reply.type == ReplyType.IMAGE_URL:
@@ -217,14 +259,20 @@ class FeiShuChanel(ChatChannel):
            is_video = file_path.lower().endswith(('.mp4', '.avi', '.mov', '.wmv', '.flv'))
            
            if is_video:
-                # 视频使用 media 类型
-                file_key = self._upload_video_url(reply.content, access_token)
-                if not file_key:
+                # 视频使用 media 类型，需要上传并获取 file_key 和 duration
+                video_info = self._upload_video_url(reply.content, access_token)
+                if not video_info or not video_info.get('file_key'):
                    logger.warning("[FeiShu] upload video failed")
                    return
-                reply_content = file_key
+                
+                # media 类型需要特殊的 content 格式
                msg_type = "media"
-                content_key = "file_key"
+                # 注意：media 类型的 content 不使用 content_key，而是完整的 JSON 对象
+                reply_content = {
+                    "file_key": video_info['file_key'],
+                    "duration": video_info.get('duration', 0)  # 视频时长（毫秒）
+                }
+                content_key = None  # media 类型不使用单一的 key
            else:
                # 其他文件使用 file 类型
                file_key = self._upload_file_url(reply.content, access_token)
@@ -243,7 +291,7 @@ class FeiShuChanel(ChatChannel):
            url = f"https://open.feishu.cn/open-apis/im/v1/messages/{msg.msg_id}/reply"
            data = {
                "msg_type": msg_type,
-                "content": json.dumps({content_key: reply_content})
+                "content": json.dumps(reply_content) if content_key is None else json.dumps({content_key: reply_content})
            }
            res = requests.post(url=url, headers=headers, json=data, timeout=(5, 10))
        else:
@@ -253,7 +301,7 @@ class FeiShuChanel(ChatChannel):
            data = {
                "receive_id": context.get("receiver"),
                "msg_type": msg_type,
-                "content": json.dumps({content_key: reply_content})
+                "content": json.dumps(reply_content) if content_key is None else json.dumps({content_key: reply_content})
            }
            res = requests.post(url=url, headers=headers, params=params, json=data, timeout=(5, 10))
        res = res.json()
@@ -336,103 +384,128 @@ class FeiShuChanel(ChatChannel):
            os.remove(temp_name)
            return upload_response.json().get("data").get("image_key")

+    def _get_video_duration(self, file_path: str) -> int:
+        """
+        获取视频时长（毫秒）
+        
+        Args:
+            file_path: 视频文件路径
+        
+        Returns:
+            视频时长（毫秒），如果获取失败返回0
+        """
+        try:
+            import subprocess
+            
+            # 使用 ffprobe 获取视频时长
+            cmd = [
+                'ffprobe',
+                '-v', 'error',
+                '-show_entries', 'format=duration',
+                '-of', 'default=noprint_wrappers=1:nokey=1',
+                file_path
+            ]
+            
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
+            if result.returncode == 0:
+                duration_seconds = float(result.stdout.strip())
+                duration_ms = int(duration_seconds * 1000)
+                logger.info(f"[FeiShu] Video duration: {duration_seconds:.2f}s ({duration_ms}ms)")
+                return duration_ms
+            else:
+                logger.warning(f"[FeiShu] Failed to get video duration via ffprobe: {result.stderr}")
+                return 0
+        except FileNotFoundError:
+            logger.warning("[FeiShu] ffprobe not found, video duration will be 0. Install ffmpeg to fix this.")
+            return 0
+        except Exception as e:
+            logger.warning(f"[FeiShu] Failed to get video duration: {e}")
+            return 0
+
    def _upload_video_url(self, video_url, access_token):
        """
-        Upload video to Feishu and return file_key (for media type messages)
+        Upload video to Feishu and return video info (file_key and duration)
        Supports:
        - file:// URLs for local files
        - http(s):// URLs (download then upload)
+        
+        Returns:
+            dict with 'file_key' and 'duration' (milliseconds), or None if failed
        """
-        # For file:// URLs (local files), upload directly
-        if video_url.startswith("file://"):
-            local_path = video_url[7:]  # Remove file:// prefix
-            if not os.path.exists(local_path):
-                logger.error(f"[FeiShu] local video file not found: {local_path}")
-                return None
+        local_path = None
+        temp_file = None
+        
+        try:
+            # For file:// URLs (local files), upload directly
+            if video_url.startswith("file://"):
+                local_path = video_url[7:]  # Remove file:// prefix
+                if not os.path.exists(local_path):
+                    logger.error(f"[FeiShu] local video file not found: {local_path}")
+                    return None
+            else:
+                # For HTTP URLs, download first
+                logger.info(f"[FeiShu] Downloading video from URL: {video_url}")
+                response = requests.get(video_url, timeout=(5, 60))
+                if response.status_code != 200:
+                    logger.error(f"[FeiShu] download video failed, status={response.status_code}")
+                    return None
+                
+                # Save to temp file
+                import uuid
+                file_name = os.path.basename(video_url) or "video.mp4"
+                temp_file = str(uuid.uuid4()) + "_" + file_name
+                
+                with open(temp_file, "wb") as file:
+                    file.write(response.content)
+                
+                logger.info(f"[FeiShu] Video downloaded, size={len(response.content)} bytes")
+                local_path = temp_file
            
+            # Get video duration
+            duration = self._get_video_duration(local_path)
+            
+            # Upload to Feishu
            file_name = os.path.basename(local_path)
            file_ext = os.path.splitext(file_name)[1].lower()
-            
-            # Determine file type for Feishu API (for media messages)
-            # Media type only supports mp4
-            file_type_map = {
-                '.mp4': 'mp4',
-            }
-            file_type = file_type_map.get(file_ext, 'mp4')  # Default to mp4
-            
-            # Upload video to Feishu (use file upload API, but send as media type)
-            upload_url = "https://open.feishu.cn/open-apis/im/v1/files"
-            data = {'file_type': file_type, 'file_name': file_name}
-            headers = {'Authorization': f'Bearer {access_token}'}
-            
-            try:
-                with open(local_path, "rb") as file:
-                    upload_response = requests.post(
-                        upload_url, 
-                        files={"file": file}, 
-                        data=data, 
-                        headers=headers,
-                        timeout=(5, 60)  # 5s connect, 60s read timeout (videos are larger)
-                    )
-                    logger.info(f"[FeiShu] upload video response, status={upload_response.status_code}, res={upload_response.content}")
-                    
-                    response_data = upload_response.json()
-                    if response_data.get("code") == 0:
-                        return response_data.get("data").get("file_key")
-                    else:
-                        logger.error(f"[FeiShu] upload video failed: {response_data}")
-                        return None
-            except Exception as e:
-                logger.error(f"[FeiShu] upload video exception: {e}")
-                return None
-        
-        # For HTTP URLs, download first then upload
-        try:
-            logger.info(f"[FeiShu] Downloading video from URL: {video_url}")
-            response = requests.get(video_url, timeout=(5, 60))
-            if response.status_code != 200:
-                logger.error(f"[FeiShu] download video failed, status={response.status_code}")
-                return None
-            
-            # Save to temp file
-            import uuid
-            file_name = os.path.basename(video_url) or "video.mp4"
-            temp_name = str(uuid.uuid4()) + "_" + file_name
-            
-            with open(temp_name, "wb") as file:
-                file.write(response.content)
-            
-            logger.info(f"[FeiShu] Video downloaded, size={len(response.content)} bytes, uploading...")
-            
-            # Upload
-            file_ext = os.path.splitext(file_name)[1].lower()
-            file_type_map = {
-                '.mp4': 'mp4',
-            }
+            file_type_map = {'.mp4': 'mp4'}
            file_type = file_type_map.get(file_ext, 'mp4')
            
            upload_url = "https://open.feishu.cn/open-apis/im/v1/files"
            data = {'file_type': file_type, 'file_name': file_name}
            headers = {'Authorization': f'Bearer {access_token}'}
            
-            with open(temp_name, "rb") as file:
-                upload_response = requests.post(upload_url, files={"file": file}, data=data, headers=headers, timeout=(5, 60))
-                logger.info(f"[FeiShu] upload video, res={upload_response.content}")
+            with open(local_path, "rb") as file:
+                upload_response = requests.post(
+                    upload_url, 
+                    files={"file": file}, 
+                    data=data, 
+                    headers=headers, 
+                    timeout=(5, 60)
+                )
+                logger.info(f"[FeiShu] upload video response, status={upload_response.status_code}, res={upload_response.content}")
                
                response_data = upload_response.json()
-                os.remove(temp_name)  # Clean up temp file
-                
                if response_data.get("code") == 0:
-                    return response_data.get("data").get("file_key")
+                    file_key = response_data.get("data").get("file_key")
+                    return {
+                        'file_key': file_key,
+                        'duration': duration
+                    }
                else:
                    logger.error(f"[FeiShu] upload video failed: {response_data}")
                    return None
+        
        except Exception as e:
-            logger.error(f"[FeiShu] upload video from URL exception: {e}")
-            # Clean up temp file if exists
-            if 'temp_name' in locals() and os.path.exists(temp_name):
-                os.remove(temp_name)
+            logger.error(f"[FeiShu] upload video exception: {e}")
            return None
+        
+        finally:
+            # Clean up temp file
+            if temp_file and os.path.exists(temp_file):
+                try:
+                    os.remove(temp_file)
+                except Exception as e:
+                    logger.warning(f"[FeiShu] Failed to remove temp file {temp_file}: {e}")

    def _upload_file_url(self, file_url, access_token):
        """
--- a/channel/feishu/feishu_message.py
+++ b/channel/feishu/feishu_message.py
@@ -25,13 +25,33 @@ class FeishuMessage(ChatMessage):
            content = json.loads(msg.get('content'))
            self.content = content.get("text").strip()
        elif msg_type == "image":
-            # 单张图片消息，不处理和存储
+            # 单张图片消息：下载并缓存，等待用户提问时一起发送
            self.ctype = ContextType.IMAGE
            content = json.loads(msg.get("content"))
            image_key = content.get("image_key")
-            # 仅记录图片key，不下载
-            self.content = f"[图片: {image_key}]"
-            logger.info(f"[FeiShu] Received single image message, key={image_key}, skipped download")
+            
+            # 下载图片到工作空间临时目录
+            workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
+            tmp_dir = os.path.join(workspace_root, "tmp")
+            os.makedirs(tmp_dir, exist_ok=True)
+            image_path = os.path.join(tmp_dir, f"{image_key}.png")
+            
+            # 下载图片
+            url = f"https://open.feishu.cn/open-apis/im/v1/messages/{msg.get('message_id')}/resources/{image_key}"
+            headers = {"Authorization": "Bearer " + access_token}
+            params = {"type": "image"}
+            response = requests.get(url=url, headers=headers, params=params)
+            
+            if response.status_code == 200:
+                with open(image_path, "wb") as f:
+                    f.write(response.content)
+                logger.info(f"[FeiShu] Downloaded single image, key={image_key}, path={image_path}")
+                self.content = image_path
+                self.image_path = image_path  # 保存图片路径
+            else:
+                logger.error(f"[FeiShu] Failed to download single image, key={image_key}, status={response.status_code}")
+                self.content = f"[图片下载失败: {image_key}]"
+                self.image_path = None
        elif msg_type == "post":
            # 富文本消息，可能包含图片、文本等多种元素
            content = json.loads(msg.get("content"))
--- a/channel/file_cache.py
+++ b/channel/file_cache.py
@@ -0,0 +1,100 @@
+"""
+文件缓存管理器
+用于缓存单独发送的文件消息（图片、视频、文档等），在用户提问时自动附加
+"""
+import time
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class FileCache:
+    """文件缓存管理器，按 session_id 缓存文件，TTL=2分钟"""
+    
+    def __init__(self, ttl=120):
+        """
+        Args:
+            ttl: 缓存过期时间（秒），默认2分钟
+        """
+        self.cache = {}
+        self.ttl = ttl
+    
+    def add(self, session_id: str, file_path: str, file_type: str = "image"):
+        """
+        添加文件到缓存
+        
+        Args:
+            session_id: 会话ID
+            file_path: 文件本地路径
+            file_type: 文件类型（image, video, file 等）
+        """
+        if session_id not in self.cache:
+            self.cache[session_id] = {
+                'files': [],
+                'timestamp': time.time()
+            }
+        
+        # 添加文件（去重）
+        file_info = {'path': file_path, 'type': file_type}
+        if file_info not in self.cache[session_id]['files']:
+            self.cache[session_id]['files'].append(file_info)
+            logger.info(f"[FileCache] Added {file_type} to cache for session {session_id}: {file_path}")
+    
+    def get(self, session_id: str) -> list:
+        """
+        获取缓存的文件列表
+        
+        Args:
+            session_id: 会话ID
+        
+        Returns:
+            文件信息列表 [{'path': '...', 'type': 'image'}, ...]，如果没有或已过期返回空列表
+        """
+        if session_id not in self.cache:
+            return []
+        
+        item = self.cache[session_id]
+        
+        # 检查是否过期
+        if time.time() - item['timestamp'] > self.ttl:
+            logger.info(f"[FileCache] Cache expired for session {session_id}, clearing...")
+            del self.cache[session_id]
+            return []
+        
+        return item['files']
+    
+    def clear(self, session_id: str):
+        """
+        清除指定会话的缓存
+        
+        Args:
+            session_id: 会话ID
+        """
+        if session_id in self.cache:
+            logger.info(f"[FileCache] Cleared cache for session {session_id}")
+            del self.cache[session_id]
+    
+    def cleanup_expired(self):
+        """清理所有过期的缓存"""
+        current_time = time.time()
+        expired_sessions = []
+        
+        for session_id, item in self.cache.items():
+            if current_time - item['timestamp'] > self.ttl:
+                expired_sessions.append(session_id)
+        
+        for session_id in expired_sessions:
+            del self.cache[session_id]
+            logger.debug(f"[FileCache] Cleaned up expired cache for session {session_id}")
+        
+        if expired_sessions:
+            logger.info(f"[FileCache] Cleaned up {len(expired_sessions)} expired cache(s)")
+
+
+# 全局单例
+_file_cache = FileCache()
+
+
+def get_file_cache() -> FileCache:
+    """获取全局文件缓存实例"""
+    return _file_cache
--- a/channel/web/web_channel.py
+++ b/channel/web/web_channel.py
@@ -200,12 +200,12 @@ class WebChannel(ChatChannel):
        logger.info("""[WebChannel] 当前channel为web，可修改 config.json 配置文件中的 channel_type 字段进行切换。全部可用类型为：
        1. web: 网页
        2. terminal: 终端
-        3. wechatmp: 个人公众号
-        4. wechatmp_service: 企业公众号
+        3. feishu: 飞书
+        4. dingtalk: 钉钉
        5. wechatcom_app: 企微自建应用
-        6. dingtalk: 钉钉
-        7. feishu: 飞书""")
-        logger.info(f"Web对话网页已运行, 请使用浏览器访问 http://localhost:{port}/chat (本地运行) 或 http://ip:{port}/chat (服务器运行)")
+        6. wechatmp: 个人公众号
+        7. wechatmp_service: 企业公众号""")
+        logger.info(f"✅ Web对话网页已运行, 请使用浏览器访问 http://localhost:{port}/chat (本地运行) 或 http://ip:{port}/chat (服务器运行)")
        
        # 确保静态文件目录存在
        static_dir = os.path.join(os.path.dirname(__file__), 'static')