feat(wechat_kf): cache images/files and merge into next text turn

Adopt the same channel-level pattern as weixin/wecom_bot/feishu so the agent actually sees attachments the user sent: - IMAGE: agent mode never reads memory.USER_IMAGE_CACHE, so a photo sent before a question (e.g. "image" then 30s later "what's this?") used to be lost. Now lone images go into channel.file_cache and the next TEXT turn appends "[图片: <path>]" to the query before producing the context. Cross-batch image+text combinations now work as users expect. - FILE: previously dropped at the sync_msg filter and unsupported by WechatKfMessage. Add msgtype="file" parsing, download via the WeCom media API, preserve the original filename from Content-Disposition (RFC 5987 + plain forms), and route through the same file_cache pipeline as images, surfacing as "[文件: <path>]" in the next text turn.
2026-07-17 11:07:11 +08:00 · 2026-05-28 18:11:41 +08:00
parent 37661daf40
commit c605b0b080
2 changed files with 79 additions and 2 deletions
--- a/channel/wechat_kf/wechat_kf_channel.py
+++ b/channel/wechat_kf/wechat_kf_channel.py
@@ -30,9 +30,10 @@ from wechatpy.enterprise.crypto import WeChatCrypto
 from wechatpy.enterprise.exceptions import InvalidCorpIdException
 from wechatpy.exceptions import InvalidSignatureException, WeChatClientException

-from bridge.context import Context
+from bridge.context import Context, ContextType
 from bridge.reply import Reply, ReplyType
 from channel.chat_channel import ChatChannel
+from channel.file_cache import get_file_cache
 from channel.wechat_kf.wechat_kf_cursor_store import CursorStore
 from channel.wechat_kf.wechat_kf_message import WechatKfMessage
 from common.log import logger
@@ -314,12 +315,48 @@ class WechatKfChannel(ChatChannel):
        msgs = self._pull_messages(token, open_kfid, existing_cursor)
        if not msgs:
            return
+        file_cache = get_file_cache()
        for raw in msgs:
            try:
                kf_msg = WechatKfMessage(msg=raw, client=self.client)
            except NotImplementedError as e:
                logger.debug("[wechat_kf] {}".format(e))
                continue
+
+            session_id = kf_msg.from_user_id
+
+            # Cache lone images/files and wait for the user's follow-up
+            # text. Agent mode never reads memory.USER_IMAGE_CACHE, so
+            # without this the attachment is effectively lost.
+            if kf_msg.ctype in (ContextType.IMAGE, ContextType.FILE):
+                ftype = "image" if kf_msg.ctype == ContextType.IMAGE else "file"
+                try:
+                    kf_msg.prepare()  # download to local tmp path
+                    file_cache.add(session_id, kf_msg.content, file_type=ftype)
+                    logger.info(
+                        "[wechat_kf] {} cached for session {}: {}".format(
+                            ftype, session_id, kf_msg.content
+                        )
+                    )
+                except Exception as e:
+                    logger.warning(f"[wechat_kf] cache {ftype} failed: {e}")
+                continue
+
+            # On a text turn, attach any pending images/files as references
+            # so the downstream agent can pick them up via the text content.
+            if kf_msg.ctype == ContextType.TEXT:
+                cached_files = file_cache.get(session_id)
+                if cached_files:
+                    refs = []
+                    for fi in cached_files:
+                        ftype, fpath = fi["type"], fi["path"]
+                        if ftype == "image":
+                            refs.append(f"[图片: {fpath}]")
+                        else:
+                            refs.append(f"[文件: {fpath}]")
+                    kf_msg.content = kf_msg.content + "\n" + "\n".join(refs)
+                    file_cache.clear(session_id)
+
            context = self._compose_context(
                kf_msg.ctype,
                kf_msg.content,
@@ -371,7 +408,7 @@ class WechatKfChannel(ChatChannel):
                # back into ourselves.
                if not item.get("external_userid"):
                    continue
-                if item.get("msgtype") in ("text", "image", "voice"):
+                if item.get("msgtype") in ("text", "image", "voice", "file"):
                    collected.append(item)
            cursor_after = data.get("next_cursor") or ""
            if cursor_after:
--- a/channel/wechat_kf/wechat_kf_message.py
+++ b/channel/wechat_kf/wechat_kf_message.py
@@ -3,6 +3,9 @@
 Adapter that turns a single `sync_msg` item from WeCom customer-service
 into a CoW `ChatMessage` object.
 """
+import os
+import re
+
 from wechatpy.enterprise import WeChatClient

 from bridge.context import ContextType
@@ -11,6 +14,23 @@ from common.log import logger
 from common.tmp_dir import TmpDir


+def _extract_filename(content_disposition: str) -> str:
+    """Best-effort parse of `filename` / `filename*` from a Content-Disposition
+    header. Returns '' when nothing usable is found."""
+    if not content_disposition:
+        return ""
+    # RFC 5987 form: filename*=UTF-8''xxx
+    m = re.search(r"filename\*=(?:[^'\"]*'[^']*'\s*)?([^;]+)", content_disposition)
+    if m:
+        try:
+            from urllib.parse import unquote
+            return unquote(m.group(1).strip().strip('"'))
+        except Exception:
+            return m.group(1).strip().strip('"')
+    m = re.search(r'filename\s*=\s*"?([^";]+)"?', content_disposition)
+    return m.group(1).strip() if m else ""
+
+
 class WechatKfMessage(ChatMessage):
    """
    msg structure (from cgi-bin/kf/sync_msg):
@@ -72,6 +92,26 @@ class WechatKfMessage(ChatMessage):
                    logger.info(f"[wechat_kf] Failed to download voice, {response.content}")

            self._prepare_fn = download_voice
+        elif self.msgtype == "file":
+            self.ctype = ContextType.FILE
+            media_id = msg.get("file", {}).get("media_id", "")
+            # Provisional path; rewritten in download_file() once we have
+            # the original filename from Content-Disposition.
+            self.content = TmpDir().path() + media_id
+
+            def download_file():
+                response = client.media.download(media_id)
+                if response.status_code == 200:
+                    filename = _extract_filename(
+                        response.headers.get("Content-Disposition", "")
+                    ) or media_id
+                    self.content = os.path.join(TmpDir().path(), filename)
+                    with open(self.content, "wb") as f:
+                        f.write(response.content)
+                else:
+                    logger.info(f"[wechat_kf] Failed to download file, {response.content}")
+
+            self._prepare_fn = download_file
        else:
            raise NotImplementedError(
                f"[wechat_kf] Unsupported message type: {self.msgtype}"