feat(wechat_kf): cache images/files and merge into next text turn

Adopt the same channel-level pattern as weixin/wecom_bot/feishu so
the agent actually sees attachments the user sent:
- IMAGE: agent mode never reads memory.USER_IMAGE_CACHE, so a photo
  sent before a question (e.g. "image" then 30s later "what's this?")
  used to be lost. Now lone images go into channel.file_cache and
  the next TEXT turn appends "[图片: <path>]" to the query before
  producing the context. Cross-batch image+text combinations now
  work as users expect.
- FILE: previously dropped at the sync_msg filter and unsupported
  by WechatKfMessage. Add msgtype="file" parsing, download via the
  WeCom media API, preserve the original filename from
  Content-Disposition (RFC 5987 + plain forms), and route through
  the same file_cache pipeline as images, surfacing as
  "[文件: <path>]" in the next text turn.
This commit is contained in:
6vision
2026-05-28 18:11:41 +08:00
parent 37661daf40
commit c605b0b080
2 changed files with 79 additions and 2 deletions

View File

@@ -30,9 +30,10 @@ from wechatpy.enterprise.crypto import WeChatCrypto
from wechatpy.enterprise.exceptions import InvalidCorpIdException
from wechatpy.exceptions import InvalidSignatureException, WeChatClientException
from bridge.context import Context
from bridge.context import Context, ContextType
from bridge.reply import Reply, ReplyType
from channel.chat_channel import ChatChannel
from channel.file_cache import get_file_cache
from channel.wechat_kf.wechat_kf_cursor_store import CursorStore
from channel.wechat_kf.wechat_kf_message import WechatKfMessage
from common.log import logger
@@ -314,12 +315,48 @@ class WechatKfChannel(ChatChannel):
msgs = self._pull_messages(token, open_kfid, existing_cursor)
if not msgs:
return
file_cache = get_file_cache()
for raw in msgs:
try:
kf_msg = WechatKfMessage(msg=raw, client=self.client)
except NotImplementedError as e:
logger.debug("[wechat_kf] {}".format(e))
continue
session_id = kf_msg.from_user_id
# Cache lone images/files and wait for the user's follow-up
# text. Agent mode never reads memory.USER_IMAGE_CACHE, so
# without this the attachment is effectively lost.
if kf_msg.ctype in (ContextType.IMAGE, ContextType.FILE):
ftype = "image" if kf_msg.ctype == ContextType.IMAGE else "file"
try:
kf_msg.prepare() # download to local tmp path
file_cache.add(session_id, kf_msg.content, file_type=ftype)
logger.info(
"[wechat_kf] {} cached for session {}: {}".format(
ftype, session_id, kf_msg.content
)
)
except Exception as e:
logger.warning(f"[wechat_kf] cache {ftype} failed: {e}")
continue
# On a text turn, attach any pending images/files as references
# so the downstream agent can pick them up via the text content.
if kf_msg.ctype == ContextType.TEXT:
cached_files = file_cache.get(session_id)
if cached_files:
refs = []
for fi in cached_files:
ftype, fpath = fi["type"], fi["path"]
if ftype == "image":
refs.append(f"[图片: {fpath}]")
else:
refs.append(f"[文件: {fpath}]")
kf_msg.content = kf_msg.content + "\n" + "\n".join(refs)
file_cache.clear(session_id)
context = self._compose_context(
kf_msg.ctype,
kf_msg.content,
@@ -371,7 +408,7 @@ class WechatKfChannel(ChatChannel):
# back into ourselves.
if not item.get("external_userid"):
continue
if item.get("msgtype") in ("text", "image", "voice"):
if item.get("msgtype") in ("text", "image", "voice", "file"):
collected.append(item)
cursor_after = data.get("next_cursor") or ""
if cursor_after:

View File

@@ -3,6 +3,9 @@
Adapter that turns a single `sync_msg` item from WeCom customer-service
into a CoW `ChatMessage` object.
"""
import os
import re
from wechatpy.enterprise import WeChatClient
from bridge.context import ContextType
@@ -11,6 +14,23 @@ from common.log import logger
from common.tmp_dir import TmpDir
def _extract_filename(content_disposition: str) -> str:
"""Best-effort parse of `filename` / `filename*` from a Content-Disposition
header. Returns '' when nothing usable is found."""
if not content_disposition:
return ""
# RFC 5987 form: filename*=UTF-8''xxx
m = re.search(r"filename\*=(?:[^'\"]*'[^']*'\s*)?([^;]+)", content_disposition)
if m:
try:
from urllib.parse import unquote
return unquote(m.group(1).strip().strip('"'))
except Exception:
return m.group(1).strip().strip('"')
m = re.search(r'filename\s*=\s*"?([^";]+)"?', content_disposition)
return m.group(1).strip() if m else ""
class WechatKfMessage(ChatMessage):
"""
msg structure (from cgi-bin/kf/sync_msg):
@@ -72,6 +92,26 @@ class WechatKfMessage(ChatMessage):
logger.info(f"[wechat_kf] Failed to download voice, {response.content}")
self._prepare_fn = download_voice
elif self.msgtype == "file":
self.ctype = ContextType.FILE
media_id = msg.get("file", {}).get("media_id", "")
# Provisional path; rewritten in download_file() once we have
# the original filename from Content-Disposition.
self.content = TmpDir().path() + media_id
def download_file():
response = client.media.download(media_id)
if response.status_code == 200:
filename = _extract_filename(
response.headers.get("Content-Disposition", "")
) or media_id
self.content = os.path.join(TmpDir().path(), filename)
with open(self.content, "wb") as f:
f.write(response.content)
else:
logger.info(f"[wechat_kf] Failed to download file, {response.content}")
self._prepare_fn = download_file
else:
raise NotImplementedError(
f"[wechat_kf] Unsupported message type: {self.msgtype}"