mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 00:57:41 +08:00
Adopt the same channel-level pattern as weixin/wecom_bot/feishu so the agent actually sees attachments the user sent: - IMAGE: agent mode never reads memory.USER_IMAGE_CACHE, so a photo sent before a question (e.g. "image" then 30s later "what's this?") used to be lost. Now lone images go into channel.file_cache and the next TEXT turn appends "[图片: <path>]" to the query before producing the context. Cross-batch image+text combinations now work as users expect. - FILE: previously dropped at the sync_msg filter and unsupported by WechatKfMessage. Add msgtype="file" parsing, download via the WeCom media API, preserve the original filename from Content-Disposition (RFC 5987 + plain forms), and route through the same file_cache pipeline as images, surfacing as "[文件: <path>]" in the next text turn.
123 lines
4.7 KiB
Python
123 lines
4.7 KiB
Python
# -*- coding=utf-8 -*-
|
|
"""
|
|
Adapter that turns a single `sync_msg` item from WeCom customer-service
|
|
into a CoW `ChatMessage` object.
|
|
"""
|
|
import os
|
|
import re
|
|
|
|
from wechatpy.enterprise import WeChatClient
|
|
|
|
from bridge.context import ContextType
|
|
from channel.chat_message import ChatMessage
|
|
from common.log import logger
|
|
from common.tmp_dir import TmpDir
|
|
|
|
|
|
def _extract_filename(content_disposition: str) -> str:
|
|
"""Best-effort parse of `filename` / `filename*` from a Content-Disposition
|
|
header. Returns '' when nothing usable is found."""
|
|
if not content_disposition:
|
|
return ""
|
|
# RFC 5987 form: filename*=UTF-8''xxx
|
|
m = re.search(r"filename\*=(?:[^'\"]*'[^']*'\s*)?([^;]+)", content_disposition)
|
|
if m:
|
|
try:
|
|
from urllib.parse import unquote
|
|
return unquote(m.group(1).strip().strip('"'))
|
|
except Exception:
|
|
return m.group(1).strip().strip('"')
|
|
m = re.search(r'filename\s*=\s*"?([^";]+)"?', content_disposition)
|
|
return m.group(1).strip() if m else ""
|
|
|
|
|
|
class WechatKfMessage(ChatMessage):
|
|
"""
|
|
msg structure (from cgi-bin/kf/sync_msg):
|
|
{
|
|
"msgid": "...",
|
|
"send_time": 1700000000,
|
|
"origin": 3,
|
|
"msgtype": "text" | "image" | "voice" | ...,
|
|
"open_kfid": "wkxxxx",
|
|
"external_userid": "wmxxxx",
|
|
"text": {"content": "..."},
|
|
"image": {"media_id": "..."},
|
|
"voice": {"media_id": "..."},
|
|
...
|
|
}
|
|
"""
|
|
|
|
def __init__(self, msg: dict, client: WeChatClient = None, is_group: bool = False):
|
|
# NOTE: skip parent constructor because it expects a wechatpy parsed
|
|
# message object, while here we receive a raw dict from sync_msg.
|
|
super().__init__(msg)
|
|
self.is_group = is_group
|
|
self.msg_id = msg.get("msgid")
|
|
self.create_time = msg.get("send_time")
|
|
self.origin = msg.get("origin")
|
|
self.msgtype = msg.get("msgtype")
|
|
self.open_kfid = msg.get("open_kfid")
|
|
self.external_userid = msg.get("external_userid")
|
|
|
|
if self.msgtype == "text":
|
|
self.ctype = ContextType.TEXT
|
|
self.content = msg.get("text", {}).get("content", "")
|
|
elif self.msgtype == "image":
|
|
self.ctype = ContextType.IMAGE
|
|
media_id = msg.get("image", {}).get("media_id", "")
|
|
self.content = TmpDir().path() + media_id + ".jpg"
|
|
|
|
def download_image():
|
|
response = client.media.download(media_id)
|
|
if response.status_code == 200:
|
|
with open(self.content, "wb") as f:
|
|
f.write(response.content)
|
|
else:
|
|
logger.info(f"[wechat_kf] Failed to download image, {response.content}")
|
|
|
|
self._prepare_fn = download_image
|
|
elif self.msgtype == "voice":
|
|
self.ctype = ContextType.VOICE
|
|
media_id = msg.get("voice", {}).get("media_id", "")
|
|
# WeCom returns amr by default; downstream voice pipeline will convert.
|
|
self.content = TmpDir().path() + media_id + ".amr"
|
|
|
|
def download_voice():
|
|
response = client.media.download(media_id)
|
|
if response.status_code == 200:
|
|
with open(self.content, "wb") as f:
|
|
f.write(response.content)
|
|
else:
|
|
logger.info(f"[wechat_kf] Failed to download voice, {response.content}")
|
|
|
|
self._prepare_fn = download_voice
|
|
elif self.msgtype == "file":
|
|
self.ctype = ContextType.FILE
|
|
media_id = msg.get("file", {}).get("media_id", "")
|
|
# Provisional path; rewritten in download_file() once we have
|
|
# the original filename from Content-Disposition.
|
|
self.content = TmpDir().path() + media_id
|
|
|
|
def download_file():
|
|
response = client.media.download(media_id)
|
|
if response.status_code == 200:
|
|
filename = _extract_filename(
|
|
response.headers.get("Content-Disposition", "")
|
|
) or media_id
|
|
self.content = os.path.join(TmpDir().path(), filename)
|
|
with open(self.content, "wb") as f:
|
|
f.write(response.content)
|
|
else:
|
|
logger.info(f"[wechat_kf] Failed to download file, {response.content}")
|
|
|
|
self._prepare_fn = download_file
|
|
else:
|
|
raise NotImplementedError(
|
|
f"[wechat_kf] Unsupported message type: {self.msgtype}"
|
|
)
|
|
|
|
self.from_user_id = self.external_userid
|
|
self.to_user_id = self.open_kfid
|
|
self.other_user_id = self.external_userid
|