Merge pull request #2848 from 6vision/fix/wechatmp-passive-merge-replies

fix(wechatmp): improve passive reply multi-turn output and local image sending
2026-07-20 13:47:15 +08:00 · 2026-05-30 17:12:36 +08:00
parent aa3f48e93c fe8b8fe831
commit 3c161df526
2 changed files with 45 additions and 12 deletions
--- a/channel/wechatmp/passive_reply.py
+++ b/channel/wechatmp/passive_reply.py
@@ -103,14 +103,21 @@ class Query:
                task_running = True
                waiting_until = request_time + 4
                while time.time() < waiting_until:
-                    if from_user in channel.running:
-                        time.sleep(0.1)
-                    else:
+                    if from_user not in channel.running:
                        task_running = False
                        break
+                    # Task still running, but if it has already produced cached
+                    # segments (e.g. multi-turn thinking output), return them now
+                    # instead of forcing the user to wait for the whole task. The
+                    # remaining segments are fetched by the user's next message.
+                    if channel.cache_dict.get(from_user):
+                        break
+                    time.sleep(0.1)

                reply_text = ""
-                if task_running:
+                # Only fall back to retry / "thinking" hint when the task is still
+                # running AND there is nothing cached to send yet.
+                if task_running and not channel.cache_dict.get(from_user):
                    if request_cnt < 3:
                        # waiting for timeout (the POST request will be closed by Wechat official server)
                        time.sleep(2)
@@ -131,8 +138,22 @@ class Query:

                # Only one request can access to the cached data
                try:
-                    (reply_type, reply_content) = channel.cache_dict[from_user].pop(0)
-                    if not channel.cache_dict[from_user]:  # If popping the message makes the list empty, delete the user entry from cache
+                    # WeChat passive reply allows only a single reply per request.
+                    # To avoid forcing the user to send an extra message for every
+                    # segment of multi-turn agent output, drain all consecutive
+                    # cached text segments at once and merge them into one reply.
+                    # Media (voice/image) can only be returned one at a time, so it
+                    # stops the merge and is returned on its own.
+                    cached = channel.cache_dict[from_user]
+                    if cached[0][0] == "text":
+                        reply_type = "text"
+                        merged_parts = []
+                        while cached and cached[0][0] == "text":
+                            merged_parts.append(cached.pop(0)[1])
+                        reply_content = "\n\n".join(merged_parts)
+                    else:
+                        (reply_type, reply_content) = cached.pop(0)
+                    if not channel.cache_dict[from_user]:  # If draining empties the list, delete the user entry from cache
                        del channel.cache_dict[from_user]
                except IndexError:
                    return "success"
--- a/channel/wechatmp/wechatmp_channel.py
+++ b/channel/wechatmp/wechatmp_channel.py
@@ -134,10 +134,16 @@ class WechatMPChannel(ChatChannel):

            elif reply.type == ReplyType.IMAGE_URL:  # 从网络下载图片
                img_url = reply.content
-                pic_res = requests.get(img_url, stream=True)
                image_storage = io.BytesIO()
-                for block in pic_res.iter_content(1024):
-                    image_storage.write(block)
+                if img_url.startswith("file://") or os.path.isfile(img_url):
+                    # Local file produced by the agent (e.g. a generated image)
+                    local_path = img_url[len("file://"):] if img_url.startswith("file://") else img_url
+                    with open(local_path, "rb") as f:
+                        image_storage.write(f.read())
+                else:
+                    pic_res = requests.get(img_url, stream=True)
+                    for block in pic_res.iter_content(1024):
+                        image_storage.write(block)
                image_storage.seek(0)
                image_type = imghdr.what(image_storage)
                filename = receiver + "-" + str(context["msg"].msg_id) + "." + image_type
@@ -258,10 +264,16 @@ class WechatMPChannel(ChatChannel):
                logger.info("[wechatmp] Do send voice to {}".format(receiver))
            elif reply.type == ReplyType.IMAGE_URL:  # 从网络下载图片
                img_url = reply.content
-                pic_res = requests.get(img_url, stream=True)
                image_storage = io.BytesIO()
-                for block in pic_res.iter_content(1024):
-                    image_storage.write(block)
+                if img_url.startswith("file://") or os.path.isfile(img_url):
+                    # Local file produced by the agent (e.g. a generated image)
+                    local_path = img_url[len("file://"):] if img_url.startswith("file://") else img_url
+                    with open(local_path, "rb") as f:
+                        image_storage.write(f.read())
+                else:
+                    pic_res = requests.get(img_url, stream=True)
+                    for block in pic_res.iter_content(1024):
+                        image_storage.write(block)
                image_storage.seek(0)
                image_type = imghdr.what(image_storage)
                filename = receiver + "-" + str(context["msg"].msg_id) + "." + image_type