From c82515a927c4bb30bd7b7e9362c4f6b1f79ec4e6 Mon Sep 17 00:00:00 2001
From: zhayujie <yjzha1996@163.com>
Date: Sat, 18 Apr 2026 20:50:40 +0800
Subject: [PATCH] fix(agent): don't drop tool_calls from empty-response retry

---
 agent/protocol/agent_stream.py     | 26 ++++++++++++++------
 channel/web/web_channel.py         | 19 +++++++++++++++
 models/gemini/google_gemini_bot.py | 39 ++++++++++++++++++++++++++----
 plugins/cow_cli/cow_cli.py         | 17 ++++++++++---
 4 files changed, 84 insertions(+), 17 deletions(-)

diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py
index dd830457..d7fc5066 100644
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -254,8 +254,15 @@ class AgentStreamExecutor:
                             assistant_msg, tool_calls = self._call_llm_stream(retry_on_empty=False)
                             final_response = assistant_msg
                             
-                            # 如果还是空，才使用 fallback
-                            if not assistant_msg and not tool_calls:
+                            # If LLM responded with tool_calls instead of text, fall through
+                            # to the tool execution path below (don't break the loop).
+                            if tool_calls:
+                                logger.info(
+                                    f"[Agent] LLM returned tool_calls in explicit-response retry, "
+                                    f"continuing to execute tools instead of breaking"
+                                )
+                            elif not assistant_msg:
+                                # Still empty (no text and no tool_calls): use fallback
                                 logger.warning(f"[Agent] Still empty after explicit request")
                                 final_response = (
                                     "抱歉，我暂时无法生成回复。请尝试换一种方式描述你的需求，或稍后再试。"
@@ -270,12 +277,15 @@ class AgentStreamExecutor:
                     else:
                         logger.info(f"💭 {assistant_msg[:150]}{'...' if len(assistant_msg) > 150 else ''}")
                     
-                    logger.debug(f"✅ 完成 (无工具调用)")
-                    self._emit_event("turn_end", {
-                        "turn": turn,
-                        "has_tool_calls": False
-                    })
-                    break
+                    # If the explicit-response retry produced tool_calls, skip the break
+                    # and continue down to the tool execution branch in this same iteration.
+                    if not tool_calls:
+                        logger.debug(f"✅ 完成 (无工具调用)")
+                        self._emit_event("turn_end", {
+                            "turn": turn,
+                            "has_tool_calls": False
+                        })
+                        break
 
                 # Log tool calls with arguments
                 tool_calls_str = []
diff --git a/channel/web/web_channel.py b/channel/web/web_channel.py
index 0560c202..637cd7f6 100644
--- a/channel/web/web_channel.py
+++ b/channel/web/web_channel.py
@@ -269,6 +269,25 @@ class WebChannel(ChatChannel):
                 if tool_calls:
                     q.put({"type": "message_end", "has_tool_calls": True})
 
+            elif event_type == "agent_end":
+                # Safety net: if the agent finishes with an empty final_response,
+                # chat_channel skips _send_reply (because reply.content is empty),
+                # which means no "done" event is ever emitted and the SSE stream
+                # would hang until the 10-min idle timeout. Push a fallback "done"
+                # here so the frontend always gets closure.
+                final_response = data.get("final_response", "")
+                if not final_response or not str(final_response).strip():
+                    logger.warning(
+                        f"[WebChannel] agent_end with empty final_response for "
+                        f"request {request_id}, sending fallback done"
+                    )
+                    q.put({
+                        "type": "done",
+                        "content": "(模型未返回任何内容，请重试或换一种方式描述你的需求)",
+                        "request_id": request_id,
+                        "timestamp": time.time(),
+                    })
+
             elif event_type == "file_to_send":
                 file_path = data.get("path", "")
                 file_name = data.get("file_name", os.path.basename(file_path))
diff --git a/models/gemini/google_gemini_bot.py b/models/gemini/google_gemini_bot.py
index aa7199ca..e379d912 100644
--- a/models/gemini/google_gemini_bot.py
+++ b/models/gemini/google_gemini_bot.py
@@ -653,6 +653,8 @@ class GoogleGeminiBot(Bot):
             chunk_count = 0
             last_finish_reason = None
             last_safety_ratings = None
+            raw_chunks = []  # Buffer raw chunks for diagnostics on empty response
+            non_text_part_keys = []  # Track non-text/functionCall part keys (e.g. thoughtSignature)
             
             for line in response.iter_lines():
                 if not line:
@@ -670,10 +672,16 @@ class GoogleGeminiBot(Bot):
                 try:
                     chunk_data = json.loads(line)
                     chunk_count += 1
+                    raw_chunks.append(chunk_data)
                     
                     candidates = chunk_data.get("candidates", [])
                     if not candidates:
-                        logger.debug("[Gemini] No candidates in chunk")
+                        # Could be a chunk with only usageMetadata / promptFeedback
+                        prompt_feedback = chunk_data.get("promptFeedback")
+                        if prompt_feedback:
+                            logger.warning(f"[Gemini] promptFeedback in chunk: {prompt_feedback}")
+                        else:
+                            logger.debug(f"[Gemini] No candidates in chunk: {chunk_data}")
                         continue
                     
                     candidate = candidates[0]
@@ -688,10 +696,16 @@ class GoogleGeminiBot(Bot):
                     parts = content.get("parts", [])
                     
                     if not parts:
-                        logger.debug("[Gemini] No parts in candidate content")
+                        logger.debug(f"[Gemini] No parts in candidate content, candidate={candidate}")
                     
                     # Stream text content
                     for part in parts:
+                        # Track unknown part types for diagnostics
+                        if "text" not in part and "functionCall" not in part:
+                            for k in part.keys():
+                                if k not in non_text_part_keys:
+                                    non_text_part_keys.append(k)
+
                         if "text" in part and part["text"]:
                             has_content = True
                             yield {
@@ -721,7 +735,7 @@ class GoogleGeminiBot(Bot):
                             })
                     
                 except json.JSONDecodeError as je:
-                    logger.debug(f"[Gemini] JSON decode error: {je}")
+                    logger.debug(f"[Gemini] JSON decode error: {je}, line={line[:500]}")
                     continue
             
             # Send tool calls if any were collected
@@ -739,9 +753,24 @@ class GoogleGeminiBot(Bot):
                 }
                 has_sent_tool_calls = True
             
-            # 如果返回空响应，记录详细警告
+            # 如果返回空响应，dump 完整原始 chunks 以便诊断
             if not has_content and not all_tool_calls:
-                logger.warning(f"[Gemini] ⚠️  Empty response detected!")
+                logger.warning(
+                    f"[Gemini] ⚠️  Empty response detected! "
+                    f"chunks={chunk_count}, finish_reason={last_finish_reason}, "
+                    f"non_text_part_keys={non_text_part_keys}"
+                )
+                if last_safety_ratings:
+                    logger.warning(f"[Gemini] safetyRatings: {last_safety_ratings}")
+                # Dump raw chunks (truncate each to avoid huge logs)
+                try:
+                    for i, ch in enumerate(raw_chunks):
+                        ch_str = json.dumps(ch, ensure_ascii=False)
+                        if len(ch_str) > 2000:
+                            ch_str = ch_str[:2000] + f"...[truncated, total {len(ch_str)} chars]"
+                        logger.warning(f"[Gemini] raw chunk[{i}]: {ch_str}")
+                except Exception as dump_err:
+                    logger.warning(f"[Gemini] Failed to dump raw chunks: {dump_err}")
             
             # Final chunk
             yield {
diff --git a/plugins/cow_cli/cow_cli.py b/plugins/cow_cli/cow_cli.py
index b7c97d46..e553a3ef 100644
--- a/plugins/cow_cli/cow_cli.py
+++ b/plugins/cow_cli/cow_cli.py
@@ -358,7 +358,7 @@ class CowCliPlugin(Plugin):
         return f"⚙️ {key}: {val}"
 
     def _config_set(self, key: str, value_str: str) -> str:
-        from config import conf, load_config
+        from config import conf, load_config, available_setting
         import json as _json
 
         if key not in self._CONFIG_WRITABLE:
@@ -402,10 +402,19 @@ class CowCliPlugin(Plugin):
 
         # Sync updated values to environment variables so that load_config()
         # won't overwrite the new value with a stale env var (common in Docker).
-        from config import available_setting
+        # Match env var keys case-insensitively (Docker compose typically uses
+        # upper-case like MODEL, but lower-case is also possible).
+        synced_envs = {}
         for k, v in updates.items():
-            if k in available_setting and k.upper() in os.environ:
-                os.environ[k.upper()] = str(v)
+            if k not in available_setting:
+                continue
+            str_val = str(v)
+            k_lower = k.lower()
+            for env_key in list(os.environ):
+                if env_key.lower() == k_lower:
+                    os.environ[env_key] = str_val
+                    synced_envs[env_key] = str_val
+        logger.info(f"[CowCli] config update: {updates}, synced envs: {synced_envs}")
 
         try:
             load_config()