fix(agent): don't drop tool_calls from empty-response retry

2026-07-17 11:07:11 +08:00 · 2026-04-18 20:50:40 +08:00
parent 26e630c2dd
commit c82515a927
4 changed files with 84 additions and 17 deletions
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -254,8 +254,15 @@ class AgentStreamExecutor:
                            assistant_msg, tool_calls = self._call_llm_stream(retry_on_empty=False)
                            final_response = assistant_msg
                            
-                            # 如果还是空，才使用 fallback
-                            if not assistant_msg and not tool_calls:
+                            # If LLM responded with tool_calls instead of text, fall through
+                            # to the tool execution path below (don't break the loop).
+                            if tool_calls:
+                                logger.info(
+                                    f"[Agent] LLM returned tool_calls in explicit-response retry, "
+                                    f"continuing to execute tools instead of breaking"
+                                )
+                            elif not assistant_msg:
+                                # Still empty (no text and no tool_calls): use fallback
                                logger.warning(f"[Agent] Still empty after explicit request")
                                final_response = (
                                    "抱歉，我暂时无法生成回复。请尝试换一种方式描述你的需求，或稍后再试。"
@@ -270,6 +277,9 @@ class AgentStreamExecutor:
                    else:
                        logger.info(f"💭 {assistant_msg[:150]}{'...' if len(assistant_msg) > 150 else ''}")
                    
+                    # If the explicit-response retry produced tool_calls, skip the break
+                    # and continue down to the tool execution branch in this same iteration.
+                    if not tool_calls:
                        logger.debug(f"✅ 完成 (无工具调用)")
                        self._emit_event("turn_end", {
                            "turn": turn,
--- a/channel/web/web_channel.py
+++ b/channel/web/web_channel.py
@@ -269,6 +269,25 @@ class WebChannel(ChatChannel):
                if tool_calls:
                    q.put({"type": "message_end", "has_tool_calls": True})

+            elif event_type == "agent_end":
+                # Safety net: if the agent finishes with an empty final_response,
+                # chat_channel skips _send_reply (because reply.content is empty),
+                # which means no "done" event is ever emitted and the SSE stream
+                # would hang until the 10-min idle timeout. Push a fallback "done"
+                # here so the frontend always gets closure.
+                final_response = data.get("final_response", "")
+                if not final_response or not str(final_response).strip():
+                    logger.warning(
+                        f"[WebChannel] agent_end with empty final_response for "
+                        f"request {request_id}, sending fallback done"
+                    )
+                    q.put({
+                        "type": "done",
+                        "content": "(模型未返回任何内容，请重试或换一种方式描述你的需求)",
+                        "request_id": request_id,
+                        "timestamp": time.time(),
+                    })
+
            elif event_type == "file_to_send":
                file_path = data.get("path", "")
                file_name = data.get("file_name", os.path.basename(file_path))
--- a/models/gemini/google_gemini_bot.py
+++ b/models/gemini/google_gemini_bot.py
@@ -653,6 +653,8 @@ class GoogleGeminiBot(Bot):
            chunk_count = 0
            last_finish_reason = None
            last_safety_ratings = None
+            raw_chunks = []  # Buffer raw chunks for diagnostics on empty response
+            non_text_part_keys = []  # Track non-text/functionCall part keys (e.g. thoughtSignature)
            
            for line in response.iter_lines():
                if not line:
@@ -670,10 +672,16 @@ class GoogleGeminiBot(Bot):
                try:
                    chunk_data = json.loads(line)
                    chunk_count += 1
+                    raw_chunks.append(chunk_data)
                    
                    candidates = chunk_data.get("candidates", [])
                    if not candidates:
-                        logger.debug("[Gemini] No candidates in chunk")
+                        # Could be a chunk with only usageMetadata / promptFeedback
+                        prompt_feedback = chunk_data.get("promptFeedback")
+                        if prompt_feedback:
+                            logger.warning(f"[Gemini] promptFeedback in chunk: {prompt_feedback}")
+                        else:
+                            logger.debug(f"[Gemini] No candidates in chunk: {chunk_data}")
                        continue
                    
                    candidate = candidates[0]
@@ -688,10 +696,16 @@ class GoogleGeminiBot(Bot):
                    parts = content.get("parts", [])
                    
                    if not parts:
-                        logger.debug("[Gemini] No parts in candidate content")
+                        logger.debug(f"[Gemini] No parts in candidate content, candidate={candidate}")
                    
                    # Stream text content
                    for part in parts:
+                        # Track unknown part types for diagnostics
+                        if "text" not in part and "functionCall" not in part:
+                            for k in part.keys():
+                                if k not in non_text_part_keys:
+                                    non_text_part_keys.append(k)
+
                        if "text" in part and part["text"]:
                            has_content = True
                            yield {
@@ -721,7 +735,7 @@ class GoogleGeminiBot(Bot):
                            })
                    
                except json.JSONDecodeError as je:
-                    logger.debug(f"[Gemini] JSON decode error: {je}")
+                    logger.debug(f"[Gemini] JSON decode error: {je}, line={line[:500]}")
                    continue
            
            # Send tool calls if any were collected
@@ -739,9 +753,24 @@ class GoogleGeminiBot(Bot):
                }
                has_sent_tool_calls = True
            
-            # 如果返回空响应，记录详细警告
+            # 如果返回空响应，dump 完整原始 chunks 以便诊断
            if not has_content and not all_tool_calls:
-                logger.warning(f"[Gemini] ⚠️  Empty response detected!")
+                logger.warning(
+                    f"[Gemini] ⚠️  Empty response detected! "
+                    f"chunks={chunk_count}, finish_reason={last_finish_reason}, "
+                    f"non_text_part_keys={non_text_part_keys}"
+                )
+                if last_safety_ratings:
+                    logger.warning(f"[Gemini] safetyRatings: {last_safety_ratings}")
+                # Dump raw chunks (truncate each to avoid huge logs)
+                try:
+                    for i, ch in enumerate(raw_chunks):
+                        ch_str = json.dumps(ch, ensure_ascii=False)
+                        if len(ch_str) > 2000:
+                            ch_str = ch_str[:2000] + f"...[truncated, total {len(ch_str)} chars]"
+                        logger.warning(f"[Gemini] raw chunk[{i}]: {ch_str}")
+                except Exception as dump_err:
+                    logger.warning(f"[Gemini] Failed to dump raw chunks: {dump_err}")
            
            # Final chunk
            yield {
--- a/plugins/cow_cli/cow_cli.py
+++ b/plugins/cow_cli/cow_cli.py
@@ -358,7 +358,7 @@ class CowCliPlugin(Plugin):
        return f"⚙️ {key}: {val}"

    def _config_set(self, key: str, value_str: str) -> str:
-        from config import conf, load_config
+        from config import conf, load_config, available_setting
        import json as _json

        if key not in self._CONFIG_WRITABLE:
@@ -402,10 +402,19 @@ class CowCliPlugin(Plugin):

        # Sync updated values to environment variables so that load_config()
        # won't overwrite the new value with a stale env var (common in Docker).
-        from config import available_setting
+        # Match env var keys case-insensitively (Docker compose typically uses
+        # upper-case like MODEL, but lower-case is also possible).
+        synced_envs = {}
        for k, v in updates.items():
-            if k in available_setting and k.upper() in os.environ:
-                os.environ[k.upper()] = str(v)
+            if k not in available_setting:
+                continue
+            str_val = str(v)
+            k_lower = k.lower()
+            for env_key in list(os.environ):
+                if env_key.lower() == k_lower:
+                    os.environ[env_key] = str_val
+                    synced_envs[env_key] = str_val
+        logger.info(f"[CowCli] config update: {updates}, synced envs: {synced_envs}")

        try:
            load_config()