From 14a119c48ce9bc4c602a228eee3fb18d60971a1d Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sat, 18 Apr 2026 21:18:27 +0800 Subject: [PATCH] fix(gemini): solving the problem of tool call not returnings --- agent/protocol/agent_stream.py | 12 ++++++++ channel/web/static/js/console.js | 34 +++++++++++++++++++++- models/gemini/google_gemini_bot.py | 46 +++++++++++++++++++++++++++++- 3 files changed, 90 insertions(+), 2 deletions(-) diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py index d7fc5066..3e6bc4e4 100644 --- a/agent/protocol/agent_stream.py +++ b/agent/protocol/agent_stream.py @@ -241,6 +241,9 @@ class AgentStreamExecutor: if turn > 1: logger.info(f"[Agent] Requesting explicit response from LLM...") + # Remember position so we can remove the injected prompt later + prompt_insert_idx = len(self.messages) + # 添加一条消息,明确要求回复用户 self.messages.append({ "role": "user", @@ -254,6 +257,15 @@ class AgentStreamExecutor: assistant_msg, tool_calls = self._call_llm_stream(retry_on_empty=False) final_response = assistant_msg + # Remove the injected prompt from history so it doesn't + # appear as a user message in persisted conversations. + # _call_llm_stream may have appended an assistant message + # after the prompt, so we locate and remove only the prompt. + if (prompt_insert_idx < len(self.messages) + and self.messages[prompt_insert_idx].get("role") == "user"): + self.messages.pop(prompt_insert_idx) + logger.debug("[Agent] Removed injected explicit-response prompt from message history") + # If LLM responded with tool_calls instead of text, fall through # to the tool execution path below (don't break the loop). if tool_calls: diff --git a/channel/web/static/js/console.js b/channel/web/static/js/console.js index 19d2c940..099ef32a 100644 --- a/channel/web/static/js/console.js +++ b/channel/web/static/js/console.js @@ -339,6 +339,7 @@ function createMd() { const md = createMd(); const VIDEO_EXT_RE = /\.(?:mp4|webm|mov|avi|mkv)$/i; // tested against URL without query string +const IMAGE_EXT_RE = /\.(?:jpg|jpeg|png|gif|webp|bmp|svg)$/i; // tested against URL without query string function _buildVideoHtml(url) { const fileName = url.split('/').pop().split('?')[0]; @@ -351,6 +352,15 @@ function _buildVideoHtml(url) { ` ${escapeHtml(fileName)}`; } +function _buildImageHtml(url) { + const safeUrl = url.replace(/"/g, '"'); + return `
` + + `image` + + `
`; +} + function injectVideoPlayers(html) { // Step 1: replace markdown-it anchor tags whose href points to a video file. const step1 = html.replace( @@ -369,10 +379,32 @@ function injectVideoPlayers(html) { }).join(''); } +// Convert image URLs into inline previews. Mirrors injectVideoPlayers but for images. +// Handles three cases produced by markdown-it: +// 1. ... (bare URL or autolink that linkify turned into an anchor) +// 2. (markdown image syntax) — leave as-is, but normalize style +// 3. raw URL still present in a text node — only as a safety net +function injectImagePreviews(html) { + // Step 1: anchor whose href points to an image file -> replace with preview. + const step1 = html.replace( + /]*>[^<]*<\/a>/gi, + (match, url) => IMAGE_EXT_RE.test(url.split('?')[0]) ? _buildImageHtml(url) : match + ); + // Step 2: bare image URLs left in text nodes (rare — markdown-it's linkify usually catches them). + return step1.split(/(<[^>]+>)/).map((chunk, idx) => { + if (idx % 2 !== 0) return chunk; + return chunk.replace(/https?:\/\/\S+/gi, (url) => { + const bare = url.replace(/[),.\s]+$/, ''); + return IMAGE_EXT_RE.test(bare.split('?')[0]) ? _buildImageHtml(bare) : url; + }); + }).join(''); +} + function renderMarkdown(text) { try { const html = md.render(text); - return injectVideoPlayers(html); + // Order matters: video first (more specific), then image. + return injectImagePreviews(injectVideoPlayers(html)); } catch (e) { return text.replace(/\n/g, '
'); } } diff --git a/models/gemini/google_gemini_bot.py b/models/gemini/google_gemini_bot.py index e379d912..ab3eae4c 100644 --- a/models/gemini/google_gemini_bot.py +++ b/models/gemini/google_gemini_bot.py @@ -335,6 +335,18 @@ class GoogleGeminiBot(Bot): # Convert role gemini_role = "user" if role in ["user", "tool"] else "model" + # For model messages that carry original Gemini parts (with + # thoughtSignature etc.), use them directly instead of + # reconstructing from Claude-format tool_use blocks. + if gemini_role == "model" and "_gemini_raw_parts" in msg: + raw_parts = msg["_gemini_raw_parts"] + if raw_parts: + payload["contents"].append({ + "role": "model", + "parts": raw_parts + }) + continue + # Handle different content formats parts = [] @@ -398,6 +410,17 @@ class GoogleGeminiBot(Bot): else: logger.warning(f"[Gemini] Skip invalid image block: {str(block)[:200]}") + elif block_type == "tool_use": + # Convert Claude tool_use to Gemini functionCall + fc_name = block.get("name", "unknown") + fc_args = block.get("input") or {} + parts.append({ + "functionCall": { + "name": fc_name, + "args": fc_args + } + }) + elif block_type == "tool_result": # Convert Claude tool_result to Gemini functionResponse tool_use_id = block.get("tool_use_id") @@ -648,6 +671,7 @@ class GoogleGeminiBot(Bot): """Handle Gemini REST API stream response""" try: all_tool_calls = [] + all_raw_parts = [] # Preserve all Gemini parts (incl. thoughtSignature) for round-trip has_sent_tool_calls = False has_content = False # Track if any content was sent chunk_count = 0 @@ -733,6 +757,9 @@ class GoogleGeminiBot(Bot): "arguments": json.dumps(fc.get("args", {})) } }) + + # Preserve all raw parts for round-trip (thoughtSignature, etc.) + all_raw_parts.extend(parts) except json.JSONDecodeError as je: logger.debug(f"[Gemini] JSON decode error: {je}, line={line[:500]}") @@ -740,6 +767,9 @@ class GoogleGeminiBot(Bot): # Send tool calls if any were collected if all_tool_calls and not has_sent_tool_calls: + delta = {"tool_calls": all_tool_calls} + if all_raw_parts: + delta["_gemini_raw_parts"] = all_raw_parts yield { "id": f"chatcmpl-{time.time()}", "object": "chat.completion.chunk", @@ -747,11 +777,25 @@ class GoogleGeminiBot(Bot): "model": model_name, "choices": [{ "index": 0, - "delta": {"tool_calls": all_tool_calls}, + "delta": delta, "finish_reason": None }] } has_sent_tool_calls = True + elif not has_sent_tool_calls and all_raw_parts: + # No tool calls but we have raw parts (e.g. text-only response with + # thoughtSignature) — pass them through for round-trip fidelity. + yield { + "id": f"chatcmpl-{time.time()}", + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": model_name, + "choices": [{ + "index": 0, + "delta": {"_gemini_raw_parts": all_raw_parts}, + "finish_reason": None + }] + } # 如果返回空响应,dump 完整原始 chunks 以便诊断 if not has_content and not all_tool_calls: