mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 00:57:41 +08:00
fix(gemini): solving the problem of tool call not returnings
This commit is contained in:
@@ -241,6 +241,9 @@ class AgentStreamExecutor:
|
|||||||
if turn > 1:
|
if turn > 1:
|
||||||
logger.info(f"[Agent] Requesting explicit response from LLM...")
|
logger.info(f"[Agent] Requesting explicit response from LLM...")
|
||||||
|
|
||||||
|
# Remember position so we can remove the injected prompt later
|
||||||
|
prompt_insert_idx = len(self.messages)
|
||||||
|
|
||||||
# 添加一条消息,明确要求回复用户
|
# 添加一条消息,明确要求回复用户
|
||||||
self.messages.append({
|
self.messages.append({
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -254,6 +257,15 @@ class AgentStreamExecutor:
|
|||||||
assistant_msg, tool_calls = self._call_llm_stream(retry_on_empty=False)
|
assistant_msg, tool_calls = self._call_llm_stream(retry_on_empty=False)
|
||||||
final_response = assistant_msg
|
final_response = assistant_msg
|
||||||
|
|
||||||
|
# Remove the injected prompt from history so it doesn't
|
||||||
|
# appear as a user message in persisted conversations.
|
||||||
|
# _call_llm_stream may have appended an assistant message
|
||||||
|
# after the prompt, so we locate and remove only the prompt.
|
||||||
|
if (prompt_insert_idx < len(self.messages)
|
||||||
|
and self.messages[prompt_insert_idx].get("role") == "user"):
|
||||||
|
self.messages.pop(prompt_insert_idx)
|
||||||
|
logger.debug("[Agent] Removed injected explicit-response prompt from message history")
|
||||||
|
|
||||||
# If LLM responded with tool_calls instead of text, fall through
|
# If LLM responded with tool_calls instead of text, fall through
|
||||||
# to the tool execution path below (don't break the loop).
|
# to the tool execution path below (don't break the loop).
|
||||||
if tool_calls:
|
if tool_calls:
|
||||||
|
|||||||
@@ -339,6 +339,7 @@ function createMd() {
|
|||||||
const md = createMd();
|
const md = createMd();
|
||||||
|
|
||||||
const VIDEO_EXT_RE = /\.(?:mp4|webm|mov|avi|mkv)$/i; // tested against URL without query string
|
const VIDEO_EXT_RE = /\.(?:mp4|webm|mov|avi|mkv)$/i; // tested against URL without query string
|
||||||
|
const IMAGE_EXT_RE = /\.(?:jpg|jpeg|png|gif|webp|bmp|svg)$/i; // tested against URL without query string
|
||||||
|
|
||||||
function _buildVideoHtml(url) {
|
function _buildVideoHtml(url) {
|
||||||
const fileName = url.split('/').pop().split('?')[0];
|
const fileName = url.split('/').pop().split('?')[0];
|
||||||
@@ -351,6 +352,15 @@ function _buildVideoHtml(url) {
|
|||||||
`<i class="fas fa-download"></i> ${escapeHtml(fileName)}</a></div>`;
|
`<i class="fas fa-download"></i> ${escapeHtml(fileName)}</a></div>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function _buildImageHtml(url) {
|
||||||
|
const safeUrl = url.replace(/"/g, '"');
|
||||||
|
return `<div style="margin:10px 0;">` +
|
||||||
|
`<img src="${safeUrl}" alt="image" loading="lazy" ` +
|
||||||
|
`onclick="window.open('${safeUrl}','_blank')" ` +
|
||||||
|
`style="max-width:600px;width:100%;border-radius:10px;box-shadow:0 2px 8px rgba(0,0,0,0.15);display:block;cursor:pointer;">` +
|
||||||
|
`</div>`;
|
||||||
|
}
|
||||||
|
|
||||||
function injectVideoPlayers(html) {
|
function injectVideoPlayers(html) {
|
||||||
// Step 1: replace markdown-it anchor tags whose href points to a video file.
|
// Step 1: replace markdown-it anchor tags whose href points to a video file.
|
||||||
const step1 = html.replace(
|
const step1 = html.replace(
|
||||||
@@ -369,10 +379,32 @@ function injectVideoPlayers(html) {
|
|||||||
}).join('');
|
}).join('');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert image URLs into inline <img> previews. Mirrors injectVideoPlayers but for images.
|
||||||
|
// Handles three cases produced by markdown-it:
|
||||||
|
// 1. <a href="...image.jpg">...</a> (bare URL or autolink that linkify turned into an anchor)
|
||||||
|
// 2. <img src="..."> (markdown image syntax) — leave as-is, but normalize style
|
||||||
|
// 3. raw URL still present in a text node — only as a safety net
|
||||||
|
function injectImagePreviews(html) {
|
||||||
|
// Step 1: anchor whose href points to an image file -> replace with <img> preview.
|
||||||
|
const step1 = html.replace(
|
||||||
|
/<a\s+href="(https?:\/\/[^"]+)"[^>]*>[^<]*<\/a>/gi,
|
||||||
|
(match, url) => IMAGE_EXT_RE.test(url.split('?')[0]) ? _buildImageHtml(url) : match
|
||||||
|
);
|
||||||
|
// Step 2: bare image URLs left in text nodes (rare — markdown-it's linkify usually catches them).
|
||||||
|
return step1.split(/(<[^>]+>)/).map((chunk, idx) => {
|
||||||
|
if (idx % 2 !== 0) return chunk;
|
||||||
|
return chunk.replace(/https?:\/\/\S+/gi, (url) => {
|
||||||
|
const bare = url.replace(/[),.\s]+$/, '');
|
||||||
|
return IMAGE_EXT_RE.test(bare.split('?')[0]) ? _buildImageHtml(bare) : url;
|
||||||
|
});
|
||||||
|
}).join('');
|
||||||
|
}
|
||||||
|
|
||||||
function renderMarkdown(text) {
|
function renderMarkdown(text) {
|
||||||
try {
|
try {
|
||||||
const html = md.render(text);
|
const html = md.render(text);
|
||||||
return injectVideoPlayers(html);
|
// Order matters: video first (more specific), then image.
|
||||||
|
return injectImagePreviews(injectVideoPlayers(html));
|
||||||
}
|
}
|
||||||
catch (e) { return text.replace(/\n/g, '<br>'); }
|
catch (e) { return text.replace(/\n/g, '<br>'); }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -335,6 +335,18 @@ class GoogleGeminiBot(Bot):
|
|||||||
# Convert role
|
# Convert role
|
||||||
gemini_role = "user" if role in ["user", "tool"] else "model"
|
gemini_role = "user" if role in ["user", "tool"] else "model"
|
||||||
|
|
||||||
|
# For model messages that carry original Gemini parts (with
|
||||||
|
# thoughtSignature etc.), use them directly instead of
|
||||||
|
# reconstructing from Claude-format tool_use blocks.
|
||||||
|
if gemini_role == "model" and "_gemini_raw_parts" in msg:
|
||||||
|
raw_parts = msg["_gemini_raw_parts"]
|
||||||
|
if raw_parts:
|
||||||
|
payload["contents"].append({
|
||||||
|
"role": "model",
|
||||||
|
"parts": raw_parts
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
# Handle different content formats
|
# Handle different content formats
|
||||||
parts = []
|
parts = []
|
||||||
|
|
||||||
@@ -398,6 +410,17 @@ class GoogleGeminiBot(Bot):
|
|||||||
else:
|
else:
|
||||||
logger.warning(f"[Gemini] Skip invalid image block: {str(block)[:200]}")
|
logger.warning(f"[Gemini] Skip invalid image block: {str(block)[:200]}")
|
||||||
|
|
||||||
|
elif block_type == "tool_use":
|
||||||
|
# Convert Claude tool_use to Gemini functionCall
|
||||||
|
fc_name = block.get("name", "unknown")
|
||||||
|
fc_args = block.get("input") or {}
|
||||||
|
parts.append({
|
||||||
|
"functionCall": {
|
||||||
|
"name": fc_name,
|
||||||
|
"args": fc_args
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
elif block_type == "tool_result":
|
elif block_type == "tool_result":
|
||||||
# Convert Claude tool_result to Gemini functionResponse
|
# Convert Claude tool_result to Gemini functionResponse
|
||||||
tool_use_id = block.get("tool_use_id")
|
tool_use_id = block.get("tool_use_id")
|
||||||
@@ -648,6 +671,7 @@ class GoogleGeminiBot(Bot):
|
|||||||
"""Handle Gemini REST API stream response"""
|
"""Handle Gemini REST API stream response"""
|
||||||
try:
|
try:
|
||||||
all_tool_calls = []
|
all_tool_calls = []
|
||||||
|
all_raw_parts = [] # Preserve all Gemini parts (incl. thoughtSignature) for round-trip
|
||||||
has_sent_tool_calls = False
|
has_sent_tool_calls = False
|
||||||
has_content = False # Track if any content was sent
|
has_content = False # Track if any content was sent
|
||||||
chunk_count = 0
|
chunk_count = 0
|
||||||
@@ -733,6 +757,9 @@ class GoogleGeminiBot(Bot):
|
|||||||
"arguments": json.dumps(fc.get("args", {}))
|
"arguments": json.dumps(fc.get("args", {}))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Preserve all raw parts for round-trip (thoughtSignature, etc.)
|
||||||
|
all_raw_parts.extend(parts)
|
||||||
|
|
||||||
except json.JSONDecodeError as je:
|
except json.JSONDecodeError as je:
|
||||||
logger.debug(f"[Gemini] JSON decode error: {je}, line={line[:500]}")
|
logger.debug(f"[Gemini] JSON decode error: {je}, line={line[:500]}")
|
||||||
@@ -740,6 +767,9 @@ class GoogleGeminiBot(Bot):
|
|||||||
|
|
||||||
# Send tool calls if any were collected
|
# Send tool calls if any were collected
|
||||||
if all_tool_calls and not has_sent_tool_calls:
|
if all_tool_calls and not has_sent_tool_calls:
|
||||||
|
delta = {"tool_calls": all_tool_calls}
|
||||||
|
if all_raw_parts:
|
||||||
|
delta["_gemini_raw_parts"] = all_raw_parts
|
||||||
yield {
|
yield {
|
||||||
"id": f"chatcmpl-{time.time()}",
|
"id": f"chatcmpl-{time.time()}",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@@ -747,11 +777,25 @@ class GoogleGeminiBot(Bot):
|
|||||||
"model": model_name,
|
"model": model_name,
|
||||||
"choices": [{
|
"choices": [{
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"delta": {"tool_calls": all_tool_calls},
|
"delta": delta,
|
||||||
"finish_reason": None
|
"finish_reason": None
|
||||||
}]
|
}]
|
||||||
}
|
}
|
||||||
has_sent_tool_calls = True
|
has_sent_tool_calls = True
|
||||||
|
elif not has_sent_tool_calls and all_raw_parts:
|
||||||
|
# No tool calls but we have raw parts (e.g. text-only response with
|
||||||
|
# thoughtSignature) — pass them through for round-trip fidelity.
|
||||||
|
yield {
|
||||||
|
"id": f"chatcmpl-{time.time()}",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": int(time.time()),
|
||||||
|
"model": model_name,
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"_gemini_raw_parts": all_raw_parts},
|
||||||
|
"finish_reason": None
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
# 如果返回空响应,dump 完整原始 chunks 以便诊断
|
# 如果返回空响应,dump 完整原始 chunks 以便诊断
|
||||||
if not has_content and not all_tool_calls:
|
if not has_content and not all_tool_calls:
|
||||||
|
|||||||
Reference in New Issue
Block a user