diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py
index d7fc5066..3e6bc4e4 100644
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -241,6 +241,9 @@ class AgentStreamExecutor:
if turn > 1:
logger.info(f"[Agent] Requesting explicit response from LLM...")
+ # Remember position so we can remove the injected prompt later
+ prompt_insert_idx = len(self.messages)
+
# 添加一条消息,明确要求回复用户
self.messages.append({
"role": "user",
@@ -254,6 +257,15 @@ class AgentStreamExecutor:
assistant_msg, tool_calls = self._call_llm_stream(retry_on_empty=False)
final_response = assistant_msg
+ # Remove the injected prompt from history so it doesn't
+ # appear as a user message in persisted conversations.
+ # _call_llm_stream may have appended an assistant message
+ # after the prompt, so we locate and remove only the prompt.
+ if (prompt_insert_idx < len(self.messages)
+ and self.messages[prompt_insert_idx].get("role") == "user"):
+ self.messages.pop(prompt_insert_idx)
+ logger.debug("[Agent] Removed injected explicit-response prompt from message history")
+
# If LLM responded with tool_calls instead of text, fall through
# to the tool execution path below (don't break the loop).
if tool_calls:
diff --git a/channel/web/static/js/console.js b/channel/web/static/js/console.js
index 19d2c940..099ef32a 100644
--- a/channel/web/static/js/console.js
+++ b/channel/web/static/js/console.js
@@ -339,6 +339,7 @@ function createMd() {
const md = createMd();
const VIDEO_EXT_RE = /\.(?:mp4|webm|mov|avi|mkv)$/i; // tested against URL without query string
+const IMAGE_EXT_RE = /\.(?:jpg|jpeg|png|gif|webp|bmp|svg)$/i; // tested against URL without query string
function _buildVideoHtml(url) {
const fileName = url.split('/').pop().split('?')[0];
@@ -351,6 +352,15 @@ function _buildVideoHtml(url) {
` ${escapeHtml(fileName)}`;
}
+function _buildImageHtml(url) {
+ const safeUrl = url.replace(/"/g, '"');
+ return `
` +
+ `

` +
+ `
`;
+}
+
function injectVideoPlayers(html) {
// Step 1: replace markdown-it anchor tags whose href points to a video file.
const step1 = html.replace(
@@ -369,10 +379,32 @@ function injectVideoPlayers(html) {
}).join('');
}
+// Convert image URLs into inline
previews. Mirrors injectVideoPlayers but for images.
+// Handles three cases produced by markdown-it:
+// 1. ... (bare URL or autolink that linkify turned into an anchor)
+// 2.
(markdown image syntax) — leave as-is, but normalize style
+// 3. raw URL still present in a text node — only as a safety net
+function injectImagePreviews(html) {
+ // Step 1: anchor whose href points to an image file -> replace with
preview.
+ const step1 = html.replace(
+ /]*>[^<]*<\/a>/gi,
+ (match, url) => IMAGE_EXT_RE.test(url.split('?')[0]) ? _buildImageHtml(url) : match
+ );
+ // Step 2: bare image URLs left in text nodes (rare — markdown-it's linkify usually catches them).
+ return step1.split(/(<[^>]+>)/).map((chunk, idx) => {
+ if (idx % 2 !== 0) return chunk;
+ return chunk.replace(/https?:\/\/\S+/gi, (url) => {
+ const bare = url.replace(/[),.\s]+$/, '');
+ return IMAGE_EXT_RE.test(bare.split('?')[0]) ? _buildImageHtml(bare) : url;
+ });
+ }).join('');
+}
+
function renderMarkdown(text) {
try {
const html = md.render(text);
- return injectVideoPlayers(html);
+ // Order matters: video first (more specific), then image.
+ return injectImagePreviews(injectVideoPlayers(html));
}
catch (e) { return text.replace(/\n/g, '
'); }
}
diff --git a/models/gemini/google_gemini_bot.py b/models/gemini/google_gemini_bot.py
index e379d912..ab3eae4c 100644
--- a/models/gemini/google_gemini_bot.py
+++ b/models/gemini/google_gemini_bot.py
@@ -335,6 +335,18 @@ class GoogleGeminiBot(Bot):
# Convert role
gemini_role = "user" if role in ["user", "tool"] else "model"
+ # For model messages that carry original Gemini parts (with
+ # thoughtSignature etc.), use them directly instead of
+ # reconstructing from Claude-format tool_use blocks.
+ if gemini_role == "model" and "_gemini_raw_parts" in msg:
+ raw_parts = msg["_gemini_raw_parts"]
+ if raw_parts:
+ payload["contents"].append({
+ "role": "model",
+ "parts": raw_parts
+ })
+ continue
+
# Handle different content formats
parts = []
@@ -398,6 +410,17 @@ class GoogleGeminiBot(Bot):
else:
logger.warning(f"[Gemini] Skip invalid image block: {str(block)[:200]}")
+ elif block_type == "tool_use":
+ # Convert Claude tool_use to Gemini functionCall
+ fc_name = block.get("name", "unknown")
+ fc_args = block.get("input") or {}
+ parts.append({
+ "functionCall": {
+ "name": fc_name,
+ "args": fc_args
+ }
+ })
+
elif block_type == "tool_result":
# Convert Claude tool_result to Gemini functionResponse
tool_use_id = block.get("tool_use_id")
@@ -648,6 +671,7 @@ class GoogleGeminiBot(Bot):
"""Handle Gemini REST API stream response"""
try:
all_tool_calls = []
+ all_raw_parts = [] # Preserve all Gemini parts (incl. thoughtSignature) for round-trip
has_sent_tool_calls = False
has_content = False # Track if any content was sent
chunk_count = 0
@@ -733,6 +757,9 @@ class GoogleGeminiBot(Bot):
"arguments": json.dumps(fc.get("args", {}))
}
})
+
+ # Preserve all raw parts for round-trip (thoughtSignature, etc.)
+ all_raw_parts.extend(parts)
except json.JSONDecodeError as je:
logger.debug(f"[Gemini] JSON decode error: {je}, line={line[:500]}")
@@ -740,6 +767,9 @@ class GoogleGeminiBot(Bot):
# Send tool calls if any were collected
if all_tool_calls and not has_sent_tool_calls:
+ delta = {"tool_calls": all_tool_calls}
+ if all_raw_parts:
+ delta["_gemini_raw_parts"] = all_raw_parts
yield {
"id": f"chatcmpl-{time.time()}",
"object": "chat.completion.chunk",
@@ -747,11 +777,25 @@ class GoogleGeminiBot(Bot):
"model": model_name,
"choices": [{
"index": 0,
- "delta": {"tool_calls": all_tool_calls},
+ "delta": delta,
"finish_reason": None
}]
}
has_sent_tool_calls = True
+ elif not has_sent_tool_calls and all_raw_parts:
+ # No tool calls but we have raw parts (e.g. text-only response with
+ # thoughtSignature) — pass them through for round-trip fidelity.
+ yield {
+ "id": f"chatcmpl-{time.time()}",
+ "object": "chat.completion.chunk",
+ "created": int(time.time()),
+ "model": model_name,
+ "choices": [{
+ "index": 0,
+ "delta": {"_gemini_raw_parts": all_raw_parts},
+ "finish_reason": None
+ }]
+ }
# 如果返回空响应,dump 完整原始 chunks 以便诊断
if not has_content and not all_tool_calls: