mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 00:57:41 +08:00
feat(vision): prioritize main model for image recognition with multi-provider fallback
- Add call_vision method to all bot implementations (DashScope, Claude, Gemini, ZhipuAI, MiniMax, Doubao, Moonshot, OpenAICompatibleBot) using each vendor's native multimodal API format - Remove call_with_tools/call_vision from Bot base class to fix MRO shadowing issue with OpenAICompatibleBot mixin - Refactor vision tool provider resolution: MainModel → other configured models (auto-discovered) → OpenAI → LinkAI, with automatic fallback - Return actual model name used in call_vision responses - Sync config.json API keys to .env bidirectionally on startup - Fix bot instance cache to detect bot_type/use_linkai config changes - Add SSE reconnection support for web console - Preserve image path hints in Gemini text for correct vision tool calls - Update docs/tools/vision.mdx
This commit is contained in:
@@ -806,15 +806,17 @@ function sendMessage() {
|
||||
}
|
||||
|
||||
function startSSE(requestId, loadingEl, timestamp) {
|
||||
const es = new EventSource(`/stream?request_id=${encodeURIComponent(requestId)}`);
|
||||
activeStreams[requestId] = es;
|
||||
|
||||
let botEl = null;
|
||||
let stepsEl = null; // .agent-steps (thinking summaries + tool indicators)
|
||||
let contentEl = null; // .answer-content (final streaming answer)
|
||||
let mediaEl = null; // .media-content (images & file attachments)
|
||||
let accumulatedText = '';
|
||||
let currentToolEl = null;
|
||||
let done = false;
|
||||
|
||||
const MAX_RECONNECTS = 10;
|
||||
const RECONNECT_BASE_MS = 1000;
|
||||
let reconnectCount = 0;
|
||||
|
||||
function ensureBotEl() {
|
||||
if (botEl) return;
|
||||
@@ -839,180 +841,204 @@ function startSSE(requestId, loadingEl, timestamp) {
|
||||
mediaEl = botEl.querySelector('.media-content');
|
||||
}
|
||||
|
||||
es.onmessage = function(e) {
|
||||
let item;
|
||||
try { item = JSON.parse(e.data); } catch (_) { return; }
|
||||
function connect() {
|
||||
const es = new EventSource(`/stream?request_id=${encodeURIComponent(requestId)}`);
|
||||
activeStreams[requestId] = es;
|
||||
|
||||
if (item.type === 'delta') {
|
||||
ensureBotEl();
|
||||
accumulatedText += item.content;
|
||||
contentEl.innerHTML = renderMarkdown(accumulatedText);
|
||||
scrollChatToBottom();
|
||||
es.onmessage = function(e) {
|
||||
let item;
|
||||
try { item = JSON.parse(e.data); } catch (_) { return; }
|
||||
|
||||
} else if (item.type === 'tool_start') {
|
||||
ensureBotEl();
|
||||
// Successful data received, reset reconnect counter
|
||||
reconnectCount = 0;
|
||||
|
||||
// Save current thinking as a collapsible step
|
||||
if (accumulatedText.trim()) {
|
||||
const fullText = accumulatedText.trim();
|
||||
const oneLine = fullText.replace(/\n+/g, ' ');
|
||||
const needsTruncate = oneLine.length > 80;
|
||||
const stepEl = document.createElement('div');
|
||||
stepEl.className = 'agent-step agent-thinking-step' + (needsTruncate ? '' : ' no-expand');
|
||||
if (needsTruncate) {
|
||||
const truncated = oneLine.substring(0, 80) + '…';
|
||||
stepEl.innerHTML = `
|
||||
<div class="thinking-header" onclick="this.parentElement.classList.toggle('expanded')">
|
||||
<i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
|
||||
<span class="thinking-summary">${escapeHtml(truncated)}</span>
|
||||
<i class="fas fa-chevron-right thinking-chevron"></i>
|
||||
</div>
|
||||
<div class="thinking-full">${renderMarkdown(fullText)}</div>`;
|
||||
} else {
|
||||
stepEl.innerHTML = `
|
||||
<div class="thinking-header no-toggle">
|
||||
<i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
|
||||
<span>${escapeHtml(oneLine)}</span>
|
||||
</div>`;
|
||||
if (item.type === 'delta') {
|
||||
ensureBotEl();
|
||||
accumulatedText += item.content;
|
||||
contentEl.innerHTML = renderMarkdown(accumulatedText);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'tool_start') {
|
||||
ensureBotEl();
|
||||
|
||||
// Save current thinking as a collapsible step
|
||||
if (accumulatedText.trim()) {
|
||||
const fullText = accumulatedText.trim();
|
||||
const oneLine = fullText.replace(/\n+/g, ' ');
|
||||
const needsTruncate = oneLine.length > 80;
|
||||
const stepEl = document.createElement('div');
|
||||
stepEl.className = 'agent-step agent-thinking-step' + (needsTruncate ? '' : ' no-expand');
|
||||
if (needsTruncate) {
|
||||
const truncated = oneLine.substring(0, 80) + '…';
|
||||
stepEl.innerHTML = `
|
||||
<div class="thinking-header" onclick="this.parentElement.classList.toggle('expanded')">
|
||||
<i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
|
||||
<span class="thinking-summary">${escapeHtml(truncated)}</span>
|
||||
<i class="fas fa-chevron-right thinking-chevron"></i>
|
||||
</div>
|
||||
<div class="thinking-full">${renderMarkdown(fullText)}</div>`;
|
||||
} else {
|
||||
stepEl.innerHTML = `
|
||||
<div class="thinking-header no-toggle">
|
||||
<i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
|
||||
<span>${escapeHtml(oneLine)}</span>
|
||||
</div>`;
|
||||
}
|
||||
stepsEl.appendChild(stepEl);
|
||||
}
|
||||
stepsEl.appendChild(stepEl);
|
||||
}
|
||||
accumulatedText = '';
|
||||
contentEl.innerHTML = '';
|
||||
accumulatedText = '';
|
||||
contentEl.innerHTML = '';
|
||||
|
||||
// Add tool execution indicator (collapsible)
|
||||
currentToolEl = document.createElement('div');
|
||||
currentToolEl.className = 'agent-step agent-tool-step';
|
||||
const argsStr = formatToolArgs(item.arguments || {});
|
||||
currentToolEl.innerHTML = `
|
||||
<div class="tool-header" onclick="this.parentElement.classList.toggle('expanded')">
|
||||
<i class="fas fa-cog fa-spin text-primary-400 flex-shrink-0 tool-icon"></i>
|
||||
<span class="tool-name">${item.tool}</span>
|
||||
<i class="fas fa-chevron-right tool-chevron"></i>
|
||||
</div>
|
||||
<div class="tool-detail">
|
||||
<div class="tool-detail-section">
|
||||
<div class="tool-detail-label">Input</div>
|
||||
<pre class="tool-detail-content">${argsStr}</pre>
|
||||
// Add tool execution indicator (collapsible)
|
||||
currentToolEl = document.createElement('div');
|
||||
currentToolEl.className = 'agent-step agent-tool-step';
|
||||
const argsStr = formatToolArgs(item.arguments || {});
|
||||
currentToolEl.innerHTML = `
|
||||
<div class="tool-header" onclick="this.parentElement.classList.toggle('expanded')">
|
||||
<i class="fas fa-cog fa-spin text-primary-400 flex-shrink-0 tool-icon"></i>
|
||||
<span class="tool-name">${item.tool}</span>
|
||||
<i class="fas fa-chevron-right tool-chevron"></i>
|
||||
</div>
|
||||
<div class="tool-detail-section tool-output-section"></div>
|
||||
</div>`;
|
||||
stepsEl.appendChild(currentToolEl);
|
||||
<div class="tool-detail">
|
||||
<div class="tool-detail-section">
|
||||
<div class="tool-detail-label">Input</div>
|
||||
<pre class="tool-detail-content">${argsStr}</pre>
|
||||
</div>
|
||||
<div class="tool-detail-section tool-output-section"></div>
|
||||
</div>`;
|
||||
stepsEl.appendChild(currentToolEl);
|
||||
|
||||
scrollChatToBottom();
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'tool_end') {
|
||||
if (currentToolEl) {
|
||||
const isError = item.status !== 'success';
|
||||
const icon = currentToolEl.querySelector('.tool-icon');
|
||||
icon.className = isError
|
||||
? 'fas fa-times text-red-400 flex-shrink-0 tool-icon'
|
||||
: 'fas fa-check text-primary-400 flex-shrink-0 tool-icon';
|
||||
} else if (item.type === 'tool_end') {
|
||||
if (currentToolEl) {
|
||||
const isError = item.status !== 'success';
|
||||
const icon = currentToolEl.querySelector('.tool-icon');
|
||||
icon.className = isError
|
||||
? 'fas fa-times text-red-400 flex-shrink-0 tool-icon'
|
||||
: 'fas fa-check text-primary-400 flex-shrink-0 tool-icon';
|
||||
|
||||
// Show execution time
|
||||
const nameEl = currentToolEl.querySelector('.tool-name');
|
||||
if (item.execution_time !== undefined) {
|
||||
nameEl.innerHTML += ` <span class="tool-time">${item.execution_time}s</span>`;
|
||||
// Show execution time
|
||||
const nameEl = currentToolEl.querySelector('.tool-name');
|
||||
if (item.execution_time !== undefined) {
|
||||
nameEl.innerHTML += ` <span class="tool-time">${item.execution_time}s</span>`;
|
||||
}
|
||||
|
||||
// Fill output section
|
||||
const outputSection = currentToolEl.querySelector('.tool-output-section');
|
||||
if (outputSection && item.result) {
|
||||
outputSection.innerHTML = `
|
||||
<div class="tool-detail-label">${isError ? 'Error' : 'Output'}</div>
|
||||
<pre class="tool-detail-content ${isError ? 'tool-error-text' : ''}">${escapeHtml(String(item.result))}</pre>`;
|
||||
}
|
||||
|
||||
if (isError) currentToolEl.classList.add('tool-failed');
|
||||
currentToolEl = null;
|
||||
}
|
||||
|
||||
// Fill output section
|
||||
const outputSection = currentToolEl.querySelector('.tool-output-section');
|
||||
if (outputSection && item.result) {
|
||||
outputSection.innerHTML = `
|
||||
<div class="tool-detail-label">${isError ? 'Error' : 'Output'}</div>
|
||||
<pre class="tool-detail-content ${isError ? 'tool-error-text' : ''}">${escapeHtml(String(item.result))}</pre>`;
|
||||
}
|
||||
} else if (item.type === 'image') {
|
||||
ensureBotEl();
|
||||
const imgEl = document.createElement('img');
|
||||
imgEl.src = item.content;
|
||||
imgEl.alt = 'screenshot';
|
||||
imgEl.style.cssText = 'max-width:600px;border-radius:8px;margin:8px 0;cursor:pointer;box-shadow:0 1px 4px rgba(0,0,0,0.1);';
|
||||
imgEl.onclick = () => window.open(item.content, '_blank');
|
||||
mediaEl.appendChild(imgEl);
|
||||
scrollChatToBottom();
|
||||
|
||||
if (isError) currentToolEl.classList.add('tool-failed');
|
||||
currentToolEl = null;
|
||||
} else if (item.type === 'text') {
|
||||
// Intermediate text sent before media items; display it but keep SSE open.
|
||||
ensureBotEl();
|
||||
contentEl.classList.remove('sse-streaming');
|
||||
const textContent = item.content || accumulatedText;
|
||||
if (textContent) contentEl.innerHTML = renderMarkdown(textContent);
|
||||
applyHighlighting(botEl);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'video') {
|
||||
ensureBotEl();
|
||||
const wrapper = document.createElement('div');
|
||||
wrapper.innerHTML = _buildVideoHtml(item.content);
|
||||
mediaEl.appendChild(wrapper.firstElementChild || wrapper);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'file') {
|
||||
ensureBotEl();
|
||||
const fileName = item.file_name || item.content.split('/').pop();
|
||||
const fileEl = document.createElement('a');
|
||||
fileEl.href = item.content;
|
||||
fileEl.download = fileName;
|
||||
fileEl.target = '_blank';
|
||||
fileEl.className = 'file-attachment';
|
||||
fileEl.style.cssText = 'display:inline-flex;align-items:center;gap:6px;padding:8px 14px;margin:8px 0;border-radius:8px;background:var(--bg-secondary,#f3f4f6);color:var(--text-primary,#374151);text-decoration:none;font-size:14px;border:1px solid var(--border-color,#e5e7eb);';
|
||||
fileEl.innerHTML = `<i class="fas fa-file-download" style="color:#6b7280;"></i> ${fileName}`;
|
||||
mediaEl.appendChild(fileEl);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'phase') {
|
||||
// Coarse progress (e.g. cow install-browser); must not close SSE (unlike "done")
|
||||
ensureBotEl();
|
||||
const wrap = document.createElement('div');
|
||||
wrap.className = 'text-xs sm:text-sm text-slate-600 dark:text-slate-400 border-l-2 border-primary-400 pl-2 py-1 my-0.5';
|
||||
wrap.textContent = String(item.content || '');
|
||||
stepsEl.appendChild(wrap);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'done') {
|
||||
done = true;
|
||||
es.close();
|
||||
delete activeStreams[requestId];
|
||||
|
||||
// item.content may be empty when "done" is only a stream-close signal after media.
|
||||
const finalText = item.content || accumulatedText;
|
||||
|
||||
if (!botEl && finalText) {
|
||||
if (loadingEl) { loadingEl.remove(); loadingEl = null; }
|
||||
addBotMessage(finalText, new Date((item.timestamp || Date.now() / 1000) * 1000), requestId);
|
||||
} else if (botEl) {
|
||||
contentEl.classList.remove('sse-streaming');
|
||||
// Only update text content when there is something new to show.
|
||||
if (finalText) contentEl.innerHTML = renderMarkdown(finalText);
|
||||
applyHighlighting(botEl);
|
||||
}
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'error') {
|
||||
done = true;
|
||||
es.close();
|
||||
delete activeStreams[requestId];
|
||||
if (loadingEl) { loadingEl.remove(); loadingEl = null; }
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
}
|
||||
};
|
||||
|
||||
} else if (item.type === 'image') {
|
||||
ensureBotEl();
|
||||
const imgEl = document.createElement('img');
|
||||
imgEl.src = item.content;
|
||||
imgEl.alt = 'screenshot';
|
||||
imgEl.style.cssText = 'max-width:600px;border-radius:8px;margin:8px 0;cursor:pointer;box-shadow:0 1px 4px rgba(0,0,0,0.1);';
|
||||
imgEl.onclick = () => window.open(item.content, '_blank');
|
||||
mediaEl.appendChild(imgEl);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'text') {
|
||||
// Intermediate text sent before media items; display it but keep SSE open.
|
||||
ensureBotEl();
|
||||
contentEl.classList.remove('sse-streaming');
|
||||
const textContent = item.content || accumulatedText;
|
||||
if (textContent) contentEl.innerHTML = renderMarkdown(textContent);
|
||||
applyHighlighting(botEl);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'video') {
|
||||
ensureBotEl();
|
||||
const wrapper = document.createElement('div');
|
||||
wrapper.innerHTML = _buildVideoHtml(item.content);
|
||||
mediaEl.appendChild(wrapper.firstElementChild || wrapper);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'file') {
|
||||
ensureBotEl();
|
||||
const fileName = item.file_name || item.content.split('/').pop();
|
||||
const fileEl = document.createElement('a');
|
||||
fileEl.href = item.content;
|
||||
fileEl.download = fileName;
|
||||
fileEl.target = '_blank';
|
||||
fileEl.className = 'file-attachment';
|
||||
fileEl.style.cssText = 'display:inline-flex;align-items:center;gap:6px;padding:8px 14px;margin:8px 0;border-radius:8px;background:var(--bg-secondary,#f3f4f6);color:var(--text-primary,#374151);text-decoration:none;font-size:14px;border:1px solid var(--border-color,#e5e7eb);';
|
||||
fileEl.innerHTML = `<i class="fas fa-file-download" style="color:#6b7280;"></i> ${fileName}`;
|
||||
mediaEl.appendChild(fileEl);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'phase') {
|
||||
// Coarse progress (e.g. cow install-browser); must not close SSE (unlike "done")
|
||||
ensureBotEl();
|
||||
const wrap = document.createElement('div');
|
||||
wrap.className = 'text-xs sm:text-sm text-slate-600 dark:text-slate-400 border-l-2 border-primary-400 pl-2 py-1 my-0.5';
|
||||
wrap.textContent = String(item.content || '');
|
||||
stepsEl.appendChild(wrap);
|
||||
scrollChatToBottom();
|
||||
|
||||
} else if (item.type === 'done') {
|
||||
es.onerror = function() {
|
||||
es.close();
|
||||
delete activeStreams[requestId];
|
||||
|
||||
// item.content may be empty when "done" is only a stream-close signal after media.
|
||||
const finalText = item.content || accumulatedText;
|
||||
if (done) return;
|
||||
|
||||
if (!botEl && finalText) {
|
||||
if (loadingEl) { loadingEl.remove(); loadingEl = null; }
|
||||
addBotMessage(finalText, new Date((item.timestamp || Date.now() / 1000) * 1000), requestId);
|
||||
} else if (botEl) {
|
||||
if (reconnectCount < MAX_RECONNECTS) {
|
||||
reconnectCount++;
|
||||
const delay = Math.min(RECONNECT_BASE_MS * reconnectCount, 5000);
|
||||
console.warn(`[SSE] connection lost for ${requestId}, reconnecting in ${delay}ms (attempt ${reconnectCount}/${MAX_RECONNECTS})`);
|
||||
setTimeout(connect, delay);
|
||||
return;
|
||||
}
|
||||
|
||||
// Exhausted retries, show whatever we have
|
||||
if (loadingEl) { loadingEl.remove(); loadingEl = null; }
|
||||
if (!botEl) {
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
} else if (accumulatedText) {
|
||||
contentEl.classList.remove('sse-streaming');
|
||||
// Only update text content when there is something new to show.
|
||||
if (finalText) contentEl.innerHTML = renderMarkdown(finalText);
|
||||
contentEl.innerHTML = renderMarkdown(accumulatedText);
|
||||
applyHighlighting(botEl);
|
||||
}
|
||||
scrollChatToBottom();
|
||||
};
|
||||
}
|
||||
|
||||
} else if (item.type === 'error') {
|
||||
es.close();
|
||||
delete activeStreams[requestId];
|
||||
if (loadingEl) { loadingEl.remove(); loadingEl = null; }
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
}
|
||||
};
|
||||
|
||||
es.onerror = function() {
|
||||
es.close();
|
||||
delete activeStreams[requestId];
|
||||
if (loadingEl) { loadingEl.remove(); loadingEl = null; }
|
||||
if (!botEl) {
|
||||
addBotMessage(t('error_send'), new Date());
|
||||
} else if (accumulatedText) {
|
||||
contentEl.classList.remove('sse-streaming');
|
||||
contentEl.innerHTML = renderMarkdown(accumulatedText);
|
||||
applyHighlighting(botEl);
|
||||
}
|
||||
};
|
||||
connect();
|
||||
}
|
||||
|
||||
function startPolling() {
|
||||
|
||||
@@ -329,14 +329,18 @@ class WebChannel(ChatChannel):
|
||||
"""
|
||||
SSE generator for a given request_id.
|
||||
Yields UTF-8 encoded bytes to avoid WSGI Latin-1 mangling.
|
||||
Supports client reconnection: the queue is only removed after a
|
||||
"done" event is consumed, so a new GET /stream with the same
|
||||
request_id can resume reading remaining events.
|
||||
"""
|
||||
if request_id not in self.sse_queues:
|
||||
yield b"data: {\"type\": \"error\", \"message\": \"invalid request_id\"}\n\n"
|
||||
return
|
||||
|
||||
q = self.sse_queues[request_id]
|
||||
timeout = 300 # 5 minutes max
|
||||
deadline = time.time() + timeout
|
||||
idle_timeout = 600 # 10 minutes without any real event
|
||||
deadline = time.time() + idle_timeout
|
||||
done = False
|
||||
|
||||
try:
|
||||
while time.time() < deadline:
|
||||
@@ -346,13 +350,18 @@ class WebChannel(ChatChannel):
|
||||
yield b": keepalive\n\n"
|
||||
continue
|
||||
|
||||
# Real event received, reset idle deadline
|
||||
deadline = time.time() + idle_timeout
|
||||
|
||||
payload = json.dumps(item, ensure_ascii=False)
|
||||
yield f"data: {payload}\n\n".encode("utf-8")
|
||||
|
||||
if item.get("type") == "done":
|
||||
done = True
|
||||
break
|
||||
finally:
|
||||
self.sse_queues.pop(request_id, None)
|
||||
if done:
|
||||
self.sse_queues.pop(request_id, None)
|
||||
|
||||
def poll_response(self):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user