feat: display thinking content in web console

2026-07-17 11:07:11 +08:00 · 2026-04-10 15:07:23 +08:00
parent 54e81aba11
commit 6a737fb734
10 changed files with 285 additions and 89 deletions
--- a/agent/chat/service.py
+++ b/agent/chat/service.py
@@ -57,7 +57,16 @@ class ChatService:
            event_type = event.get("type")
            data = event.get("data", {})

-            if event_type == "message_update":
+            if event_type == "reasoning_update":
+                delta = data.get("delta", "")
+                if delta:
+                    send_chunk_fn({
+                        "chunk_type": "reasoning",
+                        "delta": delta,
+                        "segment_id": state.segment_id,
+                    })
+
+            elif event_type == "message_update":
                # Incremental text delta
                delta = data.get("delta", "")
                if delta:
--- a/agent/memory/conversation_store.py
+++ b/agent/memory/conversation_store.py
@@ -188,8 +188,9 @@ def _group_into_display_turns(
            if text:
                turns.append({"role": "user", "content": text, "created_at": created_at})

-        # Collect all tool_calls and tool_results from the rest of the group
-        all_tool_calls: List[Dict[str, Any]] = []
+        # Build an ordered list of steps preserving the original sequence:
+        #   thinking → content → tool_call → content → ...
+        steps: List[Dict[str, Any]] = []
        tool_results: Dict[str, str] = {}
        final_text = ""
        final_ts: Optional[int] = None
@@ -198,24 +199,46 @@ def _group_into_display_turns(
            if role == "user":
                tool_results.update(_extract_tool_results(content))
            elif role == "assistant":
-                tcs = _extract_tool_calls(content)
-                all_tool_calls.extend(tcs)
-                t = _extract_display_text(content)
-                if t:
-                    final_text = t
+                # Walk content blocks in order to preserve interleaving
+                if isinstance(content, list):
+                    for block in content:
+                        if not isinstance(block, dict):
+                            continue
+                        btype = block.get("type")
+                        if btype == "thinking":
+                            txt = block.get("thinking", "").strip()
+                            if txt:
+                                steps.append({"type": "thinking", "content": txt})
+                        elif btype == "text":
+                            txt = block.get("text", "").strip()
+                            if txt:
+                                steps.append({"type": "content", "content": txt})
+                                final_text = txt
+                        elif btype == "tool_use":
+                            steps.append({
+                                "type": "tool",
+                                "id": block.get("id", ""),
+                                "name": block.get("name", ""),
+                                "arguments": block.get("input", {}),
+                            })
+                elif isinstance(content, str) and content.strip():
+                    steps.append({"type": "content", "content": content.strip()})
+                    final_text = content.strip()
                final_ts = created_at

-        # Attach tool results to their matching tool_call entries
-        for tc in all_tool_calls:
-            tc["result"] = tool_results.get(tc.get("id", ""), "")
+        # Attach tool results to tool steps
+        for step in steps:
+            if step["type"] == "tool":
+                step["result"] = tool_results.get(step.get("id", ""), "")

-        if final_text or all_tool_calls:
-            turns.append({
+        if steps or final_text:
+            turn = {
                "role": "assistant",
                "content": final_text,
-                "tool_calls": all_tool_calls,
+                "steps": steps,
                "created_at": final_ts or (user_row[1] if user_row else 0),
-            })
+            }
+            turns.append(turn)

    return turns

@@ -312,6 +335,9 @@ class ConversationStore:
                content = json.loads(raw_content)
            except Exception:
                content = raw_content
+            # Strip thinking blocks — they are stored for UI display only
+            if role == "assistant" and isinstance(content, list):
+                content = [b for b in content if b.get("type") != "thinking"]
            result.append({"role": role, "content": content})
        return result

--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -527,6 +527,7 @@ class AgentStreamExecutor:

        # Streaming response
        full_content = ""
+        full_reasoning = ""
        tool_calls_buffer = {}  # {index: {id, name, arguments}}
        gemini_raw_parts = None  # Preserve Gemini thoughtSignature for round-trip
        stop_reason = None  # Track why the stream stopped
@@ -584,10 +585,10 @@ class AgentStreamExecutor:
                    if finish_reason:
                        stop_reason = finish_reason

-                    # Skip reasoning_content (internal thinking from models like GLM-5)
                    reasoning_delta = delta.get("reasoning_content") or ""
-                    # if reasoning_delta:
-                    #     logger.debug(f"🧠 [thinking] {reasoning_delta[:100]}...")
+                    if reasoning_delta:
+                        full_reasoning += reasoning_delta
+                        self._emit_event("reasoning_update", {"delta": reasoning_delta})

                    # Handle text content
                    content_delta = delta.get("content") or ""
@@ -788,7 +789,12 @@ class AgentStreamExecutor:
        # Add assistant message to history (Claude format uses content blocks)
        assistant_msg = {"role": "assistant", "content": []}

-        # Add text content block if present
+        if full_reasoning:
+            assistant_msg["content"].append({
+                "type": "thinking",
+                "thinking": full_reasoning
+            })
+
        if full_content:
            assistant_msg["content"].append({
                "type": "text",
--- a/bridge/agent_event_handler.py
+++ b/bridge/agent_event_handler.py
@@ -26,8 +26,7 @@ class AgentEventHandler:
        if context:
            self.channel = context.kwargs.get("channel") if hasattr(context, "kwargs") else None
        
-        # Track current thinking for channel output
-        self.current_thinking = ""
+        self.current_content = ""
        self.turn_number = 0
    
    def handle_event(self, event):
@@ -47,6 +46,8 @@ class AgentEventHandler:
            self._handle_message_update(data)
        elif event_type == "message_end":
            self._handle_message_end(data)
+        elif event_type == "reasoning_update":
+            pass
        elif event_type == "tool_execution_start":
            self._handle_tool_execution_start(data)
        elif event_type == "tool_execution_end":
@@ -59,30 +60,26 @@ class AgentEventHandler:
    def _handle_turn_start(self, data):
        """Handle turn start event"""
        self.turn_number = data.get("turn", 0)
-        self.has_tool_calls_in_turn = False
-        self.current_thinking = ""
+        self.current_content = ""
    
    def _handle_message_update(self, data):
-        """Handle message update event (streaming text)"""
+        """Handle message update event (streaming content text)"""
        delta = data.get("delta", "")
-        self.current_thinking += delta
+        self.current_content += delta
    
    def _handle_message_end(self, data):
        """Handle message end event"""
        tool_calls = data.get("tool_calls", [])
        
-        # Only send thinking process if followed by tool calls
        if tool_calls:
-            if self.current_thinking.strip():
-                logger.info(f"💭 {self.current_thinking.strip()[:200]}{'...' if len(self.current_thinking) > 200 else ''}")
-                # Send thinking process to channel
-                self._send_to_channel(f"{self.current_thinking.strip()}")
+            if self.current_content.strip():
+                logger.info(f"💭 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}")
+                self._send_to_channel(self.current_content.strip())
        else:
-            # No tool calls = final response (logged at agent_stream level)
-            if self.current_thinking.strip():
-                logger.debug(f"💬 {self.current_thinking.strip()[:200]}{'...' if len(self.current_thinking) > 200 else ''}")
+            if self.current_content.strip():
+                logger.debug(f"💬 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}")
        
-        self.current_thinking = ""
+        self.current_content = ""
    
    def _handle_tool_execution_start(self, data):
        """Handle tool execution start event - logged by agent_stream.py"""
--- a/channel/web/static/css/console.css
+++ b/channel/web/static/css/console.css
@@ -146,7 +146,7 @@
    font-size: 0.75rem;
    line-height: 1.5;
    color: #94a3b8;
-    max-height: 200px;
+    max-height: 300px;
    overflow-y: auto;
 }
 .dark .agent-thinking-step .thinking-full {
@@ -158,6 +158,20 @@
 .agent-thinking-step .thinking-full p:first-child { margin-top: 0; }
 .agent-thinking-step .thinking-full p:last-child { margin-bottom: 0; }

+/* Content step - real text output frozen before tool calls */
+.agent-content-step {
+    font-size: 0.875rem;
+    line-height: 1.6;
+    color: inherit;
+    margin-bottom: 0.5rem;
+    padding-bottom: 0.5rem;
+    border-bottom: 1px dashed rgba(0, 0, 0, 0.06);
+}
+.dark .agent-content-step { border-bottom-color: rgba(255, 255, 255, 0.06); }
+.agent-content-step .agent-content-body p { margin: 0.25em 0; }
+.agent-content-step .agent-content-body p:first-child { margin-top: 0; }
+.agent-content-step .agent-content-body p:last-child { margin-bottom: 0; }
+
 /* Tool step - collapsible */
 .agent-tool-step .tool-header {
    display: flex;
--- a/channel/web/static/js/console.js
+++ b/channel/web/static/js/console.js
@@ -815,6 +815,8 @@ function startSSE(requestId, loadingEl, timestamp) {
    let mediaEl = null;    // .media-content (images & file attachments)
    let accumulatedText = '';
    let currentToolEl = null;
+    let currentReasoningEl = null;  // live reasoning bubble
+    let reasoningText = '';

    function ensureBotEl() {
        if (botEl) return;
@@ -843,39 +845,61 @@ function startSSE(requestId, loadingEl, timestamp) {
        let item;
        try { item = JSON.parse(e.data); } catch (_) { return; }

-        if (item.type === 'delta') {
+        if (item.type === 'reasoning') {
            ensureBotEl();
+            reasoningText += item.content;
+            if (!currentReasoningEl) {
+                currentReasoningEl = document.createElement('div');
+                currentReasoningEl.className = 'agent-step agent-thinking-step';
+                currentReasoningEl.innerHTML = `
+                    <div class="thinking-header" onclick="this.parentElement.classList.toggle('expanded')">
+                        <i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
+                        <span class="thinking-summary"></span>
+                        <i class="fas fa-chevron-right thinking-chevron"></i>
+                    </div>
+                    <div class="thinking-full"></div>`;
+                stepsEl.appendChild(currentReasoningEl);
+            }
+            // Stream reasoning as a single-line summary (collapsed); full text available on expand
+            const oneLine = reasoningText.trim().replace(/\n+/g, ' ');
+            currentReasoningEl.querySelector('.thinking-summary').textContent =
+                oneLine.length > 80 ? oneLine.substring(0, 80) + '…' : oneLine;
+            currentReasoningEl.querySelector('.thinking-full').innerHTML = renderMarkdown(reasoningText);
+            scrollChatToBottom();
+
+        } else if (item.type === 'delta') {
+            ensureBotEl();
+            if (currentReasoningEl) {
+                if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80)
+                    currentReasoningEl.classList.add('no-expand');
+                currentReasoningEl = null;
+                reasoningText = '';
+            }
            accumulatedText += item.content;
            contentEl.innerHTML = renderMarkdown(accumulatedText);
            scrollChatToBottom();

+        } else if (item.type === 'message_end') {
+            // Backend already strips reasoning_content; all deltas are real content.
+            // Freeze accumulated text as visible content before tool execution begins.
+            if (item.has_tool_calls && accumulatedText.trim()) {
+                ensureBotEl();
+                const frozenEl = document.createElement('div');
+                frozenEl.className = 'agent-step agent-content-step';
+                frozenEl.innerHTML = `<div class="agent-content-body">${renderMarkdown(accumulatedText.trim())}</div>`;
+                stepsEl.appendChild(frozenEl);
+                accumulatedText = '';
+                contentEl.innerHTML = '';
+                scrollChatToBottom();
+            }
+
        } else if (item.type === 'tool_start') {
            ensureBotEl();
-
-            // Save current thinking as a collapsible step
-            if (accumulatedText.trim()) {
-                const fullText = accumulatedText.trim();
-                const oneLine = fullText.replace(/\n+/g, ' ');
-                const needsTruncate = oneLine.length > 80;
-                const stepEl = document.createElement('div');
-                stepEl.className = 'agent-step agent-thinking-step' + (needsTruncate ? '' : ' no-expand');
-                if (needsTruncate) {
-                    const truncated = oneLine.substring(0, 80) + '…';
-                    stepEl.innerHTML = `
-                        <div class="thinking-header" onclick="this.parentElement.classList.toggle('expanded')">
-                            <i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
-                            <span class="thinking-summary">${escapeHtml(truncated)}</span>
-                            <i class="fas fa-chevron-right thinking-chevron"></i>
-                        </div>
-                        <div class="thinking-full">${renderMarkdown(fullText)}</div>`;
-                } else {
-                    stepEl.innerHTML = `
-                        <div class="thinking-header no-toggle">
-                            <i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
-                            <span>${escapeHtml(oneLine)}</span>
-                        </div>`;
-                }
-                stepsEl.appendChild(stepEl);
+            if (currentReasoningEl) {
+                if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80)
+                    currentReasoningEl.classList.add('no-expand');
+                currentReasoningEl = null;
+                reasoningText = '';
            }
            accumulatedText = '';
            contentEl.innerHTML = '';
@@ -979,6 +1003,13 @@ function startSSE(requestId, loadingEl, timestamp) {
            es.close();
            delete activeStreams[requestId];

+            if (currentReasoningEl) {
+                if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80)
+                    currentReasoningEl.classList.add('no-expand');
+                currentReasoningEl = null;
+                reasoningText = '';
+            }
+
            // item.content may be empty when "done" is only a stream-close signal after media.
            const finalText = item.content || accumulatedText;

@@ -1102,17 +1133,106 @@ function renderToolCallsHtml(toolCalls) {
    }).join('');
 }

-function createBotMessageEl(content, timestamp, requestId, toolCalls) {
+function renderThinkingHtml(text) {
+    if (!text || !text.trim()) return '';
+    const full = text.trim();
+    const oneLine = full.replace(/\n+/g, ' ');
+    if (oneLine.length > 80) {
+        const truncated = oneLine.substring(0, 80) + '…';
+        return `
+<div class="agent-step agent-thinking-step">
+    <div class="thinking-header" onclick="this.parentElement.classList.toggle('expanded')">
+        <i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
+        <span class="thinking-summary">${escapeHtml(truncated)}</span>
+        <i class="fas fa-chevron-right thinking-chevron"></i>
+    </div>
+    <div class="thinking-full">${renderMarkdown(full)}</div>
+</div>`;
+    }
+    return `
+<div class="agent-step agent-thinking-step no-expand">
+    <div class="thinking-header no-toggle">
+        <i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
+        <span>${escapeHtml(oneLine)}</span>
+    </div>
+</div>`;
+}
+
+function renderStepsHtml(steps) {
+    if (!steps || steps.length === 0) return { stepsHtml: '', finalContent: '' };
+
+    // Find the index of the last content step — it becomes the main answer, not a step
+    let lastContentIdx = -1;
+    for (let i = steps.length - 1; i >= 0; i--) {
+        if (steps[i].type === 'content') { lastContentIdx = i; break; }
+    }
+
+    let html = '';
+    let lastContentText = '';
+    for (let i = 0; i < steps.length; i++) {
+        const step = steps[i];
+        if (step.type === 'thinking') {
+            html += renderThinkingHtml(step.content);
+        } else if (step.type === 'content') {
+            if (i === lastContentIdx) {
+                lastContentText = step.content;
+            } else {
+                html += `<div class="agent-step agent-content-step"><div class="agent-content-body">${renderMarkdown(step.content)}</div></div>`;
+            }
+        } else if (step.type === 'tool') {
+            const argsStr = formatToolArgs(step.arguments || {});
+            const resultStr = step.result ? escapeHtml(String(step.result)) : '';
+            html += `
+<div class="agent-step agent-tool-step">
+    <div class="tool-header" onclick="this.parentElement.classList.toggle('expanded')">
+        <i class="fas fa-check text-primary-400 flex-shrink-0 tool-icon"></i>
+        <span class="tool-name">${escapeHtml(step.name || '')}</span>
+        <i class="fas fa-chevron-right tool-chevron"></i>
+    </div>
+    <div class="tool-detail">
+        <div class="tool-detail-section">
+            <div class="tool-detail-label">Input</div>
+            <pre class="tool-detail-content">${argsStr}</pre>
+        </div>
+        ${resultStr ? `
+        <div class="tool-detail-section tool-output-section">
+            <div class="tool-detail-label">Output</div>
+            <pre class="tool-detail-content">${resultStr}</pre>
+        </div>` : ''}
+    </div>
+</div>`;
+        }
+    }
+    return { stepsHtml: html, lastContentText };
+}
+
+function createBotMessageEl(content, timestamp, requestId, msg) {
    const el = document.createElement('div');
    el.className = 'flex gap-3 px-4 sm:px-6 py-3';
    if (requestId) el.dataset.requestId = requestId;
-    const toolsHtml = renderToolCallsHtml(toolCalls);
+
+    let stepsHtml = '';
+    let displayContent = content;
+
+    if (msg && msg.steps && msg.steps.length > 0) {
+        // New format: ordered steps with interleaved content
+        const result = renderStepsHtml(msg.steps);
+        stepsHtml = result.stepsHtml;
+        // The final content (last text after all steps) is the main answer
+        displayContent = content || result.lastContentText;
+    } else {
+        // Legacy format: separate tool_calls + optional reasoning
+        const toolCalls = msg && msg.tool_calls;
+        const reasoning = msg && msg.reasoning;
+        stepsHtml = renderThinkingHtml(reasoning) + renderToolCallsHtml(toolCalls);
+    }
+
    el.innerHTML = `
        <img src="assets/logo.jpg" alt="CowAgent" class="w-8 h-8 rounded-lg flex-shrink-0">
        <div class="min-w-0 flex-1 max-w-[85%]">
            <div class="bg-white dark:bg-[#1A1A1A] border border-slate-200 dark:border-white/10 rounded-2xl px-4 py-3 text-sm leading-relaxed msg-content text-slate-700 dark:text-slate-200">
-                ${toolsHtml ? `<div class="agent-steps">${toolsHtml}</div>` : ''}
-                <div class="answer-content">${renderMarkdown(content)}</div>
+                ${stepsHtml ? `<div class="agent-steps">${stepsHtml}</div>` : ''}
+                <div class="answer-content">${renderMarkdown(displayContent)}</div>
            </div>
            <div class="text-xs text-slate-400 dark:text-slate-500 mt-1.5">${formatTime(timestamp)}</div>
        </div>
@@ -1167,7 +1287,7 @@ function loadHistory(page) {
                const ts = new Date(msg.created_at * 1000);
                const el = msg.role === 'user'
                    ? createUserMessageEl(msg.content, ts)
-                    : createBotMessageEl(msg.content || '', ts, null, msg.tool_calls);
+                    : createBotMessageEl(msg.content || '', ts, null, msg);
                fragment.appendChild(el);
            });

--- a/channel/web/web_channel.py
+++ b/channel/web/web_channel.py
@@ -168,7 +168,12 @@ class WebChannel(ChatChannel):
            event_type = event.get("type")
            data = event.get("data", {})

-            if event_type == "message_update":
+            if event_type == "reasoning_update":
+                delta = data.get("delta", "")
+                if delta:
+                    q.put({"type": "reasoning", "content": delta})
+
+            elif event_type == "message_update":
                delta = data.get("delta", "")
                if delta:
                    q.put({"type": "delta", "content": delta})
@@ -195,6 +200,11 @@ class WebChannel(ChatChannel):
                    "execution_time": round(exec_time, 2)
                })

+            elif event_type == "message_end":
+                tool_calls = data.get("tool_calls", [])
+                if tool_calls:
+                    q.put({"type": "message_end", "has_tool_calls": True})
+
            elif event_type == "file_to_send":
                file_path = data.get("path", "")
                file_name = data.get("file_name", os.path.basename(file_path))
--- a/models/claudeapi/claude_api_bot.py
+++ b/models/claudeapi/claude_api_bot.py
@@ -429,8 +429,21 @@ class ClaudeAPIBot(Bot, OpenAIImage):
                                delta = event.get("delta", {})
                                delta_type = delta.get("type")

-                                if delta_type == "text_delta":
-                                    # Text content
+                                if delta_type == "thinking_delta":
+                                    thinking_text = delta.get("thinking", "")
+                                    if thinking_text:
+                                        yield {
+                                            "choices": [{
+                                                "index": 0,
+                                                "delta": {
+                                                    "role": "assistant",
+                                                    "reasoning_content": thinking_text
+                                                },
+                                                "finish_reason": None
+                                            }]
+                                        }
+
+                                elif delta_type == "text_delta":
                                    content = delta.get("text", "")
                                    yield {
                                        "id": event.get("id", ""),
--- a/models/minimax/minimax_bot.py
+++ b/models/minimax/minimax_bot.py
@@ -233,11 +233,8 @@ class MinimaxBot(Bot):

            logger.debug(f"[MINIMAX] API call: model={model}, tools={len(converted_tools) if converted_tools else 0}, stream={stream}")

-            # Check if we should show thinking process
-            show_thinking = kwargs.pop("show_thinking", conf().get("minimax_show_thinking", False))
-            
            if stream:
-                return self._handle_stream_response(request_body, show_thinking=show_thinking)
+                return self._handle_stream_response(request_body)
            else:
                return self._handle_sync_response(request_body)

@@ -466,12 +463,11 @@ class MinimaxBot(Bot):
            logger.error(traceback.format_exc())
            yield {"error": True, "message": str(e), "status_code": 500}

-    def _handle_stream_response(self, request_body, show_thinking=False):
+    def _handle_stream_response(self, request_body):
        """Handle streaming API response
-        
+
        Args:
            request_body: API request parameters
-            show_thinking: Whether to show thinking/reasoning process to users
        """
        try:
            headers = {
@@ -550,19 +546,15 @@ class MinimaxBot(Bot):

                            current_reasoning[reasoning_index]["text"] += reasoning_text

-                            # Optionally yield thinking as visible content
-                            if show_thinking:
-                                # Yield thinking text as-is (without emoji decoration)
-                                # The reasoning text will be displayed to users
-                                yield {
-                                    "choices": [{
-                                        "index": 0,
-                                        "delta": {
-                                            "role": "assistant",
-                                            "content": reasoning_text
-                                        }
-                                    }]
-                                }
+                            yield {
+                                "choices": [{
+                                    "index": 0,
+                                    "delta": {
+                                        "role": "assistant",
+                                        "reasoning_content": reasoning_text
+                                    }
+                                }]
+                            }

                # Handle text content
                if "content" in delta and delta["content"]:
--- a/models/modelscope/modelscope_bot.py
+++ b/models/modelscope/modelscope_bot.py
@@ -576,6 +576,15 @@ class ModelScopeBot(Bot):
                        continue
                    
                    if delta.get("reasoning_content"):
+                        yield {
+                            "choices": [{
+                                "index": 0,
+                                "delta": {
+                                    "role": "assistant",
+                                    "reasoning_content": delta["reasoning_content"]
+                                }
+                            }]
+                        }
                        continue
                    
                    tool_call_chunks = delta.get("tool_calls")