diff --git a/agent/chat/service.py b/agent/chat/service.py index de7d345a..550063f1 100644 --- a/agent/chat/service.py +++ b/agent/chat/service.py @@ -57,7 +57,16 @@ class ChatService: event_type = event.get("type") data = event.get("data", {}) - if event_type == "message_update": + if event_type == "reasoning_update": + delta = data.get("delta", "") + if delta: + send_chunk_fn({ + "chunk_type": "reasoning", + "delta": delta, + "segment_id": state.segment_id, + }) + + elif event_type == "message_update": # Incremental text delta delta = data.get("delta", "") if delta: diff --git a/agent/memory/conversation_store.py b/agent/memory/conversation_store.py index a4f15aab..4ab0800b 100644 --- a/agent/memory/conversation_store.py +++ b/agent/memory/conversation_store.py @@ -188,8 +188,9 @@ def _group_into_display_turns( if text: turns.append({"role": "user", "content": text, "created_at": created_at}) - # Collect all tool_calls and tool_results from the rest of the group - all_tool_calls: List[Dict[str, Any]] = [] + # Build an ordered list of steps preserving the original sequence: + # thinking → content → tool_call → content → ... + steps: List[Dict[str, Any]] = [] tool_results: Dict[str, str] = {} final_text = "" final_ts: Optional[int] = None @@ -198,24 +199,46 @@ def _group_into_display_turns( if role == "user": tool_results.update(_extract_tool_results(content)) elif role == "assistant": - tcs = _extract_tool_calls(content) - all_tool_calls.extend(tcs) - t = _extract_display_text(content) - if t: - final_text = t + # Walk content blocks in order to preserve interleaving + if isinstance(content, list): + for block in content: + if not isinstance(block, dict): + continue + btype = block.get("type") + if btype == "thinking": + txt = block.get("thinking", "").strip() + if txt: + steps.append({"type": "thinking", "content": txt}) + elif btype == "text": + txt = block.get("text", "").strip() + if txt: + steps.append({"type": "content", "content": txt}) + final_text = txt + elif btype == "tool_use": + steps.append({ + "type": "tool", + "id": block.get("id", ""), + "name": block.get("name", ""), + "arguments": block.get("input", {}), + }) + elif isinstance(content, str) and content.strip(): + steps.append({"type": "content", "content": content.strip()}) + final_text = content.strip() final_ts = created_at - # Attach tool results to their matching tool_call entries - for tc in all_tool_calls: - tc["result"] = tool_results.get(tc.get("id", ""), "") + # Attach tool results to tool steps + for step in steps: + if step["type"] == "tool": + step["result"] = tool_results.get(step.get("id", ""), "") - if final_text or all_tool_calls: - turns.append({ + if steps or final_text: + turn = { "role": "assistant", "content": final_text, - "tool_calls": all_tool_calls, + "steps": steps, "created_at": final_ts or (user_row[1] if user_row else 0), - }) + } + turns.append(turn) return turns @@ -312,6 +335,9 @@ class ConversationStore: content = json.loads(raw_content) except Exception: content = raw_content + # Strip thinking blocks — they are stored for UI display only + if role == "assistant" and isinstance(content, list): + content = [b for b in content if b.get("type") != "thinking"] result.append({"role": role, "content": content}) return result diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py index 1b250011..45f7d8a5 100644 --- a/agent/protocol/agent_stream.py +++ b/agent/protocol/agent_stream.py @@ -527,6 +527,7 @@ class AgentStreamExecutor: # Streaming response full_content = "" + full_reasoning = "" tool_calls_buffer = {} # {index: {id, name, arguments}} gemini_raw_parts = None # Preserve Gemini thoughtSignature for round-trip stop_reason = None # Track why the stream stopped @@ -584,10 +585,10 @@ class AgentStreamExecutor: if finish_reason: stop_reason = finish_reason - # Skip reasoning_content (internal thinking from models like GLM-5) reasoning_delta = delta.get("reasoning_content") or "" - # if reasoning_delta: - # logger.debug(f"🧠 [thinking] {reasoning_delta[:100]}...") + if reasoning_delta: + full_reasoning += reasoning_delta + self._emit_event("reasoning_update", {"delta": reasoning_delta}) # Handle text content content_delta = delta.get("content") or "" @@ -788,7 +789,12 @@ class AgentStreamExecutor: # Add assistant message to history (Claude format uses content blocks) assistant_msg = {"role": "assistant", "content": []} - # Add text content block if present + if full_reasoning: + assistant_msg["content"].append({ + "type": "thinking", + "thinking": full_reasoning + }) + if full_content: assistant_msg["content"].append({ "type": "text", diff --git a/bridge/agent_event_handler.py b/bridge/agent_event_handler.py index b04c77b8..50826235 100644 --- a/bridge/agent_event_handler.py +++ b/bridge/agent_event_handler.py @@ -26,8 +26,7 @@ class AgentEventHandler: if context: self.channel = context.kwargs.get("channel") if hasattr(context, "kwargs") else None - # Track current thinking for channel output - self.current_thinking = "" + self.current_content = "" self.turn_number = 0 def handle_event(self, event): @@ -47,6 +46,8 @@ class AgentEventHandler: self._handle_message_update(data) elif event_type == "message_end": self._handle_message_end(data) + elif event_type == "reasoning_update": + pass elif event_type == "tool_execution_start": self._handle_tool_execution_start(data) elif event_type == "tool_execution_end": @@ -59,30 +60,26 @@ class AgentEventHandler: def _handle_turn_start(self, data): """Handle turn start event""" self.turn_number = data.get("turn", 0) - self.has_tool_calls_in_turn = False - self.current_thinking = "" + self.current_content = "" def _handle_message_update(self, data): - """Handle message update event (streaming text)""" + """Handle message update event (streaming content text)""" delta = data.get("delta", "") - self.current_thinking += delta + self.current_content += delta def _handle_message_end(self, data): """Handle message end event""" tool_calls = data.get("tool_calls", []) - # Only send thinking process if followed by tool calls if tool_calls: - if self.current_thinking.strip(): - logger.info(f"💭 {self.current_thinking.strip()[:200]}{'...' if len(self.current_thinking) > 200 else ''}") - # Send thinking process to channel - self._send_to_channel(f"{self.current_thinking.strip()}") + if self.current_content.strip(): + logger.info(f"💭 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}") + self._send_to_channel(self.current_content.strip()) else: - # No tool calls = final response (logged at agent_stream level) - if self.current_thinking.strip(): - logger.debug(f"💬 {self.current_thinking.strip()[:200]}{'...' if len(self.current_thinking) > 200 else ''}") + if self.current_content.strip(): + logger.debug(f"💬 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}") - self.current_thinking = "" + self.current_content = "" def _handle_tool_execution_start(self, data): """Handle tool execution start event - logged by agent_stream.py""" diff --git a/channel/web/static/css/console.css b/channel/web/static/css/console.css index ea58f54e..96b0811b 100644 --- a/channel/web/static/css/console.css +++ b/channel/web/static/css/console.css @@ -146,7 +146,7 @@ font-size: 0.75rem; line-height: 1.5; color: #94a3b8; - max-height: 200px; + max-height: 300px; overflow-y: auto; } .dark .agent-thinking-step .thinking-full { @@ -158,6 +158,20 @@ .agent-thinking-step .thinking-full p:first-child { margin-top: 0; } .agent-thinking-step .thinking-full p:last-child { margin-bottom: 0; } +/* Content step - real text output frozen before tool calls */ +.agent-content-step { + font-size: 0.875rem; + line-height: 1.6; + color: inherit; + margin-bottom: 0.5rem; + padding-bottom: 0.5rem; + border-bottom: 1px dashed rgba(0, 0, 0, 0.06); +} +.dark .agent-content-step { border-bottom-color: rgba(255, 255, 255, 0.06); } +.agent-content-step .agent-content-body p { margin: 0.25em 0; } +.agent-content-step .agent-content-body p:first-child { margin-top: 0; } +.agent-content-step .agent-content-body p:last-child { margin-bottom: 0; } + /* Tool step - collapsible */ .agent-tool-step .tool-header { display: flex; diff --git a/channel/web/static/js/console.js b/channel/web/static/js/console.js index 24e120be..16c4c6c6 100644 --- a/channel/web/static/js/console.js +++ b/channel/web/static/js/console.js @@ -815,6 +815,8 @@ function startSSE(requestId, loadingEl, timestamp) { let mediaEl = null; // .media-content (images & file attachments) let accumulatedText = ''; let currentToolEl = null; + let currentReasoningEl = null; // live reasoning bubble + let reasoningText = ''; function ensureBotEl() { if (botEl) return; @@ -843,39 +845,61 @@ function startSSE(requestId, loadingEl, timestamp) { let item; try { item = JSON.parse(e.data); } catch (_) { return; } - if (item.type === 'delta') { + if (item.type === 'reasoning') { ensureBotEl(); + reasoningText += item.content; + if (!currentReasoningEl) { + currentReasoningEl = document.createElement('div'); + currentReasoningEl.className = 'agent-step agent-thinking-step'; + currentReasoningEl.innerHTML = ` +
+ + + +
+
`; + stepsEl.appendChild(currentReasoningEl); + } + // Stream reasoning as a single-line summary (collapsed); full text available on expand + const oneLine = reasoningText.trim().replace(/\n+/g, ' '); + currentReasoningEl.querySelector('.thinking-summary').textContent = + oneLine.length > 80 ? oneLine.substring(0, 80) + '…' : oneLine; + currentReasoningEl.querySelector('.thinking-full').innerHTML = renderMarkdown(reasoningText); + scrollChatToBottom(); + + } else if (item.type === 'delta') { + ensureBotEl(); + if (currentReasoningEl) { + if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80) + currentReasoningEl.classList.add('no-expand'); + currentReasoningEl = null; + reasoningText = ''; + } accumulatedText += item.content; contentEl.innerHTML = renderMarkdown(accumulatedText); scrollChatToBottom(); + } else if (item.type === 'message_end') { + // Backend already strips reasoning_content; all deltas are real content. + // Freeze accumulated text as visible content before tool execution begins. + if (item.has_tool_calls && accumulatedText.trim()) { + ensureBotEl(); + const frozenEl = document.createElement('div'); + frozenEl.className = 'agent-step agent-content-step'; + frozenEl.innerHTML = `
${renderMarkdown(accumulatedText.trim())}
`; + stepsEl.appendChild(frozenEl); + accumulatedText = ''; + contentEl.innerHTML = ''; + scrollChatToBottom(); + } + } else if (item.type === 'tool_start') { ensureBotEl(); - - // Save current thinking as a collapsible step - if (accumulatedText.trim()) { - const fullText = accumulatedText.trim(); - const oneLine = fullText.replace(/\n+/g, ' '); - const needsTruncate = oneLine.length > 80; - const stepEl = document.createElement('div'); - stepEl.className = 'agent-step agent-thinking-step' + (needsTruncate ? '' : ' no-expand'); - if (needsTruncate) { - const truncated = oneLine.substring(0, 80) + '…'; - stepEl.innerHTML = ` -
- - ${escapeHtml(truncated)} - -
-
${renderMarkdown(fullText)}
`; - } else { - stepEl.innerHTML = ` -
- - ${escapeHtml(oneLine)} -
`; - } - stepsEl.appendChild(stepEl); + if (currentReasoningEl) { + if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80) + currentReasoningEl.classList.add('no-expand'); + currentReasoningEl = null; + reasoningText = ''; } accumulatedText = ''; contentEl.innerHTML = ''; @@ -979,6 +1003,13 @@ function startSSE(requestId, loadingEl, timestamp) { es.close(); delete activeStreams[requestId]; + if (currentReasoningEl) { + if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80) + currentReasoningEl.classList.add('no-expand'); + currentReasoningEl = null; + reasoningText = ''; + } + // item.content may be empty when "done" is only a stream-close signal after media. const finalText = item.content || accumulatedText; @@ -1102,17 +1133,106 @@ function renderToolCallsHtml(toolCalls) { }).join(''); } -function createBotMessageEl(content, timestamp, requestId, toolCalls) { +function renderThinkingHtml(text) { + if (!text || !text.trim()) return ''; + const full = text.trim(); + const oneLine = full.replace(/\n+/g, ' '); + if (oneLine.length > 80) { + const truncated = oneLine.substring(0, 80) + '…'; + return ` +
+
+ + ${escapeHtml(truncated)} + +
+
${renderMarkdown(full)}
+
`; + } + return ` +
+
+ + ${escapeHtml(oneLine)} +
+
`; +} + +function renderStepsHtml(steps) { + if (!steps || steps.length === 0) return { stepsHtml: '', finalContent: '' }; + + // Find the index of the last content step — it becomes the main answer, not a step + let lastContentIdx = -1; + for (let i = steps.length - 1; i >= 0; i--) { + if (steps[i].type === 'content') { lastContentIdx = i; break; } + } + + let html = ''; + let lastContentText = ''; + for (let i = 0; i < steps.length; i++) { + const step = steps[i]; + if (step.type === 'thinking') { + html += renderThinkingHtml(step.content); + } else if (step.type === 'content') { + if (i === lastContentIdx) { + lastContentText = step.content; + } else { + html += `
${renderMarkdown(step.content)}
`; + } + } else if (step.type === 'tool') { + const argsStr = formatToolArgs(step.arguments || {}); + const resultStr = step.result ? escapeHtml(String(step.result)) : ''; + html += ` +
+
+ + ${escapeHtml(step.name || '')} + +
+
+
+
Input
+
${argsStr}
+
+ ${resultStr ? ` +
+
Output
+
${resultStr}
+
` : ''} +
+
`; + } + } + return { stepsHtml: html, lastContentText }; +} + +function createBotMessageEl(content, timestamp, requestId, msg) { const el = document.createElement('div'); el.className = 'flex gap-3 px-4 sm:px-6 py-3'; if (requestId) el.dataset.requestId = requestId; - const toolsHtml = renderToolCallsHtml(toolCalls); + + let stepsHtml = ''; + let displayContent = content; + + if (msg && msg.steps && msg.steps.length > 0) { + // New format: ordered steps with interleaved content + const result = renderStepsHtml(msg.steps); + stepsHtml = result.stepsHtml; + // The final content (last text after all steps) is the main answer + displayContent = content || result.lastContentText; + } else { + // Legacy format: separate tool_calls + optional reasoning + const toolCalls = msg && msg.tool_calls; + const reasoning = msg && msg.reasoning; + stepsHtml = renderThinkingHtml(reasoning) + renderToolCallsHtml(toolCalls); + } + el.innerHTML = ` CowAgent
- ${toolsHtml ? `
${toolsHtml}
` : ''} -
${renderMarkdown(content)}
+ ${stepsHtml ? `
${stepsHtml}
` : ''} +
${renderMarkdown(displayContent)}
${formatTime(timestamp)}
@@ -1167,7 +1287,7 @@ function loadHistory(page) { const ts = new Date(msg.created_at * 1000); const el = msg.role === 'user' ? createUserMessageEl(msg.content, ts) - : createBotMessageEl(msg.content || '', ts, null, msg.tool_calls); + : createBotMessageEl(msg.content || '', ts, null, msg); fragment.appendChild(el); }); diff --git a/channel/web/web_channel.py b/channel/web/web_channel.py index 32b27062..f7c9614a 100644 --- a/channel/web/web_channel.py +++ b/channel/web/web_channel.py @@ -168,7 +168,12 @@ class WebChannel(ChatChannel): event_type = event.get("type") data = event.get("data", {}) - if event_type == "message_update": + if event_type == "reasoning_update": + delta = data.get("delta", "") + if delta: + q.put({"type": "reasoning", "content": delta}) + + elif event_type == "message_update": delta = data.get("delta", "") if delta: q.put({"type": "delta", "content": delta}) @@ -195,6 +200,11 @@ class WebChannel(ChatChannel): "execution_time": round(exec_time, 2) }) + elif event_type == "message_end": + tool_calls = data.get("tool_calls", []) + if tool_calls: + q.put({"type": "message_end", "has_tool_calls": True}) + elif event_type == "file_to_send": file_path = data.get("path", "") file_name = data.get("file_name", os.path.basename(file_path)) diff --git a/models/claudeapi/claude_api_bot.py b/models/claudeapi/claude_api_bot.py index 5dcf9173..e7fe8710 100644 --- a/models/claudeapi/claude_api_bot.py +++ b/models/claudeapi/claude_api_bot.py @@ -429,8 +429,21 @@ class ClaudeAPIBot(Bot, OpenAIImage): delta = event.get("delta", {}) delta_type = delta.get("type") - if delta_type == "text_delta": - # Text content + if delta_type == "thinking_delta": + thinking_text = delta.get("thinking", "") + if thinking_text: + yield { + "choices": [{ + "index": 0, + "delta": { + "role": "assistant", + "reasoning_content": thinking_text + }, + "finish_reason": None + }] + } + + elif delta_type == "text_delta": content = delta.get("text", "") yield { "id": event.get("id", ""), diff --git a/models/minimax/minimax_bot.py b/models/minimax/minimax_bot.py index af80e795..63ca789c 100644 --- a/models/minimax/minimax_bot.py +++ b/models/minimax/minimax_bot.py @@ -233,11 +233,8 @@ class MinimaxBot(Bot): logger.debug(f"[MINIMAX] API call: model={model}, tools={len(converted_tools) if converted_tools else 0}, stream={stream}") - # Check if we should show thinking process - show_thinking = kwargs.pop("show_thinking", conf().get("minimax_show_thinking", False)) - if stream: - return self._handle_stream_response(request_body, show_thinking=show_thinking) + return self._handle_stream_response(request_body) else: return self._handle_sync_response(request_body) @@ -466,12 +463,11 @@ class MinimaxBot(Bot): logger.error(traceback.format_exc()) yield {"error": True, "message": str(e), "status_code": 500} - def _handle_stream_response(self, request_body, show_thinking=False): + def _handle_stream_response(self, request_body): """Handle streaming API response - + Args: request_body: API request parameters - show_thinking: Whether to show thinking/reasoning process to users """ try: headers = { @@ -550,19 +546,15 @@ class MinimaxBot(Bot): current_reasoning[reasoning_index]["text"] += reasoning_text - # Optionally yield thinking as visible content - if show_thinking: - # Yield thinking text as-is (without emoji decoration) - # The reasoning text will be displayed to users - yield { - "choices": [{ - "index": 0, - "delta": { - "role": "assistant", - "content": reasoning_text - } - }] - } + yield { + "choices": [{ + "index": 0, + "delta": { + "role": "assistant", + "reasoning_content": reasoning_text + } + }] + } # Handle text content if "content" in delta and delta["content"]: diff --git a/models/modelscope/modelscope_bot.py b/models/modelscope/modelscope_bot.py index 6d55abce..6e2b767f 100644 --- a/models/modelscope/modelscope_bot.py +++ b/models/modelscope/modelscope_bot.py @@ -576,6 +576,15 @@ class ModelScopeBot(Bot): continue if delta.get("reasoning_content"): + yield { + "choices": [{ + "index": 0, + "delta": { + "role": "assistant", + "reasoning_content": delta["reasoning_content"] + } + }] + } continue tool_call_chunks = delta.get("tool_calls")