diff --git a/agent/chat/service.py b/agent/chat/service.py
index de7d345a..550063f1 100644
--- a/agent/chat/service.py
+++ b/agent/chat/service.py
@@ -57,7 +57,16 @@ class ChatService:
event_type = event.get("type")
data = event.get("data", {})
- if event_type == "message_update":
+ if event_type == "reasoning_update":
+ delta = data.get("delta", "")
+ if delta:
+ send_chunk_fn({
+ "chunk_type": "reasoning",
+ "delta": delta,
+ "segment_id": state.segment_id,
+ })
+
+ elif event_type == "message_update":
# Incremental text delta
delta = data.get("delta", "")
if delta:
diff --git a/agent/memory/conversation_store.py b/agent/memory/conversation_store.py
index a4f15aab..4ab0800b 100644
--- a/agent/memory/conversation_store.py
+++ b/agent/memory/conversation_store.py
@@ -188,8 +188,9 @@ def _group_into_display_turns(
if text:
turns.append({"role": "user", "content": text, "created_at": created_at})
- # Collect all tool_calls and tool_results from the rest of the group
- all_tool_calls: List[Dict[str, Any]] = []
+ # Build an ordered list of steps preserving the original sequence:
+ # thinking → content → tool_call → content → ...
+ steps: List[Dict[str, Any]] = []
tool_results: Dict[str, str] = {}
final_text = ""
final_ts: Optional[int] = None
@@ -198,24 +199,46 @@ def _group_into_display_turns(
if role == "user":
tool_results.update(_extract_tool_results(content))
elif role == "assistant":
- tcs = _extract_tool_calls(content)
- all_tool_calls.extend(tcs)
- t = _extract_display_text(content)
- if t:
- final_text = t
+ # Walk content blocks in order to preserve interleaving
+ if isinstance(content, list):
+ for block in content:
+ if not isinstance(block, dict):
+ continue
+ btype = block.get("type")
+ if btype == "thinking":
+ txt = block.get("thinking", "").strip()
+ if txt:
+ steps.append({"type": "thinking", "content": txt})
+ elif btype == "text":
+ txt = block.get("text", "").strip()
+ if txt:
+ steps.append({"type": "content", "content": txt})
+ final_text = txt
+ elif btype == "tool_use":
+ steps.append({
+ "type": "tool",
+ "id": block.get("id", ""),
+ "name": block.get("name", ""),
+ "arguments": block.get("input", {}),
+ })
+ elif isinstance(content, str) and content.strip():
+ steps.append({"type": "content", "content": content.strip()})
+ final_text = content.strip()
final_ts = created_at
- # Attach tool results to their matching tool_call entries
- for tc in all_tool_calls:
- tc["result"] = tool_results.get(tc.get("id", ""), "")
+ # Attach tool results to tool steps
+ for step in steps:
+ if step["type"] == "tool":
+ step["result"] = tool_results.get(step.get("id", ""), "")
- if final_text or all_tool_calls:
- turns.append({
+ if steps or final_text:
+ turn = {
"role": "assistant",
"content": final_text,
- "tool_calls": all_tool_calls,
+ "steps": steps,
"created_at": final_ts or (user_row[1] if user_row else 0),
- })
+ }
+ turns.append(turn)
return turns
@@ -312,6 +335,9 @@ class ConversationStore:
content = json.loads(raw_content)
except Exception:
content = raw_content
+ # Strip thinking blocks — they are stored for UI display only
+ if role == "assistant" and isinstance(content, list):
+ content = [b for b in content if b.get("type") != "thinking"]
result.append({"role": role, "content": content})
return result
diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py
index 1b250011..45f7d8a5 100644
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -527,6 +527,7 @@ class AgentStreamExecutor:
# Streaming response
full_content = ""
+ full_reasoning = ""
tool_calls_buffer = {} # {index: {id, name, arguments}}
gemini_raw_parts = None # Preserve Gemini thoughtSignature for round-trip
stop_reason = None # Track why the stream stopped
@@ -584,10 +585,10 @@ class AgentStreamExecutor:
if finish_reason:
stop_reason = finish_reason
- # Skip reasoning_content (internal thinking from models like GLM-5)
reasoning_delta = delta.get("reasoning_content") or ""
- # if reasoning_delta:
- # logger.debug(f"🧠 [thinking] {reasoning_delta[:100]}...")
+ if reasoning_delta:
+ full_reasoning += reasoning_delta
+ self._emit_event("reasoning_update", {"delta": reasoning_delta})
# Handle text content
content_delta = delta.get("content") or ""
@@ -788,7 +789,12 @@ class AgentStreamExecutor:
# Add assistant message to history (Claude format uses content blocks)
assistant_msg = {"role": "assistant", "content": []}
- # Add text content block if present
+ if full_reasoning:
+ assistant_msg["content"].append({
+ "type": "thinking",
+ "thinking": full_reasoning
+ })
+
if full_content:
assistant_msg["content"].append({
"type": "text",
diff --git a/bridge/agent_event_handler.py b/bridge/agent_event_handler.py
index b04c77b8..50826235 100644
--- a/bridge/agent_event_handler.py
+++ b/bridge/agent_event_handler.py
@@ -26,8 +26,7 @@ class AgentEventHandler:
if context:
self.channel = context.kwargs.get("channel") if hasattr(context, "kwargs") else None
- # Track current thinking for channel output
- self.current_thinking = ""
+ self.current_content = ""
self.turn_number = 0
def handle_event(self, event):
@@ -47,6 +46,8 @@ class AgentEventHandler:
self._handle_message_update(data)
elif event_type == "message_end":
self._handle_message_end(data)
+ elif event_type == "reasoning_update":
+ pass
elif event_type == "tool_execution_start":
self._handle_tool_execution_start(data)
elif event_type == "tool_execution_end":
@@ -59,30 +60,26 @@ class AgentEventHandler:
def _handle_turn_start(self, data):
"""Handle turn start event"""
self.turn_number = data.get("turn", 0)
- self.has_tool_calls_in_turn = False
- self.current_thinking = ""
+ self.current_content = ""
def _handle_message_update(self, data):
- """Handle message update event (streaming text)"""
+ """Handle message update event (streaming content text)"""
delta = data.get("delta", "")
- self.current_thinking += delta
+ self.current_content += delta
def _handle_message_end(self, data):
"""Handle message end event"""
tool_calls = data.get("tool_calls", [])
- # Only send thinking process if followed by tool calls
if tool_calls:
- if self.current_thinking.strip():
- logger.info(f"💭 {self.current_thinking.strip()[:200]}{'...' if len(self.current_thinking) > 200 else ''}")
- # Send thinking process to channel
- self._send_to_channel(f"{self.current_thinking.strip()}")
+ if self.current_content.strip():
+ logger.info(f"💭 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}")
+ self._send_to_channel(self.current_content.strip())
else:
- # No tool calls = final response (logged at agent_stream level)
- if self.current_thinking.strip():
- logger.debug(f"💬 {self.current_thinking.strip()[:200]}{'...' if len(self.current_thinking) > 200 else ''}")
+ if self.current_content.strip():
+ logger.debug(f"💬 {self.current_content.strip()[:200]}{'...' if len(self.current_content) > 200 else ''}")
- self.current_thinking = ""
+ self.current_content = ""
def _handle_tool_execution_start(self, data):
"""Handle tool execution start event - logged by agent_stream.py"""
diff --git a/channel/web/static/css/console.css b/channel/web/static/css/console.css
index ea58f54e..96b0811b 100644
--- a/channel/web/static/css/console.css
+++ b/channel/web/static/css/console.css
@@ -146,7 +146,7 @@
font-size: 0.75rem;
line-height: 1.5;
color: #94a3b8;
- max-height: 200px;
+ max-height: 300px;
overflow-y: auto;
}
.dark .agent-thinking-step .thinking-full {
@@ -158,6 +158,20 @@
.agent-thinking-step .thinking-full p:first-child { margin-top: 0; }
.agent-thinking-step .thinking-full p:last-child { margin-bottom: 0; }
+/* Content step - real text output frozen before tool calls */
+.agent-content-step {
+ font-size: 0.875rem;
+ line-height: 1.6;
+ color: inherit;
+ margin-bottom: 0.5rem;
+ padding-bottom: 0.5rem;
+ border-bottom: 1px dashed rgba(0, 0, 0, 0.06);
+}
+.dark .agent-content-step { border-bottom-color: rgba(255, 255, 255, 0.06); }
+.agent-content-step .agent-content-body p { margin: 0.25em 0; }
+.agent-content-step .agent-content-body p:first-child { margin-top: 0; }
+.agent-content-step .agent-content-body p:last-child { margin-bottom: 0; }
+
/* Tool step - collapsible */
.agent-tool-step .tool-header {
display: flex;
diff --git a/channel/web/static/js/console.js b/channel/web/static/js/console.js
index 24e120be..16c4c6c6 100644
--- a/channel/web/static/js/console.js
+++ b/channel/web/static/js/console.js
@@ -815,6 +815,8 @@ function startSSE(requestId, loadingEl, timestamp) {
let mediaEl = null; // .media-content (images & file attachments)
let accumulatedText = '';
let currentToolEl = null;
+ let currentReasoningEl = null; // live reasoning bubble
+ let reasoningText = '';
function ensureBotEl() {
if (botEl) return;
@@ -843,39 +845,61 @@ function startSSE(requestId, loadingEl, timestamp) {
let item;
try { item = JSON.parse(e.data); } catch (_) { return; }
- if (item.type === 'delta') {
+ if (item.type === 'reasoning') {
ensureBotEl();
+ reasoningText += item.content;
+ if (!currentReasoningEl) {
+ currentReasoningEl = document.createElement('div');
+ currentReasoningEl.className = 'agent-step agent-thinking-step';
+ currentReasoningEl.innerHTML = `
+
+ `;
+ stepsEl.appendChild(currentReasoningEl);
+ }
+ // Stream reasoning as a single-line summary (collapsed); full text available on expand
+ const oneLine = reasoningText.trim().replace(/\n+/g, ' ');
+ currentReasoningEl.querySelector('.thinking-summary').textContent =
+ oneLine.length > 80 ? oneLine.substring(0, 80) + '…' : oneLine;
+ currentReasoningEl.querySelector('.thinking-full').innerHTML = renderMarkdown(reasoningText);
+ scrollChatToBottom();
+
+ } else if (item.type === 'delta') {
+ ensureBotEl();
+ if (currentReasoningEl) {
+ if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80)
+ currentReasoningEl.classList.add('no-expand');
+ currentReasoningEl = null;
+ reasoningText = '';
+ }
accumulatedText += item.content;
contentEl.innerHTML = renderMarkdown(accumulatedText);
scrollChatToBottom();
+ } else if (item.type === 'message_end') {
+ // Backend already strips reasoning_content; all deltas are real content.
+ // Freeze accumulated text as visible content before tool execution begins.
+ if (item.has_tool_calls && accumulatedText.trim()) {
+ ensureBotEl();
+ const frozenEl = document.createElement('div');
+ frozenEl.className = 'agent-step agent-content-step';
+ frozenEl.innerHTML = `${renderMarkdown(accumulatedText.trim())}
`;
+ stepsEl.appendChild(frozenEl);
+ accumulatedText = '';
+ contentEl.innerHTML = '';
+ scrollChatToBottom();
+ }
+
} else if (item.type === 'tool_start') {
ensureBotEl();
-
- // Save current thinking as a collapsible step
- if (accumulatedText.trim()) {
- const fullText = accumulatedText.trim();
- const oneLine = fullText.replace(/\n+/g, ' ');
- const needsTruncate = oneLine.length > 80;
- const stepEl = document.createElement('div');
- stepEl.className = 'agent-step agent-thinking-step' + (needsTruncate ? '' : ' no-expand');
- if (needsTruncate) {
- const truncated = oneLine.substring(0, 80) + '…';
- stepEl.innerHTML = `
-
- ${renderMarkdown(fullText)}
`;
- } else {
- stepEl.innerHTML = `
- `;
- }
- stepsEl.appendChild(stepEl);
+ if (currentReasoningEl) {
+ if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80)
+ currentReasoningEl.classList.add('no-expand');
+ currentReasoningEl = null;
+ reasoningText = '';
}
accumulatedText = '';
contentEl.innerHTML = '';
@@ -979,6 +1003,13 @@ function startSSE(requestId, loadingEl, timestamp) {
es.close();
delete activeStreams[requestId];
+ if (currentReasoningEl) {
+ if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80)
+ currentReasoningEl.classList.add('no-expand');
+ currentReasoningEl = null;
+ reasoningText = '';
+ }
+
// item.content may be empty when "done" is only a stream-close signal after media.
const finalText = item.content || accumulatedText;
@@ -1102,17 +1133,106 @@ function renderToolCallsHtml(toolCalls) {
}).join('');
}
-function createBotMessageEl(content, timestamp, requestId, toolCalls) {
+function renderThinkingHtml(text) {
+ if (!text || !text.trim()) return '';
+ const full = text.trim();
+ const oneLine = full.replace(/\n+/g, ' ');
+ if (oneLine.length > 80) {
+ const truncated = oneLine.substring(0, 80) + '…';
+ return `
+
+
+
${renderMarkdown(full)}
+
`;
+ }
+ return `
+
+
+
`;
+}
+
+function renderStepsHtml(steps) {
+ if (!steps || steps.length === 0) return { stepsHtml: '', finalContent: '' };
+
+ // Find the index of the last content step — it becomes the main answer, not a step
+ let lastContentIdx = -1;
+ for (let i = steps.length - 1; i >= 0; i--) {
+ if (steps[i].type === 'content') { lastContentIdx = i; break; }
+ }
+
+ let html = '';
+ let lastContentText = '';
+ for (let i = 0; i < steps.length; i++) {
+ const step = steps[i];
+ if (step.type === 'thinking') {
+ html += renderThinkingHtml(step.content);
+ } else if (step.type === 'content') {
+ if (i === lastContentIdx) {
+ lastContentText = step.content;
+ } else {
+ html += `${renderMarkdown(step.content)}
`;
+ }
+ } else if (step.type === 'tool') {
+ const argsStr = formatToolArgs(step.arguments || {});
+ const resultStr = step.result ? escapeHtml(String(step.result)) : '';
+ html += `
+`;
+ }
+ }
+ return { stepsHtml: html, lastContentText };
+}
+
+function createBotMessageEl(content, timestamp, requestId, msg) {
const el = document.createElement('div');
el.className = 'flex gap-3 px-4 sm:px-6 py-3';
if (requestId) el.dataset.requestId = requestId;
- const toolsHtml = renderToolCallsHtml(toolCalls);
+
+ let stepsHtml = '';
+ let displayContent = content;
+
+ if (msg && msg.steps && msg.steps.length > 0) {
+ // New format: ordered steps with interleaved content
+ const result = renderStepsHtml(msg.steps);
+ stepsHtml = result.stepsHtml;
+ // The final content (last text after all steps) is the main answer
+ displayContent = content || result.lastContentText;
+ } else {
+ // Legacy format: separate tool_calls + optional reasoning
+ const toolCalls = msg && msg.tool_calls;
+ const reasoning = msg && msg.reasoning;
+ stepsHtml = renderThinkingHtml(reasoning) + renderToolCallsHtml(toolCalls);
+ }
+
el.innerHTML = `
- ${toolsHtml ? `
${toolsHtml}
` : ''}
-
${renderMarkdown(content)}
+ ${stepsHtml ? `
${stepsHtml}
` : ''}
+
${renderMarkdown(displayContent)}
${formatTime(timestamp)}
@@ -1167,7 +1287,7 @@ function loadHistory(page) {
const ts = new Date(msg.created_at * 1000);
const el = msg.role === 'user'
? createUserMessageEl(msg.content, ts)
- : createBotMessageEl(msg.content || '', ts, null, msg.tool_calls);
+ : createBotMessageEl(msg.content || '', ts, null, msg);
fragment.appendChild(el);
});
diff --git a/channel/web/web_channel.py b/channel/web/web_channel.py
index 32b27062..f7c9614a 100644
--- a/channel/web/web_channel.py
+++ b/channel/web/web_channel.py
@@ -168,7 +168,12 @@ class WebChannel(ChatChannel):
event_type = event.get("type")
data = event.get("data", {})
- if event_type == "message_update":
+ if event_type == "reasoning_update":
+ delta = data.get("delta", "")
+ if delta:
+ q.put({"type": "reasoning", "content": delta})
+
+ elif event_type == "message_update":
delta = data.get("delta", "")
if delta:
q.put({"type": "delta", "content": delta})
@@ -195,6 +200,11 @@ class WebChannel(ChatChannel):
"execution_time": round(exec_time, 2)
})
+ elif event_type == "message_end":
+ tool_calls = data.get("tool_calls", [])
+ if tool_calls:
+ q.put({"type": "message_end", "has_tool_calls": True})
+
elif event_type == "file_to_send":
file_path = data.get("path", "")
file_name = data.get("file_name", os.path.basename(file_path))
diff --git a/models/claudeapi/claude_api_bot.py b/models/claudeapi/claude_api_bot.py
index 5dcf9173..e7fe8710 100644
--- a/models/claudeapi/claude_api_bot.py
+++ b/models/claudeapi/claude_api_bot.py
@@ -429,8 +429,21 @@ class ClaudeAPIBot(Bot, OpenAIImage):
delta = event.get("delta", {})
delta_type = delta.get("type")
- if delta_type == "text_delta":
- # Text content
+ if delta_type == "thinking_delta":
+ thinking_text = delta.get("thinking", "")
+ if thinking_text:
+ yield {
+ "choices": [{
+ "index": 0,
+ "delta": {
+ "role": "assistant",
+ "reasoning_content": thinking_text
+ },
+ "finish_reason": None
+ }]
+ }
+
+ elif delta_type == "text_delta":
content = delta.get("text", "")
yield {
"id": event.get("id", ""),
diff --git a/models/minimax/minimax_bot.py b/models/minimax/minimax_bot.py
index af80e795..63ca789c 100644
--- a/models/minimax/minimax_bot.py
+++ b/models/minimax/minimax_bot.py
@@ -233,11 +233,8 @@ class MinimaxBot(Bot):
logger.debug(f"[MINIMAX] API call: model={model}, tools={len(converted_tools) if converted_tools else 0}, stream={stream}")
- # Check if we should show thinking process
- show_thinking = kwargs.pop("show_thinking", conf().get("minimax_show_thinking", False))
-
if stream:
- return self._handle_stream_response(request_body, show_thinking=show_thinking)
+ return self._handle_stream_response(request_body)
else:
return self._handle_sync_response(request_body)
@@ -466,12 +463,11 @@ class MinimaxBot(Bot):
logger.error(traceback.format_exc())
yield {"error": True, "message": str(e), "status_code": 500}
- def _handle_stream_response(self, request_body, show_thinking=False):
+ def _handle_stream_response(self, request_body):
"""Handle streaming API response
-
+
Args:
request_body: API request parameters
- show_thinking: Whether to show thinking/reasoning process to users
"""
try:
headers = {
@@ -550,19 +546,15 @@ class MinimaxBot(Bot):
current_reasoning[reasoning_index]["text"] += reasoning_text
- # Optionally yield thinking as visible content
- if show_thinking:
- # Yield thinking text as-is (without emoji decoration)
- # The reasoning text will be displayed to users
- yield {
- "choices": [{
- "index": 0,
- "delta": {
- "role": "assistant",
- "content": reasoning_text
- }
- }]
- }
+ yield {
+ "choices": [{
+ "index": 0,
+ "delta": {
+ "role": "assistant",
+ "reasoning_content": reasoning_text
+ }
+ }]
+ }
# Handle text content
if "content" in delta and delta["content"]:
diff --git a/models/modelscope/modelscope_bot.py b/models/modelscope/modelscope_bot.py
index 6d55abce..6e2b767f 100644
--- a/models/modelscope/modelscope_bot.py
+++ b/models/modelscope/modelscope_bot.py
@@ -576,6 +576,15 @@ class ModelScopeBot(Bot):
continue
if delta.get("reasoning_content"):
+ yield {
+ "choices": [{
+ "index": 0,
+ "delta": {
+ "role": "assistant",
+ "reasoning_content": delta["reasoning_content"]
+ }
+ }]
+ }
continue
tool_call_chunks = delta.get("tool_calls")