feat: add enable_thinking config to control deep reasoning on web console

2026-07-20 13:47:15 +08:00 · 2026-04-13 16:06:28 +08:00
parent 3f3d0381e5
commit 89a07e8e74
15 changed files with 135 additions and 73 deletions
--- a/README.md
+++ b/README.md
@@ -203,7 +203,8 @@ cow install-browser
  "agent_workspace": "~/cow",                                 # Agent 的工作空间路径，用于存储 memory、skills、系统设定等
  "agent_max_context_tokens": 50000,                          # Agent 模式下最大上下文 tokens，超出将自动智能压缩处理
  "agent_max_context_turns": 20,                              # Agent 模式下最大上下文记忆轮次，一问一答为一轮，超出后智能压缩处理
-  "agent_max_steps": 20                                       # Agent 模式下单次任务的最大决策步数，超出后将停止继续调用工具
+  "agent_max_steps": 20,                                      # Agent 模式下单次任务的最大决策步数，超出后将停止继续调用工具
+  "enable_thinking": true                                     # 是否启用深度思考，开启后 Web 端展示模型推理过程，关闭后可加速响应
 }
 ```

--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -78,6 +78,11 @@ class AgentStreamExecutor:
            except Exception as e:
                logger.error(f"Event callback error: {e}")
    
+    def _is_thinking_enabled(self) -> bool:
+        from config import conf
+        channel_type = getattr(self.model, 'channel_type', '') or ''
+        return conf().get("enable_thinking", True) and channel_type == 'web'
+
    def _filter_think_tags(self, text: str) -> str:
        """
        Remove <think> and </think> tags but keep the content inside.
@@ -178,7 +183,10 @@ class AgentStreamExecutor:
            Final response text
        """
        # Log user message with model info
-        logger.info(f"🤖 {self.model.model} | 👤 {user_message}")
+        
+        thinking_enabled = self._is_thinking_enabled()
+        thinking_label = "💭 thinking" if thinking_enabled else "⚡ fast"
+        logger.info(f"🤖 {self.model.model} | {thinking_label} | 👤 {user_message}")        
        
        # Add user message (Claude format - use content blocks for consistency)
        self.messages.append({
@@ -588,6 +596,7 @@ class AgentStreamExecutor:
                    reasoning_delta = delta.get("reasoning_content") or ""
                    if reasoning_delta:
                        full_reasoning += reasoning_delta
+                        if self._is_thinking_enabled():
                            self._emit_event("reasoning_update", {"delta": reasoning_delta})

                    # Handle text content
--- a/bridge/agent_bridge.py
+++ b/bridge/agent_bridge.py
@@ -160,13 +160,21 @@ class AgentLLMModel(LLMModel):
                    kwargs['system'] = system_prompt

                # Pass context metadata to bot
-                channel_type = getattr(self, 'channel_type', None)
+                channel_type = getattr(self, 'channel_type', None) or ''
                if channel_type:
                    kwargs['channel_type'] = channel_type
                session_id = getattr(self, 'session_id', None)
                if session_id:
                    kwargs['session_id'] = session_id

+                # Determine thinking: respect global config, then channel_type
+                from config import conf
+                global_thinking = conf().get("enable_thinking", True)
+                if not global_thinking:
+                    kwargs['thinking'] = {"type": "disabled"}
+                else:
+                    kwargs['thinking'] = {"type": "enabled"} if channel_type == "web" else {"type": "disabled"}
+
                response = self.bot.call_with_tools(**kwargs)
                return self._format_response(response)
            else:
@@ -205,13 +213,21 @@ class AgentLLMModel(LLMModel):
                    kwargs['system'] = system_prompt

                # Pass context metadata to bot
-                channel_type = getattr(self, 'channel_type', None)
+                channel_type = getattr(self, 'channel_type', None) or ''
                if channel_type:
                    kwargs['channel_type'] = channel_type
                session_id = getattr(self, 'session_id', None)
                if session_id:
                    kwargs['session_id'] = session_id

+                # Determine thinking: respect global config, then channel_type
+                from config import conf
+                global_thinking = conf().get("enable_thinking", True)
+                if not global_thinking:
+                    kwargs['thinking'] = {"type": "disabled"}
+                else:
+                    kwargs['thinking'] = {"type": "enabled"} if channel_type == "web" else {"type": "disabled"}
+
                stream = self.bot.call_with_tools(**kwargs)
                
                # Convert stream format to our expected format
--- a/channel/web/chat.html
+++ b/channel/web/chat.html
@@ -509,6 +509,18 @@
                                                          bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
                                                          focus:outline-none focus:border-primary-500 font-mono transition-colors">
                                        </div>
+                                        <div class="flex items-center justify-between">
+                                            <label class="flex items-center gap-1.5 text-sm font-medium text-slate-600 dark:text-slate-400">
+                                                <span data-i18n="config_enable_thinking">Deep Thinking</span>
+                                                <span class="cfg-tip" data-tip-key="config_enable_thinking_hint"><i class="fas fa-circle-question"></i></span>
+                                            </label>
+                                            <label class="relative inline-flex items-center cursor-pointer">
+                                                <input id="cfg-enable-thinking" type="checkbox" class="sr-only peer" checked>
+                                                <div class="w-9 h-5 bg-slate-200 dark:bg-slate-700 peer-checked:bg-primary-400 rounded-full
+                                                            after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white
+                                                            after:rounded-full after:h-4 after:w-4 after:transition-all peer-checked:after:translate-x-full"></div>
+                                            </label>
+                                        </div>
                                        <div class="flex items-center justify-end gap-3 pt-1">
                                            <span id="cfg-agent-status" class="text-xs text-primary-500 opacity-0 transition-opacity duration-300"></span>
                                            <button id="cfg-agent-save"
--- a/channel/web/static/css/console.css
+++ b/channel/web/static/css/console.css
@@ -125,9 +125,8 @@
    cursor: pointer;
    user-select: none;
 }
-.agent-thinking-step .thinking-header.no-toggle { cursor: default; }
-.agent-thinking-step .thinking-header:not(.no-toggle):hover { color: #64748b; }
-.dark .agent-thinking-step .thinking-header:not(.no-toggle):hover { color: #cbd5e1; }
+.agent-thinking-step .thinking-header:hover { color: #64748b; }
+.dark .agent-thinking-step .thinking-header:hover { color: #cbd5e1; }
 .agent-thinking-step .thinking-header i:first-child { font-size: 0.625rem; margin-top: 1px; }
 .agent-thinking-step .thinking-chevron {
    font-size: 0.5rem;
@@ -158,6 +157,11 @@
 .agent-thinking-step .thinking-full p { margin: 0.25em 0; }
 .agent-thinking-step .thinking-full p:first-child { margin-top: 0; }
 .agent-thinking-step .thinking-full p:last-child { margin-bottom: 0; }
+.agent-thinking-step .thinking-duration {
+    font-size: 0.625rem;
+    color: #b0b8c4;
+    margin-bottom: 0.375rem;
+}

 /* Content step - real text output frozen before tool calls */
 .agent-content-step {
--- a/channel/web/static/js/console.js
+++ b/channel/web/static/js/console.js
@@ -38,6 +38,7 @@ const I18N = {
        config_max_tokens: '最大上下文 Token', config_max_tokens_hint: '对话中 Agent 能输入的最大 Token 长度，超过后会智能压缩处理',
        config_max_turns: '最大记忆轮次', config_max_turns_hint: '一问一答为一轮，超过后会智能压缩处理',
        config_max_steps: '最大执行步数', config_max_steps_hint: '单次对话中 Agent 最多调用工具的次数',
+        config_enable_thinking: '深度思考', config_enable_thinking_hint: '启用后在 Web 端展示模型推理过程',
        config_channel_type: '通道类型',
        config_provider: '模型厂商', config_model_name: '模型',
        config_custom_model_hint: '输入自定义模型名称',
@@ -80,6 +81,7 @@ const I18N = {
        logs_title: '日志', logs_desc: '实时日志输出 (run.log)',
        logs_live: '实时', logs_coming_msg: '日志流即将在此提供。将连接 run.log 实现类似 tail -f 的实时输出。',
        error_send: '发送失败，请稍后再试。', error_timeout: '请求超时，请再试一次。',
+        thinking_in_progress: '思考中...', thinking_done: '已深度思考', thinking_duration: '耗时',
    },
    en: {
        console: 'Console',
@@ -108,6 +110,7 @@ const I18N = {
        config_max_tokens: 'Max Context Tokens', config_max_tokens_hint: 'Max tokens the Agent can input per conversation, auto-compressed when exceeded',
        config_max_turns: 'Max Memory Turns', config_max_turns_hint: 'One Q&A pair = one turn, auto-compressed when exceeded',
        config_max_steps: 'Max Steps', config_max_steps_hint: 'Max tool calls the Agent can make in a single conversation',
+        config_enable_thinking: 'Deep Thinking', config_enable_thinking_hint: 'Show model reasoning on web console',
        config_channel_type: 'Channel Type',
        config_provider: 'Provider', config_model_name: 'Model',
        config_custom_model_hint: 'Enter custom model name',
@@ -150,6 +153,7 @@ const I18N = {
        logs_title: 'Logs', logs_desc: 'Real-time log output (run.log)',
        logs_live: 'Live', logs_coming_msg: 'Log streaming will be available here. Connects to run.log for real-time output similar to tail -f.',
        error_send: 'Failed to send. Please try again.', error_timeout: 'Request timeout. Please try again.',
+        thinking_in_progress: 'Thinking...', thinking_done: 'Thought', thinking_duration: 'Duration',
    }
 };

@@ -863,6 +867,7 @@ function startSSE(requestId, loadingEl, timestamp) {
    let currentToolEl = null;
    let currentReasoningEl = null;  // live reasoning bubble
    let reasoningText = '';
+    let reasoningStartTime = 0;
    let done = false;

    const MAX_RECONNECTS = 10;
@@ -907,28 +912,25 @@ function startSSE(requestId, loadingEl, timestamp) {
                ensureBotEl();
                reasoningText += item.content;
                if (!currentReasoningEl) {
+                    reasoningStartTime = Date.now();
                    currentReasoningEl = document.createElement('div');
                    currentReasoningEl.className = 'agent-step agent-thinking-step';
                    currentReasoningEl.innerHTML = `
                        <div class="thinking-header" onclick="this.parentElement.classList.toggle('expanded')">
                            <i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
-                            <span class="thinking-summary"></span>
+                            <span class="thinking-summary">${t('thinking_in_progress')}</span>
                            <i class="fas fa-chevron-right thinking-chevron"></i>
                        </div>
                        <div class="thinking-full"></div>`;
                    stepsEl.appendChild(currentReasoningEl);
                }
-                const oneLine = reasoningText.trim().replace(/\n+/g, ' ');
-                currentReasoningEl.querySelector('.thinking-summary').textContent =
-                    oneLine.length > 80 ? oneLine.substring(0, 80) + '…' : oneLine;
                currentReasoningEl.querySelector('.thinking-full').innerHTML = renderMarkdown(reasoningText);
                scrollChatToBottom();

            } else if (item.type === 'delta') {
                ensureBotEl();
                if (currentReasoningEl) {
-                    if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80)
-                        currentReasoningEl.classList.add('no-expand');
+                    finalizeThinking(currentReasoningEl, reasoningStartTime, reasoningText);
                    currentReasoningEl = null;
                    reasoningText = '';
                }
@@ -951,8 +953,7 @@ function startSSE(requestId, loadingEl, timestamp) {
            } else if (item.type === 'tool_start') {
                ensureBotEl();
                if (currentReasoningEl) {
-                    if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80)
-                        currentReasoningEl.classList.add('no-expand');
+                    finalizeThinking(currentReasoningEl, reasoningStartTime, reasoningText);
                    currentReasoningEl = null;
                    reasoningText = '';
                }
@@ -1089,8 +1090,7 @@ function startSSE(requestId, loadingEl, timestamp) {
            if (done) return;

            if (currentReasoningEl) {
-                if (reasoningText.trim().replace(/\n+/g, ' ').length <= 80)
-                    currentReasoningEl.classList.add('no-expand');
+                finalizeThinking(currentReasoningEl, reasoningStartTime, reasoningText);
                currentReasoningEl = null;
                reasoningText = '';
            }
@@ -1214,30 +1214,26 @@ function renderToolCallsHtml(toolCalls) {
    }).join('');
 }

+function finalizeThinking(el, startTime, text) {
+    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
+    el.querySelector('.thinking-summary').textContent = t('thinking_done');
+    const fullDiv = el.querySelector('.thinking-full');
+    fullDiv.innerHTML = `<div class="thinking-duration">${t('thinking_duration')} ${elapsed}s</div>` + renderMarkdown(text);
+}
+
 function renderThinkingHtml(text) {
    if (!text || !text.trim()) return '';
    const full = text.trim();
-    const oneLine = full.replace(/\n+/g, ' ');
-    if (oneLine.length > 80) {
-        const truncated = oneLine.substring(0, 80) + '…';
    return `
 <div class="agent-step agent-thinking-step">
    <div class="thinking-header" onclick="this.parentElement.classList.toggle('expanded')">
        <i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
-        <span class="thinking-summary">${escapeHtml(truncated)}</span>
+        <span class="thinking-summary">${t('thinking_done')}</span>
        <i class="fas fa-chevron-right thinking-chevron"></i>
    </div>
    <div class="thinking-full">${renderMarkdown(full)}</div>
 </div>`;
 }
-    return `
-<div class="agent-step agent-thinking-step no-expand">
-    <div class="thinking-header no-toggle">
-        <i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
-        <span>${escapeHtml(oneLine)}</span>
-    </div>
-</div>`;
-}

 function renderStepsHtml(steps) {
    if (!steps || steps.length === 0) return { stepsHtml: '', finalContent: '' };
@@ -1649,6 +1645,7 @@ function initConfigView(data) {
    document.getElementById('cfg-max-tokens').value = data.agent_max_context_tokens || 50000;
    document.getElementById('cfg-max-turns').value = data.agent_max_context_turns || 20;
    document.getElementById('cfg-max-steps').value = data.agent_max_steps || 20;
+    document.getElementById('cfg-enable-thinking').checked = data.enable_thinking !== false;

    const pwdInput = document.getElementById('cfg-password');
    const maskedPwd = data.web_password_masked || '';
@@ -1883,6 +1880,7 @@ function saveAgentConfig() {
        agent_max_context_tokens: parseInt(document.getElementById('cfg-max-tokens').value) || 50000,
        agent_max_context_turns: parseInt(document.getElementById('cfg-max-turns').value) || 20,
        agent_max_steps: parseInt(document.getElementById('cfg-max-steps').value) || 20,
+        enable_thinking: document.getElementById('cfg-enable-thinking').checked,
    };

    const btn = document.getElementById('cfg-agent-save');
--- a/channel/web/web_channel.py
+++ b/channel/web/web_channel.py
@@ -714,7 +714,7 @@ class ConfigHandler:
            "api_key_field": "minimax_api_key",
            "api_base_key": None,
            "api_base_default": None,
-            "models": [const.MINIMAX_M2_7, const.MINIMAX_M2_5, const.MINIMAX_M2_1, const.MINIMAX_M2_1_LIGHTNING],
+            "models": [const.MINIMAX_M2_7, const.MINIMAX_M2_7_HIGHSPEED, const.MINIMAX_M2_5, const.MINIMAX_M2_1, const.MINIMAX_M2_1_LIGHTNING],
        }),
        ("zhipu", {
            "label": "智谱AI",
@@ -796,7 +796,7 @@ class ConfigHandler:
        "zhipu_ai_api_key", "dashscope_api_key", "moonshot_api_key",
        "ark_api_key", "minimax_api_key", "linkai_api_key",
        "agent_max_context_tokens", "agent_max_context_turns", "agent_max_steps",
-        "web_password",
+        "enable_thinking", "web_password",
    }

    @staticmethod
@@ -849,6 +849,7 @@ class ConfigHandler:
                "agent_max_context_tokens": local_config.get("agent_max_context_tokens", 50000),
                "agent_max_context_turns": local_config.get("agent_max_context_turns", 20),
                "agent_max_steps": local_config.get("agent_max_steps", 20),
+                "enable_thinking": bool(local_config.get("enable_thinking", True)),
                "api_bases": api_bases,
                "api_keys": api_keys_masked,
                "providers": providers,
@@ -874,7 +875,7 @@ class ConfigHandler:
                    continue
                if key in ("agent_max_context_tokens", "agent_max_context_turns", "agent_max_steps"):
                    value = int(value)
-                if key == "use_linkai":
+                if key in ("use_linkai", "enable_thinking"):
                    value = bool(value)
                local_config[key] = value
                applied[key] = value
--- a/config.py
+++ b/config.py
@@ -202,6 +202,7 @@ available_setting = {
    "agent_max_context_tokens": 50000,  # Agent模式下最大上下文tokens
    "agent_max_context_turns": 20,  # Agent模式下最大上下文记忆轮次
    "agent_max_steps": 20,  # Agent模式下单次运行最大决策步数
+    "enable_thinking": True,  # Whether to enable deep thinking for web channel
    "knowledge": True,  # 是否开启知识库功能
 }

--- a/docs/channels/web.mdx
+++ b/docs/channels/web.mdx
@@ -10,7 +10,9 @@ Web 控制台是 CowAgent 的默认通道，启动后会自动运行，通过浏
 ```json
 {
  "channel_type": "web",
-  "web_port": 9899
+  "web_port": 9899,
+  "web_password": "",
+  "enable_thinking": true
 }
 ```

@@ -18,6 +20,11 @@ Web 控制台是 CowAgent 的默认通道，启动后会自动运行，通过浏
 | --- | --- | --- |
 | `channel_type` | 设为 `web` | `web` |
 | `web_port` | Web 服务监听端口 | `9899` |
+| `web_password` | 访问密码，留空表示不启用密码保护 | `""` |
+| `web_session_expire_days` | 登录会话有效天数 | `30` |
+| `enable_thinking` | 是否启用深度思考，开启后 Web 端展示推理过程，关闭可加速响应 | `true` |
+
+配置密码后，访问控制台时需先输入密码完成登录。登录状态默认保持 30 天，期间重启服务也无需重新登录。密码也支持在控制台的「配置」页面中在线修改。

 ## 访问地址

@@ -30,30 +37,11 @@ Web 控制台是 CowAgent 的默认通道，启动后会自动运行，通过浏
  请确保服务器防火墙和安全组已放行对应端口。
 </Note>

-## 密码保护
-
-Web 控制台默认无需密码即可访问。如果部署在公网环境，建议配置访问密码：
-
-```json
-{
-  "web_password": "your_password"
-}
-```
-
-| 参数 | 说明 | 默认值 |
-| --- | --- | --- |
-| `web_password` | 访问密码，留空表示不启用密码保护 | `""` |
-| `web_session_expire_days` | 登录会话有效天数 | `30` |
-
-配置密码后，访问控制台时需先输入密码完成登录。登录状态默认保持 30 天，期间重启服务也无需重新登录。修改密码后，所有已登录的会话将自动失效。
-
-密码也支持在控制台的「配置」页面中在线修改。
-
 ## 功能介绍

 ### 对话界面

-支持流式输出，可实时展示 Agent 的思考过程（Reasoning）和工具调用过程（Tool Calls），更直观地观察 Agent 的决策过程：
+支持流式输出，可实时展示 Agent 的思考过程（Reasoning）和工具调用过程（Tool Calls），更直观地观察 Agent 的决策过程。深度思考功能可通过配置或控制台的「Agent 配置」开关控制。

 <img width="850" src="https://cdn.link-ai.tech/doc/20260227180120.png" />

--- a/docs/intro/architecture.mdx
+++ b/docs/intro/architecture.mdx
@@ -69,7 +69,8 @@ Agent 的工作空间默认位于 `~/cow` 目录，用于存储系统提示词
  "agent_workspace": "~/cow",
  "agent_max_context_tokens": 40000,
  "agent_max_context_turns": 30,
-  "agent_max_steps": 15
+  "agent_max_steps": 15,
+  "enable_thinking": true
 }
 ```

@@ -80,4 +81,5 @@ Agent 的工作空间默认位于 `~/cow` 目录，用于存储系统提示词
 | `agent_max_context_tokens` | 最大上下文 token 数 | `50000` |
 | `agent_max_context_turns` | 最大上下文记忆轮次 | `20` |
 | `agent_max_steps` | 单次任务最大决策步数 | `20` |
+| `enable_thinking` | 是否启用深度思考，开启后 Web 端展示推理过程，关闭可加速响应 | `true` |
 | `knowledge` | 是否启用个人知识库 | `true` |
--- a/models/dashscope/dashscope_bot.py
+++ b/models/dashscope/dashscope_bot.py
@@ -262,20 +262,17 @@ class DashscopeBot(Bot):
                if kwargs.get("tool_choice"):
                    parameters["tool_choice"] = kwargs["tool_choice"]
            
-            # Add thinking parameters for Qwen3 models (disabled by default for stability)
+            # Add thinking parameters for Qwen3/QwQ models
            if "qwen3" in model_name.lower() or "qwq" in model_name.lower():
-                # Only enable thinking mode if explicitly requested
-                enable_thinking = kwargs.get("enable_thinking", False)
-                if enable_thinking:
+                thinking = kwargs.get("thinking", {"type": "enabled"})
+                if thinking.get("type") == "enabled":
                    parameters["enable_thinking"] = True
-                    
-                    # Set thinking budget if specified
                    if kwargs.get("thinking_budget"):
                        parameters["thinking_budget"] = kwargs["thinking_budget"]
-                    
-                    # Qwen3 requires incremental_output=true in thinking mode
                    if stream:
                        parameters["incremental_output"] = True
+                else:
+                    parameters["enable_thinking"] = False
            
            # Always use incremental_output for streaming (for better token-by-token streaming)
            # This is especially important for tool calling to avoid incomplete responses
--- a/models/doubao/doubao_bot.py
+++ b/models/doubao/doubao_bot.py
@@ -249,9 +249,7 @@ class DoubaoBot(Bot):
                request_body["tools"] = converted_tools
                request_body["tool_choice"] = "auto"

-            # Explicitly disable thinking to avoid reasoning_content issues
-            # in multi-turn tool calls
-            request_body["thinking"] = {"type": "disabled"}
+            request_body["thinking"] = kwargs.get("thinking", {"type": "enabled"})

            logger.debug(f"[DOUBAO] API call: model={model}, "
                         f"tools={len(converted_tools) if converted_tools else 0}, stream={stream}")
@@ -324,8 +322,17 @@ class DoubaoBot(Bot):
                choice = chunk["choices"][0]
                delta = choice.get("delta", {})

-                # Skip reasoning_content (thinking) - don't log or forward
                if delta.get("reasoning_content"):
+                    yield {
+                        "choices": [{
+                            "index": 0,
+                            "delta": {
+                                "role": "assistant",
+                                "reasoning_content": delta["reasoning_content"]
+                            },
+                            "finish_reason": None
+                        }]
+                    }
                    continue

                # Handle text content
--- a/models/linkai/link_ai_bot.py
+++ b/models/linkai/link_ai_bot.py
@@ -560,6 +560,10 @@ def _linkai_call_with_tools(self, messages, tools=None, stream=False, **kwargs):
            body["tools"] = tools
            body["tool_choice"] = kwargs.get("tool_choice", "auto")

+        thinking = kwargs.get("thinking")
+        if thinking:
+            body["thinking"] = thinking
+
        # Prepare headers
        headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
        base_url = conf().get("linkai_api_base", "https://api.link-ai.tech")
--- a/models/moonshot/moonshot_bot.py
+++ b/models/moonshot/moonshot_bot.py
@@ -249,10 +249,7 @@ class MoonshotBot(Bot):
                request_body["tools"] = converted_tools
                request_body["tool_choice"] = "auto"

-            # Explicitly disable thinking to avoid reasoning_content issues in multi-turn tool calls.
-            # kimi-k2.5 may enable thinking by default; without preserving reasoning_content
-            # in conversation history the API will reject subsequent requests.
-            request_body["thinking"] = {"type": "disabled"}
+            request_body["thinking"] = kwargs.get("thinking", {"type": "enabled"})

            logger.debug(f"[MOONSHOT] API call: model={model}, "
                         f"tools={len(converted_tools) if converted_tools else 0}, stream={stream}")
@@ -325,8 +322,17 @@ class MoonshotBot(Bot):
                choice = chunk["choices"][0]
                delta = choice.get("delta", {})

-                # Skip reasoning_content (thinking) – don't log or forward
                if delta.get("reasoning_content"):
+                    yield {
+                        "choices": [{
+                            "index": 0,
+                            "delta": {
+                                "role": "assistant",
+                                "reasoning_content": delta["reasoning_content"]
+                            },
+                            "finish_reason": None
+                        }]
+                    }
                    continue

                # Handle text content
--- a/run.sh
+++ b/run.sh
@@ -193,6 +193,16 @@ clone_project() {
        rm CowAgent.zip
    else
        local clone_ok=false
+        # Detect and temporarily disable invalid git proxy settings
+        local _git_proxy_unset=false
+        local _http_proxy=$(git config --global http.proxy 2>/dev/null)
+        local _https_proxy=$(git config --global https.proxy 2>/dev/null)
+        if [ -n "$_http_proxy" ] && ! curl -s --connect-timeout 3 --max-time 5 --proxy "$_http_proxy" https://github.com > /dev/null 2>&1; then
+            echo -e "${YELLOW}⚠️  Invalid git proxy detected: $_http_proxy, temporarily disabling...${NC}"
+            git config --global --unset http.proxy
+            [ -n "$_https_proxy" ] && git config --global --unset https.proxy
+            _git_proxy_unset=true
+        fi
        # Test GitHub connectivity before attempting clone
        if curl -sI --connect-timeout 5 --max-time 10 https://github.com > /dev/null 2>&1; then
            echo -e "${YELLOW}🌐 GitHub is reachable, cloning from GitHub...${NC}"
@@ -204,6 +214,12 @@ clone_project() {
        fi
        if [ "$clone_ok" = false ]; then
            echo -e "${RED}❌ Project clone failed. Please check network connection.${NC}"
+            if git config --global http.proxy &> /dev/null || git config --global https.proxy &> /dev/null || [ -n "$http_proxy" ] || [ -n "$https_proxy" ] || [ -n "$HTTP_PROXY" ] || [ -n "$HTTPS_PROXY" ]; then
+                echo -e "${YELLOW}💡 Detected proxy settings. If proxy is misconfigured, try removing it with:${NC}"
+                echo -e "${YELLOW}   git config --global --unset http.proxy${NC}"
+                echo -e "${YELLOW}   git config --global --unset https.proxy${NC}"
+                echo -e "${YELLOW}   unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY${NC}"
+            fi
            exit 1
        fi
    fi