fix(deepseek): back-fill reasoning_content for all assistant turns

2026-07-17 11:07:11 +08:00 · 2026-04-24 16:39:48 +08:00
parent fd88828abd
commit 31820f56e7
14 changed files with 188 additions and 42 deletions
--- a/README.md
+++ b/README.md
@@ -208,7 +208,7 @@ cow install-browser
  "agent_max_context_tokens": 50000,                          # Agent 模式下最大上下文 tokens，超出将自动智能压缩处理
  "agent_max_context_turns": 20,                              # Agent 模式下最大上下文记忆轮次，一问一答为一轮，超出后智能压缩处理
  "agent_max_steps": 20,                                      # Agent 模式下单次任务的最大决策步数，超出后将停止继续调用工具
-  "enable_thinking": false                                    # 是否启用深度思考模式（适用于 deepseek-v4-pro/flash、deepseek-reasoner、kimi-k2-thinking 等思考型模型）。开启后模型在出最终回答前先进行推理，回答质量更高但首字延迟增加；Web 端会展示思考过程，IM 渠道（微信/企微/钉钉/飞书）虽不展示但同样获得更好答案
+  "enable_thinking": false                                    # 是否启用深度思考模式
 }
 ```

--- a/channel/web/static/js/console.js
+++ b/channel/web/static/js/console.js
@@ -38,7 +38,7 @@ const I18N = {
        config_max_tokens: '最大上下文 Token', config_max_tokens_hint: '对话中 Agent 能输入的最大 Token 长度，超过后会智能压缩处理',
        config_max_turns: '最大记忆轮次', config_max_turns_hint: '一问一答为一轮，超过后会智能压缩处理',
        config_max_steps: '最大执行步数', config_max_steps_hint: '单次对话中 Agent 最多调用工具的次数',
-        config_enable_thinking: '深度思考', config_enable_thinking_hint: '开启后模型启用思考模式，回答质量更高但首字延迟增加，Web 端可展示思考过程',
+        config_enable_thinking: '深度思考', config_enable_thinking_hint: '是否启用深度思考模式',
        config_channel_type: '通道类型',
        config_provider: '模型厂商', config_model_name: '模型',
        config_custom_model_hint: '输入自定义模型名称',
@@ -124,7 +124,7 @@ const I18N = {
        config_max_tokens: 'Max Context Tokens', config_max_tokens_hint: 'Max tokens the Agent can input per conversation, auto-compressed when exceeded',
        config_max_turns: 'Max Memory Turns', config_max_turns_hint: 'One Q&A pair = one turn, auto-compressed when exceeded',
        config_max_steps: 'Max Steps', config_max_steps_hint: 'Max tool calls the Agent can make in a single conversation',
-        config_enable_thinking: 'Deep Thinking', config_enable_thinking_hint: 'Model reasons before answering for higher quality at the cost of first-token latency. Web console shows the reasoning trace.',
+        config_enable_thinking: 'Deep Thinking', config_enable_thinking_hint: 'Enable deep thinking mode',
        config_channel_type: 'Channel Type',
        config_provider: 'Provider', config_model_name: 'Model',
        config_custom_model_hint: 'Enter custom model name',
--- a/config.py
+++ b/config.py
@@ -204,7 +204,7 @@ available_setting = {
    "agent_max_context_tokens": 50000,  # Agent模式下最大上下文tokens
    "agent_max_context_turns": 20,  # Agent模式下最大上下文记忆轮次
    "agent_max_steps": 20,  # Agent模式下单次运行最大决策步数
-    "enable_thinking": False,  # Toggle deep-thinking mode for thinking-capable models (e.g. deepseek-v4-pro/flash, deepseek-reasoner, kimi-k2-thinking). When enabled, the model produces a reasoning trace before the final answer; the Web console renders it in a collapsible panel, while IM channels (WeChat/WeCom/DingTalk/Feishu) still benefit from the improved answer quality but do not display the trace. Note: enabling thinking increases first-token latency.
+    "enable_thinking": False,  # Enable deep-thinking mode for thinking-capable models
    "knowledge": True,  # 是否开启知识库功能
    # Per-skill runtime config. Nested keys are flattened to env vars at startup
    # using the rule: skill[<name>][<key>] -> SKILL_<NAME>_<KEY>
--- a/docs/channels/web.mdx
+++ b/docs/channels/web.mdx
@@ -22,7 +22,7 @@ Web 控制台是 CowAgent 的默认通道，启动后会自动运行，通过浏
 | `web_port` | Web 服务监听端口 | `9899` |
 | `web_password` | 访问密码，留空表示不启用密码保护 | `""` |
 | `web_session_expire_days` | 登录会话有效天数 | `30` |
-| `enable_thinking` | 全局深度思考开关（影响所有渠道）。开启后思考型模型（deepseek-v4-pro/flash、deepseek-reasoner、kimi-k2-thinking 等）会先推理再作答；Web 端会展示思考过程，回答质量更高但首字延迟增加 | `false` |
+| `enable_thinking` | 是否启用深度思考模式 | `false` |

 配置密码后，访问控制台时需先输入密码完成登录。登录状态默认保持 30 天，期间重启服务也无需重新登录。密码也支持在控制台的「配置」页面中在线修改。

--- a/docs/cli/general.mdx
+++ b/docs/cli/general.mdx
@@ -69,7 +69,7 @@ Session: 12 messages | 8 skills loaded
 | `agent_max_context_tokens` | 最大上下文 tokens | `40000` |
 | `agent_max_context_turns` | 最大上下文记忆轮次 | `30` |
 | `agent_max_steps` | 单次任务最大决策步数 | `15` |
-| `enable_thinking` | 是否启用深度思考（全局开关，对所有渠道生效；Web 端会展示思考过程，IM 渠道不展示但同样受益于更高的回答质量） | `true` / `false` |
+| `enable_thinking` | 是否启用深度思考模式 | `true` / `false` |

 <Note>
  修改 `model` 时，系统会自动匹配对应的模型调用方式。配置会写入 `config.json` 并持久保存。
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -73,14 +73,14 @@
                "pages": [
                  "models/index",
                  "models/minimax",
-                  "models/glm",
-                  "models/qwen",
-                  "models/kimi",
-                  "models/doubao",
+                  "models/deepseek",
                  "models/claude",
                  "models/gemini",
                  "models/openai",
-                  "models/deepseek",
+                  "models/glm",
+                  "models/qwen",
+                  "models/doubao",
+                  "models/kimi",
                  "models/linkai",
                  "models/coding-plan",
                  "models/custom"
@@ -258,14 +258,14 @@
                "pages": [
                  "en/models/index",
                  "en/models/minimax",
-                  "en/models/glm",
-                  "en/models/qwen",
-                  "en/models/kimi",
-                  "en/models/doubao",
+                  "en/models/deepseek",
                  "en/models/claude",
                  "en/models/gemini",
                  "en/models/openai",
-                  "en/models/deepseek",
+                  "en/models/glm",
+                  "en/models/qwen",
+                  "en/models/doubao",
+                  "en/models/kimi",
                  "en/models/linkai",
                  "en/models/coding-plan",
                  "en/models/custom"
@@ -442,14 +442,14 @@
                "pages": [
                  "ja/models/index",
                  "ja/models/minimax",
-                  "ja/models/glm",
-                  "ja/models/qwen",
-                  "ja/models/kimi",
-                  "ja/models/doubao",
+                  "ja/models/deepseek",
                  "ja/models/claude",
                  "ja/models/gemini",
                  "ja/models/openai",
-                  "ja/models/deepseek",
+                  "ja/models/glm",
+                  "ja/models/qwen",
+                  "ja/models/doubao",
+                  "ja/models/kimi",
                  "ja/models/linkai",
                  "ja/models/coding-plan",
                  "ja/models/custom"
--- a/docs/en/cli/general.mdx
+++ b/docs/en/cli/general.mdx
@@ -55,7 +55,7 @@ View or modify runtime configuration. Changes take effect immediately without re
 | `agent_max_context_tokens` | Max context tokens | `40000` |
 | `agent_max_context_turns` | Max context memory turns | `30` |
 | `agent_max_steps` | Max decision steps per task | `15` |
-| `enable_thinking` | Enable deep thinking (global toggle, applies to all channels; Web console renders the reasoning trace, IM channels don't display it but still benefit from improved answer quality) | `true` / `false` |
+| `enable_thinking` | Enable deep thinking mode | `true` / `false` |

 <Note>
  When changing `model`, the system automatically matches the corresponding model API. Configuration is persisted to `config.json`.
--- a/docs/en/models/deepseek.mdx
+++ b/docs/en/models/deepseek.mdx
@@ -14,12 +14,41 @@ Option 1: Native integration (recommended):

 | Parameter | Description |
 | --- | --- |
-| `model` | `deepseek-v4-pro` (V4 Pro, thinking mode + tool calls, Agent recommended), `deepseek-v4-flash` (V4 Flash, thinking mode + tool calls), `deepseek-chat` (DeepSeek-V3.2, non-thinking), `deepseek-reasoner` (DeepSeek-R1, thinking mode) |
+| `model` | Supports `deepseek-v4-pro` and `deepseek-v4-flash` |
 | `deepseek_api_key` | Create at [DeepSeek Platform](https://platform.deepseek.com/api_keys) |
 | `deepseek_api_base` | Optional, defaults to `https://api.deepseek.com/v1`. Can be changed to a third-party proxy |

+## Model Selection
+
+| Model | Use Case |
+| --- | --- |
+| `deepseek-v4-pro` | Best on complex tasks |
+| `deepseek-v4-flash` | Faster and cheaper |
+
+## Thinking Mode
+
+The V4 series (`deepseek-v4-pro` / `deepseek-v4-flash`) supports an explicit "thinking mode": the model emits a chain-of-thought (`reasoning_content`) before the final answer to improve answer quality.
+
+### Toggle
+
+Controlled by the global `enable_thinking` setting:
+
+```json
+{
+  "enable_thinking": true
+}
+```
+
+- `true`: thinking is on across all channels. The Web console renders the reasoning trace; IM channels (WeChat / WeCom / DingTalk / Feishu) don't render it but still benefit from higher answer quality.
+- `false`: thinking off, faster responses with lower first-token latency.
+
+### Notes
+
+- **Sampling parameters**: under thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are silently ignored by the server (no error). CowAgent skips sending them automatically.
+- **Multi-turn tool calls**: once the history contains any tool-call turn, DeepSeek requires `reasoning_content` on every assistant message. CowAgent handles the round-trip automatically, including across mid-session toggles of the thinking switch.
+
 <Tip>
-  The V4 series (`deepseek-v4-pro`, `deepseek-v4-flash`) supports thinking mode together with tool calls, enabling multi-round reasoning + tool orchestration in Agent mode. The thinking switch is controlled by the global `enable_thinking` setting, and `reasoning_content` is automatically round-tripped on tool-call turns.
+  Use `deepseek-v4-pro` for complex tasks; pick `deepseek-v4-flash` when low latency and low cost matter; enable `enable_thinking` when you want deeper reasoning.
 </Tip>

 Option 2: OpenAI-compatible configuration:
--- a/docs/intro/architecture.mdx
+++ b/docs/intro/architecture.mdx
@@ -81,5 +81,5 @@ Agent 的工作空间默认位于 `~/cow` 目录，用于存储系统提示词
 | `agent_max_context_tokens` | 最大上下文 token 数 | `50000` |
 | `agent_max_context_turns` | 最大上下文记忆轮次 | `20` |
 | `agent_max_steps` | 单次任务最大决策步数 | `20` |
-| `enable_thinking` | 是否启用深度思考模式（适用于 deepseek-v4-pro/flash、deepseek-reasoner、kimi-k2-thinking 等思考型模型）。开启后所有渠道下模型都会先思考再回答，回答质量更高但首字延迟增加；Web 端会展示思考过程，IM 渠道（微信/企微/钉钉/飞书）虽不展示但同样获得更好答案 | `false` |
+| `enable_thinking` | 是否启用深度思考模式 | `false` |
 | `knowledge` | 是否启用个人知识库 | `true` |
--- a/docs/ja/cli/general.mdx
+++ b/docs/ja/cli/general.mdx
@@ -55,7 +55,7 @@ description: ステータスの確認、設定管理、コンテキスト制御
 | `agent_max_context_tokens` | 最大コンテキストトークン数 | `40000` |
 | `agent_max_context_turns` | 最大コンテキスト記憶ターン数 | `30` |
 | `agent_max_steps` | タスクごとの最大判断ステップ数 | `15` |
-| `enable_thinking` | ディープシンキングの有効化（全チャネル共通のグローバルトグル。Web コンソールでは思考過程を折りたたみ表示、IM チャネルでは表示されないものの回答品質の向上は享受可能） | `true` / `false` |
+| `enable_thinking` | ディープシンキングモードの有効化 | `true` / `false` |

 <Note>
  `model` を変更すると、システムが対応するモデル API を自動的にマッチングします。設定は `config.json` に永続的に保存されます。
--- a/docs/ja/models/deepseek.mdx
+++ b/docs/ja/models/deepseek.mdx
@@ -14,12 +14,41 @@ description: DeepSeekモデルの設定

 | パラメータ | 説明 |
 | --- | --- |
-| `model` | `deepseek-v4-pro`（V4 Pro、思考モード + ツール呼び出し、Agent推奨）、`deepseek-v4-flash`（V4 Flash、思考モード + ツール呼び出し）、`deepseek-chat`（DeepSeek-V3.2、非思考モード）、`deepseek-reasoner`（DeepSeek-R1、思考モード） |
-| `deepseek_api_key` | [DeepSeek Platform](https://platform.deepseek.com/api_keys)で作成 |
+| `model` | `deepseek-v4-pro`、`deepseek-v4-flash` をサポート |
+| `deepseek_api_key` | [DeepSeek Platform](https://platform.deepseek.com/api_keys) で作成 |
 | `deepseek_api_base` | オプション、デフォルトは `https://api.deepseek.com/v1`。サードパーティプロキシに変更可能 |

+## モデルの選び方
+
+| モデル | 適用シーン |
+| --- | --- |
+| `deepseek-v4-pro` | 複雑なタスクに最適 |
+| `deepseek-v4-flash` | 高速・低コスト |
+
+## 思考モード
+
+V4シリーズ（`deepseek-v4-pro` / `deepseek-v4-flash`）は明示的な「思考モード」をサポートします。最終回答の前に思考内容（`reasoning_content`）を出力することで、回答品質を高めます。
+
+### スイッチ
+
+グローバル設定 `enable_thinking` で制御します：
+
+```json
+{
+  "enable_thinking": true
+}
+```
+
+- `true`：すべてのチャネルで思考モードがオン。Webコンソールでは思考過程を表示し、IMチャネル（WeChat / WeCom / DingTalk / Feishu）では表示されないものの、回答品質の向上というメリットを得られます。
+- `false`：思考オフ、応答が速く、初回トークンの遅延も低くなります。
+
+### 注意事項
+
+- **サンプリングパラメータ**：思考モード時は `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` がサーバ側で無視されます（エラーにはなりません）。CowAgentは自動的に送信をスキップします。
+- **マルチターンのツール呼び出し**：履歴にツール呼び出しが含まれる場合、DeepSeekはすべてのassistantメッセージに `reasoning_content` を返送するよう要求します。CowAgentが自動でラウンドトリップ処理を行うため、セッション途中で思考スイッチを切り替えてもエラーになりません。
+
 <Tip>
-  V4シリーズ（`deepseek-v4-pro`、`deepseek-v4-flash`）は思考モードとツール呼び出しに対応しており、Agentモードでの多段思考とツール連携が可能です。思考のオン/オフはグローバル設定 `enable_thinking` で制御され、ツール呼び出しのターンでは `reasoning_content` が自動的にAPIへ往復されます。
+  複雑なタスクには `deepseek-v4-pro` を、低レイテンシ・低コストを重視する場合は `deepseek-v4-flash` を選び、深い思考が必要な時は `enable_thinking` を有効にしてください。
 </Tip>

 方法2：OpenAI互換方式：
--- a/docs/models/deepseek.mdx
+++ b/docs/models/deepseek.mdx
@@ -14,12 +14,41 @@ description: DeepSeek 模型配置

 | 参数 | 说明 |
 | --- | --- |
-| `model` | `deepseek-v4-pro`（V4 Pro，思考模式 + 工具调用，Agent 推荐）、`deepseek-v4-flash`（V4 Flash，思考模式 + 工具调用）、`deepseek-chat`（DeepSeek-V3.2，非思考模式）、`deepseek-reasoner`（DeepSeek-R1，思考模式） |
+| `model` | 支持 `deepseek-v4-pro`、`deepseek-v4-flash` |
 | `deepseek_api_key` | 在 [DeepSeek 平台](https://platform.deepseek.com/api_keys) 创建 |
 | `deepseek_api_base` | 可选，默认为 `https://api.deepseek.com/v1`，可修改为第三方代理地址 |

+## 模型选择
+
+| 模型 | 适用场景 |
+| --- | --- |
+| `deepseek-v4-pro` | 复杂任务效果最佳 |
+| `deepseek-v4-flash` | 速度更快、成本更低 |
+
+## 思考模式
+
+V4 系列（`deepseek-v4-pro` / `deepseek-v4-flash`）支持显式的"思考模式"：模型在输出最终回答前，先输出一段思维链（`reasoning_content`），从而提升答案质量。
+
+### 开关
+
+通过全局配置 `enable_thinking` 控制：
+
+```json
+{
+  "enable_thinking": true
+}
+```
+
+- `true`：所有渠道下模型都会先思考再作答。Web 控制台会展示思考过程，IM 渠道（微信 / 企微 / 钉钉 / 飞书）虽不展示但同样获得更好答案。
+- `false`：关闭思考，响应更快，首字延迟更低。
+
+### 行为说明
+
+- **采样参数**：思考模式下 `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` 会被服务端忽略（不会报错），CowAgent 会自动跳过传入。
+- **多轮工具调用**：当历史中包含工具调用时，DeepSeek 要求所有 assistant 消息必须回传 `reasoning_content`。CowAgent 会自动处理回传逻辑，跨轮次切换思考开关也不会出错。
+
 <Tip>
-  V4 系列模型（`deepseek-v4-pro`、`deepseek-v4-flash`）支持思考模式与工具调用，可在 Agent 模式下进行多轮思考与工具协同。思考开关由全局 `enable_thinking` 配置控制，工具调用轮次的 `reasoning_content` 会自动回传给 API。
+  复杂任务推荐使用 `deepseek-v4-pro`，低延迟和低成本需求可选 `deepseek-v4-flash`，需要深度思考可开启 `enable_thinking`。
 </Tip>

 方式二：OpenAI 兼容方式接入：
--- a/models/deepseek/deepseek_bot.py
+++ b/models/deepseek/deepseek_bot.py
@@ -477,25 +477,55 @@ class DeepSeekBot(Bot, OpenAICompatibleBot):
        """
        Convert Claude-format messages (content blocks) to OpenAI format.

-        Crucially, for any assistant turn with tool_use, the accompanying `thinking`
-        block must be re-emitted as `reasoning_content` — DeepSeek returns 400 if
-        omitted on tool-call rounds.
+        Crucially, once any assistant turn in the history triggered a tool
+        call, DeepSeek requires `reasoning_content` on **every subsequent
+        assistant message** (not just the tool-call one) until the next user
+        turn — and in fact the API enforces this for the whole history when
+        thinking mode is enabled. Missing `reasoning_content` on any
+        assistant message returns 400. We back-fill an empty string when the
+        trace was not captured (e.g. history recorded while thinking was
+        disabled, or upstream proxy stripped the field).
        """
        if not messages:
            return []

+        # Determine whether the history contains any tool-call assistant turn.
+        # If so, every assistant message must carry `reasoning_content`.
+        has_tool_call_history = False
+        for msg in messages:
+            if msg.get("role") != "assistant":
+                continue
+            if msg.get("tool_calls"):
+                has_tool_call_history = True
+                break
+            content = msg.get("content")
+            if isinstance(content, list) and any(
+                isinstance(b, dict) and b.get("type") == "tool_use" for b in content
+            ):
+                has_tool_call_history = True
+                break
+
        converted = []

        for msg in messages:
            role = msg.get("role")
            content = msg.get("content")

-            if isinstance(content, str):
-                converted.append(msg)
-                continue
-
+            # Pass-through path for non-list content (e.g. plain string).
+            # Back-fill `reasoning_content` on assistant messages whenever the
+            # history contains any tool-call turn.
            if not isinstance(content, list):
-                converted.append(msg)
+                if (
+                    role == "assistant"
+                    and isinstance(msg, dict)
+                    and has_tool_call_history
+                    and "reasoning_content" not in msg
+                ):
+                    patched = dict(msg)
+                    patched["reasoning_content"] = ""
+                    converted.append(patched)
+                else:
+                    converted.append(msg)
                continue

            if role == "user":
@@ -563,10 +593,15 @@ class DeepSeekBot(Bot, OpenAICompatibleBot):
                    if not text_parts:
                        openai_msg["content"] = None

-                # Round-trip reasoning_content: required for tool-call turns,
-                # harmless (server-ignored) for plain text turns.
+                # Round-trip reasoning_content: required for every assistant
+                # message once the history contains any tool-call turn (see
+                # outer comment). Use empty string as fallback when the trace
+                # was not captured — DeepSeek validates field presence, not
+                # value; non-thinking backends silently ignore it.
                if reasoning_parts:
                    openai_msg["reasoning_content"] = "\n".join(reasoning_parts)
+                elif has_tool_call_history:
+                    openai_msg["reasoning_content"] = ""

                converted.append(openai_msg)
            else:
--- a/models/linkai/link_ai_bot.py
+++ b/models/linkai/link_ai_bot.py
@@ -704,6 +704,23 @@ def _linkai_convert_messages_to_openai_format(self, messages):
    if not messages:
        return openai_messages

+    # DeepSeek (proxied via LinkAI) requires `reasoning_content` on EVERY
+    # assistant message once the history contains any tool-call turn — not
+    # just the tool-call turn itself. Detect that condition first.
+    has_tool_call_history = False
+    for src in messages:
+        if src.get("role") != "assistant":
+            continue
+        if src.get("tool_calls"):
+            has_tool_call_history = True
+            break
+        content = src.get("content")
+        if isinstance(content, list) and any(
+            isinstance(b, dict) and b.get("type") == "tool_use" for b in content
+        ):
+            has_tool_call_history = True
+            break
+
    # Walk the original Claude messages to collect each assistant turn's
    # reasoning text, then attach it to the matching converted entry.
    dst_idx = 0
@@ -722,8 +739,15 @@ def _linkai_convert_messages_to_openai_format(self, messages):
            dst_idx += 1
        if dst_idx >= len(openai_messages):
            break
+        dst_msg = openai_messages[dst_idx]
        if reasoning_parts:
-            openai_messages[dst_idx]["reasoning_content"] = "\n".join(reasoning_parts)
+            dst_msg["reasoning_content"] = "\n".join(reasoning_parts)
+        elif has_tool_call_history:
+            # Fallback when the trace was lost (proxy stripped it, model
+            # switched mid-session, thinking toggled on after tool calls).
+            # DeepSeek-style backends validate field presence, not value;
+            # non-thinking backends silently ignore the empty string.
+            dst_msg["reasoning_content"] = ""
        dst_idx += 1

    return openai_messages