From 31820f56e7c9dd76b31a61551ec89dd1bda91834 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Fri, 24 Apr 2026 16:39:48 +0800 Subject: [PATCH] fix(deepseek): back-fill reasoning_content for all assistant turns --- README.md | 2 +- channel/web/static/js/console.js | 4 +-- config.py | 2 +- docs/channels/web.mdx | 2 +- docs/cli/general.mdx | 2 +- docs/docs.json | 30 ++++++++--------- docs/en/cli/general.mdx | 2 +- docs/en/models/deepseek.mdx | 33 +++++++++++++++++-- docs/intro/architecture.mdx | 2 +- docs/ja/cli/general.mdx | 2 +- docs/ja/models/deepseek.mdx | 35 ++++++++++++++++++-- docs/models/deepseek.mdx | 33 +++++++++++++++++-- models/deepseek/deepseek_bot.py | 55 ++++++++++++++++++++++++++------ models/linkai/link_ai_bot.py | 26 ++++++++++++++- 14 files changed, 188 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index fc03de3f..704d6693 100644 --- a/README.md +++ b/README.md @@ -208,7 +208,7 @@ cow install-browser "agent_max_context_tokens": 50000, # Agent 模式下最大上下文 tokens,超出将自动智能压缩处理 "agent_max_context_turns": 20, # Agent 模式下最大上下文记忆轮次,一问一答为一轮,超出后智能压缩处理 "agent_max_steps": 20, # Agent 模式下单次任务的最大决策步数,超出后将停止继续调用工具 - "enable_thinking": false # 是否启用深度思考模式(适用于 deepseek-v4-pro/flash、deepseek-reasoner、kimi-k2-thinking 等思考型模型)。开启后模型在出最终回答前先进行推理,回答质量更高但首字延迟增加;Web 端会展示思考过程,IM 渠道(微信/企微/钉钉/飞书)虽不展示但同样获得更好答案 + "enable_thinking": false # 是否启用深度思考模式 } ``` diff --git a/channel/web/static/js/console.js b/channel/web/static/js/console.js index 7bbc55ed..28bb5029 100644 --- a/channel/web/static/js/console.js +++ b/channel/web/static/js/console.js @@ -38,7 +38,7 @@ const I18N = { config_max_tokens: '最大上下文 Token', config_max_tokens_hint: '对话中 Agent 能输入的最大 Token 长度,超过后会智能压缩处理', config_max_turns: '最大记忆轮次', config_max_turns_hint: '一问一答为一轮,超过后会智能压缩处理', config_max_steps: '最大执行步数', config_max_steps_hint: '单次对话中 Agent 最多调用工具的次数', - config_enable_thinking: '深度思考', config_enable_thinking_hint: '开启后模型启用思考模式,回答质量更高但首字延迟增加,Web 端可展示思考过程', + config_enable_thinking: '深度思考', config_enable_thinking_hint: '是否启用深度思考模式', config_channel_type: '通道类型', config_provider: '模型厂商', config_model_name: '模型', config_custom_model_hint: '输入自定义模型名称', @@ -124,7 +124,7 @@ const I18N = { config_max_tokens: 'Max Context Tokens', config_max_tokens_hint: 'Max tokens the Agent can input per conversation, auto-compressed when exceeded', config_max_turns: 'Max Memory Turns', config_max_turns_hint: 'One Q&A pair = one turn, auto-compressed when exceeded', config_max_steps: 'Max Steps', config_max_steps_hint: 'Max tool calls the Agent can make in a single conversation', - config_enable_thinking: 'Deep Thinking', config_enable_thinking_hint: 'Model reasons before answering for higher quality at the cost of first-token latency. Web console shows the reasoning trace.', + config_enable_thinking: 'Deep Thinking', config_enable_thinking_hint: 'Enable deep thinking mode', config_channel_type: 'Channel Type', config_provider: 'Provider', config_model_name: 'Model', config_custom_model_hint: 'Enter custom model name', diff --git a/config.py b/config.py index d175c06f..aeca2562 100644 --- a/config.py +++ b/config.py @@ -204,7 +204,7 @@ available_setting = { "agent_max_context_tokens": 50000, # Agent模式下最大上下文tokens "agent_max_context_turns": 20, # Agent模式下最大上下文记忆轮次 "agent_max_steps": 20, # Agent模式下单次运行最大决策步数 - "enable_thinking": False, # Toggle deep-thinking mode for thinking-capable models (e.g. deepseek-v4-pro/flash, deepseek-reasoner, kimi-k2-thinking). When enabled, the model produces a reasoning trace before the final answer; the Web console renders it in a collapsible panel, while IM channels (WeChat/WeCom/DingTalk/Feishu) still benefit from the improved answer quality but do not display the trace. Note: enabling thinking increases first-token latency. + "enable_thinking": False, # Enable deep-thinking mode for thinking-capable models "knowledge": True, # 是否开启知识库功能 # Per-skill runtime config. Nested keys are flattened to env vars at startup # using the rule: skill[][] -> SKILL__ diff --git a/docs/channels/web.mdx b/docs/channels/web.mdx index f1440325..a936a179 100644 --- a/docs/channels/web.mdx +++ b/docs/channels/web.mdx @@ -22,7 +22,7 @@ Web 控制台是 CowAgent 的默认通道,启动后会自动运行,通过浏 | `web_port` | Web 服务监听端口 | `9899` | | `web_password` | 访问密码,留空表示不启用密码保护 | `""` | | `web_session_expire_days` | 登录会话有效天数 | `30` | -| `enable_thinking` | 全局深度思考开关(影响所有渠道)。开启后思考型模型(deepseek-v4-pro/flash、deepseek-reasoner、kimi-k2-thinking 等)会先推理再作答;Web 端会展示思考过程,回答质量更高但首字延迟增加 | `false` | +| `enable_thinking` | 是否启用深度思考模式 | `false` | 配置密码后,访问控制台时需先输入密码完成登录。登录状态默认保持 30 天,期间重启服务也无需重新登录。密码也支持在控制台的「配置」页面中在线修改。 diff --git a/docs/cli/general.mdx b/docs/cli/general.mdx index 31d383ea..bdb4bb8c 100644 --- a/docs/cli/general.mdx +++ b/docs/cli/general.mdx @@ -69,7 +69,7 @@ Session: 12 messages | 8 skills loaded | `agent_max_context_tokens` | 最大上下文 tokens | `40000` | | `agent_max_context_turns` | 最大上下文记忆轮次 | `30` | | `agent_max_steps` | 单次任务最大决策步数 | `15` | -| `enable_thinking` | 是否启用深度思考(全局开关,对所有渠道生效;Web 端会展示思考过程,IM 渠道不展示但同样受益于更高的回答质量) | `true` / `false` | +| `enable_thinking` | 是否启用深度思考模式 | `true` / `false` | 修改 `model` 时,系统会自动匹配对应的模型调用方式。配置会写入 `config.json` 并持久保存。 diff --git a/docs/docs.json b/docs/docs.json index d979ecc3..2aeee5ef 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -73,14 +73,14 @@ "pages": [ "models/index", "models/minimax", - "models/glm", - "models/qwen", - "models/kimi", - "models/doubao", + "models/deepseek", "models/claude", "models/gemini", "models/openai", - "models/deepseek", + "models/glm", + "models/qwen", + "models/doubao", + "models/kimi", "models/linkai", "models/coding-plan", "models/custom" @@ -258,14 +258,14 @@ "pages": [ "en/models/index", "en/models/minimax", - "en/models/glm", - "en/models/qwen", - "en/models/kimi", - "en/models/doubao", + "en/models/deepseek", "en/models/claude", "en/models/gemini", "en/models/openai", - "en/models/deepseek", + "en/models/glm", + "en/models/qwen", + "en/models/doubao", + "en/models/kimi", "en/models/linkai", "en/models/coding-plan", "en/models/custom" @@ -442,14 +442,14 @@ "pages": [ "ja/models/index", "ja/models/minimax", - "ja/models/glm", - "ja/models/qwen", - "ja/models/kimi", - "ja/models/doubao", + "ja/models/deepseek", "ja/models/claude", "ja/models/gemini", "ja/models/openai", - "ja/models/deepseek", + "ja/models/glm", + "ja/models/qwen", + "ja/models/doubao", + "ja/models/kimi", "ja/models/linkai", "ja/models/coding-plan", "ja/models/custom" diff --git a/docs/en/cli/general.mdx b/docs/en/cli/general.mdx index 59cd11de..d1fc332d 100644 --- a/docs/en/cli/general.mdx +++ b/docs/en/cli/general.mdx @@ -55,7 +55,7 @@ View or modify runtime configuration. Changes take effect immediately without re | `agent_max_context_tokens` | Max context tokens | `40000` | | `agent_max_context_turns` | Max context memory turns | `30` | | `agent_max_steps` | Max decision steps per task | `15` | -| `enable_thinking` | Enable deep thinking (global toggle, applies to all channels; Web console renders the reasoning trace, IM channels don't display it but still benefit from improved answer quality) | `true` / `false` | +| `enable_thinking` | Enable deep thinking mode | `true` / `false` | When changing `model`, the system automatically matches the corresponding model API. Configuration is persisted to `config.json`. diff --git a/docs/en/models/deepseek.mdx b/docs/en/models/deepseek.mdx index e39b4db3..49728f95 100644 --- a/docs/en/models/deepseek.mdx +++ b/docs/en/models/deepseek.mdx @@ -14,12 +14,41 @@ Option 1: Native integration (recommended): | Parameter | Description | | --- | --- | -| `model` | `deepseek-v4-pro` (V4 Pro, thinking mode + tool calls, Agent recommended), `deepseek-v4-flash` (V4 Flash, thinking mode + tool calls), `deepseek-chat` (DeepSeek-V3.2, non-thinking), `deepseek-reasoner` (DeepSeek-R1, thinking mode) | +| `model` | Supports `deepseek-v4-pro` and `deepseek-v4-flash` | | `deepseek_api_key` | Create at [DeepSeek Platform](https://platform.deepseek.com/api_keys) | | `deepseek_api_base` | Optional, defaults to `https://api.deepseek.com/v1`. Can be changed to a third-party proxy | +## Model Selection + +| Model | Use Case | +| --- | --- | +| `deepseek-v4-pro` | Best on complex tasks | +| `deepseek-v4-flash` | Faster and cheaper | + +## Thinking Mode + +The V4 series (`deepseek-v4-pro` / `deepseek-v4-flash`) supports an explicit "thinking mode": the model emits a chain-of-thought (`reasoning_content`) before the final answer to improve answer quality. + +### Toggle + +Controlled by the global `enable_thinking` setting: + +```json +{ + "enable_thinking": true +} +``` + +- `true`: thinking is on across all channels. The Web console renders the reasoning trace; IM channels (WeChat / WeCom / DingTalk / Feishu) don't render it but still benefit from higher answer quality. +- `false`: thinking off, faster responses with lower first-token latency. + +### Notes + +- **Sampling parameters**: under thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are silently ignored by the server (no error). CowAgent skips sending them automatically. +- **Multi-turn tool calls**: once the history contains any tool-call turn, DeepSeek requires `reasoning_content` on every assistant message. CowAgent handles the round-trip automatically, including across mid-session toggles of the thinking switch. + - The V4 series (`deepseek-v4-pro`, `deepseek-v4-flash`) supports thinking mode together with tool calls, enabling multi-round reasoning + tool orchestration in Agent mode. The thinking switch is controlled by the global `enable_thinking` setting, and `reasoning_content` is automatically round-tripped on tool-call turns. + Use `deepseek-v4-pro` for complex tasks; pick `deepseek-v4-flash` when low latency and low cost matter; enable `enable_thinking` when you want deeper reasoning. Option 2: OpenAI-compatible configuration: diff --git a/docs/intro/architecture.mdx b/docs/intro/architecture.mdx index fdacc84a..ffea7a6b 100644 --- a/docs/intro/architecture.mdx +++ b/docs/intro/architecture.mdx @@ -81,5 +81,5 @@ Agent 的工作空间默认位于 `~/cow` 目录,用于存储系统提示词 | `agent_max_context_tokens` | 最大上下文 token 数 | `50000` | | `agent_max_context_turns` | 最大上下文记忆轮次 | `20` | | `agent_max_steps` | 单次任务最大决策步数 | `20` | -| `enable_thinking` | 是否启用深度思考模式(适用于 deepseek-v4-pro/flash、deepseek-reasoner、kimi-k2-thinking 等思考型模型)。开启后所有渠道下模型都会先思考再回答,回答质量更高但首字延迟增加;Web 端会展示思考过程,IM 渠道(微信/企微/钉钉/飞书)虽不展示但同样获得更好答案 | `false` | +| `enable_thinking` | 是否启用深度思考模式 | `false` | | `knowledge` | 是否启用个人知识库 | `true` | diff --git a/docs/ja/cli/general.mdx b/docs/ja/cli/general.mdx index af4a83f7..2da93ca9 100644 --- a/docs/ja/cli/general.mdx +++ b/docs/ja/cli/general.mdx @@ -55,7 +55,7 @@ description: ステータスの確認、設定管理、コンテキスト制御 | `agent_max_context_tokens` | 最大コンテキストトークン数 | `40000` | | `agent_max_context_turns` | 最大コンテキスト記憶ターン数 | `30` | | `agent_max_steps` | タスクごとの最大判断ステップ数 | `15` | -| `enable_thinking` | ディープシンキングの有効化(全チャネル共通のグローバルトグル。Web コンソールでは思考過程を折りたたみ表示、IM チャネルでは表示されないものの回答品質の向上は享受可能) | `true` / `false` | +| `enable_thinking` | ディープシンキングモードの有効化 | `true` / `false` | `model` を変更すると、システムが対応するモデル API を自動的にマッチングします。設定は `config.json` に永続的に保存されます。 diff --git a/docs/ja/models/deepseek.mdx b/docs/ja/models/deepseek.mdx index 9d9242c6..c7408132 100644 --- a/docs/ja/models/deepseek.mdx +++ b/docs/ja/models/deepseek.mdx @@ -14,12 +14,41 @@ description: DeepSeekモデルの設定 | パラメータ | 説明 | | --- | --- | -| `model` | `deepseek-v4-pro`(V4 Pro、思考モード + ツール呼び出し、Agent推奨)、`deepseek-v4-flash`(V4 Flash、思考モード + ツール呼び出し)、`deepseek-chat`(DeepSeek-V3.2、非思考モード)、`deepseek-reasoner`(DeepSeek-R1、思考モード) | -| `deepseek_api_key` | [DeepSeek Platform](https://platform.deepseek.com/api_keys)で作成 | +| `model` | `deepseek-v4-pro`、`deepseek-v4-flash` をサポート | +| `deepseek_api_key` | [DeepSeek Platform](https://platform.deepseek.com/api_keys) で作成 | | `deepseek_api_base` | オプション、デフォルトは `https://api.deepseek.com/v1`。サードパーティプロキシに変更可能 | +## モデルの選び方 + +| モデル | 適用シーン | +| --- | --- | +| `deepseek-v4-pro` | 複雑なタスクに最適 | +| `deepseek-v4-flash` | 高速・低コスト | + +## 思考モード + +V4シリーズ(`deepseek-v4-pro` / `deepseek-v4-flash`)は明示的な「思考モード」をサポートします。最終回答の前に思考内容(`reasoning_content`)を出力することで、回答品質を高めます。 + +### スイッチ + +グローバル設定 `enable_thinking` で制御します: + +```json +{ + "enable_thinking": true +} +``` + +- `true`:すべてのチャネルで思考モードがオン。Webコンソールでは思考過程を表示し、IMチャネル(WeChat / WeCom / DingTalk / Feishu)では表示されないものの、回答品質の向上というメリットを得られます。 +- `false`:思考オフ、応答が速く、初回トークンの遅延も低くなります。 + +### 注意事項 + +- **サンプリングパラメータ**:思考モード時は `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` がサーバ側で無視されます(エラーにはなりません)。CowAgentは自動的に送信をスキップします。 +- **マルチターンのツール呼び出し**:履歴にツール呼び出しが含まれる場合、DeepSeekはすべてのassistantメッセージに `reasoning_content` を返送するよう要求します。CowAgentが自動でラウンドトリップ処理を行うため、セッション途中で思考スイッチを切り替えてもエラーになりません。 + - V4シリーズ(`deepseek-v4-pro`、`deepseek-v4-flash`)は思考モードとツール呼び出しに対応しており、Agentモードでの多段思考とツール連携が可能です。思考のオン/オフはグローバル設定 `enable_thinking` で制御され、ツール呼び出しのターンでは `reasoning_content` が自動的にAPIへ往復されます。 + 複雑なタスクには `deepseek-v4-pro` を、低レイテンシ・低コストを重視する場合は `deepseek-v4-flash` を選び、深い思考が必要な時は `enable_thinking` を有効にしてください。 方法2:OpenAI互換方式: diff --git a/docs/models/deepseek.mdx b/docs/models/deepseek.mdx index e87e55f4..8ad6861c 100644 --- a/docs/models/deepseek.mdx +++ b/docs/models/deepseek.mdx @@ -14,12 +14,41 @@ description: DeepSeek 模型配置 | 参数 | 说明 | | --- | --- | -| `model` | `deepseek-v4-pro`(V4 Pro,思考模式 + 工具调用,Agent 推荐)、`deepseek-v4-flash`(V4 Flash,思考模式 + 工具调用)、`deepseek-chat`(DeepSeek-V3.2,非思考模式)、`deepseek-reasoner`(DeepSeek-R1,思考模式) | +| `model` | 支持 `deepseek-v4-pro`、`deepseek-v4-flash` | | `deepseek_api_key` | 在 [DeepSeek 平台](https://platform.deepseek.com/api_keys) 创建 | | `deepseek_api_base` | 可选,默认为 `https://api.deepseek.com/v1`,可修改为第三方代理地址 | +## 模型选择 + +| 模型 | 适用场景 | +| --- | --- | +| `deepseek-v4-pro` | 复杂任务效果最佳 | +| `deepseek-v4-flash` | 速度更快、成本更低 | + +## 思考模式 + +V4 系列(`deepseek-v4-pro` / `deepseek-v4-flash`)支持显式的"思考模式":模型在输出最终回答前,先输出一段思维链(`reasoning_content`),从而提升答案质量。 + +### 开关 + +通过全局配置 `enable_thinking` 控制: + +```json +{ + "enable_thinking": true +} +``` + +- `true`:所有渠道下模型都会先思考再作答。Web 控制台会展示思考过程,IM 渠道(微信 / 企微 / 钉钉 / 飞书)虽不展示但同样获得更好答案。 +- `false`:关闭思考,响应更快,首字延迟更低。 + +### 行为说明 + +- **采样参数**:思考模式下 `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` 会被服务端忽略(不会报错),CowAgent 会自动跳过传入。 +- **多轮工具调用**:当历史中包含工具调用时,DeepSeek 要求所有 assistant 消息必须回传 `reasoning_content`。CowAgent 会自动处理回传逻辑,跨轮次切换思考开关也不会出错。 + - V4 系列模型(`deepseek-v4-pro`、`deepseek-v4-flash`)支持思考模式与工具调用,可在 Agent 模式下进行多轮思考与工具协同。思考开关由全局 `enable_thinking` 配置控制,工具调用轮次的 `reasoning_content` 会自动回传给 API。 + 复杂任务推荐使用 `deepseek-v4-pro`,低延迟和低成本需求可选 `deepseek-v4-flash`,需要深度思考可开启 `enable_thinking`。 方式二:OpenAI 兼容方式接入: diff --git a/models/deepseek/deepseek_bot.py b/models/deepseek/deepseek_bot.py index 9f03771b..8e1fea29 100644 --- a/models/deepseek/deepseek_bot.py +++ b/models/deepseek/deepseek_bot.py @@ -477,25 +477,55 @@ class DeepSeekBot(Bot, OpenAICompatibleBot): """ Convert Claude-format messages (content blocks) to OpenAI format. - Crucially, for any assistant turn with tool_use, the accompanying `thinking` - block must be re-emitted as `reasoning_content` — DeepSeek returns 400 if - omitted on tool-call rounds. + Crucially, once any assistant turn in the history triggered a tool + call, DeepSeek requires `reasoning_content` on **every subsequent + assistant message** (not just the tool-call one) until the next user + turn — and in fact the API enforces this for the whole history when + thinking mode is enabled. Missing `reasoning_content` on any + assistant message returns 400. We back-fill an empty string when the + trace was not captured (e.g. history recorded while thinking was + disabled, or upstream proxy stripped the field). """ if not messages: return [] + # Determine whether the history contains any tool-call assistant turn. + # If so, every assistant message must carry `reasoning_content`. + has_tool_call_history = False + for msg in messages: + if msg.get("role") != "assistant": + continue + if msg.get("tool_calls"): + has_tool_call_history = True + break + content = msg.get("content") + if isinstance(content, list) and any( + isinstance(b, dict) and b.get("type") == "tool_use" for b in content + ): + has_tool_call_history = True + break + converted = [] for msg in messages: role = msg.get("role") content = msg.get("content") - if isinstance(content, str): - converted.append(msg) - continue - + # Pass-through path for non-list content (e.g. plain string). + # Back-fill `reasoning_content` on assistant messages whenever the + # history contains any tool-call turn. if not isinstance(content, list): - converted.append(msg) + if ( + role == "assistant" + and isinstance(msg, dict) + and has_tool_call_history + and "reasoning_content" not in msg + ): + patched = dict(msg) + patched["reasoning_content"] = "" + converted.append(patched) + else: + converted.append(msg) continue if role == "user": @@ -563,10 +593,15 @@ class DeepSeekBot(Bot, OpenAICompatibleBot): if not text_parts: openai_msg["content"] = None - # Round-trip reasoning_content: required for tool-call turns, - # harmless (server-ignored) for plain text turns. + # Round-trip reasoning_content: required for every assistant + # message once the history contains any tool-call turn (see + # outer comment). Use empty string as fallback when the trace + # was not captured — DeepSeek validates field presence, not + # value; non-thinking backends silently ignore it. if reasoning_parts: openai_msg["reasoning_content"] = "\n".join(reasoning_parts) + elif has_tool_call_history: + openai_msg["reasoning_content"] = "" converted.append(openai_msg) else: diff --git a/models/linkai/link_ai_bot.py b/models/linkai/link_ai_bot.py index 191b2133..801adce6 100644 --- a/models/linkai/link_ai_bot.py +++ b/models/linkai/link_ai_bot.py @@ -704,6 +704,23 @@ def _linkai_convert_messages_to_openai_format(self, messages): if not messages: return openai_messages + # DeepSeek (proxied via LinkAI) requires `reasoning_content` on EVERY + # assistant message once the history contains any tool-call turn — not + # just the tool-call turn itself. Detect that condition first. + has_tool_call_history = False + for src in messages: + if src.get("role") != "assistant": + continue + if src.get("tool_calls"): + has_tool_call_history = True + break + content = src.get("content") + if isinstance(content, list) and any( + isinstance(b, dict) and b.get("type") == "tool_use" for b in content + ): + has_tool_call_history = True + break + # Walk the original Claude messages to collect each assistant turn's # reasoning text, then attach it to the matching converted entry. dst_idx = 0 @@ -722,8 +739,15 @@ def _linkai_convert_messages_to_openai_format(self, messages): dst_idx += 1 if dst_idx >= len(openai_messages): break + dst_msg = openai_messages[dst_idx] if reasoning_parts: - openai_messages[dst_idx]["reasoning_content"] = "\n".join(reasoning_parts) + dst_msg["reasoning_content"] = "\n".join(reasoning_parts) + elif has_tool_call_history: + # Fallback when the trace was lost (proxy stripped it, model + # switched mid-session, thinking toggled on after tool calls). + # DeepSeek-style backends validate field presence, not value; + # non-thinking backends silently ignore the empty string. + dst_msg["reasoning_content"] = "" dst_idx += 1 return openai_messages