From f5479c56afd1a40e8b09c0759798e5d9a38c9790 Mon Sep 17 00:00:00 2001
From: zhayujie <yjzha1996@163.com>
Date: Fri, 15 May 2026 18:17:35 +0800
Subject: [PATCH] feat(models): support reasoning_effort config for DeepSeek V4

---
 bridge/agent_bridge.py      | 18 ++++++++++++++++--
 config.py                   | 20 +++-----------------
 docs/en/models/deepseek.mdx | 18 ++++++++++++++++++
 docs/ja/models/deepseek.mdx | 18 ++++++++++++++++++
 docs/models/deepseek.mdx    | 18 ++++++++++++++++++
 5 files changed, 73 insertions(+), 19 deletions(-)
diff --git a/bridge/agent_bridge.py b/bridge/agent_bridge.py
index 8f238d62..e60ffd9d 100644
--- a/bridge/agent_bridge.py
+++ b/bridge/agent_bridge.py
@@ -172,10 +172,17 @@ class AgentLLMModel(LLMModel):
                 # reasoning trace, but still benefit from the higher answer
                 # quality the thinking pass produces.
                 from config import conf
+                thinking_enabled = bool(conf().get("enable_thinking", False))
                 kwargs['thinking'] = (
-                    {"type": "enabled"} if conf().get("enable_thinking", False)
+                    {"type": "enabled"} if thinking_enabled
                     else {"type": "disabled"}
                 )
+                # Reasoning effort is only meaningful when thinking is on.
+                # Bots that don't understand the kwarg drop it silently.
+                if thinking_enabled:
+                    effort = conf().get("reasoning_effort", "high")
+                    if effort in ("high", "max"):
+                        kwargs['reasoning_effort'] = effort
 
                 response = self.bot.call_with_tools(**kwargs)
                 return self._format_response(response)
@@ -227,10 +234,17 @@ class AgentLLMModel(LLMModel):
                 # reasoning trace, but still benefit from the higher answer
                 # quality the thinking pass produces.
                 from config import conf
+                thinking_enabled = bool(conf().get("enable_thinking", False))
                 kwargs['thinking'] = (
-                    {"type": "enabled"} if conf().get("enable_thinking", False)
+                    {"type": "enabled"} if thinking_enabled
                     else {"type": "disabled"}
                 )
+                # Reasoning effort is only meaningful when thinking is on.
+                # Bots that don't understand the kwarg drop it silently.
+                if thinking_enabled:
+                    effort = conf().get("reasoning_effort", "high")
+                    if effort in ("high", "max"):
+                        kwargs['reasoning_effort'] = effort
 
                 stream = self.bot.call_with_tools(**kwargs)
                 
diff --git a/config.py b/config.py
index bea33752..95e851c3 100644
--- a/config.py
+++ b/config.py
@@ -214,24 +214,10 @@ available_setting = {
     "agent_max_context_turns": 20,  # Agent模式下最大上下文记忆轮次
     "agent_max_steps": 20,  # Agent模式下单次运行最大决策步数
     "enable_thinking": False,  # Enable deep-thinking mode for thinking-capable models
+    "reasoning_effort": "high",  # Reasoning depth under thinking mode: "high" or "max"
     "knowledge": True,  # 是否开启知识库功能
-    # Per-skill runtime config. Nested keys are flattened to env vars at startup
-    # using the rule: skill[<name>][<key>] -> SKILL_<NAME>_<KEY>
-    # (e.g. skill["image-generation"].model -> SKILL_IMAGE_GENERATION_MODEL).
-    "skill": {},
-    # MCP (Model Context Protocol) server list.
-    # Each entry describes one MCP server to connect at startup.
-    # Supported types:
-    #   stdio — launch a local process and communicate over stdin/stdout
-    #   sse   — connect to a remote server via HTTP + Server-Sent Events
-    #
-    # Example:
-    #   "mcp_servers": [
-    #     {"name": "filesystem", "type": "stdio", "command": "npx",
-    #      "args": ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"]},
-    #     {"name": "my-api", "type": "sse", "url": "http://localhost:8000/sse"}
-    #   ]
-    "mcp_servers": [],
+    "skill": {},  # Per-skill runtime config; nested keys flatten to SKILL_<NAME>_<KEY> env vars at startup
+    "mcp_servers": [],  # MCP server list; each entry supports type "stdio" (local process) or "sse" (remote URL)
 }
 
 
diff --git a/docs/en/models/deepseek.mdx b/docs/en/models/deepseek.mdx
index 38d211df..1bafb076 100644
--- a/docs/en/models/deepseek.mdx
+++ b/docs/en/models/deepseek.mdx
@@ -42,6 +42,24 @@ Controlled by the global `enable_thinking` setting:
 - `true`: thinking is on across all channels. The Web console renders the reasoning trace; IM channels (WeChat / WeCom / DingTalk / Feishu) don't render it but still benefit from higher answer quality.
 - `false`: thinking off, faster responses with lower first-token latency.
 
+### Reasoning Effort
+
+Under thinking mode, `reasoning_effort` controls how hard the model thinks:
+
+```json
+{
+  "enable_thinking": true,
+  "reasoning_effort": "high"
+}
+```
+
+| Value | Use Case |
+| --- | --- |
+| `high` (default) | Day-to-day agent tasks; balanced thinking depth and latency |
+| `max` | Complex coding, long-horizon planning, strict-constraint tasks. Deeper reasoning at the cost of more output tokens and higher latency |
+
+`reasoning_effort` only takes effect when `enable_thinking` is `true`. It is silently ignored on models that do not support thinking mode.
+
 ### Notes
 
 - **Sampling parameters**: under thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are silently ignored by the server (no error). CowAgent skips sending them automatically.
diff --git a/docs/ja/models/deepseek.mdx b/docs/ja/models/deepseek.mdx
index 1f9327dd..018931f0 100644
--- a/docs/ja/models/deepseek.mdx
+++ b/docs/ja/models/deepseek.mdx
@@ -42,6 +42,24 @@ V4シリーズ（`deepseek-v4-flash` / `deepseek-v4-pro`）は明示的な「思
 - `true`：すべてのチャネルで思考モードがオン。Webコンソールでは思考過程を表示し、IMチャネル（WeChat / WeCom / DingTalk / Feishu）では表示されないものの、回答品質の向上というメリットを得られます。
 - `false`：思考オフ、応答が速く、初回トークンの遅延も低くなります。
 
+### 推論強度
+
+思考モード下では `reasoning_effort` で推論の深さを制御できます：
+
+```json
+{
+  "enable_thinking": true,
+  "reasoning_effort": "high"
+}
+```
+
+| 値 | 適用シーン |
+| --- | --- |
+| `high`（デフォルト） | 通常の Agent タスク、思考の深さとレスポンス速度のバランス |
+| `max` | 複雑なコーディング、長いプランニング、厳密な制約のあるタスク。より深い推論と引き換えに出力トークンとレイテンシが増加 |
+
+`reasoning_effort` は `enable_thinking` が `true` の場合のみ有効になります。思考モードをサポートしないモデルでは自動的に無視されます。
+
 ### 注意事項
 
 - **サンプリングパラメータ**：思考モード時は `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` がサーバ側で無視されます（エラーにはなりません）。CowAgentは自動的に送信をスキップします。
diff --git a/docs/models/deepseek.mdx b/docs/models/deepseek.mdx
index 76fde82f..a522ce98 100644
--- a/docs/models/deepseek.mdx
+++ b/docs/models/deepseek.mdx
@@ -42,6 +42,24 @@ V4 系列（`deepseek-v4-flash` / `deepseek-v4-pro`）支持显式的"思考模
 - `true`：所有渠道下模型都会先思考再作答。Web 控制台会展示思考过程，IM 渠道（微信 / 企微 / 钉钉 / 飞书）虽不展示但同样获得更好答案。
 - `false`：关闭思考，响应更快，首字延迟更低。
 
+### 推理强度
+
+思考模式下可通过 `reasoning_effort` 控制推理强度：
+
+```json
+{
+  "enable_thinking": true,
+  "reasoning_effort": "high"
+}
+```
+
+| 取值 | 适用场景 |
+| --- | --- |
+| `high`（默认） | 日常 Agent 任务，思考与速度的平衡 |
+| `max` | 复杂编码、长链路规划、严格约束的任务，推理更深但耗时与输出 token 更多 |
+
+`reasoning_effort` 仅在 `enable_thinking` 为 `true` 时生效；模型不支持思考模式时该字段自动忽略。
+
 ### 行为说明
 
 - **采样参数**：思考模式下 `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` 会被服务端忽略（不会报错），CowAgent 会自动跳过传入。