From f5479c56afd1a40e8b09c0759798e5d9a38c9790 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Fri, 15 May 2026 18:17:35 +0800 Subject: [PATCH] feat(models): support reasoning_effort config for DeepSeek V4 --- bridge/agent_bridge.py | 18 ++++++++++++++++-- config.py | 20 +++----------------- docs/en/models/deepseek.mdx | 18 ++++++++++++++++++ docs/ja/models/deepseek.mdx | 18 ++++++++++++++++++ docs/models/deepseek.mdx | 18 ++++++++++++++++++ 5 files changed, 73 insertions(+), 19 deletions(-) diff --git a/bridge/agent_bridge.py b/bridge/agent_bridge.py index 8f238d62..e60ffd9d 100644 --- a/bridge/agent_bridge.py +++ b/bridge/agent_bridge.py @@ -172,10 +172,17 @@ class AgentLLMModel(LLMModel): # reasoning trace, but still benefit from the higher answer # quality the thinking pass produces. from config import conf + thinking_enabled = bool(conf().get("enable_thinking", False)) kwargs['thinking'] = ( - {"type": "enabled"} if conf().get("enable_thinking", False) + {"type": "enabled"} if thinking_enabled else {"type": "disabled"} ) + # Reasoning effort is only meaningful when thinking is on. + # Bots that don't understand the kwarg drop it silently. + if thinking_enabled: + effort = conf().get("reasoning_effort", "high") + if effort in ("high", "max"): + kwargs['reasoning_effort'] = effort response = self.bot.call_with_tools(**kwargs) return self._format_response(response) @@ -227,10 +234,17 @@ class AgentLLMModel(LLMModel): # reasoning trace, but still benefit from the higher answer # quality the thinking pass produces. from config import conf + thinking_enabled = bool(conf().get("enable_thinking", False)) kwargs['thinking'] = ( - {"type": "enabled"} if conf().get("enable_thinking", False) + {"type": "enabled"} if thinking_enabled else {"type": "disabled"} ) + # Reasoning effort is only meaningful when thinking is on. + # Bots that don't understand the kwarg drop it silently. + if thinking_enabled: + effort = conf().get("reasoning_effort", "high") + if effort in ("high", "max"): + kwargs['reasoning_effort'] = effort stream = self.bot.call_with_tools(**kwargs) diff --git a/config.py b/config.py index bea33752..95e851c3 100644 --- a/config.py +++ b/config.py @@ -214,24 +214,10 @@ available_setting = { "agent_max_context_turns": 20, # Agent模式下最大上下文记忆轮次 "agent_max_steps": 20, # Agent模式下单次运行最大决策步数 "enable_thinking": False, # Enable deep-thinking mode for thinking-capable models + "reasoning_effort": "high", # Reasoning depth under thinking mode: "high" or "max" "knowledge": True, # 是否开启知识库功能 - # Per-skill runtime config. Nested keys are flattened to env vars at startup - # using the rule: skill[][] -> SKILL__ - # (e.g. skill["image-generation"].model -> SKILL_IMAGE_GENERATION_MODEL). - "skill": {}, - # MCP (Model Context Protocol) server list. - # Each entry describes one MCP server to connect at startup. - # Supported types: - # stdio — launch a local process and communicate over stdin/stdout - # sse — connect to a remote server via HTTP + Server-Sent Events - # - # Example: - # "mcp_servers": [ - # {"name": "filesystem", "type": "stdio", "command": "npx", - # "args": ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"]}, - # {"name": "my-api", "type": "sse", "url": "http://localhost:8000/sse"} - # ] - "mcp_servers": [], + "skill": {}, # Per-skill runtime config; nested keys flatten to SKILL__ env vars at startup + "mcp_servers": [], # MCP server list; each entry supports type "stdio" (local process) or "sse" (remote URL) } diff --git a/docs/en/models/deepseek.mdx b/docs/en/models/deepseek.mdx index 38d211df..1bafb076 100644 --- a/docs/en/models/deepseek.mdx +++ b/docs/en/models/deepseek.mdx @@ -42,6 +42,24 @@ Controlled by the global `enable_thinking` setting: - `true`: thinking is on across all channels. The Web console renders the reasoning trace; IM channels (WeChat / WeCom / DingTalk / Feishu) don't render it but still benefit from higher answer quality. - `false`: thinking off, faster responses with lower first-token latency. +### Reasoning Effort + +Under thinking mode, `reasoning_effort` controls how hard the model thinks: + +```json +{ + "enable_thinking": true, + "reasoning_effort": "high" +} +``` + +| Value | Use Case | +| --- | --- | +| `high` (default) | Day-to-day agent tasks; balanced thinking depth and latency | +| `max` | Complex coding, long-horizon planning, strict-constraint tasks. Deeper reasoning at the cost of more output tokens and higher latency | + +`reasoning_effort` only takes effect when `enable_thinking` is `true`. It is silently ignored on models that do not support thinking mode. + ### Notes - **Sampling parameters**: under thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are silently ignored by the server (no error). CowAgent skips sending them automatically. diff --git a/docs/ja/models/deepseek.mdx b/docs/ja/models/deepseek.mdx index 1f9327dd..018931f0 100644 --- a/docs/ja/models/deepseek.mdx +++ b/docs/ja/models/deepseek.mdx @@ -42,6 +42,24 @@ V4シリーズ(`deepseek-v4-flash` / `deepseek-v4-pro`)は明示的な「思 - `true`:すべてのチャネルで思考モードがオン。Webコンソールでは思考過程を表示し、IMチャネル(WeChat / WeCom / DingTalk / Feishu)では表示されないものの、回答品質の向上というメリットを得られます。 - `false`:思考オフ、応答が速く、初回トークンの遅延も低くなります。 +### 推論強度 + +思考モード下では `reasoning_effort` で推論の深さを制御できます: + +```json +{ + "enable_thinking": true, + "reasoning_effort": "high" +} +``` + +| 値 | 適用シーン | +| --- | --- | +| `high`(デフォルト) | 通常の Agent タスク、思考の深さとレスポンス速度のバランス | +| `max` | 複雑なコーディング、長いプランニング、厳密な制約のあるタスク。より深い推論と引き換えに出力トークンとレイテンシが増加 | + +`reasoning_effort` は `enable_thinking` が `true` の場合のみ有効になります。思考モードをサポートしないモデルでは自動的に無視されます。 + ### 注意事項 - **サンプリングパラメータ**:思考モード時は `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` がサーバ側で無視されます(エラーにはなりません)。CowAgentは自動的に送信をスキップします。 diff --git a/docs/models/deepseek.mdx b/docs/models/deepseek.mdx index 76fde82f..a522ce98 100644 --- a/docs/models/deepseek.mdx +++ b/docs/models/deepseek.mdx @@ -42,6 +42,24 @@ V4 系列(`deepseek-v4-flash` / `deepseek-v4-pro`)支持显式的"思考模 - `true`:所有渠道下模型都会先思考再作答。Web 控制台会展示思考过程,IM 渠道(微信 / 企微 / 钉钉 / 飞书)虽不展示但同样获得更好答案。 - `false`:关闭思考,响应更快,首字延迟更低。 +### 推理强度 + +思考模式下可通过 `reasoning_effort` 控制推理强度: + +```json +{ + "enable_thinking": true, + "reasoning_effort": "high" +} +``` + +| 取值 | 适用场景 | +| --- | --- | +| `high`(默认) | 日常 Agent 任务,思考与速度的平衡 | +| `max` | 复杂编码、长链路规划、严格约束的任务,推理更深但耗时与输出 token 更多 | + +`reasoning_effort` 仅在 `enable_thinking` 为 `true` 时生效;模型不支持思考模式时该字段自动忽略。 + ### 行为说明 - **采样参数**:思考模式下 `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` 会被服务端忽略(不会报错),CowAgent 会自动跳过传入。