feat: add enable_thinking config to control deep reasoning on web console

2026-07-17 11:07:11 +08:00 · 2026-04-13 16:06:28 +08:00
parent 3f3d0381e5
commit 89a07e8e74
15 changed files with 135 additions and 73 deletions
--- a/models/dashscope/dashscope_bot.py
+++ b/models/dashscope/dashscope_bot.py
@@ -262,20 +262,17 @@ class DashscopeBot(Bot):
                if kwargs.get("tool_choice"):
                    parameters["tool_choice"] = kwargs["tool_choice"]
            
-            # Add thinking parameters for Qwen3 models (disabled by default for stability)
+            # Add thinking parameters for Qwen3/QwQ models
            if "qwen3" in model_name.lower() or "qwq" in model_name.lower():
-                # Only enable thinking mode if explicitly requested
-                enable_thinking = kwargs.get("enable_thinking", False)
-                if enable_thinking:
+                thinking = kwargs.get("thinking", {"type": "enabled"})
+                if thinking.get("type") == "enabled":
                    parameters["enable_thinking"] = True
-                    
-                    # Set thinking budget if specified
                    if kwargs.get("thinking_budget"):
                        parameters["thinking_budget"] = kwargs["thinking_budget"]
-                    
-                    # Qwen3 requires incremental_output=true in thinking mode
                    if stream:
                        parameters["incremental_output"] = True
+                else:
+                    parameters["enable_thinking"] = False
            
            # Always use incremental_output for streaming (for better token-by-token streaming)
            # This is especially important for tool calling to avoid incomplete responses
--- a/models/doubao/doubao_bot.py
+++ b/models/doubao/doubao_bot.py
@@ -249,9 +249,7 @@ class DoubaoBot(Bot):
                request_body["tools"] = converted_tools
                request_body["tool_choice"] = "auto"

-            # Explicitly disable thinking to avoid reasoning_content issues
-            # in multi-turn tool calls
-            request_body["thinking"] = {"type": "disabled"}
+            request_body["thinking"] = kwargs.get("thinking", {"type": "enabled"})

            logger.debug(f"[DOUBAO] API call: model={model}, "
                         f"tools={len(converted_tools) if converted_tools else 0}, stream={stream}")
@@ -324,8 +322,17 @@ class DoubaoBot(Bot):
                choice = chunk["choices"][0]
                delta = choice.get("delta", {})

-                # Skip reasoning_content (thinking) - don't log or forward
                if delta.get("reasoning_content"):
+                    yield {
+                        "choices": [{
+                            "index": 0,
+                            "delta": {
+                                "role": "assistant",
+                                "reasoning_content": delta["reasoning_content"]
+                            },
+                            "finish_reason": None
+                        }]
+                    }
                    continue

                # Handle text content
--- a/models/linkai/link_ai_bot.py
+++ b/models/linkai/link_ai_bot.py
@@ -560,6 +560,10 @@ def _linkai_call_with_tools(self, messages, tools=None, stream=False, **kwargs):
            body["tools"] = tools
            body["tool_choice"] = kwargs.get("tool_choice", "auto")

+        thinking = kwargs.get("thinking")
+        if thinking:
+            body["thinking"] = thinking
+
        # Prepare headers
        headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
        base_url = conf().get("linkai_api_base", "https://api.link-ai.tech")
--- a/models/moonshot/moonshot_bot.py
+++ b/models/moonshot/moonshot_bot.py
@@ -249,10 +249,7 @@ class MoonshotBot(Bot):
                request_body["tools"] = converted_tools
                request_body["tool_choice"] = "auto"

-            # Explicitly disable thinking to avoid reasoning_content issues in multi-turn tool calls.
-            # kimi-k2.5 may enable thinking by default; without preserving reasoning_content
-            # in conversation history the API will reject subsequent requests.
-            request_body["thinking"] = {"type": "disabled"}
+            request_body["thinking"] = kwargs.get("thinking", {"type": "enabled"})

            logger.debug(f"[MOONSHOT] API call: model={model}, "
                         f"tools={len(converted_tools) if converted_tools else 0}, stream={stream}")
@@ -325,8 +322,17 @@ class MoonshotBot(Bot):
                choice = chunk["choices"][0]
                delta = choice.get("delta", {})

-                # Skip reasoning_content (thinking) – don't log or forward
                if delta.get("reasoning_content"):
+                    yield {
+                        "choices": [{
+                            "index": 0,
+                            "delta": {
+                                "role": "assistant",
+                                "reasoning_content": delta["reasoning_content"]
+                            },
+                            "finish_reason": None
+                        }]
+                    }
                    continue

                # Handle text content