feat: support kimi coding plan by temporary solution

2026-07-17 11:07:11 +08:00 · 2026-04-23 16:24:37 +08:00
parent 68ce2e5232
commit fc465b463d
4 changed files with 91 additions and 61 deletions
--- a/models/moonshot/moonshot_bot.py
+++ b/models/moonshot/moonshot_bot.py
@@ -39,6 +39,29 @@ class MoonshotBot(Bot):
            url = url.rsplit("/chat/completions", 1)[0]
        return url.rstrip("/")

+    @property
+    def _is_kimi_coding_plan(self) -> bool:
+        """Detect Kimi Coding Plan by model name or API base URL."""
+        model = str(conf().get("model", ""))
+        base = str(conf().get("moonshot_base_url", ""))
+        return model == "kimi-for-coding" or "api.kimi.com/coding" in base
+
+    @staticmethod
+    def _model_supports_thinking(model_name: str) -> bool:
+        """Return True if the model supports the ``thinking`` request parameter."""
+        m = model_name.lower()
+        return m.startswith("kimi-k2") or m.startswith("kimi-k1.5")
+
+    def _build_headers(self) -> dict:
+        """Build HTTP headers, adding Coding-Agent User-Agent for Kimi Coding Plan."""
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+        if self._is_kimi_coding_plan:
+            headers["User-Agent"] = "claude-cli/2.1.39"
+        return headers
+
    def reply(self, query, context=None):
        # acquire reply content
        if context.type == ContextType.TEXT:
@@ -97,19 +120,15 @@ class MoonshotBot(Bot):
        :return: {}
        """
        try:
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": "Bearer " + self.api_key
-            }
+            headers = self._build_headers()
            # Fallback to default args (e.g. when called by session title
            # generation which passes only the session). Always copy to avoid
            # mutating the shared self.args across calls.
            body = dict(args) if args else dict(self.args)
            body["messages"] = session.messages
-            # K2.x series enforces fixed temperature/top_p; passing custom values
-            # makes the API reject the request. Strip them for kimi-k2 family.
            model_name = str(body.get("model", ""))
-            if model_name.startswith("kimi-k2"):
+            # K2.x / Coding Plan enforce fixed temperature/top_p; strip them.
+            if model_name.startswith("kimi-k2") or model_name == "kimi-for-coding":
                body.pop("temperature", None)
                body.pop("top_p", None)
            res = requests.post(
@@ -174,10 +193,7 @@ class MoonshotBot(Bot):
                    ],
                }],
            }
-            headers = {
-                "Authorization": f"Bearer {self.api_key}",
-                "Content-Type": "application/json",
-            }
+            headers = self._build_headers()
            resp = requests.post(f"{self.base_url}/chat/completions",
                                 headers=headers, json=payload, timeout=60)
            if resp.status_code != 200:
@@ -258,7 +274,12 @@ class MoonshotBot(Bot):
                request_body["tools"] = converted_tools
                request_body["tool_choice"] = "auto"

-            request_body["thinking"] = kwargs.get("thinking", {"type": "enabled"})
+            # Kimi Coding Plan has built-in reasoning and ignores the thinking param.
+            # For regular Kimi models, only K2/K1.5 series support the thinking param.
+            # Respect the enable_thinking config passed from agent_bridge.
+            if not self._is_kimi_coding_plan and self._model_supports_thinking(model):
+                thinking = kwargs.get("thinking", {"type": "enabled"})
+                request_body["thinking"] = thinking

            logger.debug(f"[MOONSHOT] API call: model={model}, "
                         f"tools={len(converted_tools) if converted_tools else 0}, stream={stream}")
@@ -282,10 +303,7 @@ class MoonshotBot(Bot):
    def _handle_stream_response(self, request_body: dict):
        """Handle streaming SSE response from Moonshot API and yield OpenAI-format chunks."""
        try:
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.api_key}"
-            }
+            headers = self._build_headers()

            url = f"{self.base_url}/chat/completions"
            response = requests.post(url, headers=headers, json=request_body, stream=True, timeout=120)
@@ -304,10 +322,13 @@ class MoonshotBot(Bot):
                    continue

                line = line.decode("utf-8")
-                if not line.startswith("data: "):
+                # Handle both "data: {...}" and "data:{...}" (Kimi Coding Plan omits the space)
+                if line.startswith("data: "):
+                    data_str = line[6:]
+                elif line.startswith("data:"):
+                    data_str = line[5:]
+                else:
                    continue
-
-                data_str = line[6:]  # Remove "data: " prefix
                if data_str.strip() == "[DONE]":
                    break

@@ -328,9 +349,16 @@ class MoonshotBot(Bot):
                if not chunk.get("choices"):
                    continue

-                choice = chunk["choices"][0]
+                choices = chunk["choices"]
+                if not choices:
+                    continue
+                choice = choices[0]
                delta = choice.get("delta", {})

+                # Capture finish_reason early (it may arrive on any chunk type)
+                if choice.get("finish_reason"):
+                    finish_reason = choice["finish_reason"]
+
                if delta.get("reasoning_content"):
                    yield {
                        "choices": [{
@@ -382,10 +410,6 @@ class MoonshotBot(Bot):
                            }]
                        }

-                # Capture finish_reason
-                if choice.get("finish_reason"):
-                    finish_reason = choice["finish_reason"]
-
            # Final chunk with finish_reason
            yield {
                "choices": [{
@@ -409,10 +433,7 @@ class MoonshotBot(Bot):
    def _handle_sync_response(self, request_body: dict):
        """Handle synchronous API response and yield a single result dict."""
        try:
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.api_key}"
-            }
+            headers = self._build_headers()

            request_body.pop("stream", None)
            url = f"{self.base_url}/chat/completions"
@@ -530,6 +551,7 @@ class MoonshotBot(Bot):
                openai_msg = {"role": "assistant"}
                text_parts = []
                tool_calls = []
+                reasoning_parts = []

                for block in content:
                    if not isinstance(block, dict):
@@ -545,6 +567,8 @@ class MoonshotBot(Bot):
                                "arguments": json.dumps(block.get("input", {}))
                            }
                        })
+                    elif block.get("type") == "thinking":
+                        reasoning_parts.append(block.get("thinking", ""))

                if text_parts:
                    openai_msg["content"] = "\n".join(text_parts)
@@ -556,6 +580,12 @@ class MoonshotBot(Bot):
                    if not text_parts:
                        openai_msg["content"] = None

+                # Kimi API requires reasoning_content in assistant messages
+                # when thinking was active for that turn. The presence of
+                # reasoning_parts means thinking was on, so always round-trip it.
+                if reasoning_parts:
+                    openai_msg["reasoning_content"] = "\n".join(reasoning_parts)
+
                converted.append(openai_msg)
            else:
                converted.append(msg)