From 116fb27257cdc8162e204fd6d566e75f33ece2c5 Mon Sep 17 00:00:00 2001
From: zhayujie <yjzha1996@163.com>
Date: Wed, 27 May 2026 18:37:54 +0800
Subject: [PATCH] fix: robust tool args JSON parsing for non-strict providers
 #2823

---
 agent/protocol/agent_stream.py | 61 ++++++++++++++++++++++------------
 requirements-optional.txt      |  4 +--
 requirements.txt               |  1 +
 voice/edge/edge_voice.py       |  1 +
 voice/elevent/elevent_voice.py |  1 +
 5 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py
index 701b00de..e3be20b8 100644
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -13,6 +13,13 @@ from agent.protocol.message_utils import sanitize_claude_messages, compress_turn
 from agent.tools.base_tool import BaseTool, ToolResult
 from common.log import logger
 
+# Optional: repair malformed JSON args from non-strict providers (e.g. unescaped quotes in long content).
+try:
+    from json_repair import repair_json as _repair_json
+    _HAS_JSON_REPAIR = True
+except ImportError:
+    _HAS_JSON_REPAIR = False
+
 
 # Maximum number of characters of model "reasoning / thinking" content to persist
 # in conversation history. The full reasoning is still streamed to the UI in real
@@ -45,6 +52,30 @@ def _truncate_reasoning_for_storage(text: str) -> str:
     return head + _REASONING_TRUNCATE_MARKER.format(omitted=omitted) + tail
 
 
+def _parse_tool_args(args_str: str, finish_reason: Optional[str]) -> Tuple[dict, Optional[str]]:
+    """Parse tool args JSON. Returns (args, error_msg); error_msg is None on success.
+
+    On JSONDecodeError: detect truncation first (skip repair, surface max_tokens hint);
+    otherwise try json-repair for escape issues; finally fall back to the raw decoder error.
+    """
+    if not args_str:
+        return {}, None
+    try:
+        return json.loads(args_str), None
+    except json.JSONDecodeError as e:
+        if finish_reason in ("length", "max_tokens") or not args_str.rstrip().endswith("}"):
+            return {}, "Output truncated (max_tokens reached). Split content into smaller chunks across multiple tool calls."
+        if _HAS_JSON_REPAIR:
+            try:
+                repaired = _repair_json(args_str, return_objects=True)
+                if isinstance(repaired, dict):
+                    logger.warning(f"Tool args JSON repaired ({len(args_str)} chars)")
+                    return repaired, None
+            except Exception:
+                pass
+        return {}, f"Invalid JSON in tool arguments: {e.msg}"
+
+
 class AgentStreamExecutor:
     """
     Agent Stream Executor
@@ -973,26 +1004,17 @@ class AgentStreamExecutor:
                 import uuid
                 tool_id = f"call_{uuid.uuid4().hex[:24]}"
 
-            try:
-                # Safely get arguments, handle None case
-                args_str = tc.get("arguments") or ""
-                arguments = json.loads(args_str) if args_str else {}
-            except json.JSONDecodeError as e:
-                # Handle None or invalid arguments safely
-                args_str = tc.get('arguments') or ""
-                args_preview = args_str[:200] if len(args_str) > 200 else args_str
-                logger.error(f"Failed to parse tool arguments for {tc['name']}")
-                logger.error(f"Arguments length: {len(args_str)} chars")
-                logger.error(f"Arguments preview: {args_preview}...")
-                logger.error(f"JSON decode error: {e}")
-
-                # Return a clear error message to the LLM instead of empty dict
-                # This helps the LLM understand what went wrong
+            args_str = tc.get("arguments") or ""
+            arguments, parse_err = _parse_tool_args(args_str, stop_reason)
+            if parse_err:
+                logger.error(
+                    f"Tool args parse failed for {tc['name']} ({len(args_str)} chars): {parse_err}"
+                )
                 tool_calls.append({
                     "id": tool_id,
                     "name": tc["name"],
                     "arguments": {},
-                    "_parse_error": f"Invalid JSON in tool arguments: {args_preview}... Error: {str(e)}. Tip: For large content, consider splitting into smaller chunks or using a different approach."
+                    "_parse_error": parse_err,
                 })
                 continue
 
@@ -1080,14 +1102,11 @@ class AgentStreamExecutor:
         tool_id = tool_call["id"]
         arguments = tool_call["arguments"]
 
-        # Check if there was a JSON parse error
         if "_parse_error" in tool_call:
-            parse_error = tool_call["_parse_error"]
-            logger.error(f"Skipping tool execution due to parse error: {parse_error}")
             result = {
                 "status": "error",
-                "result": f"Failed to parse tool arguments. {parse_error}. Please ensure your tool call uses valid JSON format with all required parameters.",
-                "execution_time": 0
+                "result": tool_call["_parse_error"],
+                "execution_time": 0,
             }
             self._record_tool_result(tool_name, arguments, False)
             return result
diff --git a/requirements-optional.txt b/requirements-optional.txt
index c8cd9a63..7abdc8e5 100644
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -3,8 +3,8 @@ tiktoken>=0.3.2 # openai calculate token
 #voice
 pydub>=0.25.1 # need ffmpeg
 gTTS>=2.3.1 # google text to speech
-edge-tts # edge-tts
-elevenlabs==1.0.3 # elevenlabs TTS
+# edge-tts: install on demand, see voice/edge/edge_voice.py
+# elevenlabs: install on demand, see voice/elevent/elevent_voice.py
 
 #install plugin
 dulwich
diff --git a/requirements.txt b/requirements.txt
index 77a66551..706d9894 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,7 @@ PyYAML>=6.0
 croniter>=2.0.0
 click>=8.0
 qrcode
+json-repair
 
 # wechatcom & wechatmp
 wechatpy
diff --git a/voice/edge/edge_voice.py b/voice/edge/edge_voice.py
index 7bb8b2e6..1a25a2b4 100644
--- a/voice/edge/edge_voice.py
+++ b/voice/edge/edge_voice.py
@@ -1,3 +1,4 @@
+# Requires: edge-tts  (pip install edge-tts)
 import time
 
 import edge_tts
diff --git a/voice/elevent/elevent_voice.py b/voice/elevent/elevent_voice.py
index 2cfa5a3f..5e274638 100644
--- a/voice/elevent/elevent_voice.py
+++ b/voice/elevent/elevent_voice.py
@@ -1,3 +1,4 @@
+# Requires: elevenlabs==1.0.3  (pip install elevenlabs==1.0.3)
 import time
 
 from elevenlabs.client import ElevenLabs