fix: robust tool args JSON parsing for non-strict providers #2823

2026-07-17 11:07:11 +08:00 · 2026-05-27 18:37:54 +08:00
parent 8d67177a1b
commit 116fb27257
5 changed files with 45 additions and 23 deletions
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -13,6 +13,13 @@ from agent.protocol.message_utils import sanitize_claude_messages, compress_turn
 from agent.tools.base_tool import BaseTool, ToolResult
 from common.log import logger
 # Optional: repair malformed JSON args from non-strict providers (e.g. unescaped quotes in long content).
 try:
    from json_repair import repair_json as _repair_json
    _HAS_JSON_REPAIR = True
 except ImportError:
    _HAS_JSON_REPAIR = False
 # Maximum number of characters of model "reasoning / thinking" content to persist
 # in conversation history. The full reasoning is still streamed to the UI in real
@@ -45,6 +52,30 @@ def _truncate_reasoning_for_storage(text: str) -> str:
    return head + _REASONING_TRUNCATE_MARKER.format(omitted=omitted) + tail
 def _parse_tool_args(args_str: str, finish_reason: Optional[str]) -> Tuple[dict, Optional[str]]:
    """Parse tool args JSON. Returns (args, error_msg); error_msg is None on success.
    On JSONDecodeError: detect truncation first (skip repair, surface max_tokens hint);
    otherwise try json-repair for escape issues; finally fall back to the raw decoder error.
    """
    if not args_str:
        return {}, None
    try:
        return json.loads(args_str), None
    except json.JSONDecodeError as e:
        if finish_reason in ("length", "max_tokens") or not args_str.rstrip().endswith("}"):
            return {}, "Output truncated (max_tokens reached). Split content into smaller chunks across multiple tool calls."
        if _HAS_JSON_REPAIR:
            try:
                repaired = _repair_json(args_str, return_objects=True)
                if isinstance(repaired, dict):
                    logger.warning(f"Tool args JSON repaired ({len(args_str)} chars)")
                    return repaired, None
            except Exception:
                pass
        return {}, f"Invalid JSON in tool arguments: {e.msg}"
 class AgentStreamExecutor:
    """
    Agent Stream Executor
@@ -973,26 +1004,17 @@ class AgentStreamExecutor:
                import uuid
                tool_id = f"call_{uuid.uuid4().hex[:24]}"
-            try:
+            args_str = tc.get("arguments") or ""
-                # Safely get arguments, handle None case
+            arguments, parse_err = _parse_tool_args(args_str, stop_reason)
-                args_str = tc.get("arguments") or ""
+            if parse_err:
-                arguments = json.loads(args_str) if args_str else {}
+                logger.error(
-            except json.JSONDecodeError as e:
+                    f"Tool args parse failed for {tc['name']} ({len(args_str)} chars): {parse_err}"
-                # Handle None or invalid arguments safely
+                )
                args_str = tc.get('arguments') or ""
                args_preview = args_str[:200] if len(args_str) > 200 else args_str
                logger.error(f"Failed to parse tool arguments for {tc['name']}")
                logger.error(f"Arguments length: {len(args_str)} chars")
                logger.error(f"Arguments preview: {args_preview}...")
                logger.error(f"JSON decode error: {e}")
                # Return a clear error message to the LLM instead of empty dict
                # This helps the LLM understand what went wrong
                tool_calls.append({
                    "id": tool_id,
                    "name": tc["name"],
                    "arguments": {},
-                    "_parse_error": f"Invalid JSON in tool arguments: {args_preview}... Error: {str(e)}. Tip: For large content, consider splitting into smaller chunks or using a different approach."
+                    "_parse_error": parse_err,
                })
                continue
@@ -1080,14 +1102,11 @@ class AgentStreamExecutor:
        tool_id = tool_call["id"]
        arguments = tool_call["arguments"]
        # Check if there was a JSON parse error
        if "_parse_error" in tool_call:
            parse_error = tool_call["_parse_error"]
            logger.error(f"Skipping tool execution due to parse error: {parse_error}")
            result = {
                "status": "error",
-                "result": f"Failed to parse tool arguments. {parse_error}. Please ensure your tool call uses valid JSON format with all required parameters.",
+                "result": tool_call["_parse_error"],
-                "execution_time": 0
+                "execution_time": 0,
            }
            self._record_tool_result(tool_name, arguments, False)
            return result
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -3,8 +3,8 @@ tiktoken>=0.3.2 # openai calculate token
 #voice
 pydub>=0.25.1 # need ffmpeg
 gTTS>=2.3.1 # google text to speech
-edge-tts # edge-tts
+# edge-tts: install on demand, see voice/edge/edge_voice.py
-elevenlabs==1.0.3 # elevenlabs TTS
+# elevenlabs: install on demand, see voice/elevent/elevent_voice.py
 #install plugin
 dulwich
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,7 @@ PyYAML>=6.0
 croniter>=2.0.0
 click>=8.0
 qrcode
 json-repair
 # wechatcom & wechatmp
 wechatpy
--- a/voice/edge/edge_voice.py
+++ b/voice/edge/edge_voice.py
@@ -1,3 +1,4 @@
 # Requires: edge-tts  (pip install edge-tts)
 import time
 import edge_tts
--- a/voice/elevent/elevent_voice.py
+++ b/voice/elevent/elevent_voice.py
@@ -1,3 +1,4 @@
 # Requires: elevenlabs==1.0.3  (pip install elevenlabs==1.0.3)
 import time
 from elevenlabs.client import ElevenLabs