fix: robust tool args JSON parsing for non-strict providers #2823

This commit is contained in:
zhayujie
2026-05-27 18:37:54 +08:00
parent 8d67177a1b
commit 116fb27257
5 changed files with 45 additions and 23 deletions

View File

@@ -13,6 +13,13 @@ from agent.protocol.message_utils import sanitize_claude_messages, compress_turn
from agent.tools.base_tool import BaseTool, ToolResult from agent.tools.base_tool import BaseTool, ToolResult
from common.log import logger from common.log import logger
# Optional: repair malformed JSON args from non-strict providers (e.g. unescaped quotes in long content).
try:
from json_repair import repair_json as _repair_json
_HAS_JSON_REPAIR = True
except ImportError:
_HAS_JSON_REPAIR = False
# Maximum number of characters of model "reasoning / thinking" content to persist # Maximum number of characters of model "reasoning / thinking" content to persist
# in conversation history. The full reasoning is still streamed to the UI in real # in conversation history. The full reasoning is still streamed to the UI in real
@@ -45,6 +52,30 @@ def _truncate_reasoning_for_storage(text: str) -> str:
return head + _REASONING_TRUNCATE_MARKER.format(omitted=omitted) + tail return head + _REASONING_TRUNCATE_MARKER.format(omitted=omitted) + tail
def _parse_tool_args(args_str: str, finish_reason: Optional[str]) -> Tuple[dict, Optional[str]]:
"""Parse tool args JSON. Returns (args, error_msg); error_msg is None on success.
On JSONDecodeError: detect truncation first (skip repair, surface max_tokens hint);
otherwise try json-repair for escape issues; finally fall back to the raw decoder error.
"""
if not args_str:
return {}, None
try:
return json.loads(args_str), None
except json.JSONDecodeError as e:
if finish_reason in ("length", "max_tokens") or not args_str.rstrip().endswith("}"):
return {}, "Output truncated (max_tokens reached). Split content into smaller chunks across multiple tool calls."
if _HAS_JSON_REPAIR:
try:
repaired = _repair_json(args_str, return_objects=True)
if isinstance(repaired, dict):
logger.warning(f"Tool args JSON repaired ({len(args_str)} chars)")
return repaired, None
except Exception:
pass
return {}, f"Invalid JSON in tool arguments: {e.msg}"
class AgentStreamExecutor: class AgentStreamExecutor:
""" """
Agent Stream Executor Agent Stream Executor
@@ -973,26 +1004,17 @@ class AgentStreamExecutor:
import uuid import uuid
tool_id = f"call_{uuid.uuid4().hex[:24]}" tool_id = f"call_{uuid.uuid4().hex[:24]}"
try: args_str = tc.get("arguments") or ""
# Safely get arguments, handle None case arguments, parse_err = _parse_tool_args(args_str, stop_reason)
args_str = tc.get("arguments") or "" if parse_err:
arguments = json.loads(args_str) if args_str else {} logger.error(
except json.JSONDecodeError as e: f"Tool args parse failed for {tc['name']} ({len(args_str)} chars): {parse_err}"
# Handle None or invalid arguments safely )
args_str = tc.get('arguments') or ""
args_preview = args_str[:200] if len(args_str) > 200 else args_str
logger.error(f"Failed to parse tool arguments for {tc['name']}")
logger.error(f"Arguments length: {len(args_str)} chars")
logger.error(f"Arguments preview: {args_preview}...")
logger.error(f"JSON decode error: {e}")
# Return a clear error message to the LLM instead of empty dict
# This helps the LLM understand what went wrong
tool_calls.append({ tool_calls.append({
"id": tool_id, "id": tool_id,
"name": tc["name"], "name": tc["name"],
"arguments": {}, "arguments": {},
"_parse_error": f"Invalid JSON in tool arguments: {args_preview}... Error: {str(e)}. Tip: For large content, consider splitting into smaller chunks or using a different approach." "_parse_error": parse_err,
}) })
continue continue
@@ -1080,14 +1102,11 @@ class AgentStreamExecutor:
tool_id = tool_call["id"] tool_id = tool_call["id"]
arguments = tool_call["arguments"] arguments = tool_call["arguments"]
# Check if there was a JSON parse error
if "_parse_error" in tool_call: if "_parse_error" in tool_call:
parse_error = tool_call["_parse_error"]
logger.error(f"Skipping tool execution due to parse error: {parse_error}")
result = { result = {
"status": "error", "status": "error",
"result": f"Failed to parse tool arguments. {parse_error}. Please ensure your tool call uses valid JSON format with all required parameters.", "result": tool_call["_parse_error"],
"execution_time": 0 "execution_time": 0,
} }
self._record_tool_result(tool_name, arguments, False) self._record_tool_result(tool_name, arguments, False)
return result return result

View File

@@ -3,8 +3,8 @@ tiktoken>=0.3.2 # openai calculate token
#voice #voice
pydub>=0.25.1 # need ffmpeg pydub>=0.25.1 # need ffmpeg
gTTS>=2.3.1 # google text to speech gTTS>=2.3.1 # google text to speech
edge-tts # edge-tts # edge-tts: install on demand, see voice/edge/edge_voice.py
elevenlabs==1.0.3 # elevenlabs TTS # elevenlabs: install on demand, see voice/elevent/elevent_voice.py
#install plugin #install plugin
dulwich dulwich

View File

@@ -10,6 +10,7 @@ PyYAML>=6.0
croniter>=2.0.0 croniter>=2.0.0
click>=8.0 click>=8.0
qrcode qrcode
json-repair
# wechatcom & wechatmp # wechatcom & wechatmp
wechatpy wechatpy

View File

@@ -1,3 +1,4 @@
# Requires: edge-tts (pip install edge-tts)
import time import time
import edge_tts import edge_tts

View File

@@ -1,3 +1,4 @@
# Requires: elevenlabs==1.0.3 (pip install elevenlabs==1.0.3)
import time import time
from elevenlabs.client import ElevenLabs from elevenlabs.client import ElevenLabs