feat: optimize agent configuration and memory

2026-07-19 12:47:25 +08:00 · 2026-02-02 11:48:53 +08:00
parent a8d5309c90
commit 46fa07e4a9
36 changed files with 1245 additions and 355 deletions
--- a/agent/prompt/builder.py
+++ b/agent/prompt/builder.py
@@ -308,21 +308,20 @@ def _build_memory_section(memory_manager: Any, tools: Optional[List[Any]], langu
        "在回答关于以前的工作、决定、日期、人物、偏好或待办事项的任何问题之前：",
        "",
        "1. 不确定记忆文件位置 → 先用 `memory_search` 通过关键词和语义检索相关内容",
-        "2. 已知文件位置 → 直接用 `memory_get` 读取相应的行",
-        "3. search 无结果 → 尝试用 `memory_get` 读取最近两天的记忆文件",
+        "2. 已知文件位置 → 直接用 `memory_get` 读取相应的行 (例如：MEMORY.md, memory/YYYY-MM-DD.md)",
+        "3. search 无结果 → 尝试用 `memory_get` 读取MEMORY.md及最近两天记忆文件",
        "",
        "**记忆文件结构**:",
-        "- `MEMORY.md`: 长期记忆（已自动加载，无需主动读取）",
+        "- `MEMORY.md`: 长期记忆（核心信息、偏好、决策等）",
        "- `memory/YYYY-MM-DD.md`: 每日记忆，记录当天的事件和对话信息",
        "",
-        "**使用原则**:",
-        "- 自然使用记忆，就像你本来就知道; 不用刻意提起或列举记忆，除非用户提起相关内容",
+        "**写入记忆**:",
+        "- 追加内容 → `edit` 工具，oldText 留空",
+        "- 修改内容 → `edit` 工具，oldText 填写要替换的文本",
+        "- 新建文件 → `write` 工具",
+        "- **禁止写入敏感信息**：API密钥、令牌等敏感信息严禁写入记忆文件",
        "",
-        "**写入记忆的正确方式**:",
-        "- 追加到现有文件末尾 → 用 `edit` 工具，oldText 留空",
-        "  例: edit(path=memory/2026-02-01.md, oldText=\"\", newText=\"\\n## 新内容\\n...\")",
-        "- 修改文件中的某段文字 → 用 `edit` 工具，oldText 填写要替换的文本",
-        "- 创建新文件 → 用 `write`",
+        "**使用原则**: 自然使用记忆，就像你本来就知道；不用刻意提起，除非用户问起。",
        "",
    ]
    
@@ -392,8 +391,8 @@ def _build_workspace_section(workspace_dir: str, language: str, is_first_convers
        "",
        "**交流规范**:",
        "",
-        "- 在所有对话中，无需提及技术细节（如 SOUL.md、USER.md 等文件名，工具名称，配置等），除非用户明确询问",
-        "- 用自然表达如「我已记住」而非「已更新 SOUL.md」",
+        "- 在对话中，非必要不输出工作空间技术细节（如 SOUL.md、USER.md等文件名称，工具名称，配置等），除非用户明确询问",
+        "- 例如用自然表达如「我已记住」而非「已更新 MEMORY.md」",
        "",
    ]
    
--- a/agent/prompt/workspace.py
+++ b/agent/prompt/workspace.py
@@ -64,7 +64,7 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works
        _create_template_if_missing(agents_path, _get_agents_template())
        _create_template_if_missing(memory_path, _get_memory_template())
        
-        logger.info(f"[Workspace] Initialized workspace at: {workspace_dir}")
+        logger.debug(f"[Workspace] Initialized workspace at: {workspace_dir}")
    
    return WorkspaceFiles(
        soul_path=soul_path,
@@ -270,14 +270,9 @@ def _get_agents_template() -> str:
 2. **动态记忆 → MEMORY.md**（爱好、偏好、决策、目标、项目、教训、待办事项）
 3. **当天对话 → memory/YYYY-MM-DD.md**（今天聊的内容）

-**重要**: 
- 爱好（唱歌、篮球等）→ MEMORY.md，不是 USER.md
- 近期计划（下周要做什么）→ MEMORY.md，不是 USER.md
- USER.md 只存放不会变的基本信息
-
 ## 安全

- 永远不要泄露私人数据
+- 永远不要泄露秘钥等私人数据
 - 不要在未经询问的情况下运行破坏性命令
 - 当有疑问时，先问

--- a/agent/protocol/agent.py
+++ b/agent/protocol/agent.py
@@ -1,5 +1,6 @@
 import json
 import time
+import threading

 from common.log import logger
 from agent.protocol.models import LLMRequest, LLMModel
@@ -43,6 +44,7 @@ class Agent:
        self.output_mode = output_mode
        self.last_usage = None  # Store last API response usage info
        self.messages = []  # Unified message history for stream mode
+        self.messages_lock = threading.Lock()  # Lock for thread-safe message operations
        self.memory_manager = memory_manager  # Memory manager for auto memory flush
        self.workspace_dir = workspace_dir  # Workspace directory
        self.enable_skills = enable_skills  # Skills enabled flag
@@ -57,7 +59,7 @@ class Agent:
                try:
                    from agent.skills import SkillManager
                    self.skill_manager = SkillManager(workspace_dir=workspace_dir)
-                    logger.info(f"Initialized SkillManager with {len(self.skill_manager.skills)} skills")
+                    logger.debug(f"Initialized SkillManager with {len(self.skill_manager.skills)} skills")
                except Exception as e:
                    logger.warning(f"Failed to initialize SkillManager: {e}")
        
@@ -335,7 +337,8 @@ class Agent:
        """
        # Clear history if requested
        if clear_history:
-            self.messages = []
+            with self.messages_lock:
+                self.messages = []

        # Get model to use
        if not self.model:
@@ -344,7 +347,17 @@ class Agent:
        # Get full system prompt with skills
        full_system_prompt = self.get_full_system_prompt(skill_filter=skill_filter)

-        # Create stream executor with agent's message history
+        # Create a copy of messages for this execution to avoid concurrent modification
+        # Record the original length to track which messages are new
+        with self.messages_lock:
+            messages_copy = self.messages.copy()
+            original_length = len(self.messages)
+
+        # Get max_context_turns from config
+        from config import conf
+        max_context_turns = conf().get("agent_max_context_turns", 30)
+        
+        # Create stream executor with copied message history
        executor = AgentStreamExecutor(
            agent=self,
            model=self.model,
@@ -352,14 +365,18 @@ class Agent:
            tools=self.tools,
            max_turns=self.max_steps,
            on_event=on_event,
-            messages=self.messages  # Pass agent's message history
+            messages=messages_copy,  # Pass copied message history
+            max_context_turns=max_context_turns
        )

        # Execute
        response = executor.run_stream(user_message)

-        # Update agent's message history from executor
-        self.messages = executor.messages
+        # Append only the NEW messages from this execution (thread-safe)
+        # This allows concurrent requests to both contribute to history
+        with self.messages_lock:
+            new_messages = executor.messages[original_length:]
+            self.messages.extend(new_messages)
        
        # Store executor reference for agent_bridge to access files_to_send
        self.stream_executor = executor
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -31,7 +31,8 @@ class AgentStreamExecutor:
            tools: List[BaseTool],
            max_turns: int = 50,
            on_event: Optional[Callable] = None,
-            messages: Optional[List[Dict]] = None
+            messages: Optional[List[Dict]] = None,
+            max_context_turns: int = 30
    ):
        """
        Initialize stream executor
@@ -44,6 +45,7 @@ class AgentStreamExecutor:
            max_turns: Maximum number of turns
            on_event: Event callback function
            messages: Optional existing message history (for persistent conversations)
+            max_context_turns: Maximum number of conversation turns to keep in context
        """
        self.agent = agent
        self.model = model
@@ -52,6 +54,7 @@ class AgentStreamExecutor:
        self.tools = {tool.name: tool for tool in tools} if isinstance(tools, list) else tools
        self.max_turns = max_turns
        self.on_event = on_event
+        self.max_context_turns = max_context_turns

        # Message history - use provided messages or create new list
        self.messages = messages if messages is not None else []
@@ -147,10 +150,7 @@ class AgentStreamExecutor:
            Final response text
        """
        # Log user message with model info
-        logger.info(f"{'='*50}")
-        logger.info(f"🤖 Model: {self.model.model}")
-        logger.info(f"👤 用户: {user_message}")
-        logger.info(f"{'='*50}")
+        logger.info(f"🤖 {self.model.model} | 👤 {user_message}")
        
        # Add user message (Claude format - use content blocks for consistency)
        self.messages.append({
@@ -171,7 +171,7 @@ class AgentStreamExecutor:
        try:
            while turn < self.max_turns:
                turn += 1
-                logger.info(f"第 {turn} 轮")
+                logger.debug(f"第 {turn} 轮")
                self._emit_event("turn_start", {"turn": turn})

                # Check if memory flush is needed (before calling LLM)
@@ -238,7 +238,7 @@ class AgentStreamExecutor:
                    else:
                        logger.info(f"💭 {assistant_msg[:150]}{'...' if len(assistant_msg) > 150 else ''}")
                    
-                    logger.info(f"✅ 完成 (无工具调用)")
+                    logger.debug(f"✅ 完成 (无工具调用)")
                    self._emit_event("turn_end", {
                        "turn": turn,
                        "has_tool_calls": False
@@ -350,11 +350,37 @@ class AgentStreamExecutor:
                })

            if turn >= self.max_turns:
-                logger.warning(f"⚠️  已达到最大轮数限制: {self.max_turns}")
-                if not final_response:
+                logger.warning(f"⚠️  已达到最大决策步数限制: {self.max_turns}")
+                
+                # Force model to summarize without tool calls
+                logger.info(f"[Agent] Requesting summary from LLM after reaching max steps...")
+                
+                # Add a system message to force summary
+                self.messages.append({
+                    "role": "user",
+                    "content": [{
+                        "type": "text",
+                        "text": f"你已经执行了{turn}个决策步骤，达到了单次运行的最大步数限制。请总结一下你目前的执行过程和结果，告诉用户当前的进展情况。不要再调用工具，直接用文字回复。"
+                    }]
+                })
+                
+                # Call LLM one more time to get summary (without retry to avoid loops)
+                try:
+                    summary_response, summary_tools = self._call_llm_stream(retry_on_empty=False)
+                    if summary_response:
+                        final_response = summary_response
+                        logger.info(f"💭 Summary: {summary_response[:150]}{'...' if len(summary_response) > 150 else ''}")
+                    else:
+                        # Fallback if model still doesn't respond
+                        final_response = (
+                            f"我已经执行了{turn}个决策步骤，达到了单次运行的步数上限。"
+                            "任务可能还未完全完成，建议你将任务拆分成更小的步骤，或者换一种方式描述需求。"
+                        )
+                except Exception as e:
+                    logger.warning(f"Failed to get summary from LLM: {e}")
                    final_response = (
-                        "抱歉，我在处理你的请求时遇到了一些困难，尝试了多次仍未能完成。"
-                        "请尝试简化你的问题，或换一种方式描述。"
+                        f"我已经执行了{turn}个决策步骤，达到了单次运行的步数上限。"
+                        "任务可能还未完全完成，建议你将任务拆分成更小的步骤，或者换一种方式描述需求。"
                    )

        except Exception as e:
@@ -363,7 +389,7 @@ class AgentStreamExecutor:
            raise

        finally:
-            logger.info(f"🏁 完成({turn}轮)")
+            logger.debug(f"🏁 完成({turn}轮)")
            self._emit_event("agent_end", {"final_response": final_response})

            # 每轮对话结束后增加计数（用户消息+AI回复=1轮）
@@ -783,54 +809,174 @@ class AgentStreamExecutor:
                        logger.warning(f"⚠️ Removing incomplete tool_use message from history")
                        self.messages.pop()

+    def _identify_complete_turns(self) -> List[Dict]:
+        """
+        识别完整的对话轮次
+        
+        一个完整轮次包括：
+        1. 用户消息（text）
+        2. AI 回复（可能包含 tool_use）
+        3. 工具结果（tool_result，如果有）
+        4. 后续 AI 回复（如果有）
+        
+        Returns:
+            List of turns, each turn is a dict with 'messages' list
+        """
+        turns = []
+        current_turn = {'messages': []}
+        
+        for msg in self.messages:
+            role = msg.get('role')
+            content = msg.get('content', [])
+            
+            if role == 'user':
+                # 检查是否是用户查询（不是工具结果）
+                is_user_query = False
+                if isinstance(content, list):
+                    is_user_query = any(
+                        block.get('type') == 'text' 
+                        for block in content 
+                        if isinstance(block, dict)
+                    )
+                elif isinstance(content, str):
+                    is_user_query = True
+                
+                if is_user_query:
+                    # 开始新轮次
+                    if current_turn['messages']:
+                        turns.append(current_turn)
+                    current_turn = {'messages': [msg]}
+                else:
+                    # 工具结果，属于当前轮次
+                    current_turn['messages'].append(msg)
+            else:
+                # AI 回复，属于当前轮次
+                current_turn['messages'].append(msg)
+        
+        # 添加最后一个轮次
+        if current_turn['messages']:
+            turns.append(current_turn)
+        
+        return turns
+    
+    def _estimate_turn_tokens(self, turn: Dict) -> int:
+        """估算一个轮次的 tokens"""
+        return sum(
+            self.agent._estimate_message_tokens(msg) 
+            for msg in turn['messages']
+        )
+
    def _trim_messages(self):
        """
-        Trim message history to stay within context limits.
-        Uses agent's context management configuration.
+        智能清理消息历史，保持对话完整性
+        
+        使用完整轮次作为清理单位，确保：
+        1. 不会在对话中间截断
+        2. 工具调用链（tool_use + tool_result）保持完整
+        3. 每轮对话都是完整的（用户消息 + AI回复 + 工具调用）
        """
        if not self.messages or not self.agent:
            return

+        # Step 1: 识别完整轮次
+        turns = self._identify_complete_turns()
+        
+        if not turns:
+            return
+        
+        # Step 2: 轮次限制 - 保留最近 N 轮
+        if len(turns) > self.max_context_turns:
+            removed_turns = len(turns) - self.max_context_turns
+            turns = turns[-self.max_context_turns:]  # 保留最近的轮次
+            
+            logger.info(
+                f"💾 上下文轮次超限: {len(turns) + removed_turns} > {self.max_context_turns}，"
+                f"移除最早的 {removed_turns} 轮完整对话"
+            )
+
+        # Step 3: Token 限制 - 保留完整轮次
        # Get context window from agent (based on model)
        context_window = self.agent._get_model_context_window()

-        # Reserve 10% for response generation
-        reserve_tokens = int(context_window * 0.1)
-        max_tokens = context_window - reserve_tokens
+        # Use configured max_context_tokens if available
+        if hasattr(self.agent, 'max_context_tokens') and self.agent.max_context_tokens:
+            max_tokens = self.agent.max_context_tokens
+        else:
+            # Reserve 10% for response generation
+            reserve_tokens = int(context_window * 0.1)
+            max_tokens = context_window - reserve_tokens

-        # Estimate current tokens
-        current_tokens = sum(self.agent._estimate_message_tokens(msg) for msg in self.messages)
-
-        # Add system prompt tokens
+        # Estimate system prompt tokens
        system_tokens = self.agent._estimate_message_tokens({"role": "system", "content": self.system_prompt})
-        current_tokens += system_tokens
+        available_tokens = max_tokens - system_tokens

-        # If under limit, no need to trim
-        if current_tokens <= max_tokens:
+        # Calculate current tokens
+        current_tokens = sum(self._estimate_turn_tokens(turn) for turn in turns)
+        
+        # If under limit, reconstruct messages and return
+        if current_tokens + system_tokens <= max_tokens:
+            # Reconstruct message list from turns
+            new_messages = []
+            for turn in turns:
+                new_messages.extend(turn['messages'])
+            
+            old_count = len(self.messages)
+            self.messages = new_messages
+            
+            # Log if we removed messages due to turn limit
+            if old_count > len(self.messages):
+                logger.info(f"   重建消息列表: {old_count} -> {len(self.messages)} 条消息")
            return

-        # Keep messages from newest, accumulating tokens
-        available_tokens = max_tokens - system_tokens
-        kept_messages = []
+        # Token limit exceeded - keep complete turns from newest
+        logger.info(
+            f"🔄 上下文tokens超限: ~{current_tokens + system_tokens} > {max_tokens}，"
+            f"将按完整轮次移除最早的对话"
+        )
+
+        # 从最新轮次开始，反向累加（保持完整轮次）
+        kept_turns = []
        accumulated_tokens = 0
-
-        for msg in reversed(self.messages):
-            msg_tokens = self.agent._estimate_message_tokens(msg)
-            if accumulated_tokens + msg_tokens <= available_tokens:
-                kept_messages.insert(0, msg)
-                accumulated_tokens += msg_tokens
+        min_turns = 3  # 尽量保留至少 3 轮，但不强制（避免超出 token 限制）
+        
+        for i, turn in enumerate(reversed(turns)):
+            turn_tokens = self._estimate_turn_tokens(turn)
+            turns_from_end = i + 1
+            
+            # 检查是否超出限制
+            if accumulated_tokens + turn_tokens <= available_tokens:
+                kept_turns.insert(0, turn)
+                accumulated_tokens += turn_tokens
            else:
+                # 超出限制
+                # 如果还没有保留足够的轮次，且这是最后的机会，尝试保留
+                if len(kept_turns) < min_turns and turns_from_end <= min_turns:
+                    # 检查是否严重超出（超出 20% 以上则放弃）
+                    overflow_ratio = (accumulated_tokens + turn_tokens - available_tokens) / available_tokens
+                    if overflow_ratio < 0.2:  # 允许最多超出 20%
+                        kept_turns.insert(0, turn)
+                        accumulated_tokens += turn_tokens
+                        logger.debug(f"   为保留最少轮次，允许超出 {overflow_ratio*100:.1f}%")
+                        continue
+                # 停止保留更早的轮次
                break
-
+        
+        # 重建消息列表
+        new_messages = []
+        for turn in kept_turns:
+            new_messages.extend(turn['messages'])
+        
        old_count = len(self.messages)
-        self.messages = kept_messages
+        old_turn_count = len(turns)
+        self.messages = new_messages
        new_count = len(self.messages)
-
+        new_turn_count = len(kept_turns)
+        
        if old_count > new_count:
            logger.info(
-                f"Context trimmed: {old_count} -> {new_count} messages "
-                f"(~{current_tokens} -> ~{system_tokens + accumulated_tokens} tokens, "
-                f"limit: {max_tokens})"
+                f"   移除了 {old_turn_count - new_turn_count} 轮对话 "
+                f"({old_count} -> {new_count} 条消息，"
+                f"~{current_tokens + system_tokens} -> ~{accumulated_tokens + system_tokens} tokens)"
            )

    def _prepare_messages(self) -> List[Dict[str, Any]]:
--- a/agent/skills/loader.py
+++ b/agent/skills/loader.py
@@ -259,7 +259,7 @@ class SkillLoader:
            for diag in all_diagnostics[:5]:  # Log first 5
                logger.debug(f"  - {diag}")
        
-        logger.info(f"Loaded {len(skill_map)} skills from all sources")
+        logger.debug(f"Loaded {len(skill_map)} skills from all sources")
        
        return skill_map
    
--- a/agent/skills/manager.py
+++ b/agent/skills/manager.py
@@ -59,7 +59,7 @@ class SkillManager:
            extra_dirs=self.extra_dirs,
        )
        
-        logger.info(f"SkillManager: Loaded {len(self.skills)} skills")
+        logger.debug(f"SkillManager: Loaded {len(self.skills)} skills")
    
    def get_skill(self, name: str) -> Optional[SkillEntry]:
        """
--- a/agent/tools/init.py
+++ b/agent/tools/init.py
@@ -46,32 +46,6 @@ def _import_optional_tools():
    except Exception as e:
        logger.error(f"[Tools] Scheduler tool failed to load: {e}")
    
-    # Google Search (requires requests)
-    try:
-        from agent.tools.google_search.google_search import GoogleSearch
-        tools['GoogleSearch'] = GoogleSearch
-    except ImportError as e:
-        logger.warning(f"[Tools] GoogleSearch tool not loaded - missing dependency: {e}")
-    except Exception as e:
-        logger.error(f"[Tools] GoogleSearch tool failed to load: {e}")
-    
-    # File Save (may have dependencies)
-    try:
-        from agent.tools.file_save.file_save import FileSave
-        tools['FileSave'] = FileSave
-    except ImportError as e:
-        logger.warning(f"[Tools] FileSave tool not loaded - missing dependency: {e}")
-    except Exception as e:
-        logger.error(f"[Tools] FileSave tool failed to load: {e}")
-    
-    # Terminal (basic, should work)
-    try:
-        from agent.tools.terminal.terminal import Terminal
-        tools['Terminal'] = Terminal
-    except ImportError as e:
-        logger.warning(f"[Tools] Terminal tool not loaded - missing dependency: {e}")
-    except Exception as e:
-        logger.error(f"[Tools] Terminal tool failed to load: {e}")
    
    return tools

@@ -102,7 +76,7 @@ def _import_browser_tool():


 # Dynamically set BrowserTool
-BrowserTool = _import_browser_tool()
+# BrowserTool = _import_browser_tool()

 # Export all tools (including optional ones that might be None)
 __all__ = [
@@ -119,10 +93,7 @@ __all__ = [
    'EnvConfig',
    'SchedulerTool',
    # Optional tools (may be None if dependencies not available)
-    'GoogleSearch',
-    'FileSave',
-    'Terminal',
-    'BrowserTool'
+    # 'BrowserTool'
 ]

 """
--- a/agent/tools/memory/memory_get.py
+++ b/agent/tools/memory/memory_get.py
@@ -20,7 +20,7 @@ class MemoryGetTool(BaseTool):
        "properties": {
            "path": {
                "type": "string",
-                "description": "Relative path to the memory file (e.g. 'memory/2026-01-01.md')"
+                "description": "Relative path to the memory file (e.g. 'MEMORY.md', 'memory/2026-01-01.md')"
            },
            "start_line": {
                "type": "integer",
--- a/agent/tools/scheduler/integration.py
+++ b/agent/tools/scheduler/integration.py
@@ -36,7 +36,7 @@ def init_scheduler(agent_bridge) -> bool:
        
        # Create task store
        _task_store = TaskStore(store_path)
-        logger.info(f"[Scheduler] Task store initialized: {store_path}")
+        logger.debug(f"[Scheduler] Task store initialized: {store_path}")
        
        # Create execute callback
        def execute_task_callback(task: dict):
@@ -65,7 +65,7 @@ def init_scheduler(agent_bridge) -> bool:
        _scheduler_service = SchedulerService(_task_store, execute_task_callback)
        _scheduler_service.start()
        
-        logger.info("[Scheduler] Scheduler service initialized and started")
+        logger.debug("[Scheduler] Scheduler service initialized and started")
        return True
        
    except Exception as e:
--- a/agent/tools/scheduler/scheduler_service.py
+++ b/agent/tools/scheduler/scheduler_service.py
@@ -39,7 +39,7 @@ class SchedulerService:
            self.running = True
            self.thread = threading.Thread(target=self._run_loop, daemon=True)
            self.thread.start()
-            logger.info("[Scheduler] Service started")
+            logger.debug("[Scheduler] Service started")
    
    def stop(self):
        """Stop the scheduler service"""
@@ -54,7 +54,7 @@ class SchedulerService:
    
    def _run_loop(self):
        """Main scheduler loop"""
-        logger.info("[Scheduler] Scheduler loop started")
+        logger.debug("[Scheduler] Scheduler loop started")
        
        while self.running:
            try: