feat: personal ai agent framework

2026-07-18 12:07:15 +08:00 · 2026-01-30 09:53:46 +08:00
parent 25cf6823d0
commit bb850bb6c5
62 changed files with 7675 additions and 275 deletions
--- a/agent/protocol/init.py
+++ b/agent/protocol/init.py
@@ -0,0 +1,20 @@
+from .agent import Agent
+from .agent_stream import AgentStreamExecutor
+from .task import Task, TaskType, TaskStatus
+from .result import AgentResult, AgentAction, AgentActionType, ToolResult
+from .models import LLMModel, LLMRequest, ModelFactory
+
+__all__ = [
+    'Agent', 
+    'AgentStreamExecutor',
+    'Task', 
+    'TaskType', 
+    'TaskStatus',
+    'AgentResult',
+    'AgentAction',
+    'AgentActionType', 
+    'ToolResult',
+    'LLMModel',
+    'LLMRequest', 
+    'ModelFactory'
+]
--- a/agent/protocol/agent.py
+++ b/agent/protocol/agent.py
@@ -0,0 +1,292 @@
+import json
+import time
+
+from common.log import logger
+from agent.protocol.models import LLMRequest, LLMModel
+from agent.protocol.agent_stream import AgentStreamExecutor
+from agent.protocol.result import AgentAction, AgentActionType, ToolResult, AgentResult
+from agent.tools.base_tool import BaseTool, ToolStage
+
+
+class Agent:
+    def __init__(self, system_prompt: str, description: str = "AI Agent", model: LLMModel = None,
+                 tools=None, output_mode="print", max_steps=100, max_context_tokens=None, 
+                 context_reserve_tokens=None, memory_manager=None, name: str = None):
+        """
+        Initialize the Agent with system prompt, model, description.
+
+        :param system_prompt: The system prompt for the agent.
+        :param description: A description of the agent.
+        :param model: An instance of LLMModel to be used by the agent.
+        :param tools: Optional list of tools for the agent to use.
+        :param output_mode: Control how execution progress is displayed: 
+                           "print" for console output or "logger" for using logger
+        :param max_steps: Maximum number of steps the agent can take (default: 100)
+        :param max_context_tokens: Maximum tokens to keep in context (default: None, auto-calculated based on model)
+        :param context_reserve_tokens: Reserve tokens for new requests (default: None, auto-calculated)
+        :param memory_manager: Optional MemoryManager instance for memory operations
+        :param name: [Deprecated] The name of the agent (no longer used in single-agent system)
+        """
+        self.name = name or "Agent"
+        self.system_prompt = system_prompt
+        self.model: LLMModel = model  # Instance of LLMModel
+        self.description = description
+        self.tools: list = []
+        self.max_steps = max_steps  # max tool-call steps, default 100
+        self.max_context_tokens = max_context_tokens  # max tokens in context
+        self.context_reserve_tokens = context_reserve_tokens  # reserve tokens for new requests
+        self.captured_actions = []  # Initialize captured actions list
+        self.output_mode = output_mode
+        self.last_usage = None  # Store last API response usage info
+        self.messages = []  # Unified message history for stream mode
+        self.memory_manager = memory_manager  # Memory manager for auto memory flush
+        if tools:
+            for tool in tools:
+                self.add_tool(tool)
+
+    def add_tool(self, tool: BaseTool):
+        """
+        Add a tool to the agent.
+
+        :param tool: The tool to add (either a tool instance or a tool name)
+        """
+        # If tool is already an instance, use it directly
+        tool.model = self.model
+        self.tools.append(tool)
+
+    def _get_model_context_window(self) -> int:
+        """
+        Get the model's context window size in tokens.
+        Auto-detect based on model name.
+        
+        Model context windows:
+        - Claude 3.5/3.7 Sonnet: 200K tokens
+        - Claude 3 Opus: 200K tokens
+        - GPT-4 Turbo/128K: 128K tokens
+        - GPT-4: 8K-32K tokens
+        - GPT-3.5: 16K tokens
+        - DeepSeek: 64K tokens
+        
+        :return: Context window size in tokens
+        """
+        if self.model and hasattr(self.model, 'model'):
+            model_name = self.model.model.lower()
+
+            # Claude models - 200K context
+            if 'claude-3' in model_name or 'claude-sonnet' in model_name:
+                return 200000
+
+            # GPT-4 models
+            elif 'gpt-4' in model_name:
+                if 'turbo' in model_name or '128k' in model_name:
+                    return 128000
+                elif '32k' in model_name:
+                    return 32000
+                else:
+                    return 8000
+
+            # GPT-3.5
+            elif 'gpt-3.5' in model_name:
+                if '16k' in model_name:
+                    return 16000
+                else:
+                    return 4000
+
+            # DeepSeek
+            elif 'deepseek' in model_name:
+                return 64000
+
+        # Default conservative value
+        return 10000
+
+    def _get_context_reserve_tokens(self) -> int:
+        """
+        Get the number of tokens to reserve for new requests.
+        This prevents context overflow by keeping a buffer.
+        
+        :return: Number of tokens to reserve
+        """
+        if self.context_reserve_tokens is not None:
+            return self.context_reserve_tokens
+
+        # Reserve ~20% of context window for new requests
+        context_window = self._get_model_context_window()
+        return max(4000, int(context_window * 0.2))
+
+    def _estimate_message_tokens(self, message: dict) -> int:
+        """
+        Estimate token count for a message using chars/4 heuristic.
+        This is a conservative estimate (tends to overestimate).
+
+        :param message: Message dict with 'role' and 'content'
+        :return: Estimated token count
+        """
+        content = message.get('content', '')
+        if isinstance(content, str):
+            return max(1, len(content) // 4)
+        elif isinstance(content, list):
+            # Handle multi-part content (text + images)
+            total_chars = 0
+            for part in content:
+                if isinstance(part, dict) and part.get('type') == 'text':
+                    total_chars += len(part.get('text', ''))
+                elif isinstance(part, dict) and part.get('type') == 'image':
+                    # Estimate images as ~1200 tokens
+                    total_chars += 4800
+            return max(1, total_chars // 4)
+        return 1
+
+    def _find_tool(self, tool_name: str):
+        """Find and return a tool with the specified name"""
+        for tool in self.tools:
+            if tool.name == tool_name:
+                # Only pre-process stage tools can be actively called
+                if tool.stage == ToolStage.PRE_PROCESS:
+                    tool.model = self.model
+                    tool.context = self  # Set tool context
+                    return tool
+                else:
+                    # If it's a post-process tool, return None to prevent direct calling
+                    logger.warning(f"Tool {tool_name} is a post-process tool and cannot be called directly.")
+                    return None
+        return None
+
+    # output function based on mode
+    def output(self, message="", end="\n"):
+        if self.output_mode == "print":
+            print(message, end=end)
+        elif message:
+            logger.info(message)
+
+    def _execute_post_process_tools(self):
+        """Execute all post-process stage tools"""
+        # Get all post-process stage tools
+        post_process_tools = [tool for tool in self.tools if tool.stage == ToolStage.POST_PROCESS]
+
+        # Execute each tool
+        for tool in post_process_tools:
+            # Set tool context
+            tool.context = self
+
+            # Record start time for execution timing
+            start_time = time.time()
+
+            # Execute tool (with empty parameters, tool will extract needed info from context)
+            result = tool.execute({})
+
+            # Calculate execution time
+            execution_time = time.time() - start_time
+
+            # Capture tool use for tracking
+            self.capture_tool_use(
+                tool_name=tool.name,
+                input_params={},  # Post-process tools typically don't take parameters
+                output=result.result,
+                status=result.status,
+                error_message=str(result.result) if result.status == "error" else None,
+                execution_time=execution_time
+            )
+
+            # Log result
+            if result.status == "success":
+                # Print tool execution result in the desired format
+                self.output(f"\n🛠️ {tool.name}: {json.dumps(result.result)}")
+            else:
+                # Print failure in print mode
+                self.output(f"\n🛠️ {tool.name}: {json.dumps({'status': 'error', 'message': str(result.result)})}")
+
+    def capture_tool_use(self, tool_name, input_params, output, status, thought=None, error_message=None,
+                         execution_time=0.0):
+        """
+        Capture a tool use action.
+        
+        :param thought: thought content
+        :param tool_name: Name of the tool used
+        :param input_params: Parameters passed to the tool
+        :param output: Output from the tool
+        :param status: Status of the tool execution
+        :param error_message: Error message if the tool execution failed
+        :param execution_time: Time taken to execute the tool
+        """
+        tool_result = ToolResult(
+            tool_name=tool_name,
+            input_params=input_params,
+            output=output,
+            status=status,
+            error_message=error_message,
+            execution_time=execution_time
+        )
+
+        action = AgentAction(
+            agent_id=self.id if hasattr(self, 'id') else str(id(self)),
+            agent_name=self.name,
+            action_type=AgentActionType.TOOL_USE,
+            tool_result=tool_result,
+            thought=thought
+        )
+
+        self.captured_actions.append(action)
+
+        return action
+
+    def run_stream(self, user_message: str, on_event=None, clear_history: bool = False) -> str:
+        """
+        Execute single agent task with streaming (based on tool-call)
+
+        This method supports:
+        - Streaming output
+        - Multi-turn reasoning based on tool-call
+        - Event callbacks
+        - Persistent conversation history across calls
+
+        Args:
+            user_message: User message
+            on_event: Event callback function callback(event: dict)
+                     event = {"type": str, "timestamp": float, "data": dict}
+            clear_history: If True, clear conversation history before this call (default: False)
+
+        Returns:
+            Final response text
+
+        Example:
+            # Multi-turn conversation with memory
+            response1 = agent.run_stream("My name is Alice")
+            response2 = agent.run_stream("What's my name?")  # Will remember Alice
+
+            # Single-turn without memory
+            response = agent.run_stream("Hello", clear_history=True)
+        """
+        # Clear history if requested
+        if clear_history:
+            self.messages = []
+
+        # Get model to use
+        if not self.model:
+            raise ValueError("No model available for agent")
+
+        # Create stream executor with agent's message history
+        executor = AgentStreamExecutor(
+            agent=self,
+            model=self.model,
+            system_prompt=self.system_prompt,
+            tools=self.tools,
+            max_turns=self.max_steps,
+            on_event=on_event,
+            messages=self.messages  # Pass agent's message history
+        )
+
+        # Execute
+        response = executor.run_stream(user_message)
+
+        # Update agent's message history from executor
+        self.messages = executor.messages
+
+        # Execute all post-process tools
+        self._execute_post_process_tools()
+
+        return response
+
+    def clear_history(self):
+        """Clear conversation history and captured actions"""
+        self.messages = []
+        self.captured_actions = []
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -0,0 +1,461 @@
+"""
+Agent Stream Execution Module - Multi-turn reasoning based on tool-call
+
+Provides streaming output, event system, and complete tool-call loop
+"""
+import json
+import time
+from typing import List, Dict, Any, Optional, Callable
+
+from common.log import logger
+from agent.protocol.models import LLMRequest, LLMModel
+from agent.tools.base_tool import BaseTool, ToolResult
+
+
+class AgentStreamExecutor:
+    """
+    Agent Stream Executor
+    
+    Handles multi-turn reasoning loop based on tool-call:
+    1. LLM generates response (may include tool calls)
+    2. Execute tools
+    3. Return results to LLM
+    4. Repeat until no more tool calls
+    """
+
+    def __init__(
+            self,
+            agent,  # Agent instance
+            model: LLMModel,
+            system_prompt: str,
+            tools: List[BaseTool],
+            max_turns: int = 50,
+            on_event: Optional[Callable] = None,
+            messages: Optional[List[Dict]] = None
+    ):
+        """
+        Initialize stream executor
+        
+        Args:
+            agent: Agent instance (for accessing context)
+            model: LLM model
+            system_prompt: System prompt
+            tools: List of available tools
+            max_turns: Maximum number of turns
+            on_event: Event callback function
+            messages: Optional existing message history (for persistent conversations)
+        """
+        self.agent = agent
+        self.model = model
+        self.system_prompt = system_prompt
+        # Convert tools list to dict
+        self.tools = {tool.name: tool for tool in tools} if isinstance(tools, list) else tools
+        self.max_turns = max_turns
+        self.on_event = on_event
+
+        # Message history - use provided messages or create new list
+        self.messages = messages if messages is not None else []
+
+    def _emit_event(self, event_type: str, data: dict = None):
+        """Emit event"""
+        if self.on_event:
+            try:
+                self.on_event({
+                    "type": event_type,
+                    "timestamp": time.time(),
+                    "data": data or {}
+                })
+            except Exception as e:
+                logger.error(f"Event callback error: {e}")
+
+    def run_stream(self, user_message: str) -> str:
+        """
+        Execute streaming reasoning loop
+        
+        Args:
+            user_message: User message
+            
+        Returns:
+            Final response text
+        """
+        # Log user message
+        logger.info(f"\n{'='*50}")
+        logger.info(f"👤 用户: {user_message}")
+        logger.info(f"{'='*50}")
+        
+        # Add user message (Claude format - use content blocks for consistency)
+        self.messages.append({
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": user_message
+                }
+            ]
+        })
+
+        self._emit_event("agent_start")
+
+        final_response = ""
+        turn = 0
+
+        try:
+            while turn < self.max_turns:
+                turn += 1
+                logger.info(f"\n{'='*50} 第 {turn} 轮 {'='*50}")
+                self._emit_event("turn_start", {"turn": turn})
+
+                # Check if memory flush is needed (before calling LLM)
+                if self.agent.memory_manager and hasattr(self.agent, 'last_usage'):
+                    usage = self.agent.last_usage
+                    if usage and 'input_tokens' in usage:
+                        current_tokens = usage.get('input_tokens', 0)
+                        context_window = self.agent._get_model_context_window()
+                        reserve_tokens = self.agent.context_reserve_tokens or 20000
+
+                        if self.agent.memory_manager.should_flush_memory(
+                                current_tokens=current_tokens,
+                                context_window=context_window,
+                                reserve_tokens=reserve_tokens
+                        ):
+                            self._emit_event("memory_flush_start", {
+                                "current_tokens": current_tokens,
+                                "threshold": context_window - reserve_tokens - 4000
+                            })
+
+                            # TODO: Execute memory flush in background
+                            # This would require async support
+                            logger.info(f"Memory flush recommended at {current_tokens} tokens")
+
+                # Call LLM
+                assistant_msg, tool_calls = self._call_llm_stream()
+                final_response = assistant_msg
+
+                # No tool calls, end loop
+                if not tool_calls:
+                    if assistant_msg:
+                        logger.info(f"💭 {assistant_msg[:150]}{'...' if len(assistant_msg) > 150 else ''}")
+                    logger.info(f"✅ 完成 (无工具调用)")
+                    self._emit_event("turn_end", {
+                        "turn": turn,
+                        "has_tool_calls": False
+                    })
+                    break
+
+                # Log tool calls in compact format
+                tool_names = [tc['name'] for tc in tool_calls]
+                logger.info(f"🔧 调用工具: {', '.join(tool_names)}")
+
+                # Execute tools
+                tool_results = []
+                tool_result_blocks = []
+
+                for tool_call in tool_calls:
+                    result = self._execute_tool(tool_call)
+                    tool_results.append(result)
+                    
+                    # Log tool result in compact format
+                    status_emoji = "✅" if result.get("status") == "success" else "❌"
+                    result_str = str(result.get('result', ''))
+                    logger.info(f"  {status_emoji} {tool_call['name']} ({result.get('execution_time', 0):.2f}s): {result_str[:200]}{'...' if len(result_str) > 200 else ''}")
+
+                    # Build tool result block (Claude format)
+                    # Content should be a string representation of the result
+                    result_content = json.dumps(result) if not isinstance(result, str) else result
+                    tool_result_blocks.append({
+                        "type": "tool_result",
+                        "tool_use_id": tool_call["id"],
+                        "content": result_content
+                    })
+
+                # Add tool results to message history as user message (Claude format)
+                self.messages.append({
+                    "role": "user",
+                    "content": tool_result_blocks
+                })
+
+                self._emit_event("turn_end", {
+                    "turn": turn,
+                    "has_tool_calls": True,
+                    "tool_count": len(tool_calls)
+                })
+
+            if turn >= self.max_turns:
+                logger.warning(f"⚠️  已达到最大轮数限制: {self.max_turns}")
+
+        except Exception as e:
+            logger.error(f"❌ Agent执行错误: {e}")
+            self._emit_event("error", {"error": str(e)})
+            raise
+
+        finally:
+            logger.info(f"{'='*50} 完成({turn}轮) {'='*50}\n")
+            self._emit_event("agent_end", {"final_response": final_response})
+
+        return final_response
+
+    def _call_llm_stream(self) -> tuple[str, List[Dict]]:
+        """
+        Call LLM with streaming
+        
+        Returns:
+            (response_text, tool_calls)
+        """
+        # Trim messages if needed (using agent's context management)
+        self._trim_messages()
+
+        # Prepare messages
+        messages = self._prepare_messages()
+        
+        # Debug: log message structure
+        logger.debug(f"Sending {len(messages)} messages to LLM")
+        for i, msg in enumerate(messages):
+            role = msg.get("role", "unknown")
+            content = msg.get("content", "")
+            if isinstance(content, list):
+                content_types = [c.get("type") for c in content if isinstance(c, dict)]
+                logger.debug(f"  Message {i}: role={role}, content_blocks={content_types}")
+            else:
+                logger.debug(f"  Message {i}: role={role}, content_length={len(str(content))}")
+
+        # Prepare tool definitions (OpenAI/Claude format)
+        tools_schema = None
+        if self.tools:
+            tools_schema = []
+            for tool in self.tools.values():
+                tools_schema.append({
+                    "name": tool.name,
+                    "description": tool.description,
+                    "input_schema": tool.params  # Claude uses input_schema
+                })
+
+        # Create request
+        request = LLMRequest(
+            messages=messages,
+            temperature=0,
+            stream=True,
+            tools=tools_schema,
+            system=self.system_prompt  # Pass system prompt separately for Claude API
+        )
+
+        self._emit_event("message_start", {"role": "assistant"})
+
+        # Streaming response
+        full_content = ""
+        tool_calls_buffer = {}  # {index: {id, name, arguments}}
+
+        try:
+            stream = self.model.call_stream(request)
+
+            for chunk in stream:
+                # Check for errors
+                if isinstance(chunk, dict) and chunk.get("error"):
+                    error_msg = chunk.get("message", "Unknown error")
+                    status_code = chunk.get("status_code", "N/A")
+                    logger.error(f"API Error: {error_msg} (Status: {status_code})")
+                    logger.error(f"Full error chunk: {chunk}")
+                    raise Exception(f"{error_msg} (Status: {status_code})")
+
+                # Parse chunk
+                if isinstance(chunk, dict) and "choices" in chunk:
+                    choice = chunk["choices"][0]
+                    delta = choice.get("delta", {})
+
+                    # Handle text content
+                    if "content" in delta and delta["content"]:
+                        content_delta = delta["content"]
+                        full_content += content_delta
+                        self._emit_event("message_update", {"delta": content_delta})
+
+                    # Handle tool calls
+                    if "tool_calls" in delta:
+                        for tc_delta in delta["tool_calls"]:
+                            index = tc_delta.get("index", 0)
+
+                            if index not in tool_calls_buffer:
+                                tool_calls_buffer[index] = {
+                                    "id": "",
+                                    "name": "",
+                                    "arguments": ""
+                                }
+
+                            if "id" in tc_delta:
+                                tool_calls_buffer[index]["id"] = tc_delta["id"]
+
+                            if "function" in tc_delta:
+                                func = tc_delta["function"]
+                                if "name" in func:
+                                    tool_calls_buffer[index]["name"] = func["name"]
+                                if "arguments" in func:
+                                    tool_calls_buffer[index]["arguments"] += func["arguments"]
+
+        except Exception as e:
+            logger.error(f"LLM call error: {e}")
+            raise
+
+        # Parse tool calls
+        tool_calls = []
+        for idx in sorted(tool_calls_buffer.keys()):
+            tc = tool_calls_buffer[idx]
+            try:
+                arguments = json.loads(tc["arguments"]) if tc["arguments"] else {}
+            except json.JSONDecodeError as e:
+                logger.error(f"Failed to parse tool arguments: {tc['arguments']}")
+                arguments = {}
+
+            tool_calls.append({
+                "id": tc["id"],
+                "name": tc["name"],
+                "arguments": arguments
+            })
+
+        # Add assistant message to history (Claude format uses content blocks)
+        assistant_msg = {"role": "assistant", "content": []}
+
+        # Add text content block if present
+        if full_content:
+            assistant_msg["content"].append({
+                "type": "text",
+                "text": full_content
+            })
+
+        # Add tool_use blocks if present
+        if tool_calls:
+            for tc in tool_calls:
+                assistant_msg["content"].append({
+                    "type": "tool_use",
+                    "id": tc["id"],
+                    "name": tc["name"],
+                    "input": tc["arguments"]
+                })
+        
+        # Only append if content is not empty
+        if assistant_msg["content"]:
+            self.messages.append(assistant_msg)
+
+        self._emit_event("message_end", {
+            "content": full_content,
+            "tool_calls": tool_calls
+        })
+
+        return full_content, tool_calls
+
+    def _execute_tool(self, tool_call: Dict) -> Dict[str, Any]:
+        """
+        Execute tool
+        
+        Args:
+            tool_call: {"id": str, "name": str, "arguments": dict}
+            
+        Returns:
+            Tool execution result
+        """
+        tool_name = tool_call["name"]
+        tool_id = tool_call["id"]
+        arguments = tool_call["arguments"]
+
+        self._emit_event("tool_execution_start", {
+            "tool_call_id": tool_id,
+            "tool_name": tool_name,
+            "arguments": arguments
+        })
+
+        try:
+            tool = self.tools.get(tool_name)
+            if not tool:
+                raise ValueError(f"Tool '{tool_name}' not found")
+
+            # Set tool context
+            tool.model = self.model
+            tool.context = self.agent
+
+            # Execute tool
+            start_time = time.time()
+            result: ToolResult = tool.execute_tool(arguments)
+            execution_time = time.time() - start_time
+
+            result_dict = {
+                "status": result.status,
+                "result": result.result,
+                "execution_time": execution_time
+            }
+
+            self._emit_event("tool_execution_end", {
+                "tool_call_id": tool_id,
+                "tool_name": tool_name,
+                **result_dict
+            })
+
+            return result_dict
+
+        except Exception as e:
+            logger.error(f"Tool execution error: {e}")
+            error_result = {
+                "status": "error",
+                "result": str(e),
+                "execution_time": 0
+            }
+            self._emit_event("tool_execution_end", {
+                "tool_call_id": tool_id,
+                "tool_name": tool_name,
+                **error_result
+            })
+            return error_result
+
+    def _trim_messages(self):
+        """
+        Trim message history to stay within context limits.
+        Uses agent's context management configuration.
+        """
+        if not self.messages or not self.agent:
+            return
+
+        # Get context window and reserve tokens from agent
+        context_window = self.agent._get_model_context_window()
+        reserve_tokens = self.agent._get_context_reserve_tokens()
+        max_tokens = context_window - reserve_tokens
+
+        # Estimate current tokens
+        current_tokens = sum(self.agent._estimate_message_tokens(msg) for msg in self.messages)
+
+        # Add system prompt tokens
+        system_tokens = self.agent._estimate_message_tokens({"role": "system", "content": self.system_prompt})
+        current_tokens += system_tokens
+
+        # If under limit, no need to trim
+        if current_tokens <= max_tokens:
+            return
+
+        # Keep messages from newest, accumulating tokens
+        available_tokens = max_tokens - system_tokens
+        kept_messages = []
+        accumulated_tokens = 0
+
+        for msg in reversed(self.messages):
+            msg_tokens = self.agent._estimate_message_tokens(msg)
+            if accumulated_tokens + msg_tokens <= available_tokens:
+                kept_messages.insert(0, msg)
+                accumulated_tokens += msg_tokens
+            else:
+                break
+
+        old_count = len(self.messages)
+        self.messages = kept_messages
+        new_count = len(self.messages)
+
+        if old_count > new_count:
+            logger.info(
+                f"Context trimmed: {old_count} -> {new_count} messages "
+                f"(~{current_tokens} -> ~{system_tokens + accumulated_tokens} tokens, "
+                f"limit: {max_tokens})"
+            )
+
+    def _prepare_messages(self) -> List[Dict[str, Any]]:
+        """
+        Prepare messages to send to LLM
+        
+        Note: For Claude API, system prompt should be passed separately via system parameter,
+        not as a message. The AgentLLMModel will handle this.
+        """
+        # Don't add system message here - it will be handled separately by the LLM adapter
+        return self.messages
--- a/agent/protocol/context.py
+++ b/agent/protocol/context.py
@@ -0,0 +1,27 @@
+class TeamContext:
+    def __init__(self, name: str, description: str, rule: str, agents: list, max_steps: int = 100):
+        """
+        Initialize the TeamContext with a name, description, rules, a list of agents, and a user question.
+        :param name: The name of the group context.
+        :param description: A description of the group context.
+        :param rule: The rules governing the group context.
+        :param agents: A list of agents in the context.
+        """
+        self.name = name
+        self.description = description
+        self.rule = rule
+        self.agents = agents
+        self.user_task = ""  # For backward compatibility
+        self.task = None  # Will be a Task instance
+        self.model = None  # Will be an instance of LLMModel
+        self.task_short_name = None  # Store the task directory name
+        # List of agents that have been executed
+        self.agent_outputs: list = []
+        self.current_steps = 0
+        self.max_steps = max_steps
+
+
+class AgentOutput:
+    def __init__(self, agent_name: str, output: str):
+        self.agent_name = agent_name
+        self.output = output
--- a/agent/protocol/models.py
+++ b/agent/protocol/models.py
@@ -0,0 +1,57 @@
+"""
+Models module for agent system.
+Provides basic model classes needed by tools and bridge integration.
+"""
+
+from typing import Any, Dict, List, Optional
+
+
+class LLMRequest:
+    """Request model for LLM operations"""
+    
+    def __init__(self, messages: List[Dict[str, str]] = None, model: Optional[str] = None,
+                 temperature: float = 0.7, max_tokens: Optional[int] = None, 
+                 stream: bool = False, tools: Optional[List] = None, **kwargs):
+        self.messages = messages or []
+        self.model = model
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+        self.stream = stream
+        self.tools = tools
+        # Allow extra attributes
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+
+
+class LLMModel:
+    """Base class for LLM models"""
+    
+    def __init__(self, model: str = None, **kwargs):
+        self.model = model
+        self.config = kwargs
+    
+    def call(self, request: LLMRequest):
+        """
+        Call the model with a request.
+        This is a placeholder implementation.
+        """
+        raise NotImplementedError("LLMModel.call not implemented in this context")
+    
+    def call_stream(self, request: LLMRequest):
+        """
+        Call the model with streaming.
+        This is a placeholder implementation.
+        """
+        raise NotImplementedError("LLMModel.call_stream not implemented in this context")
+
+
+class ModelFactory:
+    """Factory for creating model instances"""
+
+    @staticmethod
+    def create_model(model_type: str, **kwargs):
+        """
+        Create a model instance based on type.
+        This is a placeholder implementation.
+        """
+        raise NotImplementedError("ModelFactory.create_model not implemented in this context")
--- a/agent/protocol/result.py
+++ b/agent/protocol/result.py
@@ -0,0 +1,96 @@
+import time
+import uuid
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import List, Dict, Any, Optional
+
+from agent.protocol.task import Task, TaskStatus
+
+
+class AgentActionType(Enum):
+    """Enum representing different types of agent actions."""
+    TOOL_USE = "tool_use"
+    THINKING = "thinking"
+    FINAL_ANSWER = "final_answer"
+
+
+@dataclass
+class ToolResult:
+    """
+    Represents the result of a tool use.
+    
+    Attributes:
+        tool_name: Name of the tool used
+        input_params: Parameters passed to the tool
+        output: Output from the tool
+        status: Status of the tool execution (success/error)
+        error_message: Error message if the tool execution failed
+        execution_time: Time taken to execute the tool
+    """
+    tool_name: str
+    input_params: Dict[str, Any]
+    output: Any
+    status: str
+    error_message: Optional[str] = None
+    execution_time: float = 0.0
+
+
+@dataclass
+class AgentAction:
+    """
+    Represents an action taken by an agent.
+    
+    Attributes:
+        id: Unique identifier for the action
+        agent_id: ID of the agent that performed the action
+        agent_name: Name of the agent that performed the action
+        action_type: Type of action (tool use, thinking, final answer)
+        content: Content of the action (thought content, final answer content)
+        tool_result: Tool use details if action_type is TOOL_USE
+        timestamp: When the action was performed
+    """
+    agent_id: str
+    agent_name: str
+    action_type: AgentActionType
+    id: str = field(default_factory=lambda: str(uuid.uuid4()))
+    content: str = ""
+    tool_result: Optional[ToolResult] = None
+    thought: Optional[str] = None
+    timestamp: float = field(default_factory=time.time)
+
+
+@dataclass
+class AgentResult:
+    """
+    Represents the result of an agent's execution.
+
+    Attributes:
+        final_answer: The final answer provided by the agent
+        step_count: Number of steps taken by the agent
+        status: Status of the execution (success/error)
+        error_message: Error message if execution failed
+    """
+    final_answer: str
+    step_count: int
+    status: str = "success"
+    error_message: Optional[str] = None
+
+    @classmethod
+    def success(cls, final_answer: str, step_count: int) -> "AgentResult":
+        """Create a successful result"""
+        return cls(final_answer=final_answer, step_count=step_count)
+
+    @classmethod
+    def error(cls, error_message: str, step_count: int = 0) -> "AgentResult":
+        """Create an error result"""
+        return cls(
+            final_answer=f"Error: {error_message}",
+            step_count=step_count,
+            status="error",
+            error_message=error_message
+        )
+
+    @property
+    def is_error(self) -> bool:
+        """Check if the result represents an error"""
+        return self.status == "error"
--- a/agent/protocol/task.py
+++ b/agent/protocol/task.py
@@ -0,0 +1,95 @@
+import time
+import uuid
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Dict, Any, List
+
+
+class TaskType(Enum):
+    """Enum representing different types of tasks."""
+    TEXT = "text"
+    IMAGE = "image"
+    VIDEO = "video"
+    AUDIO = "audio"
+    FILE = "file"
+    MIXED = "mixed"
+
+
+class TaskStatus(Enum):
+    """Enum representing the status of a task."""
+    INIT = "init"  # Initial state
+    PROCESSING = "processing"  # In progress
+    COMPLETED = "completed"  # Completed
+    FAILED = "failed"  # Failed
+
+
+@dataclass
+class Task:
+    """
+    Represents a task to be processed by an agent.
+    
+    Attributes:
+        id: Unique identifier for the task
+        content: The primary text content of the task
+        type: Type of the task
+        status: Current status of the task
+        created_at: Timestamp when the task was created
+        updated_at: Timestamp when the task was last updated
+        metadata: Additional metadata for the task
+        images: List of image URLs or base64 encoded images
+        videos: List of video URLs
+        audios: List of audio URLs or base64 encoded audios
+        files: List of file URLs or paths
+    """
+    id: str = field(default_factory=lambda: str(uuid.uuid4()))
+    content: str = ""
+    type: TaskType = TaskType.TEXT
+    status: TaskStatus = TaskStatus.INIT
+    created_at: float = field(default_factory=time.time)
+    updated_at: float = field(default_factory=time.time)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    # Media content
+    images: List[str] = field(default_factory=list)
+    videos: List[str] = field(default_factory=list)
+    audios: List[str] = field(default_factory=list)
+    files: List[str] = field(default_factory=list)
+
+    def __init__(self, content: str = "", **kwargs):
+        """
+        Initialize a Task with content and optional keyword arguments.
+        
+        Args:
+            content: The text content of the task
+            **kwargs: Additional attributes to set
+        """
+        self.id = kwargs.get('id', str(uuid.uuid4()))
+        self.content = content
+        self.type = kwargs.get('type', TaskType.TEXT)
+        self.status = kwargs.get('status', TaskStatus.INIT)
+        self.created_at = kwargs.get('created_at', time.time())
+        self.updated_at = kwargs.get('updated_at', time.time())
+        self.metadata = kwargs.get('metadata', {})
+        self.images = kwargs.get('images', [])
+        self.videos = kwargs.get('videos', [])
+        self.audios = kwargs.get('audios', [])
+        self.files = kwargs.get('files', [])
+
+    def get_text(self) -> str:
+        """
+        Get the text content of the task.
+        
+        Returns:
+            The text content
+        """
+        return self.content
+
+    def update_status(self, status: TaskStatus) -> None:
+        """
+        Update the status of the task.
+        
+        Args:
+            status: The new status
+        """
+        self.status = status
+        self.updated_at = time.time()