feat(vision): prioritize main model for image recognition with multi-provider fallback

- Add call_vision method to all bot implementations (DashScope, Claude, Gemini, ZhipuAI, MiniMax, Doubao, Moonshot, OpenAICompatibleBot) using each vendor's native multimodal API format - Remove call_with_tools/call_vision from Bot base class to fix MRO shadowing issue with OpenAICompatibleBot mixin - Refactor vision tool provider resolution: MainModel → other configured models (auto-discovered) → OpenAI → LinkAI, with automatic fallback - Return actual model name used in call_vision responses - Sync config.json API keys to .env bidirectionally on startup - Fix bot instance cache to detect bot_type/use_linkai config changes - Add SSE reconnection support for web console - Preserve image path hints in Gemini text for correct vision tool calls - Update docs/tools/vision.mdx
2026-07-17 11:07:11 +08:00 · 2026-04-11 19:46:11 +08:00
parent 3cd92ccda3
commit 26693acc3f
17 changed files with 1173 additions and 359 deletions
--- a/bridge/agent_bridge.py
+++ b/bridge/agent_bridge.py
@@ -124,14 +124,15 @@ class AgentLLMModel(LLMModel):

    @property
    def bot(self):
-        """Lazy load the bot, re-create when model changes"""
+        """Lazy load the bot, re-create when model or bot_type changes"""
        from models.bot_factory import create_bot
        cur_model = self.model
-        if self._bot is None or self._bot_model != cur_model:
-            bot_type = self._resolve_bot_type(cur_model)
-            self._bot = create_bot(bot_type)
+        cur_bot_type = self._resolve_bot_type(cur_model)
+        if self._bot is None or self._bot_model != cur_model or getattr(self, '_bot_type', None) != cur_bot_type:
+            self._bot = create_bot(cur_bot_type)
            self._bot = add_openai_compatible_support(self._bot)
            self._bot_model = cur_model
+            self._bot_type = cur_bot_type
        return self._bot

    def call(self, request: LLMRequest):
@@ -505,15 +506,15 @@ class AgentBridge:
    
    def _migrate_config_to_env(self, workspace_root: str):
        """
-        Migrate API keys from config.json to .env file if not already set
-        
+        Sync API keys from config.json to .env file.
+        Adds new keys and updates changed values on each startup.
+
        Args:
            workspace_root: Workspace directory path (not used, kept for compatibility)
        """
        from config import conf
        import os
        
-        # Mapping from config.json keys to environment variable names
        key_mapping = {
            "open_ai_api_key": "OPENAI_API_KEY",
            "open_ai_api_base": "OPENAI_API_BASE",
@@ -522,10 +523,9 @@ class AgentBridge:
            "linkai_api_key": "LINKAI_API_KEY",
        }
        
-        # Use fixed secure location for .env file
        env_file = expand_path("~/.cow/.env")
        
-        # Read existing env vars from .env file
+        # Read existing env vars (key -> value)
        existing_env_vars = {}
        if os.path.exists(env_file):
            try:
@@ -533,48 +533,46 @@ class AgentBridge:
                    for line in f:
                        line = line.strip()
                        if line and not line.startswith('#') and '=' in line:
-                            key, _ = line.split('=', 1)
-                            existing_env_vars[key.strip()] = True
+                            key, val = line.split('=', 1)
+                            existing_env_vars[key.strip()] = val.strip()
            except Exception as e:
                logger.warning(f"[AgentBridge] Failed to read .env file: {e}")
        
-        # Check which keys need to be migrated
-        keys_to_migrate = {}
+        # Sync config.json values into .env (add/update/remove)
+        updated = False
        for config_key, env_key in key_mapping.items():
-            # Skip if already in .env file
-            if env_key in existing_env_vars:
-                continue
-            
-            # Get value from config.json
-            value = conf().get(config_key, "")
-            if value and value.strip():  # Only migrate non-empty values
-                keys_to_migrate[env_key] = value.strip()
-        
-        # Log summary if there are keys to skip
-        if existing_env_vars:
-            logger.debug(f"[AgentBridge] {len(existing_env_vars)} env vars already in .env")
-        
-        # Write new keys to .env file
-        if keys_to_migrate:
+            raw = conf().get(config_key, "")
+            value = raw.strip() if raw else ""
+            old_value = existing_env_vars.get(env_key)
+
+            if value:
+                if old_value == value:
+                    continue
+                existing_env_vars[env_key] = value
+                os.environ[env_key] = value
+                updated = True
+            else:
+                if old_value is None:
+                    continue
+                existing_env_vars.pop(env_key, None)
+                os.environ.pop(env_key, None)
+                updated = True
+            updated = True
+
+        if updated:
            try:
-                # Ensure ~/.cow directory and .env file exist
                env_dir = os.path.dirname(env_file)
-                if not os.path.exists(env_dir):
-                    os.makedirs(env_dir, exist_ok=True)
-                if not os.path.exists(env_file):
-                    open(env_file, 'a').close()
-                
-                # Append new keys
-                with open(env_file, 'a', encoding='utf-8') as f:
-                    f.write('\n# Auto-migrated from config.json\n')
-                    for key, value in keys_to_migrate.items():
+                os.makedirs(env_dir, exist_ok=True)
+
+                with open(env_file, 'w', encoding='utf-8') as f:
+                    f.write('# Environment variables for agent\n')
+                    f.write('# Auto-managed - synced from config.json on startup\n\n')
+                    for key, value in sorted(existing_env_vars.items()):
                        f.write(f'{key}={value}\n')
-                        # Also set in current process
-                        os.environ[key] = value
-                
-                logger.info(f"[AgentBridge] Migrated {len(keys_to_migrate)} API keys from config.json to .env: {list(keys_to_migrate.keys())}")
+
+                logger.info(f"[AgentBridge] Synced API keys from config.json to .env")
            except Exception as e:
-                logger.warning(f"[AgentBridge] Failed to migrate API keys: {e}")
+                logger.warning(f"[AgentBridge] Failed to sync API keys: {e}")
    
    def _persist_messages(
        self, session_id: str, new_messages: list, channel_type: str = ""
--- a/bridge/agent_initializer.py
+++ b/bridge/agent_initializer.py
@@ -490,7 +490,7 @@ class AgentInitializer:
        
        env_file = expand_path("~/.cow/.env")
        
-        # Read existing env vars
+        # Read existing env vars (key -> value)
        existing_env_vars = {}
        if os.path.exists(env_file):
            try:
@@ -498,38 +498,46 @@ class AgentInitializer:
                    for line in f:
                        line = line.strip()
                        if line and not line.startswith('#') and '=' in line:
-                            key, _ = line.split('=', 1)
-                            existing_env_vars[key.strip()] = True
+                            key, val = line.split('=', 1)
+                            existing_env_vars[key.strip()] = val.strip()
            except Exception as e:
                logger.warning(f"[AgentInitializer] Failed to read .env file: {e}")
        
-        # Check which keys need migration
-        keys_to_migrate = {}
+        # Sync config.json values into .env (add/update/remove)
+        updated = False
        for config_key, env_key in key_mapping.items():
-            if env_key in existing_env_vars:
-                continue
-            value = conf().get(config_key, "")
-            if value and value.strip():
-                keys_to_migrate[env_key] = value.strip()
-        
-        # Write new keys
-        if keys_to_migrate:
+            raw = conf().get(config_key, "")
+            value = raw.strip() if raw else ""
+            old_value = existing_env_vars.get(env_key)
+
+            if value:
+                if old_value == value:
+                    continue
+                existing_env_vars[env_key] = value
+                os.environ[env_key] = value
+                updated = True
+            else:
+                if old_value is None:
+                    continue
+                existing_env_vars.pop(env_key, None)
+                os.environ.pop(env_key, None)
+                updated = True
+
+        if updated:
            try:
                env_dir = os.path.dirname(env_file)
-                if not os.path.exists(env_dir):
-                    os.makedirs(env_dir, exist_ok=True)
-                if not os.path.exists(env_file):
-                    open(env_file, 'a').close()
-                
-                with open(env_file, 'a', encoding='utf-8') as f:
-                    f.write('\n# Auto-migrated from config.json\n')
-                    for key, value in keys_to_migrate.items():
+                os.makedirs(env_dir, exist_ok=True)
+
+                # Rewrite the entire .env file to ensure consistency
+                with open(env_file, 'w', encoding='utf-8') as f:
+                    f.write('# Environment variables for agent\n')
+                    f.write('# Auto-managed - synced from config.json on startup\n\n')
+                    for key, value in sorted(existing_env_vars.items()):
                        f.write(f'{key}={value}\n')
-                        os.environ[key] = value
-                
-                logger.info(f"[AgentInitializer] Migrated {len(keys_to_migrate)} API keys to .env: {list(keys_to_migrate.keys())}")
+
+                logger.info(f"[AgentInitializer] Synced API keys from config.json to .env")
            except Exception as e:
-                logger.warning(f"[AgentInitializer] Failed to migrate API keys: {e}")
+                logger.warning(f"[AgentInitializer] Failed to sync API keys: {e}")

    def _start_daily_flush_timer(self):
        """Start a background thread that flushes all agents' memory daily at 23:55."""