feat(memory): support multi-vendor embedding fallback

Add embedding_provider config knob with native support for
openai / dashscope / doubao / zhipu / linkai, plus an in-chat
/memory status and /memory rebuild-index workflow for switching
vendors safely.
This commit is contained in:
zhayujie
2026-05-20 11:00:53 +08:00
parent a0dfdb79df
commit 3ffb563a44
12 changed files with 1572 additions and 449 deletions

View File

@@ -17,6 +17,10 @@ from common.utils import expand_path
# Module-level lock to serialize scheduler init across concurrent sessions
_scheduler_init_lock = threading.Lock()
# Track whether the embedding model log has been printed in this process,
# so we avoid spamming it once per session.
_embedding_logged: bool = False
class AgentInitializer:
"""
@@ -272,52 +276,19 @@ class AgentInitializer:
memory_tools = []
try:
from agent.memory import MemoryManager, MemoryConfig, create_embedding_provider
from agent.memory import MemoryManager, MemoryConfig
from agent.tools import MemorySearchTool, MemoryGetTool
from config import conf
# Initialize embedding provider (prefer OpenAI, fallback to LinkAI)
embedding_provider = None
openai_api_key = conf().get("open_ai_api_key", "")
openai_api_base = conf().get("open_ai_api_base", "")
if openai_api_key and openai_api_key not in ["", "YOUR API KEY", "YOUR_API_KEY"]:
try:
embedding_provider = create_embedding_provider(
provider="openai",
model="text-embedding-3-small",
api_key=openai_api_key,
api_base=openai_api_base or "https://api.openai.com/v1"
)
if session_id is None:
logger.info("[AgentInitializer] OpenAI embedding initialized")
except Exception as e:
logger.warning(f"[AgentInitializer] OpenAI embedding failed: {e}")
if embedding_provider is None:
linkai_api_key = conf().get("linkai_api_key", "") or os.environ.get("LINKAI_API_KEY", "")
linkai_api_base = conf().get("linkai_api_base", "https://api.link-ai.tech")
if linkai_api_key and linkai_api_key not in ["", "YOUR API KEY", "YOUR_API_KEY"]:
try:
embedding_provider = create_embedding_provider(
provider="linkai",
model="text-embedding-3-small",
api_key=linkai_api_key,
api_base=f"{linkai_api_base}/v1"
)
if session_id is None:
logger.info("[AgentInitializer] LinkAI embedding initialized (fallback)")
except Exception as e:
logger.warning(f"[AgentInitializer] LinkAI embedding failed: {e}")
# Create memory manager
memory_config = MemoryConfig(workspace_root=workspace_root)
embedding_provider = self._init_embedding_provider(
memory_config, session_id=session_id
)
memory_manager = MemoryManager(memory_config, embedding_provider=embedding_provider)
# Sync memory
self._sync_memory(memory_manager, session_id)
# Create memory tools
memory_tools = [
MemorySearchTool(memory_manager),
MemoryGetTool(memory_manager)
@@ -330,6 +301,190 @@ class AgentInitializer:
logger.warning(f"[AgentInitializer] Memory system not available: {e}")
return memory_manager, memory_tools
def _init_embedding_provider(self, memory_config, session_id: Optional[str] = None):
"""
Initialize the embedding provider for memory.
Two paths:
A. Default (no `embedding_provider` in config.json):
Auto-init OpenAI -> LinkAI fallback. Existing 1536-dim indices
keep working.
B. Explicit (`embedding_provider` is set):
Initialize the requested vendor with unified dim (default 1024).
If the index was built with a different dim, vector search will
quietly return no results (cosine returns 0) and keyword search
takes over until the user runs /memory rebuild-index.
"""
from agent.memory import create_embedding_provider
from config import conf
explicit_provider = (conf().get("embedding_provider") or "").strip().lower()
if not explicit_provider:
return self._init_embedding_provider_legacy(session_id=session_id)
return self._init_embedding_provider_explicit(
memory_config, explicit_provider, session_id=session_id,
)
def _init_embedding_provider_legacy(self, session_id: Optional[str] = None):
"""Legacy auto-init path: OpenAI -> LinkAI. Preserved verbatim for compat."""
from agent.memory import create_embedding_provider
from config import conf
embedding_provider = None
embedding_model = None
openai_api_key = conf().get("open_ai_api_key", "")
openai_api_base = conf().get("open_ai_api_base", "")
if openai_api_key and openai_api_key not in ["", "YOUR API KEY", "YOUR_API_KEY"]:
try:
model = "text-embedding-3-small"
embedding_provider = create_embedding_provider(
provider="openai",
model=model,
api_key=openai_api_key,
api_base=openai_api_base or "https://api.openai.com/v1"
)
embedding_model = f"openai/{model}"
except Exception as e:
logger.warning(f"[AgentInitializer] OpenAI embedding failed: {e}")
if embedding_provider is None:
linkai_api_key = conf().get("linkai_api_key", "") or os.environ.get("LINKAI_API_KEY", "")
linkai_api_base = conf().get("linkai_api_base", "https://api.link-ai.tech")
if linkai_api_key and linkai_api_key not in ["", "YOUR API KEY", "YOUR_API_KEY"]:
try:
model = "text-embedding-3-small"
embedding_provider = create_embedding_provider(
provider="linkai",
model=model,
api_key=linkai_api_key,
api_base=f"{linkai_api_base}/v1"
)
embedding_model = f"linkai/{model}"
except Exception as e:
logger.warning(f"[AgentInitializer] LinkAI embedding failed: {e}")
if embedding_provider is not None and embedding_model:
global _embedding_logged
if not _embedding_logged:
logger.info(
f"[AgentInitializer] Embedding model in use: {embedding_model} "
f"(dim={embedding_provider.dimensions})"
)
_embedding_logged = True
return embedding_provider
def _init_embedding_provider_explicit(
self,
memory_config,
provider_key: str,
session_id: Optional[str] = None,
):
"""Explicit-provider path: build the configured vendor.
If the index was built with a different dim, vector search will
silently return no results (cosine returns 0 for mismatched dims)
and keyword search takes over. Users switch vendors by running
/memory rebuild-index — see docs.
"""
from agent.memory import create_embedding_provider
from agent.memory.embedding import EMBEDDING_VENDORS
from config import conf
meta = EMBEDDING_VENDORS.get(provider_key)
if meta is None:
logger.error(
f"[AgentInitializer] Unknown embedding_provider '{provider_key}'. "
f"Supported: {sorted(EMBEDDING_VENDORS.keys())}. "
f"Memory will run in keyword-only mode."
)
return None
api_key = self._resolve_embedding_api_key(provider_key)
api_base = self._resolve_embedding_api_base(provider_key, meta["default_base_url"])
if not api_key:
logger.error(
f"[AgentInitializer] embedding_provider='{provider_key}' is set but its "
f"API key is missing. Memory will run in keyword-only mode."
)
return None
model = (conf().get("embedding_model") or "").strip() or meta["default_model"]
try:
cfg_dim = int(conf().get("embedding_dimensions") or 0)
except (TypeError, ValueError):
cfg_dim = 0
dim = cfg_dim if cfg_dim > 0 else meta["default_dimensions"]
try:
provider = create_embedding_provider(
provider=provider_key,
model=model,
api_key=api_key,
api_base=api_base,
dimensions=dim,
)
except Exception as e:
logger.error(
f"[AgentInitializer] Failed to init embedding provider "
f"'{provider_key}/{model}': {e}"
)
return None
global _embedding_logged
if not _embedding_logged:
logger.info(
f"[AgentInitializer] Embedding model in use: "
f"{provider_key}/{model} (dim={provider.dimensions})"
)
_embedding_logged = True
return provider
@staticmethod
def _resolve_embedding_api_key(provider_key: str) -> str:
"""Pick the API key for an explicit embedding provider from config."""
from config import conf
key_map = {
"openai": "open_ai_api_key",
"linkai": "linkai_api_key",
"dashscope": "dashscope_api_key",
"doubao": "ark_api_key",
"zhipu": "zhipu_ai_api_key",
}
field = key_map.get(provider_key)
if not field:
return ""
value = conf().get(field, "") or ""
if value in ["", "YOUR API KEY", "YOUR_API_KEY"]:
return ""
return value
@staticmethod
def _resolve_embedding_api_base(provider_key: str, default_base: str) -> str:
"""Pick the API base for an explicit embedding provider from config."""
from config import conf
base_map = {
"openai": "open_ai_api_base",
"linkai": "linkai_api_base",
"doubao": "ark_base_url",
"zhipu": "zhipu_ai_api_base",
}
field = base_map.get(provider_key)
if not field:
return default_base
value = (conf().get(field) or "").strip()
if not value:
return default_base
if provider_key == "linkai" and not value.rstrip("/").endswith("/v1"):
return f"{value.rstrip('/')}/v1"
return value
def _sync_memory(self, memory_manager, session_id: Optional[str] = None):
"""Sync memory database"""