mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 00:57:41 +08:00
feat(memory): support multi-vendor embedding fallback
Add embedding_provider config knob with native support for openai / dashscope / doubao / zhipu / linkai, plus an in-chat /memory status and /memory rebuild-index workflow for switching vendors safely.
This commit is contained in:
@@ -17,6 +17,10 @@ from common.utils import expand_path
|
||||
# Module-level lock to serialize scheduler init across concurrent sessions
|
||||
_scheduler_init_lock = threading.Lock()
|
||||
|
||||
# Track whether the embedding model log has been printed in this process,
|
||||
# so we avoid spamming it once per session.
|
||||
_embedding_logged: bool = False
|
||||
|
||||
|
||||
class AgentInitializer:
|
||||
"""
|
||||
@@ -272,52 +276,19 @@ class AgentInitializer:
|
||||
memory_tools = []
|
||||
|
||||
try:
|
||||
from agent.memory import MemoryManager, MemoryConfig, create_embedding_provider
|
||||
from agent.memory import MemoryManager, MemoryConfig
|
||||
from agent.tools import MemorySearchTool, MemoryGetTool
|
||||
from config import conf
|
||||
|
||||
# Initialize embedding provider (prefer OpenAI, fallback to LinkAI)
|
||||
embedding_provider = None
|
||||
|
||||
openai_api_key = conf().get("open_ai_api_key", "")
|
||||
openai_api_base = conf().get("open_ai_api_base", "")
|
||||
if openai_api_key and openai_api_key not in ["", "YOUR API KEY", "YOUR_API_KEY"]:
|
||||
try:
|
||||
embedding_provider = create_embedding_provider(
|
||||
provider="openai",
|
||||
model="text-embedding-3-small",
|
||||
api_key=openai_api_key,
|
||||
api_base=openai_api_base or "https://api.openai.com/v1"
|
||||
)
|
||||
if session_id is None:
|
||||
logger.info("[AgentInitializer] OpenAI embedding initialized")
|
||||
except Exception as e:
|
||||
logger.warning(f"[AgentInitializer] OpenAI embedding failed: {e}")
|
||||
|
||||
if embedding_provider is None:
|
||||
linkai_api_key = conf().get("linkai_api_key", "") or os.environ.get("LINKAI_API_KEY", "")
|
||||
linkai_api_base = conf().get("linkai_api_base", "https://api.link-ai.tech")
|
||||
if linkai_api_key and linkai_api_key not in ["", "YOUR API KEY", "YOUR_API_KEY"]:
|
||||
try:
|
||||
embedding_provider = create_embedding_provider(
|
||||
provider="linkai",
|
||||
model="text-embedding-3-small",
|
||||
api_key=linkai_api_key,
|
||||
api_base=f"{linkai_api_base}/v1"
|
||||
)
|
||||
if session_id is None:
|
||||
logger.info("[AgentInitializer] LinkAI embedding initialized (fallback)")
|
||||
except Exception as e:
|
||||
logger.warning(f"[AgentInitializer] LinkAI embedding failed: {e}")
|
||||
|
||||
# Create memory manager
|
||||
memory_config = MemoryConfig(workspace_root=workspace_root)
|
||||
|
||||
embedding_provider = self._init_embedding_provider(
|
||||
memory_config, session_id=session_id
|
||||
)
|
||||
|
||||
memory_manager = MemoryManager(memory_config, embedding_provider=embedding_provider)
|
||||
|
||||
# Sync memory
|
||||
self._sync_memory(memory_manager, session_id)
|
||||
|
||||
# Create memory tools
|
||||
|
||||
memory_tools = [
|
||||
MemorySearchTool(memory_manager),
|
||||
MemoryGetTool(memory_manager)
|
||||
@@ -330,6 +301,190 @@ class AgentInitializer:
|
||||
logger.warning(f"[AgentInitializer] Memory system not available: {e}")
|
||||
|
||||
return memory_manager, memory_tools
|
||||
|
||||
def _init_embedding_provider(self, memory_config, session_id: Optional[str] = None):
|
||||
"""
|
||||
Initialize the embedding provider for memory.
|
||||
|
||||
Two paths:
|
||||
A. Default (no `embedding_provider` in config.json):
|
||||
Auto-init OpenAI -> LinkAI fallback. Existing 1536-dim indices
|
||||
keep working.
|
||||
B. Explicit (`embedding_provider` is set):
|
||||
Initialize the requested vendor with unified dim (default 1024).
|
||||
If the index was built with a different dim, vector search will
|
||||
quietly return no results (cosine returns 0) and keyword search
|
||||
takes over until the user runs /memory rebuild-index.
|
||||
"""
|
||||
from agent.memory import create_embedding_provider
|
||||
from config import conf
|
||||
|
||||
explicit_provider = (conf().get("embedding_provider") or "").strip().lower()
|
||||
|
||||
if not explicit_provider:
|
||||
return self._init_embedding_provider_legacy(session_id=session_id)
|
||||
|
||||
return self._init_embedding_provider_explicit(
|
||||
memory_config, explicit_provider, session_id=session_id,
|
||||
)
|
||||
|
||||
def _init_embedding_provider_legacy(self, session_id: Optional[str] = None):
|
||||
"""Legacy auto-init path: OpenAI -> LinkAI. Preserved verbatim for compat."""
|
||||
from agent.memory import create_embedding_provider
|
||||
from config import conf
|
||||
|
||||
embedding_provider = None
|
||||
embedding_model = None
|
||||
|
||||
openai_api_key = conf().get("open_ai_api_key", "")
|
||||
openai_api_base = conf().get("open_ai_api_base", "")
|
||||
if openai_api_key and openai_api_key not in ["", "YOUR API KEY", "YOUR_API_KEY"]:
|
||||
try:
|
||||
model = "text-embedding-3-small"
|
||||
embedding_provider = create_embedding_provider(
|
||||
provider="openai",
|
||||
model=model,
|
||||
api_key=openai_api_key,
|
||||
api_base=openai_api_base or "https://api.openai.com/v1"
|
||||
)
|
||||
embedding_model = f"openai/{model}"
|
||||
except Exception as e:
|
||||
logger.warning(f"[AgentInitializer] OpenAI embedding failed: {e}")
|
||||
|
||||
if embedding_provider is None:
|
||||
linkai_api_key = conf().get("linkai_api_key", "") or os.environ.get("LINKAI_API_KEY", "")
|
||||
linkai_api_base = conf().get("linkai_api_base", "https://api.link-ai.tech")
|
||||
if linkai_api_key and linkai_api_key not in ["", "YOUR API KEY", "YOUR_API_KEY"]:
|
||||
try:
|
||||
model = "text-embedding-3-small"
|
||||
embedding_provider = create_embedding_provider(
|
||||
provider="linkai",
|
||||
model=model,
|
||||
api_key=linkai_api_key,
|
||||
api_base=f"{linkai_api_base}/v1"
|
||||
)
|
||||
embedding_model = f"linkai/{model}"
|
||||
except Exception as e:
|
||||
logger.warning(f"[AgentInitializer] LinkAI embedding failed: {e}")
|
||||
|
||||
if embedding_provider is not None and embedding_model:
|
||||
global _embedding_logged
|
||||
if not _embedding_logged:
|
||||
logger.info(
|
||||
f"[AgentInitializer] Embedding model in use: {embedding_model} "
|
||||
f"(dim={embedding_provider.dimensions})"
|
||||
)
|
||||
_embedding_logged = True
|
||||
|
||||
return embedding_provider
|
||||
|
||||
def _init_embedding_provider_explicit(
|
||||
self,
|
||||
memory_config,
|
||||
provider_key: str,
|
||||
session_id: Optional[str] = None,
|
||||
):
|
||||
"""Explicit-provider path: build the configured vendor.
|
||||
|
||||
If the index was built with a different dim, vector search will
|
||||
silently return no results (cosine returns 0 for mismatched dims)
|
||||
and keyword search takes over. Users switch vendors by running
|
||||
/memory rebuild-index — see docs.
|
||||
"""
|
||||
from agent.memory import create_embedding_provider
|
||||
from agent.memory.embedding import EMBEDDING_VENDORS
|
||||
from config import conf
|
||||
|
||||
meta = EMBEDDING_VENDORS.get(provider_key)
|
||||
if meta is None:
|
||||
logger.error(
|
||||
f"[AgentInitializer] Unknown embedding_provider '{provider_key}'. "
|
||||
f"Supported: {sorted(EMBEDDING_VENDORS.keys())}. "
|
||||
f"Memory will run in keyword-only mode."
|
||||
)
|
||||
return None
|
||||
|
||||
api_key = self._resolve_embedding_api_key(provider_key)
|
||||
api_base = self._resolve_embedding_api_base(provider_key, meta["default_base_url"])
|
||||
|
||||
if not api_key:
|
||||
logger.error(
|
||||
f"[AgentInitializer] embedding_provider='{provider_key}' is set but its "
|
||||
f"API key is missing. Memory will run in keyword-only mode."
|
||||
)
|
||||
return None
|
||||
|
||||
model = (conf().get("embedding_model") or "").strip() or meta["default_model"]
|
||||
try:
|
||||
cfg_dim = int(conf().get("embedding_dimensions") or 0)
|
||||
except (TypeError, ValueError):
|
||||
cfg_dim = 0
|
||||
dim = cfg_dim if cfg_dim > 0 else meta["default_dimensions"]
|
||||
|
||||
try:
|
||||
provider = create_embedding_provider(
|
||||
provider=provider_key,
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
dimensions=dim,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[AgentInitializer] Failed to init embedding provider "
|
||||
f"'{provider_key}/{model}': {e}"
|
||||
)
|
||||
return None
|
||||
|
||||
global _embedding_logged
|
||||
if not _embedding_logged:
|
||||
logger.info(
|
||||
f"[AgentInitializer] Embedding model in use: "
|
||||
f"{provider_key}/{model} (dim={provider.dimensions})"
|
||||
)
|
||||
_embedding_logged = True
|
||||
return provider
|
||||
|
||||
@staticmethod
|
||||
def _resolve_embedding_api_key(provider_key: str) -> str:
|
||||
"""Pick the API key for an explicit embedding provider from config."""
|
||||
from config import conf
|
||||
|
||||
key_map = {
|
||||
"openai": "open_ai_api_key",
|
||||
"linkai": "linkai_api_key",
|
||||
"dashscope": "dashscope_api_key",
|
||||
"doubao": "ark_api_key",
|
||||
"zhipu": "zhipu_ai_api_key",
|
||||
}
|
||||
field = key_map.get(provider_key)
|
||||
if not field:
|
||||
return ""
|
||||
value = conf().get(field, "") or ""
|
||||
if value in ["", "YOUR API KEY", "YOUR_API_KEY"]:
|
||||
return ""
|
||||
return value
|
||||
|
||||
@staticmethod
|
||||
def _resolve_embedding_api_base(provider_key: str, default_base: str) -> str:
|
||||
"""Pick the API base for an explicit embedding provider from config."""
|
||||
from config import conf
|
||||
|
||||
base_map = {
|
||||
"openai": "open_ai_api_base",
|
||||
"linkai": "linkai_api_base",
|
||||
"doubao": "ark_base_url",
|
||||
"zhipu": "zhipu_ai_api_base",
|
||||
}
|
||||
field = base_map.get(provider_key)
|
||||
if not field:
|
||||
return default_base
|
||||
value = (conf().get(field) or "").strip()
|
||||
if not value:
|
||||
return default_base
|
||||
if provider_key == "linkai" and not value.rstrip("/").endswith("/v1"):
|
||||
return f"{value.rstrip('/')}/v1"
|
||||
return value
|
||||
|
||||
def _sync_memory(self, memory_manager, session_id: Optional[str] = None):
|
||||
"""Sync memory database"""
|
||||
|
||||
Reference in New Issue
Block a user