Merge pull request #2826 from zhayujie/feat-multi-model

feat: multi-provider model console
This commit is contained in:
zhayujie
2026-05-22 11:08:13 +08:00
committed by GitHub
78 changed files with 6067 additions and 673 deletions

View File

@@ -44,6 +44,7 @@ CREATE TABLE IF NOT EXISTS messages (
role TEXT NOT NULL,
content TEXT NOT NULL,
created_at INTEGER NOT NULL,
extras TEXT NOT NULL DEFAULT '',
UNIQUE (session_id, seq)
);
@@ -67,6 +68,12 @@ _MIGRATION_ADD_CONTEXT_START_SEQ = """
ALTER TABLE sessions ADD COLUMN context_start_seq INTEGER NOT NULL DEFAULT 0;
"""
# Generic JSON sidecar for per-message attachments (TTS audio URL, future use).
# Always optional — readers must tolerate missing column / empty / invalid JSON.
_MIGRATION_ADD_MSG_EXTRAS = """
ALTER TABLE messages ADD COLUMN extras TEXT NOT NULL DEFAULT '';
"""
DEFAULT_MAX_AGE_DAYS: int = 30
@@ -169,20 +176,26 @@ def _group_into_display_turns(
cur_rest: List[tuple] = []
started = False
for role, raw_content, created_at in rows:
for role, raw_content, created_at, raw_extras in rows:
try:
content = json.loads(raw_content)
except Exception:
content = raw_content
try:
extras = json.loads(raw_extras) if raw_extras else {}
if not isinstance(extras, dict):
extras = {}
except Exception:
extras = {}
if role == "user" and _is_visible_user_message(content):
if started:
groups.append((cur_user, cur_rest))
cur_user = (content, created_at)
cur_user = (content, created_at, extras)
cur_rest = []
started = True
else:
cur_rest.append((role, content, created_at))
cur_rest.append((role, content, created_at, extras))
if started:
groups.append((cur_user, cur_rest))
@@ -195,7 +208,7 @@ def _group_into_display_turns(
for user_row, rest in groups:
# User turn
if user_row:
content, created_at = user_row
content, created_at, _u_extras = user_row
text = _extract_display_text(content)
if text:
turns.append({"role": "user", "content": text, "created_at": created_at})
@@ -206,8 +219,11 @@ def _group_into_display_turns(
tool_results: Dict[str, str] = {}
final_text = ""
final_ts: Optional[int] = None
merged_extras: Dict[str, Any] = {}
for role, content, created_at in rest:
for role, content, created_at, extras in rest:
if role == "assistant" and isinstance(extras, dict):
merged_extras.update(extras)
if role == "user":
tool_results.update(_extract_tool_results(content))
elif role == "assistant":
@@ -256,6 +272,8 @@ def _group_into_display_turns(
"steps": steps,
"created_at": final_ts or (user_row[1] if user_row else 0),
}
if merged_extras:
turn["extras"] = merged_extras
turns.append(turn)
return turns
@@ -411,13 +429,15 @@ class ConversationStore:
content = json.dumps(
msg.get("content", ""), ensure_ascii=False
)
extras_obj = msg.get("extras") or {}
extras = json.dumps(extras_obj, ensure_ascii=False) if extras_obj else ""
conn.execute(
"""
INSERT OR IGNORE INTO messages
(session_id, seq, role, content, created_at)
VALUES (?, ?, ?, ?, ?)
(session_id, seq, role, content, created_at, extras)
VALUES (?, ?, ?, ?, ?, ?)
""",
(session_id, next_seq, role, content, now),
(session_id, next_seq, role, content, now, extras),
)
next_seq += 1
@@ -651,6 +671,55 @@ class ConversationStore:
logger.info(f"[ConversationStore] Pruned {deleted} expired sessions")
return deleted
def attach_extras_to_last_assistant(
self,
session_id: str,
extras: Dict[str, Any],
) -> Optional[int]:
"""
Merge ``extras`` into the latest assistant message of a session.
Used by post-processing (e.g. TTS) that needs to annotate an already
persisted bot reply with attachments such as audio URLs.
Returns the message seq that was updated, or ``None`` if no assistant
message exists or the update could not be applied.
"""
if not extras:
return None
with self._lock:
conn = self._connect()
try:
row = conn.execute(
"""
SELECT seq, extras FROM messages
WHERE session_id = ? AND role = 'assistant'
ORDER BY seq DESC LIMIT 1
""",
(session_id,),
).fetchone()
if not row:
return None
seq, raw = row
try:
cur = json.loads(raw) if raw else {}
if not isinstance(cur, dict):
cur = {}
except Exception:
cur = {}
cur.update(extras)
conn.execute(
"UPDATE messages SET extras = ? WHERE session_id = ? AND seq = ?",
(json.dumps(cur, ensure_ascii=False), session_id, seq),
)
conn.commit()
return seq
except Exception as e:
logger.warning(f"[ConversationStore] attach_extras failed: {e}")
return None
finally:
conn.close()
def load_history_page(
self,
session_id: str,
@@ -698,7 +767,22 @@ class ConversationStore:
).fetchone()
ctx_start = ctx_row[0] if ctx_row else 0
# extras column is added by migration; tolerate older DBs that
# might miss it by falling back to a NULL literal.
try:
rows = conn.execute(
"""
SELECT seq, role, content, created_at, extras
FROM messages
WHERE session_id = ?
ORDER BY seq ASC
""",
(session_id,),
).fetchall()
except sqlite3.OperationalError:
rows = [
(seq, role, content, created_at, "")
for (seq, role, content, created_at) in conn.execute(
"""
SELECT seq, role, content, created_at
FROM messages
@@ -707,6 +791,7 @@ class ConversationStore:
""",
(session_id,),
).fetchall()
]
finally:
conn.close()
@@ -719,13 +804,16 @@ class ConversationStore:
include_thinking = False
# Strip seq for display grouping, but record max seq per visible user group
plain_rows = [(role, content, created_at) for _seq, role, content, created_at in rows]
plain_rows = [
(role, content, created_at, extras_raw)
for _seq, role, content, created_at, extras_raw in rows
]
visible = _group_into_display_turns(plain_rows, include_thinking=include_thinking)
# Build a mapping: find the seq of each visible user message to annotate context boundary.
# Walk through rows to find visible user message seqs in order.
visible_user_seqs: List[int] = []
for seq, role, raw_content, _ts in rows:
for seq, role, raw_content, _ts, _extras in rows:
if role != "user":
continue
try:
@@ -911,6 +999,18 @@ class ConversationStore:
except Exception as e:
logger.warning(f"[ConversationStore] Migration (context_start_seq) failed: {e}")
msg_cols = {
row[1]
for row in conn.execute("PRAGMA table_info(messages)").fetchall()
}
if "extras" not in msg_cols:
try:
conn.execute(_MIGRATION_ADD_MSG_EXTRAS)
conn.commit()
logger.info("[ConversationStore] Migrated: added messages.extras column")
except Exception as e:
logger.warning(f"[ConversationStore] Migration (extras) failed: {e}")
def _connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(str(self._db_path), timeout=10)
conn.execute("PRAGMA journal_mode=WAL")

View File

@@ -603,15 +603,24 @@ class AgentStreamExecutor:
except Exception as e:
logger.debug(f"[Agent] MCP sync skipped: {e}")
# Prepare tool definitions (OpenAI/Claude format)
# Prepare tool definitions. Prefer get_json_schema() when it yields
# real properties (lets tools augment schema at runtime), otherwise
# fall back to the static `tool.params` (MCP tools rely on this).
tools_schema = None
if self.tools:
tools_schema = []
for tool in self.tools.values():
input_schema = tool.params
try:
dynamic = (tool.get_json_schema() or {}).get("parameters") or {}
if dynamic.get("properties"):
input_schema = dynamic
except Exception:
pass
tools_schema.append({
"name": tool.name,
"description": tool.description,
"input_schema": tool.params # Claude uses input_schema
"input_schema": input_schema,
})
# Create request

View File

@@ -3,7 +3,7 @@ Vision tool - Analyze images using Vision API.
Supports local files (auto base64-encoded) and HTTP URLs.
Provider resolution:
- tool.vision.model (if set) means "prefer this model first; fall back to
- tools.vision.model (if set) means "prefer this model first; fall back to
other configured providers if it fails". The model name is mapped to its
native provider (e.g. doubao-* → Doubao, kimi-* → Moonshot, gpt-* →
OpenAI/LinkAI). That provider is tried first, then the standard auto
@@ -30,7 +30,7 @@ from common import const
from common.log import logger
from config import conf
DEFAULT_MODEL = const.GPT_41_MINI
DEFAULT_MODEL = const.GPT_55
DEFAULT_TIMEOUT = 60
MAX_TOKENS = 1000
COMPRESS_THRESHOLD = 1_048_576 # 1 MB
@@ -53,14 +53,14 @@ _DISCOVERABLE_MODELS = [
("ark_api_key", const.DOUBAO, const.DOUBAO_SEED_2_PRO, "Doubao"),
("dashscope_api_key", const.QWEN_DASHSCOPE, const.QWEN36_PLUS, "DashScope"),
("claude_api_key", const.CLAUDEAPI, const.CLAUDE_4_6_SONNET, "Claude"),
("gemini_api_key", const.GEMINI, const.GEMINI_31_FLASH_LITE_PRE, "Gemini"),
("gemini_api_key", const.GEMINI, const.GEMINI_35_FLASH, "Gemini"),
("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"),
("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"),
("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
]
# Model name prefix → discoverable provider display_name.
# Used to auto-route tool.vision.model to its native provider.
# Used to auto-route tools.vision.model to its native provider.
# Matched case-insensitively; longest prefix wins.
_MODEL_PREFIX_TO_PROVIDER = [
("doubao-", "Doubao"),
@@ -154,7 +154,7 @@ class Vision(BaseTool):
# Default model is only used as a last-resort placeholder for providers
# whose VisionProvider.model_override is None (e.g. raw OpenAI provider
# when the user did not configure tool.vision.model).
# when the user did not configure tools.vision.model).
return self._call_with_fallback(providers, DEFAULT_MODEL, question, image_content)
def _call_with_fallback(self, providers: List[VisionProvider], model: str,
@@ -193,12 +193,12 @@ class Vision(BaseTool):
"""
Build an ordered list of providers to try.
Semantics of `tool.vision.model`:
Semantics of `tools.vision.model`:
"Prefer this model first; fall back to other configured providers
if it fails."
Order:
1. The provider that natively serves `tool.vision.model` (if any
1. The provider that natively serves `tools.vision.model` (if any
and its API key is configured) — using the user-specified model
name verbatim.
2. Auto-discovery chain as fallback:
@@ -213,7 +213,7 @@ class Vision(BaseTool):
user_model = self._resolve_user_vision_model()
providers: List[VisionProvider] = []
# Step 1: preferred provider derived from tool.vision.model
# Step 1: preferred provider derived from tools.vision.model
if user_model:
preferred = self._route_by_model_name(user_model)
if preferred:
@@ -251,11 +251,11 @@ class Vision(BaseTool):
@staticmethod
def _resolve_user_vision_model() -> Optional[str]:
"""Read tool.vision.model from config; return None if unset/blank."""
tool_conf = conf().get("tool", {})
if not isinstance(tool_conf, dict):
"""Read tools.vision.model (singular ``tool`` kept as runtime fallback)."""
tools_conf = conf().get("tools") or conf().get("tool") or {}
if not isinstance(tools_conf, dict):
return None
vision_conf = tool_conf.get("vision", {})
vision_conf = tools_conf.get("vision", {})
if not isinstance(vision_conf, dict):
return None
m = vision_conf.get("model")
@@ -303,7 +303,7 @@ class Vision(BaseTool):
self._append_provider(providers, lambda: self._build_linkai_provider(user_model))
if providers:
return providers
logger.warning(f"[Vision] tool.vision.model='{user_model}' looks like an OpenAI "
logger.warning(f"[Vision] tools.vision.model='{user_model}' looks like an OpenAI "
f"model but neither OPENAI_API_KEY nor LINKAI_API_KEY is configured.")
return None # fall through to auto
@@ -317,7 +317,7 @@ class Vision(BaseTool):
continue
api_key = conf().get(config_key, "")
if not api_key or not api_key.strip():
logger.warning(f"[Vision] tool.vision.model='{user_model}' routes to "
logger.warning(f"[Vision] tools.vision.model='{user_model}' routes to "
f"'{display_name}' but '{config_key}' is not configured. "
f"Falling back to auto-discovery.")
return None # fall through to auto
@@ -452,8 +452,8 @@ class Vision(BaseTool):
if not self._main_bot_supports_vision(bot):
return None
# Use the configured main model name; do NOT inject tool.vision.model
# here, because by the time we reach this branch the tool.vision.model
# Use the configured main model name; do NOT inject tools.vision.model
# here, because by the time we reach this branch the tools.vision.model
# routing has already been attempted (and either matched the main bot
# or failed to find a provider).
main_model_name = conf().get("model") or None

View File

@@ -1,13 +1,27 @@
"""
Web Search tool - Search the web using Bocha or LinkAI search API.
Supports two backends with unified response format:
1. Bocha Search (primary, requires BOCHA_API_KEY)
2. LinkAI Search (fallback, requires LINKAI_API_KEY)
"""Web Search tool. Supports four backends with a unified response format:
- bocha (https://open.bochaai.com)
- zhipu (https://docs.bigmodel.cn/cn/guide/tools/web-search)
- qianfan (https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy)
- linkai (https://link-ai.tech, fallback)
Provider selection
- strategy 'auto' (default): pick the first configured provider in the
canonical order [bocha, zhipu, qianfan, linkai]. When the caller passes
an explicit `provider` it overrides the pick; an invalid/unconfigured
one silently falls back to the auto order.
- strategy 'fixed': use the configured provider; if its credential is
missing at call time, silently fall back to auto order (no card hint).
Credentials
- bocha : tools.web_search.bocha_api_key -> env BOCHA_API_KEY
- zhipu : conf.zhipu_ai_api_key -> env ZHIPUAI_API_KEY
- qianfan : conf.qianfan_api_key -> env QIANFAN_API_KEY
- linkai : conf.linkai_api_key -> env LINKAI_API_KEY
"""
import os
import json
from typing import Dict, Any, Optional
import os
from typing import Any, Dict, List, Optional
import requests
@@ -16,12 +30,63 @@ from common.log import logger
from config import conf
# Default timeout for API requests (seconds)
DEFAULT_TIMEOUT = 30
# Canonical fallback order. Empirically ordered by Chinese real-time
# quality + relevance: bocha (best overall), qianfan (best for hot news),
# zhipu (strong on long-form articles), linkai (cloud aggregator, last
# resort).
PROVIDER_ORDER = ("bocha", "qianfan", "zhipu", "linkai")
PROVIDER_LABELS = {
"bocha": "Bocha",
"zhipu": "Zhipu",
"qianfan": "Baidu Qianfan",
"linkai": "LinkAI",
}
def _tools_web_search_conf() -> dict:
"""Return the tools.web_search config block (dict-like)."""
tools_cfg = conf().get("tools") or {}
if not isinstance(tools_cfg, dict):
return {}
block = tools_cfg.get("web_search") or {}
return block if isinstance(block, dict) else {}
def _get_api_key(provider: str) -> str:
"""Resolve API key for a provider, with conf -> env fallback."""
if provider == "bocha":
key = (_tools_web_search_conf().get("bocha_api_key") or "").strip()
return key or os.environ.get("BOCHA_API_KEY", "").strip()
if provider == "zhipu":
key = (conf().get("zhipu_ai_api_key") or "").strip()
return key or os.environ.get("ZHIPUAI_API_KEY", "").strip()
if provider == "qianfan":
key = (conf().get("qianfan_api_key") or "").strip()
return key or os.environ.get("QIANFAN_API_KEY", "").strip()
if provider == "linkai":
key = (conf().get("linkai_api_key") or "").strip()
return key or os.environ.get("LINKAI_API_KEY", "").strip()
return ""
def configured_providers() -> List[str]:
"""Return configured providers in canonical order."""
return [p for p in PROVIDER_ORDER if _get_api_key(p)]
def _configured_strategy() -> str:
return (_tools_web_search_conf().get("strategy") or "auto").strip().lower()
def _configured_provider() -> str:
return (_tools_web_search_conf().get("provider") or "").strip().lower()
class WebSearch(BaseTool):
"""Tool for searching the web using Bocha or LinkAI search API"""
"""Tool for searching the web across multiple providers."""
name: str = "web_search"
description: str = "Search the web for real-time information. Returns titles, URLs, and snippets."
@@ -55,264 +120,368 @@ class WebSearch(BaseTool):
def __init__(self, config: dict = None):
self.config = config or {}
self._backend = None # Will be resolved on first execute
@staticmethod
def is_available() -> bool:
"""Check if web search is available (at least one API key is configured)"""
return bool(os.environ.get("BOCHA_API_KEY") or os.environ.get("LINKAI_API_KEY"))
"""Tool is offered to the agent when at least one provider has a key."""
return bool(configured_providers())
def _resolve_backend(self) -> Optional[str]:
"""
Determine which search backend to use.
Priority: Bocha > LinkAI
@classmethod
def get_json_schema(cls) -> dict:
"""Augment the static schema with a `provider` field — only when the
user has ≥2 providers configured AND strategy is 'auto'. Otherwise
the backend picks silently and exposing the field would only waste
the agent's tokens."""
schema = {
"name": cls.name,
"description": cls.description,
"parameters": json.loads(json.dumps(cls.params)), # deep copy
}
if _configured_strategy() != "auto":
return schema
available = configured_providers()
if len(available) < 2:
return schema
:return: 'bocha', 'linkai', or None
schema["parameters"]["properties"]["provider"] = {
"type": "string",
"enum": available,
"description": "Optional. Specifies the search backend. You may switch between providers when the user wants results from a particular source or from multiple sources.",
}
return schema
# ------------------------------------------------------------------
# Provider resolution
# ------------------------------------------------------------------
def _resolve_provider(self, requested: Optional[str]) -> Optional[str]:
"""Pick a provider for this call.
Priority: caller-supplied (if configured) > fixed strategy (if
configured) > first configured in PROVIDER_ORDER. Silent fallback
when the desired one has no key.
"""
if os.environ.get("BOCHA_API_KEY"):
return "bocha"
if os.environ.get("LINKAI_API_KEY"):
return "linkai"
available = configured_providers()
if not available:
return None
def execute(self, args: Dict[str, Any]) -> ToolResult:
"""
Execute web search
if requested:
req = requested.strip().lower()
if req in available:
return req
logger.warning(f"[WebSearch] requested provider '{requested}' unavailable, falling back")
:param args: Search parameters (query, count, freshness, summary)
:return: Search results
"""
query = args.get("query", "").strip()
if _configured_strategy() == "fixed":
pinned = _configured_provider()
if pinned in available:
return pinned
if pinned:
logger.warning(f"[WebSearch] pinned provider '{pinned}' unavailable, falling back to auto")
return available[0]
@staticmethod
def _resolution_reason(requested: Optional[str], chosen: str) -> str:
"""Human-readable explanation for why `chosen` won the resolver."""
if requested and requested.strip().lower() == chosen:
return "caller-requested"
strategy = _configured_strategy()
if strategy == "fixed" and _configured_provider() == chosen:
return "fixed-strategy"
return "auto-fallback"
# ------------------------------------------------------------------
# Entry point
# ------------------------------------------------------------------
def execute(self, args: Dict[str, Any]) -> ToolResult:
query = (args.get("query") or "").strip()
if not query:
return ToolResult.fail("Error: 'query' parameter is required")
count = args.get("count", 10)
freshness = args.get("freshness", "noLimit")
summary = args.get("summary", False)
# Validate count
if not isinstance(count, int) or count < 1 or count > 50:
count = 10
# Resolve backend
backend = self._resolve_backend()
if not backend:
requested = args.get("provider")
provider = self._resolve_provider(requested)
if not provider:
return ToolResult.fail(
"Error: No search API key configured. "
"Please set BOCHA_API_KEY or LINKAI_API_KEY using env_config tool.\n"
" - Bocha Search: https://open.bocha.cn\n"
" - LinkAI Search: https://link-ai.tech"
"Error: No search provider configured. "
"Configure one of BOCHA_API_KEY / zhipu_ai_api_key / qianfan_api_key / linkai_api_key."
)
# Always log the routing decision so multi-provider deployments can
# tell at a glance which backend served any given query.
available = configured_providers()
reason = self._resolution_reason(requested, provider)
q_preview = query if len(query) <= 60 else (query[:57] + "...")
logger.info(
f"[WebSearch] provider={provider} reason={reason} "
f"available={list(available)} query={q_preview!r} count={count} freshness={freshness}"
)
try:
if backend == "bocha":
if provider == "bocha":
return self._search_bocha(query, count, freshness, summary)
else:
if provider == "zhipu":
return self._search_zhipu(query, count, freshness)
if provider == "qianfan":
return self._search_qianfan(query, count, freshness)
if provider == "linkai":
return self._search_linkai(query, count, freshness)
return ToolResult.fail(f"Error: Unknown provider '{provider}'")
except requests.Timeout:
return ToolResult.fail(f"Error: Search request timed out after {DEFAULT_TIMEOUT}s")
except requests.ConnectionError:
return ToolResult.fail("Error: Failed to connect to search API")
except Exception as e:
logger.error(f"[WebSearch] Unexpected error: {e}", exc_info=True)
logger.error(f"[WebSearch] Unexpected error ({provider}): {e}", exc_info=True)
return ToolResult.fail(f"Error: Search failed - {str(e)}")
# ------------------------------------------------------------------
# Bocha
# ------------------------------------------------------------------
def _search_bocha(self, query: str, count: int, freshness: str, summary: bool) -> ToolResult:
"""
Search using Bocha API
:param query: Search query
:param count: Number of results
:param freshness: Time range filter
:param summary: Whether to include summary
:return: Formatted search results
"""
api_key = os.environ.get("BOCHA_API_KEY", "")
url = "https://api.bocha.cn/v1/web-search"
api_key = _get_api_key("bocha")
url = "https://api.bochaai.com/v1/web-search"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
"Accept": "application/json"
"Accept": "application/json",
}
payload = {"query": query, "count": count, "freshness": freshness, "summary": summary}
payload = {
"query": query,
"count": count,
"freshness": freshness,
"summary": summary
}
logger.debug(f"[WebSearch] bocha: query='{query}', count={count}")
resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
logger.debug(f"[WebSearch] Bocha search: query='{query}', count={count}")
if resp.status_code == 401:
return ToolResult.fail("Error: Invalid bocha API key.")
if resp.status_code == 403:
return ToolResult.fail("Error: bocha API — insufficient balance. Top up at https://open.bochaai.com")
if resp.status_code == 429:
return ToolResult.fail("Error: bocha API rate limit reached.")
if resp.status_code != 200:
return ToolResult.fail(f"Error: bocha API returned HTTP {resp.status_code}")
response = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
if response.status_code == 401:
return ToolResult.fail("Error: Invalid BOCHA_API_KEY. Please check your API key.")
if response.status_code == 403:
return ToolResult.fail("Error: Bocha API - insufficient balance. Please top up at https://open.bocha.cn")
if response.status_code == 429:
return ToolResult.fail("Error: Bocha API rate limit reached. Please try again later.")
if response.status_code != 200:
return ToolResult.fail(f"Error: Bocha API returned HTTP {response.status_code}")
data = response.json()
# Check API-level error code
data = resp.json()
api_code = data.get("code")
if api_code is not None and api_code != 200:
msg = data.get("msg") or "Unknown error"
return ToolResult.fail(f"Error: Bocha API error (code={api_code}): {msg}")
# Extract and format results
return self._format_bocha_results(data, query)
def _format_bocha_results(self, data: dict, query: str) -> ToolResult:
"""
Format Bocha API response into unified result structure
:param data: Raw API response
:param query: Original query
:return: Formatted ToolResult
"""
search_data = data.get("data", {})
web_pages = search_data.get("webPages", {})
pages = web_pages.get("value", [])
if not pages:
return ToolResult.success({
"query": query,
"backend": "bocha",
"total": 0,
"results": [],
"message": "No results found"
})
return ToolResult.fail(f"Error: bocha API error (code={api_code}): {msg}")
pages = (data.get("data") or {}).get("webPages", {}).get("value", []) or []
results = []
for page in pages:
result = {
"title": page.get("name", ""),
"url": page.get("url", ""),
"snippet": page.get("snippet", ""),
"siteName": page.get("siteName", ""),
"datePublished": page.get("datePublished") or page.get("dateLastCrawled", ""),
for p in pages:
item = {
"title": p.get("name", ""),
"url": p.get("url", ""),
"snippet": p.get("snippet", ""),
"siteName": p.get("siteName", ""),
"datePublished": p.get("datePublished") or p.get("dateLastCrawled", ""),
}
# Include summary only if present
if page.get("summary"):
result["summary"] = page["summary"]
results.append(result)
total = web_pages.get("totalEstimatedMatches", len(results))
if p.get("summary"):
item["summary"] = p["summary"]
results.append(item)
total = (data.get("data") or {}).get("webPages", {}).get("totalEstimatedMatches", len(results))
return ToolResult.success({
"query": query,
"backend": "bocha",
"total": total,
"count": len(results),
"results": results
"query": query, "backend": "bocha",
"total": total, "count": len(results), "results": results,
})
# ------------------------------------------------------------------
# Zhipu
# ------------------------------------------------------------------
def _search_zhipu(self, query: str, count: int, freshness: str) -> ToolResult:
api_key = _get_api_key("zhipu")
api_base = (conf().get("zhipu_ai_api_base") or "https://open.bigmodel.cn/api/paas/v4").rstrip("/")
url = f"{api_base}/web_search"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
# Zhipu Web Search expects `search_query` <= 70 chars; truncate
# gracefully so a long agent-supplied query doesn't get rejected.
trimmed_query = (query or "")[:70]
engine = (_tools_web_search_conf().get("zhipu_search_engine") or "search_pro").strip().lower()
if engine not in ("search_std", "search_pro", "search_pro_sogou", "search_pro_quark"):
engine = "search_pro"
payload: Dict[str, Any] = {
"search_engine": engine,
"search_query": trimmed_query,
"search_intent": False,
"count": max(1, min(int(count or 10), 50)),
"search_recency_filter": freshness if freshness in (
"oneDay", "oneWeek", "oneMonth", "oneYear", "noLimit"
) else "noLimit",
}
content_size = (_tools_web_search_conf().get("zhipu_content_size") or "").strip().lower()
if content_size in ("medium", "high"):
payload["content_size"] = content_size
logger.debug(f"[WebSearch] zhipu: query='{trimmed_query}', count={payload['count']}, engine={engine}")
resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
if resp.status_code == 401:
return ToolResult.fail("Error: Invalid Zhipu API key.")
if resp.status_code != 200:
return ToolResult.fail(f"Error: Zhipu API returned HTTP {resp.status_code}: {resp.text[:200]}")
data = resp.json()
# Business-level errors (1701/1702/1703 etc.) come back as
# {"error": {"code","message"}} even on HTTP 200.
if isinstance(data, dict) and data.get("error"):
err = data["error"] or {}
return ToolResult.fail(f"Error: Zhipu returned {err.get('code')}: {err.get('message','')}")
items = data.get("search_result") or (data.get("data") or {}).get("search_result") or []
results = []
for it in items:
results.append({
"title": it.get("title", ""),
"url": it.get("link") or it.get("url", ""),
"snippet": it.get("content") or it.get("snippet", ""),
"siteName": it.get("media") or it.get("siteName", ""),
"datePublished": it.get("publish_date") or it.get("datePublished", ""),
})
return ToolResult.success({
"query": query, "backend": "zhipu",
"total": len(results), "count": len(results), "results": results,
})
# ------------------------------------------------------------------
# Qianfan (Baidu)
# ------------------------------------------------------------------
def _search_qianfan(self, query: str, count: int, freshness: str) -> ToolResult:
api_key = _get_api_key("qianfan")
api_base = (conf().get("qianfan_api_base") or "https://qianfan.baidubce.com/v2").rstrip("/")
url = f"{api_base}/ai_search/web_search"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
"X-Appbuilder-From": "cow",
}
count = max(1, min(int(count or 10), 50))
payload: Dict[str, Any] = {
"messages": [{"role": "user", "content": query}],
"search_source": "baidu_search_v2",
"resource_type_filter": [{"type": "web", "top_k": count}],
}
# Baidu AI Search expects freshness as a date-range filter, not a
# named recency token. Translate our shared vocabulary into the
# underlying page_time range expected by the API.
search_filter = self._qianfan_build_freshness_filter(freshness)
if search_filter:
payload["search_filter"] = search_filter
logger.debug(f"[WebSearch] qianfan: query='{query}', count={count}, freshness={freshness!r}")
resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
if resp.status_code == 401:
return ToolResult.fail("Error: Invalid Qianfan API key.")
if resp.status_code != 200:
return ToolResult.fail(f"Error: Qianfan API returned HTTP {resp.status_code}: {resp.text[:200]}")
data = resp.json()
# Even on HTTP 200 Baidu surfaces business errors as {"code","message"}.
if isinstance(data, dict) and data.get("code"):
return ToolResult.fail(f"Error: Qianfan returned {data.get('code')}: {data.get('message','')}")
refs = data.get("references") or []
results = []
for d in refs:
results.append({
"title": d.get("title", ""),
"url": d.get("url", ""),
"snippet": (d.get("content") or "")[:200],
"siteName": d.get("web_anchor") or d.get("website") or "",
"datePublished": d.get("date", ""),
})
return ToolResult.success({
"query": query, "backend": "qianfan",
"total": len(results), "count": len(results), "results": results,
})
@staticmethod
def _qianfan_build_freshness_filter(freshness: str) -> Optional[Dict[str, Any]]:
if not freshness or freshness == "noLimit":
return None
delta_days = {"oneDay": 1, "oneWeek": 7, "oneMonth": 30, "oneYear": 365}.get(freshness)
if not delta_days:
return None
from datetime import datetime, timedelta
now = datetime.now()
end_date = (now + timedelta(days=1)).strftime("%Y-%m-%d")
start_date = (now - timedelta(days=delta_days)).strftime("%Y-%m-%d")
return {"range": {"page_time": {"gte": start_date, "lt": end_date}}}
# ------------------------------------------------------------------
# LinkAI (plugin)
# ------------------------------------------------------------------
def _search_linkai(self, query: str, count: int, freshness: str) -> ToolResult:
"""
Search using LinkAI plugin API
:param query: Search query
:param count: Number of results
:param freshness: Time range filter
:return: Formatted search results
"""
api_key = os.environ.get("LINKAI_API_KEY", "")
api_base = conf().get("linkai_api_base", "https://api.link-ai.tech")
url = f"{api_base.rstrip('/')}/v1/plugin/execute"
api_key = _get_api_key("linkai")
api_base = (conf().get("linkai_api_base") or "https://api.link-ai.tech").rstrip("/")
url = f"{api_base}/v1/plugin/execute"
from common.utils import get_cloud_headers
headers = get_cloud_headers(api_key)
payload = {
"code": "web-search",
"args": {
"query": query,
"count": count,
"freshness": freshness
}
}
payload = {"code": "web-search", "args": {"query": query, "count": count, "freshness": freshness}}
logger.debug(f"[WebSearch] linkai: query='{query}', count={count}")
resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
logger.debug(f"[WebSearch] LinkAI search: query='{query}', count={count}")
response = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
if response.status_code == 401:
return ToolResult.fail("Error: Invalid LINKAI_API_KEY. Please check your API key.")
if response.status_code != 200:
return ToolResult.fail(f"Error: LinkAI API returned HTTP {response.status_code}")
data = response.json()
if resp.status_code == 401:
return ToolResult.fail("Error: Invalid LinkAI API key.")
if resp.status_code != 200:
return ToolResult.fail(f"Error: LinkAI API returned HTTP {resp.status_code}")
data = resp.json()
if not data.get("success"):
msg = data.get("message") or "Unknown error"
return ToolResult.fail(f"Error: LinkAI search failed: {msg}")
return self._format_linkai_results(data, query)
def _format_linkai_results(self, data: dict, query: str) -> ToolResult:
"""
Format LinkAI API response into unified result structure.
LinkAI returns the search data in data.data field, which follows
the same Bing-compatible format as Bocha.
:param data: Raw API response
:param query: Original query
:return: Formatted ToolResult
"""
raw_data = data.get("data", "")
# LinkAI may return data as a JSON string
if isinstance(raw_data, str):
raw = data.get("data", "")
if isinstance(raw, str):
try:
raw_data = json.loads(raw_data)
raw = json.loads(raw)
except (json.JSONDecodeError, TypeError):
# If data is plain text, return it as a single result
return ToolResult.success({
"query": query,
"backend": "linkai",
"total": 1,
"count": 1,
"results": [{"content": raw_data}]
"query": query, "backend": "linkai",
"total": 1, "count": 1, "results": [{"content": raw}],
})
# If the response follows Bing-compatible structure
if isinstance(raw_data, dict):
web_pages = raw_data.get("webPages", {})
pages = web_pages.get("value", [])
if isinstance(raw, dict):
pages = (raw.get("webPages") or {}).get("value", []) or []
if pages:
results = []
for page in pages:
result = {
"title": page.get("name", ""),
"url": page.get("url", ""),
"snippet": page.get("snippet", ""),
"siteName": page.get("siteName", ""),
"datePublished": page.get("datePublished") or page.get("dateLastCrawled", ""),
for p in pages:
item = {
"title": p.get("name", ""),
"url": p.get("url", ""),
"snippet": p.get("snippet", ""),
"siteName": p.get("siteName", ""),
"datePublished": p.get("datePublished") or p.get("dateLastCrawled", ""),
}
if page.get("summary"):
result["summary"] = page["summary"]
results.append(result)
total = web_pages.get("totalEstimatedMatches", len(results))
if p.get("summary"):
item["summary"] = p["summary"]
results.append(item)
total = (raw.get("webPages") or {}).get("totalEstimatedMatches", len(results))
return ToolResult.success({
"query": query,
"backend": "linkai",
"total": total,
"count": len(results),
"results": results
"query": query, "backend": "linkai",
"total": total, "count": len(results), "results": results,
})
# Fallback: return raw data
return ToolResult.success({
"query": query,
"backend": "linkai",
"total": 1,
"count": 1,
"results": [{"content": str(raw_data)}]
"query": query, "backend": "linkai",
"total": 1, "count": 1, "results": [{"content": str(raw)}],
})

View File

@@ -521,7 +521,7 @@ class AgentInitializer:
if tool_name == "web_search":
from agent.tools.web_search.web_search import WebSearch
if not WebSearch.is_available():
logger.debug("[AgentInitializer] WebSearch skipped - no BOCHA_API_KEY or LINKAI_API_KEY")
logger.debug("[AgentInitializer] WebSearch skipped - no search provider configured")
continue
# Special handling for EnvConfig tool

View File

@@ -14,7 +14,9 @@ class Bridge(object):
def __init__(self):
self.btype = {
"chat": const.OPENAI,
"voice_to_text": conf().get("voice_to_text", "openai"),
# Empty `voice_to_text` (the default in new configs) triggers
# the auto-pick below — see _auto_pick_voice_to_text for order.
"voice_to_text": conf().get("voice_to_text") or self._auto_pick_voice_to_text(),
"text_to_voice": conf().get("text_to_voice", "google"),
"translate": conf().get("translate", "baidu"),
}
@@ -84,6 +86,46 @@ class Bridge(object):
self.chat_bots = {}
self._agent_bridge = None
def refresh_voice(self):
"""Re-read voice_to_text / text_to_voice from config and drop the
cached voice bots so the next call picks up the new provider.
Used by the web console after the user edits voice settings.
Does NOT touch the agent_bridge / agent state.
"""
new_v2t = conf().get("voice_to_text") or self._auto_pick_voice_to_text()
new_t2v = conf().get("text_to_voice", "google")
if conf().get("use_linkai") and conf().get("linkai_api_key"):
if not conf().get("voice_to_text") or conf().get("voice_to_text") in ["openai"]:
new_v2t = const.LINKAI
if not conf().get("text_to_voice") or conf().get("text_to_voice") in ["openai", const.TTS_1, const.TTS_1_HD]:
new_t2v = const.LINKAI
self.btype["voice_to_text"] = new_v2t
self.btype["text_to_voice"] = new_t2v
self.bots.pop("voice_to_text", None)
self.bots.pop("text_to_voice", None)
logger.info(f"[Bridge] voice refreshed: voice_to_text={new_v2t}, text_to_voice={new_t2v}")
@staticmethod
def _auto_pick_voice_to_text() -> str:
"""Pick an ASR provider by configured api keys when voice_to_text is
unset. Order matches the web console: openai → dashscope → zhipu →
linkai. Falls back to 'openai' when nothing is configured so the
original "missing key" error is preserved.
"""
def has(k: str) -> bool:
v = (conf().get(k) or "").strip()
return v != "" and v not in ("YOUR API KEY", "YOUR_API_KEY")
for key, provider in (
("open_ai_api_key", "openai"),
("dashscope_api_key", "dashscope"),
("zhipu_ai_api_key", "zhipu"),
("linkai_api_key", "linkai"),
):
if has(key):
return provider
return "openai"
# 模型对应的接口
def get_bot(self, typename):
if self.bots.get(typename) is None:

View File

@@ -171,7 +171,13 @@ class ChatChannel(Channel):
if "desire_rtype" not in context and conf().get("always_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
context["desire_rtype"] = ReplyType.VOICE
elif context.type == ContextType.VOICE:
if "desire_rtype" not in context and conf().get("voice_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
# Voice input replies with voice when either voice_reply_voice
# (mirror voice) or the global always_reply_voice toggle is on.
if (
"desire_rtype" not in context
and (conf().get("voice_reply_voice") or conf().get("always_reply_voice"))
and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE
):
context["desire_rtype"] = ReplyType.VOICE
return context
@@ -264,6 +270,8 @@ class ChatChannel(Channel):
if reply.type == ReplyType.TEXT:
reply_text = reply.content
if desire_rtype == ReplyType.VOICE and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
# Preserve original text for the "text-then-voice" pattern in _send_reply.
context["voice_reply_text"] = reply.content
reply = super().build_text_to_voice(reply.content)
return self._decorate_reply(context, reply)
if context.get("isgroup", False):
@@ -311,6 +319,15 @@ class ChatChannel(Channel):
# 短暂延迟后发送图片
time.sleep(0.3)
self._send(reply, context)
# Send text bubble before voice, unless channel already streamed
# the text (feishu) or natively renders STT under the voice (wechatcom).
elif reply.type == ReplyType.VOICE and context.get("voice_reply_text") \
and not context.get("feishu_streamed") \
and context.get("channel_type") not in ("wechatcom_app",):
text_reply = Reply(ReplyType.TEXT, context.get("voice_reply_text"))
self._send(text_reply, context)
time.sleep(0.3)
self._send(reply, context)
else:
self._send(reply, context)

View File

@@ -86,6 +86,8 @@ def _check(func):
@singleton
class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
NOT_SUPPORT_REPLYTYPE = []
dingtalk_client_id = conf().get('dingtalk_client_id')
dingtalk_client_secret = conf().get('dingtalk_client_secret')
@@ -870,6 +872,48 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
self.reply_text("抱歉,文件上传失败", incoming_message)
return
# Native sampleAudio. Upload only accepts ogg/amr, so convert TTS mp3/wav to amr.
elif reply.type == ReplyType.VOICE:
logger.info(f"[DingTalk] Sending voice: {reply.content}")
access_token = self.get_access_token()
if not access_token:
logger.error("[DingTalk] Cannot get access token for voice")
self.reply_text("抱歉语音发送失败无法获取token", incoming_message)
return
voice_path = reply.content
if voice_path.startswith("file://"):
voice_path = voice_path[7:]
amr_path = voice_path
duration_ms = 0
if not voice_path.lower().endswith((".amr", ".ogg")):
try:
from voice.audio_convert import any_to_amr
amr_path = os.path.splitext(voice_path)[0] + ".amr"
duration_ms = int(any_to_amr(voice_path, amr_path) or 0)
except Exception as e:
logger.error(f"[DingTalk] Failed to convert voice to amr: {e}")
self.reply_text("抱歉,语音转码失败", incoming_message)
return
media_id = self.upload_media(amr_path, media_type="voice")
if not media_id:
logger.error("[DingTalk] Failed to upload voice media")
self.reply_text("抱歉,语音上传失败", incoming_message)
return
msg_param = {
"mediaId": media_id,
"duration": str(duration_ms or 1000),
}
success = self._send_file_message(
access_token, incoming_message, "sampleAudio", msg_param, isgroup
)
if not success:
self.reply_text("抱歉,语音发送失败", incoming_message)
return
# 处理文本消息
elif reply.type == ReplyType.TEXT:
logger.info(f"[DingTalk] Sending text message, length={len(reply.content)}")

View File

@@ -1515,10 +1515,16 @@ class FeiShuChanel(ChatChannel):
else:
context.type = ContextType.TEXT
context.content = content.strip()
# Text input opts into voice replies only when the always-on toggle is set.
if "desire_rtype" not in context and conf().get("always_reply_voice"):
context["desire_rtype"] = ReplyType.VOICE
elif context.type == ContextType.VOICE:
# 2.语音请求
if "desire_rtype" not in context and conf().get("voice_reply_voice"):
# 2.语音请求: voice input replies with voice if either
# voice_reply_voice (mirror reply) or always_reply_voice is on.
if "desire_rtype" not in context and (
conf().get("voice_reply_voice") or conf().get("always_reply_voice")
):
context["desire_rtype"] = ReplyType.VOICE
return context

View File

@@ -137,6 +137,11 @@
<i class="fas fa-sliders item-icon text-xs w-5 text-center"></i>
<span data-i18n="menu_config">配置</span>
</a>
<a class="sidebar-item flex items-center gap-3 px-3 py-2 rounded-lg cursor-pointer transition-all duration-150 hover:bg-white/5 hover:text-neutral-200 text-[14px]"
data-view="models">
<i class="fas fa-microchip item-icon text-xs w-5 text-center"></i>
<span data-i18n="menu_models">模型</span>
</a>
<a class="sidebar-item flex items-center gap-3 px-3 py-2 rounded-lg cursor-pointer transition-all duration-150 hover:bg-white/5 hover:text-neutral-200 text-[14px]"
data-view="skills">
<i class="fas fa-bolt item-icon text-xs w-5 text-center"></i>
@@ -417,8 +422,9 @@
</button>
</div>
<div id="slash-menu" class="slash-menu hidden"></div>
<div class="flex-1 min-w-0 relative flex items-center">
<textarea id="chat-input"
class="flex-1 min-w-0 px-4 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600
class="w-full pl-4 pr-11 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600
bg-slate-50 dark:bg-white/5 text-slate-800 dark:text-slate-100
placeholder:text-slate-400 dark:placeholder:text-slate-500
focus:outline-none focus:ring-0 focus:border-primary-600
@@ -426,6 +432,14 @@
rows="1"
data-i18n-placeholder="input_placeholder"
placeholder="输入消息,或输入 / 使用指令"></textarea>
<button id="mic-btn" type="button"
class="absolute right-2 top-1/2 -translate-y-1/2 w-8 h-8 flex items-center justify-center rounded-lg
text-slate-400 hover:text-primary-500 hover:bg-primary-50 dark:hover:bg-primary-900/20
cursor-pointer transition-colors duration-150"
data-i18n-title="mic_idle_title" title="点击录音 / 再按一次结束">
<i class="fas fa-microphone text-sm"></i>
</button>
</div>
<button id="send-btn"
class="flex-shrink-0 w-10 h-10 flex items-center justify-center rounded-lg
bg-primary-400 text-white hover:bg-primary-500
@@ -460,6 +474,11 @@
<i class="fas fa-microchip text-primary-500 text-sm"></i>
</div>
<h3 class="font-semibold text-slate-800 dark:text-slate-100" data-i18n="config_model">模型配置</h3>
<a class="ml-auto text-xs text-slate-500 dark:text-slate-400 hover:text-primary-500 dark:hover:text-primary-400 cursor-pointer transition-colors flex items-center gap-1"
onclick="navigateTo('models')">
<span data-i18n="config_model_advanced">高级配置</span>
<i class="fas fa-arrow-right text-[10px]"></i>
</a>
</div>
<div class="space-y-5">
<!-- Provider -->
@@ -850,6 +869,41 @@
</div>
</div>
<!-- ====================================================== -->
<!-- VIEW: Models -->
<!-- ====================================================== -->
<div id="view-models" class="view">
<!-- Tailwind JIT safelist: capability-card icon colors are
emitted from JS template strings. Listing them here
(display:none) guarantees the CDN-side compiler picks
them up regardless of render timing. -->
<div class="hidden bg-blue-50 dark:bg-blue-900/30 text-blue-500
bg-orange-50 dark:bg-orange-900/30 text-orange-500
bg-purple-50 dark:bg-purple-900/30 text-purple-500
bg-amber-50 dark:bg-amber-900/30 text-amber-500
bg-primary-50 dark:bg-primary-900/30 text-primary-500"></div>
<div class="flex-1 overflow-y-auto p-6">
<div class="max-w-4xl mx-auto">
<div class="flex items-center justify-between mb-6">
<div>
<h2 class="text-xl font-bold text-slate-800 dark:text-slate-100" data-i18n="models_title">模型管理</h2>
<p class="text-sm text-slate-500 dark:text-slate-400 mt-1" data-i18n="models_desc">统一管理对话、视觉、语音、向量、图像、搜索能力</p>
</div>
<button id="models-add-vendor-btn" onclick="openVendorModal('')"
class="flex items-center gap-2 px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600
text-white text-sm font-medium cursor-pointer transition-colors duration-150">
<i class="fas fa-plus text-xs"></i>
<span data-i18n="models_add_vendor">添加厂商</span>
</button>
</div>
<div id="models-loading" class="flex items-center gap-2 py-12 justify-center text-slate-400 dark:text-slate-500 text-sm">
<i class="fas fa-spinner fa-spin text-xs"></i><span>Loading...</span>
</div>
<div id="models-content" class="grid gap-6 hidden"></div>
</div>
</div>
</div>
<!-- ====================================================== -->
<!-- VIEW: Channels -->
<!-- ====================================================== -->
@@ -959,7 +1013,7 @@
</div><!-- /app -->
<!-- Confirm Dialog -->
<div id="confirm-dialog-overlay" class="fixed inset-0 bg-black/50 z-[100] hidden flex items-center justify-center">
<div id="confirm-dialog-overlay" class="fixed inset-0 bg-black/50 z-[200] hidden flex items-center justify-center">
<div class="bg-white dark:bg-[#1A1A1A] rounded-2xl border border-slate-200 dark:border-white/10 shadow-xl
w-full max-w-sm mx-4 overflow-hidden">
<div class="p-6">
@@ -984,6 +1038,77 @@
</div>
</div>
<!-- Vendor Credentials Modal -->
<div id="vendor-modal-overlay" class="fixed inset-0 bg-black/50 z-[100] hidden flex items-center justify-center">
<div class="bg-white dark:bg-[#1A1A1A] rounded-2xl border border-slate-200 dark:border-white/10 shadow-xl
w-full max-w-md mx-4">
<div class="p-6">
<div class="flex items-center gap-3 mb-5">
<div class="w-10 h-10 rounded-xl bg-primary-50 dark:bg-primary-900/20 flex items-center justify-center flex-shrink-0">
<i class="fas fa-key text-primary-500"></i>
</div>
<div class="min-w-0 flex-1">
<h3 id="vendor-modal-title" class="font-semibold text-slate-800 dark:text-slate-100 text-base"></h3>
<p id="vendor-modal-subtitle" class="text-xs text-slate-500 dark:text-slate-400 mt-0.5 font-mono"></p>
</div>
</div>
<!-- Provider selector (only visible when adding via top button) -->
<div id="vendor-modal-picker-wrap" class="mb-4 hidden">
<label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5" data-i18n="models_provider">厂商</label>
<div id="vendor-modal-picker" class="cfg-dropdown" tabindex="0">
<div class="cfg-dropdown-selected">
<span class="cfg-dropdown-text">--</span>
<i class="fas fa-chevron-down cfg-dropdown-arrow"></i>
</div>
<div class="cfg-dropdown-menu"></div>
</div>
</div>
<div class="space-y-4">
<div>
<label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">API Key</label>
<input id="vendor-modal-key" type="text" autocomplete="off" data-1p-ignore data-lpignore="true"
class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
focus:outline-none focus:border-primary-500 font-mono transition-colors"
placeholder="sk-...">
</div>
<div id="vendor-modal-base-wrap">
<label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">API Base</label>
<input id="vendor-modal-base" type="text"
class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
focus:outline-none focus:border-primary-500 font-mono transition-colors"
placeholder="https://...../v1">
<p id="vendor-modal-base-hint" class="mt-1.5 text-xs text-slate-400 dark:text-slate-500 hidden">
<i class="fas fa-info-circle mr-1"></i><span data-i18n="models_base_default_hint">留空将使用官方默认地址</span>
</p>
</div>
</div>
</div>
<div class="flex items-center justify-between gap-3 px-6 py-4 border-t border-slate-100 dark:border-white/5 rounded-b-2xl">
<button id="vendor-modal-clear"
class="px-3 py-2 rounded-lg text-xs
text-red-500 dark:text-red-400 hover:bg-red-50 dark:hover:bg-red-900/20
cursor-pointer transition-colors duration-150 hidden"
data-i18n="models_clear_credential">清除凭据</button>
<span id="vendor-modal-status"
class="flex-1 text-xs text-primary-500 opacity-0 transition-opacity duration-300 text-center"></span>
<button id="vendor-modal-cancel"
class="px-4 py-2 rounded-lg border border-slate-200 dark:border-white/10
text-slate-600 dark:text-slate-300 text-sm font-medium
hover:bg-slate-50 dark:hover:bg-white/5
cursor-pointer transition-colors duration-150"
data-i18n="cancel">取消</button>
<button id="vendor-modal-save"
class="px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600 text-white text-sm font-medium
cursor-pointer transition-colors duration-150 disabled:opacity-50 disabled:cursor-not-allowed"
data-i18n="save">保存</button>
</div>
</div>
</div>
<script defer src="assets/js/console.js"></script>
</body>
</html>

View File

@@ -725,6 +725,58 @@
background: rgba(74, 190, 110, 0.15);
color: #74E9A4;
}
/* When an item carries a hint (e.g. brand alias next to a technical model
id), label/hint are split into two spans so the hint sits on the right in
a dim, smaller weight. Without a hint the row stays a plain text node and
uses the default ellipsis behaviour, so no layout regressions for old call
sites. */
.cfg-dropdown-label {
flex: 1 1 auto;
min-width: 0;
overflow: hidden;
text-overflow: ellipsis;
}
.cfg-dropdown-hint {
flex-shrink: 0;
margin-left: auto;
padding-left: 12px;
color: #94a3b8;
font-size: 12px;
font-weight: 400;
}
.dark .cfg-dropdown-hint {
color: #64748b;
}
.cfg-dropdown-item.active .cfg-dropdown-hint {
/* Tint the hint toward the brand colour on the active row so it doesn't
fight with the highlighted label tone. */
color: rgba(34, 133, 71, 0.65);
}
.dark .cfg-dropdown-item.active .cfg-dropdown-hint {
color: rgba(116, 233, 164, 0.6);
}
/* The active row gets a trailing brand-green checkmark via a Font Awesome
pseudo-element so every dropdown (chat / vision / image / asr / tts / etc.)
surfaces "this is what's currently selected" without per-call JS plumbing.
When a hint is present, the ✓ sits to its right with a small gap; without
a hint, margin-left:auto pushes the ✓ flush against the right edge. */
.cfg-dropdown-item.active::after {
content: '\f00c'; /* FontAwesome check glyph */
font-family: 'Font Awesome 6 Free', 'Font Awesome 5 Free', 'FontAwesome';
font-weight: 900;
margin-left: auto;
padding-left: 12px;
color: #4abe6e;
font-size: 11px;
flex-shrink: 0;
}
.cfg-dropdown-item.active:has(.cfg-dropdown-hint)::after {
/* When hint occupies the auto-margin slot, the ✓ no longer benefits
from `margin-left: auto`; replace it with a small fixed gap so the
✓ trails the hint cleanly. */
margin-left: 0;
padding-left: 10px;
}
/* API Key masking via CSS (avoids browser password prompts) */
.cfg-key-masked {
@@ -732,6 +784,77 @@
text-security: disc;
}
/* Provider logo image — vendors flagged as `provider-logo-invert-dark`
ship a black wordmark that disappears on the dark canvas; we invert their
luminance only in dark mode so the brand stays recognizable without
touching multi-color marks like Google/MiniMax. */
.provider-logo-img {
object-fit: contain;
object-position: center;
}
.dark .provider-logo-invert-dark {
filter: invert(1) brightness(1.15);
}
/* Models page — provider dropdown rows.
Configured rows look like ordinary picker entries; the .active row's
trailing brand-green ✓ already announces "this is what's selected"
(handled globally by .cfg-dropdown-item.active::after above).
Unconfigured rows are visually subdued and carry a trailing gear icon
as a "click to set up" affordance. */
.cap-provider-label {
flex: 1 1 auto;
overflow: hidden;
text-overflow: ellipsis;
}
.cap-provider-gear {
margin-left: auto;
padding-left: 12px;
color: #94a3b8;
font-size: 11px;
flex-shrink: 0;
}
.cap-provider-item.cap-provider-unconfigured {
color: #94a3b8;
}
.dark .cap-provider-item.cap-provider-unconfigured {
color: #64748b;
}
.cap-provider-item.cap-provider-unconfigured:hover {
color: #475569;
}
.dark .cap-provider-item.cap-provider-unconfigured:hover {
color: #cbd5e1;
}
.cap-provider-item.cap-provider-unconfigured:hover .cap-provider-gear {
color: #475569;
}
.dark .cap-provider-item.cap-provider-unconfigured:hover .cap-provider-gear {
color: #cbd5e1;
}
/* If the active row ever lands on an unconfigured vendor (defensive — the
click handler normally diverts to the modal), suppress the global ✓ so
the gear remains the sole trailing icon and the row keeps reading as
"needs setup" rather than "already selected". */
.cap-provider-item.cap-provider-unconfigured.active::after {
content: none;
}
/* "Add vendor" modal picker — each configured row carries a static
brand-green ✓ via decorateVendorModalPicker so users can see what's set
up at a glance. The active row's global ✓ is suppressed here to avoid
showing two checks side by side on configured + selected rows. */
.vendor-picker-item.active::after {
content: none;
}
.vendor-picker-configured-mark {
margin-left: auto;
padding-left: 12px;
color: #4abe6e;
font-size: 11px;
flex-shrink: 0;
}
/* Chat Input */
#chat-input {
resize: none; height: 42px; max-height: 180px;
@@ -1171,3 +1294,76 @@
overflow: hidden;
min-height: 2.5em; /* ~2 lines at text-sm leading-relaxed */
}
/* --------------------------------------------------------------------
* Voice pill — compact custom audio player used by mic uploads and TTS
* replies. Replaces the bulky native <audio controls> with a play/pause
* icon + thin progress bar + duration counter so it blends into chat
* bubbles without the chrome-grey browser default look.
* ------------------------------------------------------------------ */
.voice-pill {
display: inline-flex;
align-items: center;
gap: 8px;
padding: 6px 10px;
border-radius: 999px;
background: rgba(15, 23, 42, 0.05);
color: rgb(71, 85, 105);
font-size: 12px;
line-height: 1;
max-width: 240px;
user-select: none;
cursor: default;
}
.dark .voice-pill {
background: rgba(255, 255, 255, 0.08);
color: rgb(203, 213, 225);
}
.voice-pill[data-loading="1"] {
opacity: 0.65;
}
.voice-pill-btn {
width: 22px;
height: 22px;
border-radius: 999px;
display: inline-flex;
align-items: center;
justify-content: center;
background: var(--color-primary-500, #2563eb);
color: #fff;
flex-shrink: 0;
cursor: pointer;
transition: transform 0.1s ease;
}
.voice-pill-btn:hover { transform: scale(1.05); }
.voice-pill-btn i { font-size: 9px; margin-left: 1px; }
.voice-pill-btn[data-state="play"] i { margin-left: 2px; }
.voice-pill-btn[data-state="pause"] i { margin-left: 0; }
.voice-pill-track {
flex: 1;
height: 3px;
border-radius: 999px;
background: rgba(100, 116, 139, 0.25);
overflow: hidden;
min-width: 70px;
}
.dark .voice-pill-track {
background: rgba(148, 163, 184, 0.25);
}
.voice-pill-fill {
height: 100%;
width: 0%;
background: var(--color-primary-500, #2563eb);
border-radius: inherit;
transition: width 0.1s linear;
}
.voice-pill-time {
font-variant-numeric: tabular-nums;
font-size: 11px;
color: inherit;
opacity: 0.75;
flex-shrink: 0;
min-width: 28px;
text-align: right;
}
.voice-pill audio { display: none; }

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251656961" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="18432" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M252.8 652.8l167.893333-94.293333 2.773334-8.106667-2.773334-4.48h-8.106666l-28.16-1.706667-96-2.56-83.2-3.413333-80.64-4.266667-20.266667-4.266666L85.333333 504.746667l1.92-12.586667 17.066667-11.52 24.32 2.133333 53.973333 3.626667 81.066667 5.546667 58.666667 3.413333 87.04 9.173333h13.866666l1.92-5.546666-4.693333-3.413334-3.626667-3.413333-83.84-56.746667-90.666666-60.16-47.573334-34.56-25.813333-17.493333-13.013333-16.426667-5.546667-35.84 23.253333-25.813333 31.36 2.133333 7.893334 2.133334 31.786666 24.32 67.84 52.48L401.066667 391.466667l13.013333 10.88 5.12-3.626667 0.64-2.56-5.76-9.813333-48.213333-87.04L314.453333 210.773333l-22.826666-36.693333-5.973334-21.973333a107.861333 107.861333 0 0 1-3.626666-26.026667l26.666666-36.053333L323.413333 85.333333l35.413334 4.693334 14.933333 13.013333 21.973333 50.346667 35.626667 79.36 55.253333 107.733333 16.213334 32 8.746666 29.653333 3.2 9.173334h5.546667v-5.12l4.48-60.8 8.32-74.453334 8.106667-96 2.773333-27.093333 13.44-32.426667 26.666667-17.493333 20.693333 10.026667 17.066667 24.32-2.346667 15.786666-10.24 65.92-19.84 103.253334-13.013333 69.12h7.466666l8.746667-8.746667 34.986667-46.506667 58.666666-73.386666 26.026667-29.226667 30.293333-32.213333 19.413334-15.36h36.693333l27.093333 40.106666-12.16 41.386667-37.76 48-31.36 40.533333-45.013333 60.586667-28.16 48.426667 2.56 3.84 6.613333-0.64 101.546667-21.546667 54.826667-10.026667 65.493333-11.306666 29.653333 13.866666 3.2 14.08-11.733333 28.8-69.973333 17.28-82.133334 16.426667-122.24 29.013333-1.493333 1.066667 1.706667 2.133333 55.04 5.12 23.466666 1.28h57.6l107.306667 7.893334 28.16 18.56 16.853333 22.613333-2.773333 17.28-43.306667 21.973333-58.24-13.866666-136.106666-32.426667-46.72-11.733333h-6.4v3.84l38.826666 37.973333 71.253334 64.426667 89.173333 82.986666 4.48 20.48-11.52 16.213334-12.16-1.706667-78.506667-58.88-30.293333-26.666667-68.48-57.6h-4.48v5.973334l15.786667 23.04 83.413333 125.226666 4.266667 38.4-5.973334 12.586667-21.546666 7.466667-23.68-4.266667-48.853334-68.48-50.346666-77.226667-40.533334-69.12-4.906666 2.773334-23.893334 258.133333-11.306666 13.226667-26.026667 10.026666-21.546667-16.426666-11.52-26.666667 11.52-52.48 13.866667-68.48 11.306667-54.4 10.24-67.626667 5.973333-22.4-0.426667-1.493333-4.906666 0.64-50.986667 69.973333-77.653333 104.746667-61.44 65.706667-14.72 5.76-25.386667-13.226667 2.346667-23.466667 14.293333-20.906666 84.906667-107.946667 51.2-66.986667 33.066666-38.613333v-5.546667h-2.133333l-225.493333 146.56-40.106667 5.12-17.28-16.213333 2.133333-26.666667 8.106667-8.746666 67.84-46.72h-0.213333l0.853333 0.853333z" fill="#D97757" p-id="18433"></path></svg>

After

Width:  |  Height:  |  Size: 2.9 KiB

View File

@@ -0,0 +1,10 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="200" height="200" fill="none" stroke="#475569" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<!-- Horizontal slider tracks -->
<line x1="4" y1="7" x2="20" y2="7"/>
<line x1="4" y1="12" x2="20" y2="12"/>
<line x1="4" y1="17" x2="20" y2="17"/>
<!-- Knobs (filled circles) -->
<circle cx="9" cy="7" r="2.2" fill="#475569" stroke="none"/>
<circle cx="15" cy="12" r="2.2" fill="#475569" stroke="none"/>
<circle cx="7" cy="17" r="2.2" fill="#475569" stroke="none"/>
</svg>

After

Width:  |  Height:  |  Size: 573 B

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251621200" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="17444" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M1019.364785 620.816931L891.797142 397.807295 946.450846 293.15069a29.097778 29.097778 0 0 0 6.399732-36.393472l-70.184053-126.586684a30.078737 30.078737 0 0 0-24.574968-13.652427H597.4945L539.171949 14.549389a27.348852 27.348852 0 0 0-20.906122-14.549389H380.628607a29.139776 29.139776 0 0 0-24.616967 14.549389v5.545767L225.797108 243.062793H100.919352a29.182775 29.182775 0 0 0-25.513928 13.653427L3.428446 384.11187a32.766624 32.766624 0 0 0 0 29.182775L132.831012 638.096205 74.508461 740.064923a32.766624 32.766624 0 0 0 0 29.05478l66.514207 116.561105a29.905744 29.905744 0 0 0 25.513929 14.505391H427.132654l62.845361 109.222414A30.078737 30.078737 0 0 0 512.762058 1024H660.382859a29.139776 29.139776 0 0 0 24.574968-14.549389l128.463606-224.843558h114.76818a31.91366 31.91366 0 0 0 24.660965-15.444352l66.471208-117.414069a28.158818 28.158818 0 0 0 0-30.9747l0.042999 0.042999z m-161.273228 14.591387L791.57735 512.490479 518.265827 993.964261l-74.748861-122.87484h-273.268525l65.618244-119.205994h139.386147L101.856313 272.244568h143.055993L380.671605 30.121735l68.34913 119.247993-70.184053 122.87484H925.501726l-69.202094 121.936879 137.594222 241.183873H858.134555z" fill="#605BEC" p-id="17445"></path><path d="M499.962596 699.320634l174.371677-274.719464H324.694955z" fill="#605BEC" p-id="17446"></path></svg>

After

Width:  |  Height:  |  Size: 1.6 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 5.1 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779261485522" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5381" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M958.976 439.808C804.864 336.896 642.56 321.536 642.56 321.536s8.192 235.008-10.752 306.176c-0.512 9.728-11.776 75.264-43.008 157.696-10.752 28.16-24.064 55.296-39.424 81.408-40.96 74.24-89.6 127.488-89.6 127.488 119.808-48.64 205.312-92.672 309.76-175.616 122.88-96.768 229.376-254.464 189.44-378.88z" fill="#37E1BE" p-id="5382"></path><path d="M329.728 395.776c158.208-100.864 308.736-78.848 312.32-74.752 0.512 0.512 1.024 0.512 1.024 0.512 0-14.336-6.656-60.928-13.312-106.496-11.776-60.928-22.528-124.928-23.04-133.632-170.496-139.264-356.864-78.336-448 25.6-61.44 70.144-103.424 169.984-102.4 224.256V762.88c0.512-12.8 1.536-20.48 2.048-20.48 17.92-197.12 271.36-346.624 271.36-346.624z" fill="#A569FF" p-id="5383"></path><path d="M792.064 272.384c-41.984-43.52-87.552-88.576-122.368-125.44-33.28-34.816-59.392-60.928-62.976-65.536 0.512 8.704 11.264 72.704 23.04 133.632 6.656 45.568 12.8 92.672 13.312 106.496 0 0 162.304 15.36 316.416 118.272-0.512 0-83.456-80.384-167.424-167.424zM549.888 866.816c-2.56 1.024-198.656 107.008-292.352-30.72-20.992-30.72-31.744-68.096-33.28-106.496-3.072-74.752 5.12-227.84 105.472-333.824 0 0-253.44 149.504-270.848 346.624-0.512 0.512-2.048 8.192-2.048 20.48-1.024 32.768 4.608 98.304 43.008 155.136 52.224 78.336 193.024 138.752 328.192 85.504l33.28-9.728c-1.024 0.512 47.616-52.224 88.576-126.976z" fill="#1E37FC" p-id="5384"></path></svg>

After

Width:  |  Height:  |  Size: 1.7 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251750646" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="29551" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M214.101333 512c0-32.512 5.546667-63.701333 15.36-92.928L57.173333 290.218667A491.861333 491.861333 0 0 0 4.693333 512c0 79.701333 18.858667 154.88 52.394667 221.610667l172.202667-129.066667A290.56 290.56 0 0 1 214.101333 512" fill="#FBBC05" p-id="29552"></path><path d="M516.693333 216.192c72.106667 0 137.258667 25.002667 188.458667 65.962667L854.101333 136.533333C763.349333 59.178667 646.997333 11.392 516.693333 11.392c-202.325333 0-376.234667 113.28-459.52 278.826667l172.373334 128.853333c39.68-118.016 152.832-202.88 287.146666-202.88" fill="#EA4335" p-id="29553"></path><path d="M516.693333 807.808c-134.357333 0-247.509333-84.864-287.232-202.88l-172.288 128.853333c83.242667 165.546667 257.152 278.826667 459.52 278.826667 124.842667 0 244.053333-43.392 333.568-124.757333l-163.584-123.818667c-46.122667 28.458667-104.234667 43.776-170.026666 43.776" fill="#34A853" p-id="29554"></path><path d="M1005.397333 512c0-29.568-4.693333-61.44-11.648-91.008H516.650667V614.4h274.602666c-13.696 65.962667-51.072 116.650667-104.533333 149.632l163.541333 123.818667c93.994667-85.418667 155.136-212.650667 155.136-375.850667" fill="#4285F4" p-id="29555"></path></svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 11 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251514432" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="11888" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M415.392 475.808v329.984c-22.304 111.744-170.56 82.944-171.2 1.92-0.672-101.824 0-202.976 0-304.064v-117.184c0-14.656-3.2-26.24-16-35.392-24.96-18.72-54.944 3.264-55.584 30.208-1.408 36.16-0.704 71.616-1.408 107.264 0 28.16 0 55.52 0.64 83.648-18.368 123.776-168.32 103.232-171.808 0.704V487.04c0-28.032 54.944-34.624 52.256 7.36-1.792 20.8-0.64 42.272-1.344 62.912-0.64 36.8 55.648 61.6 68.896 1.408 0.64-49.632 0.64-99.264 0.64-149.344 0-62.752 17.824-113.856 84.352-118.624 28.8-2.56 47.968 9.504 66.336 30.304 7.04 7.36 23.68 30.72 24.32 56.16 0 23.456 0.64 46.752 0.64 70.464 0 46.72-0.64 93.76-0.64 140.48 0 30.304 0.64 60.256 0.64 89.856 0 37.536 0 75.552-0.64 113.152-0.64 48.864 58.816 48.16 68.352-0.768 0-57.632 0.64-114.56 0.64-172.192 0-141.984-0.64-283.968-0.64-425.856 0-14.72-2.048-55.584 5.76-70.464 41.504-101.12 167.392-56.96 168.544 26.72 2.432 171.52 0 344.896 0.64 516.8 0 59.616-48.416 46.816-51.104 23.488 0-178.88 0-358.4 0.64-537.024-2.368-44.832-68.832-38.72-72.672-6.592-1.28 36.864-0.64 74.4-1.28 111.232v219.008h0.64l0.448 0.256h-0.064z" fill="#D4367A" p-id="11889"></path><path d="M610.016 473.184v242.336V143.648c21.632-112.512 169.824-83.264 170.464-2.176 0.704 101.12 0 202.912 0.704 304 0 38.784 0 77.728-0.64 116.544 0 15.36 3.776 26.176 16.64 36.032 24.32 18.24 54.24-3.2 55.584-30.592 1.344-35.488 0.64-70.976 0.64-107.328V376.96c18.56-123.776 168.128-103.232 171.264-0.704v310.592c0 28.16-54.304 34.848-51.872-7.296 1.472-21.44 0-267.104 0.768-288.64 1.28-36.16-55.712-61.664-68.928-0.768v148.576c0 63.68-17.856 113.92-84.96 119.36-63.264 1.504-88.704-42.24-90.752-86.432V271.328c0-38.24 0-75.552 0.64-113.088 0.64-48.864-58.784-48.864-68.896 0.704V831.36c0 14.592 2.048 55.52-5.184 70.432-41.44 101.056-168 56.864-169.152-26.752v-79.616c3.136-53.6 48.416-40.864 50.464-18.176v94.464c2.432 44.928 68.928 39.488 72.064 6.656 1.344-36.896 1.344-73.728 1.344-111.296v-293.824h-0.192v-0.064z" fill="#ED6D48" p-id="11890"></path></svg>

After

Width:  |  Height:  |  Size: 2.2 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251592968" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="16416" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M117.9648 684.6464l342.30272 93.57312v75.34592l209.7152 58.5728A428.99456 428.99456 0 0 1 512 942.08c-176.128 0-327.53664-105.8816-394.0352-257.4336zM83.29216 477.42976l407.30624 112.64-9.6256 37.00736-6.0416 35.0208 383.3856 104.96a432.5376 432.5376 0 0 1-65.10592 70.32832l-688.18944-185.9584A429.4656 429.4656 0 0 1 81.92 512c0-11.63264 0.47104-23.1424 1.37216-34.54976z m57.344-182.4768l429.07648 114.21696a279.94112 279.94112 0 0 0-23.06048 35.55328 201.17504 201.17504 0 0 0-14.70464 34.93888l403.08736 110.26432a426.8032 426.8032 0 0 1-23.552 81.7152L86.54848 448.7168a427.25376 427.25376 0 0 1 54.0672-153.76384z m158.47424-156.75392l404.23424 108.31872a190.2592 190.2592 0 0 0-32.80896 24.90368c-9.13408 8.8064-19.8656 21.4016-32.1536 37.74464l285.24544 77.78304c9.216 30.45376 15.03232 61.8496 17.32608 93.5936L156.61056 269.68064a432.27136 432.27136 0 0 1 142.49984-131.4816zM512 81.92c142.90944 0 269.55776 69.71392 347.7504 176.98816L337.26464 118.90688A428.50304 428.50304 0 0 1 512 81.92z" fill="#000000" p-id="16417"></path></svg>

After

Width:  |  Height:  |  Size: 1.3 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251225589" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="9015" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M881.664 431.488a218.88 218.88 0 0 0-18.176-177.088A218.624 218.624 0 0 0 628.992 149.76c-40.576-45.824-100.288-71.424-162.176-71.424a219.136 219.136 0 0 0-208 150.4 215.68 215.68 0 0 0-144 104.512 218.944 218.944 0 0 0 26.688 254.912 218.752 218.752 0 0 0 19.2 177.152 217.088 217.088 0 0 0 234.624 104.512 219.136 219.136 0 0 0 162.112 72.512 219.136 219.136 0 0 0 208-150.4 215.68 215.68 0 0 0 144-104.512 219.008 219.008 0 0 0-27.712-256z m-324.288 454.4a158.08 158.08 0 0 1-103.424-37.376c1.088-1.088 4.288-2.176 5.376-3.2l171.712-99.2a28.16 28.16 0 0 0 13.824-24.512V479.488l72.576 41.6c1.024 0 1.024 1.024 1.024 2.112v200.512a160.512 160.512 0 0 1-161.088 162.112z m-347.712-148.288c-19.2-33.088-25.6-71.488-19.2-108.8 1.088 1.024 3.2 2.176 5.376 3.2l171.712 99.2a25.984 25.984 0 0 0 27.712 0l210.112-121.6v84.224c0 1.152 0 2.176-1.024 2.176L430.464 796.16c-76.8 44.8-176 18.176-220.8-58.624z m-44.736-375.424c19.2-32.64 48.896-57.856 84.224-71.488v204.8c0 9.6 5.376 19.2 13.888 24.512l210.176 121.6-72.576 41.6c-1.024 0-2.112 1.088-2.112 0L224.64 582.912a160.448 160.448 0 0 1-59.776-220.8h0.064z m597.312 138.688l-210.112-121.6 72.512-41.6c1.088 0 2.176-1.088 2.176 0l173.824 100.224a161.088 161.088 0 0 1-25.6 291.2V525.44a26.304 26.304 0 0 0-12.8-24.512z m71.488-108.8a23.232 23.232 0 0 0-5.312-3.2L656.64 289.536a26.048 26.048 0 0 0-27.712 0l-210.176 121.6V326.912c0-1.088 0-2.176 1.088-2.176l173.824-100.224a161.152 161.152 0 0 1 220.8 59.712c19.2 32 25.6 70.4 19.2 107.776z m-454.4 149.248l-72.64-41.6c-1.024 0-1.024-1.088-1.024-2.176V297.088A162.048 162.048 0 0 1 467.84 135.04a158.08 158.08 0 0 1 103.424 37.312 22.848 22.848 0 0 1-5.312 3.2L394.24 274.688a28.16 28.16 0 0 0-13.888 24.512v242.112h-1.088z m39.424-85.312l93.824-54.4 93.888 54.4v107.712l-93.888 54.4-93.824-54.4V456z" fill="#000000" p-id="9016"></path></svg>

After

Width:  |  Height:  |  Size: 2.1 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251568791" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="14450" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M96.20121136 636.3124965c-0.1472897-113.41305959-0.29457937-226.8261192-0.29457937-340.23917879 0-14.87625845 7.65906378-26.51214381 20.4732666-34.02391789 45.51251353-26.65943349 91.02502705-53.31886698 136.83211997-79.53643141 71.1409192-40.94653321 142.42912809-81.59848704 213.71733698-122.39773055 7.36448439-4.12411126 14.58167909-8.3955122 21.50429441-13.2560719 19.44223878-13.40336159 39.03176725-16.05457598 60.09419263-3.53495252 27.39588193 16.34915535 54.93905355 32.25644163 82.48222516 48.16372793 88.0792333 50.96223197 176.30575629 101.77717426 264.38498958 152.59211653 9.86840908 5.74429781 19.88410785 11.19401627 29.60522725 17.0856038 14.13981003 8.54280189 21.50429441 21.06242535 21.50429443 37.70616007 0 147.73155685 0.29457937 295.46311371-0.1472897 443.19467057 0 15.46541722-7.2171947 28.57419943-21.7988738 36.96971163-34.7603663 20.17868721-70.55176044 38.88447758-104.57567833 59.94690293-48.90017634 30.19438599-100.00969801 56.11737105-148.76258466 86.60633642-29.01606849 18.11663161-59.50503387 34.02391789-89.11026112 50.96223197-13.10878221 7.51177407-26.07027474 15.17083783-39.03176726 22.9771913-13.84523065 8.3955122-27.83775099 8.83738127-41.97756102 0.73644843-56.41195043-32.55102101-112.82390085-65.10204201-169.38314098-97.653063-61.86166887-35.64410444-123.72333775-71.1409192-185.4377169-106.78502365-11.19401627-6.48074626-22.24074286-12.81420285-32.99289009-19.88410785-11.48859565-7.65906378-17.08560379-19.14765941-17.08560378-32.69831069-0.1472897-34.7603663 0.1472897-69.52073264 0.29457938-104.28109895 1.62018657-0.58915875 1.62018657-1.62018657-0.29457938-2.65121438z m356.58833414-225.500512c2.20934532-1.76747625 4.41869063-3.68224221 6.77532565-5.15513907 68.93157389-39.62092601 137.86314777-79.24185204 206.94201135-118.86277807 2.79850407-1.62018657 6.48074626-1.62018657 6.62803594-6.18616688 0.1472897-4.8605597-4.12411126-4.71327001-6.77532564-6.18616688-40.65195383-23.56635005-81.59848704-46.83812071-122.10315117-70.84633984-16.79102442-10.01569877-32.84560039-8.54280189-48.45830728 0.58915876-45.9543826 26.51214381-91.46689612 53.61344636-137.27398903 80.42016953-31.96186226 18.70579035-64.21830387 37.11700133-96.32745581 55.67550198-18.41121097 10.60485751-27.54317163 25.33382629-27.24859225 47.72185885 0.88373813 89.55213018 0.58915875 179.10426036 0.14728969 268.65639053-0.1472897 20.17868721 9.27925033 33.58204881 25.33382629 43.15587853 31.3727035 18.70579035 63.18727606 37.11700133 95.14913832 54.93905355 10.89943689 6.03887719 21.06242535 13.99252034 35.79139414 18.41121096V505.51925374c6.48074626 19.58952848 18.55850066 34.02391789 36.67513226 44.6287754 27.83775099 16.20186565 63.18727606 12.51962347 86.31175705-10.45756784 26.95401286-26.65943349 28.72148912-62.89269668 12.81420282-90.14128893-16.34915535-28.42690974-43.59774757-37.55887038-74.38129233-38.73718787z m82.48222517 429.64401928c14.28709972-3.82953187 25.92298506-13.99252034 38.88447758-21.35700473 40.94653321-23.27177067 81.30390766-47.72185885 122.54502023-70.55176046 26.95401286-15.02354815 52.87699792-31.66728287 80.71474891-45.21793415 16.79102442-8.10093283 29.60522723-22.53532223 29.60522726-43.4504579 0.1472897-92.939793 0.29457937-185.73229631 0.14728969-278.6720893 0-11.19401627-5.15513907-13.99252034-13.84523067-7.06990501-26.51214381 20.76784598-57.29568854 34.46578693-86.16446735 51.25681135-54.49718448 31.81457257-109.14165865 63.33456576-163.78613282 95.00184862-8.54280189 4.8605597-11.78317502 10.45756784-11.63588535 20.47326662 0.29457937 96.18016613 0.1472897 192.50762194 0.1472897 288.68778806-0.29457937 3.5349525-1.47289687 7.65906378 3.38766282 10.8994369z" fill="#066AF3" p-id="14451"></path><path d="M96.20121136 636.3124965c1.91476594 1.03102783 1.91476594 2.06205563 0 3.09308345v-3.09308345z" fill="#4372E0" p-id="14452"></path><path d="M391.3697457 505.37196405c-5.44971845-44.33419602 13.84523065-74.08671296 61.4197998-94.55997955 30.93083443 1.17831749 58.03213699 10.31027814 74.38129233 38.5898982 15.75999659 27.39588193 14.13981003 63.48185543-12.81420282 90.14128893-23.27177067 22.97719129-58.47400606 26.65943349-86.31175705 10.45756783-18.11663161-10.60485751-30.34167568-25.03924691-36.67513226-44.62877541z" fill="#002A9A" p-id="14453"></path></svg>

After

Width:  |  Height:  |  Size: 4.5 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251419020" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="10062" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M520.063496 0v77.563152c0 269.231173-144.758953 414.054122-434.212862 434.340854L86.106618 511.968002H76.827198V255.984001l443.236298-255.984001z" fill="#5B55F6" p-id="10063"></path><path d="M520.063496 1023.936004v-77.563152c0-269.231173-144.758953-414.054122-434.212862-434.340854L86.042622 511.968002H76.827198v255.984001l443.236298 255.984001z" fill="#376AF3" p-id="10064"></path><path d="M520.063496 0v77.563152c0 269.231173 144.758953 414.054122 434.276858 434.340854L954.08437 511.968002h9.215424V255.984001L520.063496 0z" fill="#5B55F6" p-id="10065"></path><path d="M520.063496 1023.936004v-77.563152c0-269.231173 144.758953-414.054122 434.276858-434.340854L954.08437 511.968002h9.27942v255.984001l-443.236298 255.984001z" fill="#376AF3" p-id="10066"></path></svg>

After

Width:  |  Height:  |  Size: 1.1 KiB

File diff suppressed because it is too large Load Diff

View File

@@ -81,6 +81,8 @@ def _loads_wecom_ws_json(raw):
@singleton
class WecomBotChannel(ChatChannel):
NOT_SUPPORT_REPLYTYPE = []
def __init__(self):
super().__init__()
self.bot_id = ""
@@ -472,6 +474,8 @@ class WecomBotChannel(ChatChannel):
else:
context.type = ContextType.TEXT
context.content = content.strip()
if "desire_rtype" not in context and conf().get("always_reply_voice"):
context["desire_rtype"] = ReplyType.VOICE
return context
@@ -498,6 +502,8 @@ class WecomBotChannel(ChatChannel):
self._send_file(reply.content, receiver, is_group, req_id)
elif reply.type == ReplyType.VIDEO or reply.type == ReplyType.VIDEO_URL:
self._send_file(reply.content, receiver, is_group, req_id, media_type="video")
elif reply.type == ReplyType.VOICE:
self._send_voice(reply.content, receiver, is_group, req_id)
else:
logger.warning(f"[WecomBot] Unsupported reply type: {reply.type}, falling back to text")
self._send_text(str(reply.content), receiver, is_group, req_id)
@@ -730,6 +736,65 @@ class WecomBotChannel(ChatChannel):
},
})
def _send_voice(self, voice_path: str, receiver: str, is_group: bool, req_id: str = None):
"""Send native voice reply. WeCom voice media must be amr."""
local_path = voice_path
if local_path.startswith("file://"):
local_path = local_path[7:]
if local_path.startswith(("http://", "https://")):
try:
resp = requests.get(local_path, timeout=60)
resp.raise_for_status()
ext = os.path.splitext(local_path)[1] or ".mp3"
tmp_path = f"/tmp/wecom_voice_{uuid.uuid4().hex[:8]}{ext}"
with open(tmp_path, "wb") as f:
f.write(resp.content)
local_path = tmp_path
except Exception as e:
logger.error(f"[WecomBot] Failed to download voice for sending: {e}")
return
if not os.path.exists(local_path):
logger.error(f"[WecomBot] Voice file not found: {local_path}")
return
amr_path = local_path
if not local_path.lower().endswith(".amr"):
try:
from voice.audio_convert import any_to_amr
amr_path = os.path.splitext(local_path)[0] + ".amr"
any_to_amr(local_path, amr_path)
except Exception as e:
logger.error(f"[WecomBot] Failed to convert voice to amr: {e}")
return
media_id = self._upload_media(amr_path, "voice")
if not media_id:
logger.error("[WecomBot] Failed to upload voice media")
return
if req_id:
self._ws_send({
"cmd": "aibot_respond_msg",
"headers": {"req_id": req_id},
"body": {
"msgtype": "voice",
"voice": {"media_id": media_id},
},
})
else:
self._ws_send({
"cmd": "aibot_send_msg",
"headers": {"req_id": self._gen_req_id()},
"body": {
"chatid": receiver,
"chat_type": 2 if is_group else 1,
"msgtype": "voice",
"voice": {"media_id": media_id},
},
})
def _active_send_markdown(self, content: str, receiver: str, is_group: bool):
"""Proactively send markdown message (for scheduled tasks, no req_id)."""
self._ws_send({

View File

@@ -60,6 +60,9 @@ def _save_credentials(cred_path: str, data: dict):
@singleton
class WeixinChannel(ChatChannel):
# ilink bot protocol has no outbound voice item; deliver TTS as a file.
NOT_SUPPORT_REPLYTYPE = []
LOGIN_STATUS_IDLE = "idle"
LOGIN_STATUS_WAITING = "waiting_scan"
LOGIN_STATUS_SCANNED = "scanned"
@@ -464,6 +467,14 @@ class WeixinChannel(ChatChannel):
else:
context.type = ContextType.TEXT
context.content = content.strip()
if "desire_rtype" not in context and conf().get("always_reply_voice"):
context["desire_rtype"] = ReplyType.VOICE
elif ctype == ContextType.VOICE:
if "desire_rtype" not in context and (
conf().get("voice_reply_voice") or conf().get("always_reply_voice")
):
context["desire_rtype"] = ReplyType.VOICE
return context
@@ -486,6 +497,9 @@ class WeixinChannel(ChatChannel):
self._send_file(reply.content, receiver, context_token)
elif reply.type in (ReplyType.VIDEO, ReplyType.VIDEO_URL):
self._send_video(reply.content, receiver, context_token)
elif reply.type == ReplyType.VOICE:
# ilink has no outbound voice item; deliver TTS as a file attachment.
self._send_file(reply.content, receiver, context_token)
else:
logger.warning(f"[Weixin] Unsupported reply type: {reply.type}, fallback to text")
self._send_text(str(reply.content), receiver, context_token)

View File

@@ -1 +1 @@
2.0.8
2.0.9

View File

@@ -47,6 +47,7 @@ GEMINI_3_FLASH_PRE = "gemini-3-flash-preview" # Gemini 3 Flash Preview - Agent
GEMINI_3_PRO_PRE = "gemini-3-pro-preview" # Gemini 3 Pro Preview
GEMINI_31_PRO_PRE = "gemini-3.1-pro-preview" # Gemini 3.1 Pro Preview - Agent推荐模型
GEMINI_31_FLASH_LITE_PRE = "gemini-3.1-flash-lite-preview" # Gemini 3.1 Flash Lite Preview - Agent推荐模型
GEMINI_35_FLASH = "gemini-3.5-flash" # Gemini 3.5 Flash - Agent推荐模型
# OpenAI
GPT35 = "gpt-3.5-turbo"
@@ -74,6 +75,7 @@ GPT_5_NANO = "gpt-5-nano"
GPT_54 = "gpt-5.4" # GPT-5.4 - Agent recommended model
GPT_54_MINI = "gpt-5.4-mini"
GPT_54_NANO = "gpt-5.4-nano"
GPT_55 = "gpt-5.5" # GPT-5.5 - top-tier (expensive), not default
O1 = "o1-preview"
O1_MINI = "o1-mini"
WHISPER_1 = "whisper-1"
@@ -104,10 +106,12 @@ QWEN_LONG = "qwen-long"
QWEN3_MAX = "qwen3-max" # Qwen3 Max - Agent推荐模型
QWEN35_PLUS = "qwen3.5-plus" # Qwen3.5 Plus - Omni model (MultiModalConversation)
QWEN36_PLUS = "qwen3.6-plus" # Qwen3.6 Plus - Omni model (MultiModalConversation)
QWEN37_MAX = "qwen3.7-max" # Qwen3.7 Max - Agent推荐模型
QWQ_PLUS = "qwq-plus"
# MiniMax
MINIMAX_M2_7 = "MiniMax-M2.7" # MiniMax M2.7 - Latest
MINIMAX_TEXT_01 = "MiniMax-Text-01" # MiniMax 多模态 (vision)
MINIMAX_M2_7_HIGHSPEED = "MiniMax-M2.7-highspeed" # MiniMax M2.7 highspeed
MINIMAX_M2_5 = "MiniMax-M2.5" # MiniMax M2.5
MINIMAX_M2_1 = "MiniMax-M2.1" # MiniMax M2.1
@@ -119,6 +123,7 @@ MINIMAX_ABAB6_5 = "abab6.5-chat" # MiniMax abab6.5
GLM_5_1 = "glm-5.1" # 智谱 GLM-5.1 - Agent recommended model (default)
GLM_5_TURBO = "glm-5-turbo" # 智谱 GLM-5-Turbo
GLM_5 = "glm-5" # 智谱 GLM-5
GLM_5V_TURBO = "glm-5v-turbo" # 智谱多模态 (vision)
GLM_4 = "glm-4"
GLM_4_PLUS = "glm-4-plus"
GLM_4_flash = "glm-4-flash"
@@ -183,7 +188,7 @@ MODEL_LIST = [
"claude", "claude-3-haiku", "claude-3-sonnet", "claude-3-opus", "claude-3.5-sonnet",
# Gemini
GEMINI_31_FLASH_LITE_PRE, GEMINI_31_PRO_PRE, GEMINI_3_PRO_PRE, GEMINI_3_FLASH_PRE, GEMINI_25_PRO_PRE, GEMINI_25_FLASH_PRE,
GEMINI_35_FLASH, GEMINI_31_FLASH_LITE_PRE, GEMINI_31_PRO_PRE, GEMINI_3_PRO_PRE, GEMINI_3_FLASH_PRE, GEMINI_25_PRO_PRE, GEMINI_25_FLASH_PRE,
GEMINI_20_FLASH, GEMINI_20_flash_exp, GEMINI_15_PRO, GEMINI_15_flash, GEMINI_PRO, GEMINI,
# OpenAI
@@ -193,7 +198,7 @@ MODEL_LIST = [
GPT_4o, GPT_4O_0806, GPT_4o_MINI,
GPT_41, GPT_41_MINI, GPT_41_NANO,
GPT_5, GPT_5_MINI, GPT_5_NANO,
GPT_54, GPT_54_MINI, GPT_54_NANO,
GPT_54, GPT_55, GPT_54_MINI, GPT_54_NANO,
O1, O1_MINI,
# GLM (智谱AI)
@@ -201,7 +206,7 @@ MODEL_LIST = [
GLM_4_0520, GLM_4_AIR, GLM_4_AIRX, GLM_4_7,
# Qwen (通义千问)
QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG,
QWEN37_MAX, QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG,
# Doubao (豆包)
DOUBAO, DOUBAO_SEED_2_CODE, DOUBAO_SEED_2_PRO, DOUBAO_SEED_2_LITE, DOUBAO_SEED_2_MINI,

View File

@@ -16,8 +16,8 @@
"open_ai_api_base": "https://api.openai.com/v1",
"gemini_api_key": "",
"gemini_api_base": "https://generativelanguage.googleapis.com",
"voice_to_text": "openai",
"text_to_voice": "openai",
"voice_to_text": "",
"text_to_voice": "",
"voice_reply_voice": false,
"speech_recognition": true,
"group_speech_recognition": false,

View File

@@ -330,8 +330,18 @@ def load_config():
config_str = read_file(config_path)
logger.debug("[INIT] config str: {}".format(drag_sensitive(config_str)))
# 将json字符串反序列化为dict类型
config = Config(json.loads(config_str))
# 将json字符串反序列化为dict类型
# `object_pairs_hook` lets us catch users who accidentally typed the
# same key twice (e.g. two `"tools"` blocks) — json.loads would
# otherwise silently drop all but the last occurrence.
config = Config(json.loads(config_str, object_pairs_hook=_merge_duplicate_keys))
# Migrate legacy singular keys (`tool`, `skill`) into the canonical
# plural buckets so the rest of the codebase only reads one schema.
# Deep-merge so existing `tools`/`skills` entries are preserved and
# only missing namespaces are filled in from the legacy section.
_merge_legacy_namespace(config, legacy="tool", canonical="tools")
_merge_legacy_namespace(config, legacy="skill", canonical="skills")
# override config with environment variables.
# Some online deployment platforms (e.g. Railway) deploy project from github directly. So you shouldn't put your secrets like api key in a config file, instead use environment variables to override the default config.
@@ -422,7 +432,7 @@ def load_config():
os.environ[env_key] = str(val)
injected += 1
injected += _sync_skill_config_to_env(config.get("skill", {}))
injected += _sync_skill_config_to_env(config.get("skills", {}))
if injected:
logger.info("[INIT] Synced {} config values to environment variables".format(injected))
@@ -430,11 +440,90 @@ def load_config():
config.load_user_datas()
def _deep_merge_dicts(base: dict, incoming: dict) -> dict:
"""Recursively merge ``incoming`` into ``base`` (incoming wins on leaves)."""
for key, val in incoming.items():
if (
key in base
and isinstance(base[key], dict)
and isinstance(val, dict)
):
_deep_merge_dicts(base[key], val)
else:
base[key] = val
return base
def _merge_duplicate_keys(pairs):
"""object_pairs_hook for json.loads: deep-merge duplicate top-level keys
(lists concat, dicts merge, scalars take the latter) instead of dropping."""
out = {}
duplicates = []
for key, val in pairs:
if key not in out:
out[key] = val
continue
duplicates.append(key)
prev = out[key]
if isinstance(prev, dict) and isinstance(val, dict):
_deep_merge_dicts(prev, val)
elif isinstance(prev, list) and isinstance(val, list):
prev.extend(val)
else:
out[key] = val
if duplicates:
# logger may not be wired yet — fall back to print so we never lose the warning.
unique = sorted(set(duplicates))
try:
logger.warning("[INIT] config.json has duplicate keys (merged): %s", unique)
except Exception:
print("[INIT] config.json has duplicate keys (merged):", unique)
return out
def _merge_legacy_namespace(cfg, legacy: str, canonical: str) -> None:
"""Fold deprecated singular keys (``tool`` / ``skill``) into their plural
canonical counterparts at load time. Canonical entries always win."""
legacy_section = cfg.get(legacy)
if not isinstance(legacy_section, dict) or not legacy_section:
cfg.pop(legacy, None)
return
canonical_section = cfg.get(canonical)
if not isinstance(canonical_section, dict):
canonical_section = {}
merged_keys = []
for name, val in legacy_section.items():
if name in canonical_section:
if isinstance(canonical_section[name], dict) and isinstance(val, dict):
for sub_key, sub_val in val.items():
if (
sub_key in canonical_section[name]
and isinstance(canonical_section[name][sub_key], dict)
and isinstance(sub_val, dict)
):
_deep_merge_dicts(sub_val, canonical_section[name][sub_key])
canonical_section[name][sub_key] = sub_val
else:
canonical_section[name].setdefault(sub_key, sub_val)
continue
canonical_section[name] = val
merged_keys.append(name)
cfg[canonical] = canonical_section
cfg.pop(legacy, None)
if merged_keys:
logger.warning(
"[INIT] Legacy config key '{}' is deprecated; merged into '{}': {}. "
"Please rename '{}' to '{}' in your config.json.".format(
legacy, canonical, merged_keys, legacy, canonical,
)
)
def _sync_skill_config_to_env(skill_section) -> int:
"""Flatten skill-namespaced config into environment variables.
Mapping rule: ``config["skill"][<name>][<key>]`` -> ``SKILL_<NAME>_<KEY>``
(e.g. ``skill["image-generation"].model`` -> ``SKILL_IMAGE_GENERATION_MODEL``).
Mapping rule: ``config["skills"][<name>][<key>]`` -> ``SKILL_<NAME>_<KEY>``
(e.g. ``skills["image-generation"].model`` -> ``SKILL_IMAGE_GENERATION_MODEL``).
This lets subprocess-based skill scripts read their own settings without
importing project code. Existing env vars are NOT overwritten so the

39
docs/channels/index.mdx Normal file
View File

@@ -0,0 +1,39 @@
---
title: 通道概览
description: CowAgent 支持的通道及能力矩阵
---
CowAgent 支持接入多种聊天通道,启动时通过 `channel_type` 切换。Web 控制台默认开启,可与其他接入通道并行运行。
## 能力矩阵
下表汇总各通道支持的入站消息类型、机器人回复类型与群聊能力,方便按场景选择。
| 通道 | 文本 | 图片 | 文件 | 语音 | 群聊 |
| --- | :-: | :-: | :-: | :-: | :-: |
| [微信](/channels/weixin) | ✅ | ✅ | ✅ | ✅ | |
| [Web 控制台](/channels/web) | ✅ | ✅ | ✅ | ✅ | |
| [飞书](/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [钉钉](/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [企微智能机器人](/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [QQ](/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
| [企业微信应用](/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
| [公众号](/channels/wechatmp) | ✅ | ✅ | | ✅ | |
- **图片 / 文件 / 语音**列表示通道支持收发对应消息类型,具体细节详见各通道文档
- **群聊**列指可识别并响应群消息
<Tip>
每个通道的语音 / 图像能力依赖对应模型厂商的配置,详见 [模型概览](/models)。
</Tip>
## 通道一览
- [Web 控制台](/channels/web) — 内置浏览器对话和管理面板,默认开启
- [微信](/channels/weixin) — 通过个人微信扫码登录
- [飞书](/channels/feishu) — 飞书自建机器人
- [钉钉](/channels/dingtalk) — 钉钉自建机器人
- [企微智能机器人](/channels/wecom-bot) — 企业微信智能机器人
- [QQ](/channels/qq) — QQ 官方机器人开放平台
- [企业微信应用](/channels/wecom) — 企业微信自建应用接入
- [公众号](/channels/wechatmp) — 微信公众号(订阅号 / 服务号)

View File

@@ -59,9 +59,9 @@ Web 控制台是 CowAgent 的默认通道,启动后会自动运行,通过浏
### 模型管理
支持在线管理模型配置,无需手动编辑配置文件:
支持在线管理不同模型厂商的文本、图像、语音、向量模型配置,无需手动编辑配置文件:
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" />
<img width="850" src="https://cdn.link-ai.tech/doc/20260521212949.png" />
### 技能管理

View File

@@ -181,6 +181,7 @@
{
"group": "接入渠道",
"pages": [
"channels/index",
"channels/weixin",
"channels/web",
"channels/feishu",

View File

@@ -40,7 +40,7 @@ To force a specific Vision model, set it explicitly in `config.json`:
```json
{
"tool": {
"tools": {
"vision": {
"model": "ernie-4.5-turbo-vl"
}

View File

@@ -11,7 +11,7 @@ New built-in `image-generation` skill supporting text-to-image, image-to-image,
- **Zero model selection**: Just configure an API key and it works — no need to manually specify a model. You can also name a specific model in conversation (e.g. "draw a cat with seedream")
- **Flexible control**: Supports `quality`, `size` (512/1K4K), and `aspect_ratio` parameters, with each provider automatically mapping to its supported values
- **Image editing**: Pass existing images for editing, style transfer, or multi-image fusion (Seedream supports up to 14 reference images)
- **Skill-level config**: Pin a default model via `skill.image-generation.model` in `config.json`
- **Skill-level config**: Pin a default model via `skills.image-generation.model` in `config.json`
- **Image lightbox**: All images in the Web console now support click-to-enlarge preview
Docs: [Image Generation Skill](https://docs.cowagent.ai/en/skills/image-generation)

View File

@@ -51,7 +51,7 @@ The voice and streaming building blocks come from a community contribution #2791
## 🔧 Tools and Safety
- **Vision model selection**: `tool.vision.model` config now actually takes effect, with automatic fallback when unconfigured #2792
- **Vision model selection**: `tools.vision.model` config now actually takes effect, with automatic fallback when unconfigured #2792
- **Bash safety prompt**: The destructive-deletion confirm prompt is now scoped to paths outside the workspace — routine in-workspace operations are no longer interrupted
## 🐛 Other Fixes

View File

@@ -87,7 +87,7 @@ Configure ARK_API_KEY as xxx
To force all image generation through a specific provider's model, add this to `config.json`:
```json
"skill": {
"skills": {
"image-generation": {
"model": "seedream-5.0-lite"
}

View File

@@ -51,7 +51,7 @@ To specify a particular model for the vision tool, add to `config.json`:
```json
{
"tool": {
"tools": {
"vision": {
"model": "ernie-4.5-turbo-vl"
}

View File

@@ -40,7 +40,7 @@ description: Baidu Qianfan ERNIE モデル設定
```json
{
"tool": {
"tools": {
"vision": {
"model": "ernie-4.5-turbo-vl"
}

View File

@@ -11,7 +11,7 @@ description: CowAgent 2.0.7 - 画像生成スキル6プロバイダー自動
- **モデル選択不要**API Key を設定するだけで使用可能、モデルを手動で指定する必要なし。会話で特定モデルを指名することも可能「seedream で猫を描いて」)
- **柔軟な制御**`quality`(画質)、`size`解像度、512/1K〜4K、`aspect_ratio`(アスペクト比)パラメータ対応、各プロバイダーが自動的に有効な値にマッピング
- **画像編集**既存の画像を渡して編集・スタイル変換・複数画像融合が可能Seedream は最大 14 枚の参照画像をサポート)
- **スキルレベル設定**`config.json` の `skill.image-generation.model` でデフォルトモデルを固定可能
- **スキルレベル設定**`config.json` の `skills.image-generation.model` でデフォルトモデルを固定可能
- **画像ライトボックス**Web コンソールのすべての画像がクリックで拡大プレビュー対応
ドキュメント:[画像生成スキル](https://docs.cowagent.ai/ja/skills/image-generation)

View File

@@ -51,7 +51,7 @@ description: CowAgent 2.0.8 - 飛書チャネル全面アップグレード(
## 🔧 ツールと安全性
- **Vision モデル選択**`tool.vision.model` 設定が実際に反映されるようになり、未設定時は自動フォールバック #2792
- **Vision モデル選択**`tools.vision.model` 設定が実際に反映されるようになり、未設定時は自動フォールバック #2792
- **Bash セーフティ確認**:破壊的削除の確認プロンプトをワークスペース外のパスに限定。ワークスペース内の通常操作は中断されません
## 🐛 その他の修正

View File

@@ -87,7 +87,7 @@ ARK_API_KEY を xxx に設定して
すべての画像生成を特定のプロバイダーのモデルで固定したい場合、`config.json` に以下を追加:
```json
"skill": {
"skills": {
"image-generation": {
"model": "seedream-5.0-lite"
}

View File

@@ -51,7 +51,7 @@ Vision ツールで使用するモデルを指定するには、`config.json`
```json
{
"tool": {
"tools": {
"vision": {
"model": "ernie-4.5-turbo-vl"
}

View File

@@ -1,8 +1,16 @@
---
title: Claude
description: Claude 模型配置
description: Anthropic Claude 模型配置(文本对话 + 图像理解)
---
Claude 由 Anthropic 提供,支持文本对话与图像理解,主流 Sonnet / Opus 模型均原生支持视觉,无需额外指定 Vision 模型。
<Tip>
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
</Tip>
## 文本对话
```json
{
"model": "claude-sonnet-4-6",
@@ -14,4 +22,28 @@ description: Claude 模型配置
| --- | --- |
| `model` | 支持 `claude-sonnet-4-6`、`claude-opus-4-7`、`claude-opus-4-6`、`claude-sonnet-4-5`、`claude-sonnet-4-0`、`claude-3-5-sonnet-latest` 等,参考 [官方模型](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
| `claude_api_key` | 在 [Claude 控制台](https://console.anthropic.com/settings/keys) 创建 |
| `claude_api_base` | 可选,默认为 `https://api.anthropic.com/v1`修改可接入第三方代理 |
| `claude_api_base` | 可选,默认为 `https://api.anthropic.com/v1`可改为第三方代理 |
### 模型选择
| 模型 | 适用场景 |
| --- | --- |
| `claude-sonnet-4-6` | 默认推荐,性价比与速度平衡 |
| `claude-opus-4-7` | 复杂推理与长链路任务,效果最佳但成本更高 |
| `claude-sonnet-4-5` / `claude-sonnet-4-0` | 上一代旗舰,价格更低 |
## 图像理解
配置 `claude_api_key` 后 Agent 的 Vision 工具会自动使用 Claude 主模型识别图像,无需额外配置。
如需手动指定 Vision 模型,可在配置文件中显式配置:
```json
{
"tools": {
"vision": {
"model": "claude-sonnet-4-6"
}
}
}
```

View File

@@ -13,7 +13,7 @@ description: 自定义厂商配置,适用于第三方 API 代理和本地模
与 `openai` 厂商的区别:选择自定义厂商后,通过 `/config model` 切换模型时,不会自动切换厂商类型,始终使用自定义的 API 地址。
</Note>
## 配置方式
## 文本对话
### 第三方 API 代理
@@ -35,7 +35,7 @@ description: 自定义厂商配置,适用于第三方 API 代理和本地模
### 本地模型
本地模型通常不需要 API Key只需填写 API Base 即可
本地模型通常不需要 API Key只需填写 API Base
```json
{
@@ -53,7 +53,7 @@ description: 自定义厂商配置,适用于第三方 API 代理和本地模
| [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` |
| [LocalAI](https://localai.io) | `http://localhost:8080/v1` |
## 切换模型
### 切换模型
自定义厂商下切换模型时,只会修改 `model`,不会改变 `bot_type` 和 API 地址:

View File

@@ -1,9 +1,11 @@
---
title: DeepSeek
description: DeepSeek 模型配置
description: DeepSeek 模型配置(文本对话 + 思考模式)
---
方式一:官方接入(推荐):
DeepSeek 是当前 Agent 模式默认推荐的厂商之一,主打高性价比的文本对话和任务规划能力。
## 文本对话
```json
{
@@ -18,20 +20,20 @@ description: DeepSeek 模型配置
| `deepseek_api_key` | 在 [DeepSeek 平台](https://platform.deepseek.com/api_keys) 创建 |
| `deepseek_api_base` | 可选,默认为 `https://api.deepseek.com/v1`,可修改为第三方代理地址 |
## 模型选择
### 模型选择
| 模型 | 适用场景 |
| --- | --- |
| `deepseek-v4-flash` | 默认推荐,速度快、成本低 |
| `deepseek-v4-pro` | 更智能复杂任务效果更强 |
| `deepseek-v4-pro` | 更智能复杂任务效果更强 |
## 思考模式
V4 系列(`deepseek-v4-flash` / `deepseek-v4-pro`)支持显式的"思考模式":模型在输出最终回答前,先输出一段思维链(`reasoning_content`),从而提升答案质量。
V4 系列(`deepseek-v4-flash` / `deepseek-v4-pro`)支持显式的思考模式:模型在输出最终回答前,先输出一段思维链(`reasoning_content`),从而提升答案质量。
### 开关
通过全局配置 `enable_thinking` 控制:
通过全局配置 `enable_thinking` 控制,也可在 web控制台 - 配置页面中进行切换
```json
{
@@ -66,16 +68,5 @@ V4 系列(`deepseek-v4-flash` / `deepseek-v4-pro`)支持显式的"思考模
- **多轮工具调用**当历史中包含工具调用时DeepSeek 要求所有 assistant 消息必须回传 `reasoning_content`。CowAgent 会自动处理回传逻辑,跨轮次切换思考开关也不会出错。
<Tip>
默认使用 `deepseek-v4-flash`;复杂任务可使用 `deepseek-v4-pro`;需要深度思考可开启 `enable_thinking`。
默认使用 `deepseek-v4-flash`;复杂任务可使用 `deepseek-v4-pro`;需要深度推理可开启 `enable_thinking`。
</Tip>
方式二OpenAI 兼容方式接入:
```json
{
"model": "deepseek-v4-flash",
"bot_type": "openai",
"open_ai_api_key": "YOUR_API_KEY",
"open_ai_api_base": "https://api.deepseek.com/v1"
}
```

View File

@@ -1,17 +1,66 @@
---
title: 豆包 Doubao
description: 豆包 (火山方舟) 模型配置
description: 豆包火山方舟模型配置(文本 / 图像理解 / 图像生成 / 向量)
---
豆包火山方舟支持文本对话、图像理解、图像生成Seedream和向量能力一份 `ark_api_key` 即可启用全部能力。
<Tip>
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
</Tip>
## 文本对话
```json
{
"model": "doubao-seed-2-0-code-preview-260215",
"model": "doubao-seed-2-0-pro-260215",
"ark_api_key": "YOUR_API_KEY"
}
```
| 参数 | 说明 |
| --- | --- |
| `model` | 可填 `doubao-seed-2-0-code-preview-260215`、`doubao-seed-2-0-pro-260215`、`doubao-seed-2-0-lite-260215` 等 |
| `model` | 可填 `doubao-seed-2-0-pro-260215`、`doubao-seed-2-0-code-preview-260215`、`doubao-seed-2-0-lite-260215` 等 |
| `ark_api_key` | 在 [火山方舟控制台](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) 创建 |
| `ark_base_url` | 可选,默认为 `https://ark.cn-beijing.volces.com/api/v3` |
## 图像理解
配置 `ark_api_key` 后 Agent 的 Vision 工具会自动使用 `doubao-seed-2-0-pro-260215` 识别图像,无需额外配置。
如需手动指定 Vision 模型:
```json
{
"tools": {
"vision": {
"model": "doubao-seed-2-0-pro-260215"
}
}
}
```
## 图像生成
```json
{
"skills": {
"image-generation": {
"model": "seedream-5.0-lite"
}
}
}
```
可选模型:`seedream-5.0-lite`、`seedream-4.5`。
## 向量
```json
{
"embedding_provider": "doubao",
"embedding_model": "doubao-embedding-vision-251215"
}
```
默认模型 `doubao-embedding-vision-251215`(多模态 embedding可在配置文件中通过 `embedding_dimensions` 指定 1024 或 2048 维。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。

View File

@@ -1,16 +1,59 @@
---
title: Gemini
description: Google Gemini 模型配置
description: Google Gemini 模型配置(文本对话 + 图像理解 + 图像生成)
---
Google Gemini 支持文本对话、图像理解和图像生成Nano Banana 系列),一个 `gemini_api_key` 即可启用全部能力。
<Tip>
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
</Tip>
## 文本对话
```json
{
"model": "gemini-3.1-pro-preview",
"model": "gemini-3.5-flash",
"gemini_api_key": "YOUR_API_KEY"
}
```
| 参数 | 说明 |
| --- | --- |
| `model` | 支持 `gemini-3.1-flash-lite-preview`、`gemini-3.1-pro-preview`、`gemini-3-flash-preview`、`gemini-3-pro-preview` 等,参考 [官方文档](https://ai.google.dev/gemini-api/docs/models) |
| `model` | 推荐 `gemini-3.5-flash`,亦支持 `gemini-3.1-pro-preview`、`gemini-3.1-flash-lite-preview`、`gemini-3-flash-preview`、`gemini-3-pro-preview` 等,参考 [官方文档](https://ai.google.dev/gemini-api/docs/models) |
| `gemini_api_key` | 在 [Google AI Studio](https://aistudio.google.com/app/apikey) 创建 |
| `gemini_api_base` | 可选,默认为 `https://generativelanguage.googleapis.com`,可改为第三方代理 |
## 图像理解
Gemini 全系列模型均原生支持视觉,配置 `gemini_api_key` 后 Agent 的 Vision 工具会自动使用主模型识别图像,无需额外配置。
如需手动指定 Vision 模型:
```json
{
"tools": {
"vision": {
"model": "gemini-3.1-flash-lite-preview"
}
}
}
```
## 图像生成
```json
{
"skills": {
"image-generation": {
"model": "gemini-3.1-flash-image-preview"
}
}
}
```
| 模型 ID | 别名 |
| --- | --- |
| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
| `gemini-3-pro-image-preview` | Nano Banana Pro |
| `gemini-2.5-flash-image` | Nano Banana |

View File

@@ -1,8 +1,16 @@
---
title: 智谱 GLM
description: 智谱AI GLM 模型配置
description: 智谱 AI GLM 模型配置(文本 / 图像理解 / 语音识别 / 向量)
---
智谱 AI 支持文本对话、图像理解、语音识别ASR和向量Embedding一份 `zhipu_ai_api_key` 即可启用全部能力。
<Tip>
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
</Tip>
## 文本对话
```json
{
"model": "glm-5.1",
@@ -14,14 +22,35 @@ description: 智谱AI GLM 模型配置
| --- | --- |
| `model` | 可填 `glm-5.1`、`glm-5-turbo`、`glm-5`、`glm-4.7`、`glm-4-plus`、`glm-4-flash`、`glm-4-air` 等,参考 [模型编码](https://bigmodel.cn/dev/api/normal-model/glm-4) |
| `zhipu_ai_api_key` | 在 [智谱 AI 控制台](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) 创建 |
| `zhipu_ai_api_base` | 可选,默认为 `https://open.bigmodel.cn/api/paas/v4` |
也支持 OpenAI 兼容方式接入:
## 图像理解
智谱 chat 系列模型(`glm-5.1`、`glm-5-turbo` 等)不支持视觉,视觉调用统一路由到 `glm-5v-turbo`。配置 `zhipu_ai_api_key` 后 Agent 的 Vision 工具会自动使用该模型,无需在配置文件中显式指定。
## 语音识别
```json
{
"bot_type": "openai",
"model": "glm-5.1",
"open_ai_api_base": "https://open.bigmodel.cn/api/paas/v4",
"open_ai_api_key": "YOUR_API_KEY"
"voice_to_text": "zhipu",
"voice_to_text_model": "glm-asr-2512"
}
```
| 参数 | 说明 |
| --- | --- |
| `voice_to_text` | 设为 `zhipu` 启用智谱 ASR |
| `voice_to_text_model` | 可选,默认 `glm-asr-2512` |
凭证自动复用 `zhipu_ai_api_key`。语音文件建议小于 25MB超大文件可能被服务端拒绝。
## 向量
```json
{
"embedding_provider": "zhipu",
"embedding_model": "embedding-3"
}
```
可选模型:`embedding-3`、`embedding-2`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。

View File

@@ -1,67 +1,45 @@
---
title: 模型概览
description: CowAgent 支持的模型及推荐选择
description: CowAgent 支持的模型厂商及能力矩阵
---
CowAgent 支持国内外主流厂商的大语言模型,模型接口实现在项目的 `models/` 目录下。
CowAgent 支持国内外主流厂商的大语言模型,模型接口实现在项目的 `models/` 目录下。除文本对话外,部分厂商还提供视觉理解、图像生成、语音识别、语音合成、向量等能力,可在 Agent 流程中按需调用。
<Note>
Agent 模式下推荐使用以下模型可根据效果及成本综合选择deepseek-v4-flash、MiniMax-M2.7、claude-sonnet-4-6、gemini-3.1-pro-preview、glm-5.1、qwen3.6-plus、kimi-k2.6、ernie-5.1
Agent 模式下推荐使用以下模型可根据效果及成本综合选择deepseek-v4-flash、MiniMax-M2.7、claude-sonnet-4-6、gemini-3.5-flash、glm-5.1、qwen3.6-plus、kimi-k2.6、ernie-5.1
同时支持使用 [LinkAI](https://link-ai.tech) 平台接口,可灵活切换多种模型,并支持知识库、工作流、插件等 Agent 能力。
同时支持使用 [LinkAI](https://link-ai.tech) 平台接口,一个 Key 即可灵活切换多家厂商,并附带知识库、工作流、插件等能力。
</Note>
## 模型能力总览
各厂商提供的能力一览。「文本」指主对话模型,其余列表示该厂商可承担对应 Agent 能力。
| 厂商 | 代表模型 | 文本 | 图像理解 | 图像生成 | 语音识别 | 语音合成 | 向量 |
| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: |
| [DeepSeek](/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | |
| [MiniMax](/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
| [Claude](/models/claude) | claude-opus-4-7 | ✅ | ✅ | | | | |
| [Gemini](/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | |
| [OpenAI](/models/openai) | gpt-5.5、o 系列 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [智谱 GLM](/models/glm) | glm-5.1、glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ |
| [通义千问](/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [豆包 Doubao](/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ |
| [Kimi](/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
| [百度千帆](/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
| [LinkAI](/models/linkai) | 多厂商 100+ 模型统一接入 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [自定义](/models/custom) |本地模型 / 三方代理 | ✅ | | | | | |
<Tip>
Web 控制台中各项能力(视觉 / 图像 / 语音识别 / 语音合成 / 向量 / 网络搜索)均可独立配置厂商与模型,互相之间不强制绑定。
</Tip>
## 配置方式
**方式一(推荐):** 通过 [Web 控制台](/channels/web) 在线管理模型配置,无需手动编辑配置文件:
**方式一(推荐):** 通过 [Web 控制台](/channels/web) 在线管理模型与各项能力,无需手动编辑配置文件:
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" />
<img width="900" src="https://cdn.link-ai.tech/doc/20260521212527.png" />
**方式二:** 手动编辑 `config.json`,根据所选模型填写对应的模型名称和 API Key。每个模型也支持 OpenAI 兼容方式接入,将 `bot_type` 设为 `openai`,配置 `open_ai_api_base` 和 `open_ai_api_key` 即可。
## 支持的模型
<CardGroup cols={2}>
<Card title="DeepSeek" href="/models/deepseek">
deepseek-v4-flash、deepseek-v4-pro 等
</Card>
<Card title="百度千帆 / ERNIE" href="/models/qianfan">
ernie-5.1、ernie-5.0、ernie-4.5-turbo-128k 等
</Card>
<Card title="MiniMax" href="/models/minimax">
MiniMax-M2.7 等系列模型
</Card>
<Card title="Claude" href="/models/claude">
claude-sonnet-4-6 等
</Card>
<Card title="Gemini" href="/models/gemini">
gemini-3.1-pro-preview 等
</Card>
<Card title="OpenAI" href="/models/openai">
gpt-5.4、gpt-4.1、o 系列等
</Card>
<Card title="智谱 GLM" href="/models/glm">
glm-5.1、glm-5-turbo、glm-5 等系列模型
</Card>
<Card title="通义千问 Qwen" href="/models/qwen">
qwen3.6-plus、qwen3-max 等
</Card>
<Card title="豆包 Doubao" href="/models/doubao">
doubao-seed 系列模型
</Card>
<Card title="Kimi" href="/models/kimi">
kimi-k2.6、kimi-k2.5、kimi-k2 等
</Card>
<Card title="LinkAI" href="/models/linkai">
多模型统一接口 + 知识库
</Card>
<Card title="自定义" href="/models/custom">
第三方代理、本地模型等
</Card>
</CardGroup>
<Tip>
全部模型名称可参考项目 [`common/const.py`](https://github.com/zhayujie/CowAgent/blob/master/common/const.py) 文件。
</Tip>

View File

@@ -1,8 +1,16 @@
---
title: Kimi
description: Kimi (Moonshot) 模型配置
description: KimiMoonshot模型配置(文本对话 + 图像理解)
---
Kimi 由 Moonshot 提供,支持文本对话与图像理解,`kimi-k2.x` 系列原生支持视觉。
<Tip>
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
</Tip>
## 文本对话
```json
{
"model": "kimi-k2.6",
@@ -14,14 +22,20 @@ description: Kimi (Moonshot) 模型配置
| --- | --- |
| `model` | 可填 `kimi-k2.6`、`kimi-k2.5`、`kimi-k2`、`moonshot-v1-8k`、`moonshot-v1-32k`、`moonshot-v1-128k` |
| `moonshot_api_key` | 在 [Moonshot 控制台](https://platform.moonshot.cn/console/api-keys) 创建 |
| `moonshot_base_url` | 可选,默认为 `https://api.moonshot.cn/v1` |
也支持 OpenAI 兼容方式接入:
## 图像理解
配置 `moonshot_api_key` 后 Agent 的 Vision 工具会自动使用 `kimi-k2.6` 识别图像,无需额外配置。
如需手动指定 Vision 模型:
```json
{
"bot_type": "openai",
"model": "kimi-k2.6",
"open_ai_api_base": "https://api.moonshot.cn/v1",
"open_ai_api_key": "YOUR_API_KEY"
"tools": {
"vision": {
"model": "kimi-k2.6"
}
}
}
```

View File

@@ -1,9 +1,15 @@
---
title: LinkAI
description: 通过 LinkAI 平台统一接入多种模型
description: 通过 LinkAI 平台统一接入文本、视觉、图像、语音与向量能力
---
通过 [LinkAI](https://link-ai.tech) 平台可灵活切换 OpenAI、Claude、Gemini、DeepSeek、MiniMax、Qwen、Kimi 等多种模型,并支持知识库、工作流、插件等 Agent 能力。
通过一份 `linkai_api_key` 即可访问 OpenAI、Claude、Gemini、DeepSeek、MiniMax、Qwen、Kimi、豆包 等主流厂商的全部能力。
<Tip>
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
</Tip>
## 文本对话
```json
{
@@ -14,8 +20,84 @@ description: 通过 LinkAI 平台统一接入多种模型
| 参数 | 说明 |
| --- | --- |
| `use_linkai` | 设为 `true` 启用 LinkAI 接口 |
| `use_linkai` | 设为 `true` 启用 |
| `linkai_api_key` | 在 [控制台](https://link-ai.tech/console/interface) 创建 |
| `model` | 留空则使用智能体默认模型,可在平台中灵活切换,[模型列表](https://link-ai.tech/console/models) 中的全部模型均可使用 |
| `model` | 可填写 [模型列表](https://link-ai.tech/console/models) 中任意编码 |
参考 [接口文档](https://docs.link-ai.tech/platform/api) 了解更多。
前往 [模型服务](https://link-ai.tech/console/models) 了解更多。
## 图像理解
配置完成后 Agent 的 Vision 工具会自动调用网关上的多模态模型,无需额外配置。如需手动指定 Vision 模型:
```json
{
"tools": {
"vision": {
"model": "gpt-5.4-mini"
}
}
}
```
可选模型:`gpt-4.1-mini`、`gpt-5.4-mini`、`qwen3.6-plus`、`doubao-seed-2-0-pro-260215`、`kimi-k2.6`、`claude-sonnet-4-6`、`gemini-3.1-flash-lite-preview` 等。
## 图像生成
```json
{
"skills": {
"image-generation": {
"model": "gpt-image-2"
}
}
}
```
| 模型 ID | 别名 |
| --- | --- |
| `gpt-image-2` | OpenAI |
| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
| `gemini-3-pro-image-preview` | Nano Banana Pro |
| `seedream-5.0-lite` | 字节豆包 Seedream |
## 语音识别
```json
{
"voice_to_text": "linkai"
}
```
ASR 固定使用 Whisper凭证自动复用 `linkai_api_key`。
## 语音合成
语音合成网关下支持多个底层 TTS 引擎,按 `text_to_voice_model` 选择引擎,音色随引擎切换。
```json
{
"text_to_voice": "linkai",
"text_to_voice_model": "doubao",
"tts_voice_id": "BV001_streaming"
}
```
| `text_to_voice_model` | 引擎说明 |
| --- | --- |
| `tts-1` | OpenAI · 多语种通用(音色 `alloy` / `nova` / `echo` 等) |
| `doubao` | 字节豆包 · 中文音色丰富 |
| `baidu` | 百度 · 中文主播音色 |
不同引擎对应的音色不同,建议在 Web 控制台「模型管理 → 语音合成」中可视化选择。
## 向量
```json
{
"embedding_provider": "linkai",
"embedding_model": "text-embedding-3-small"
}
```
默认模型 `text-embedding-3-small`OpenAI 兼容)。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。

View File

@@ -1,8 +1,16 @@
---
title: MiniMax
description: MiniMax 模型配置
description: MiniMax 模型配置(文本 / 图像理解 / 图像生成 / 语音合成)
---
MiniMax 支持文本对话、图像理解、图像生成与语音合成,一份 `minimax_api_key` 即可启用全部能力。
<Tip>
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
</Tip>
## 文本对话
```json
{
"model": "MiniMax-M2.7",
@@ -12,16 +20,52 @@ description: MiniMax 模型配置
| 参数 | 说明 |
| --- | --- |
| `model` | 可填 `MiniMax-M2.7`、`MiniMax-M2.5`、`MiniMax-M2.1`、`MiniMax-M2.1-lightning`、`MiniMax-M2` 等 |
| `model` | 可填 `MiniMax-M2.7`、`MiniMax-M2.7-highspeed`、`MiniMax-M2.5`、`MiniMax-M2.1`、`MiniMax-M2.1-lightning`、`MiniMax-M2` 等 |
| `minimax_api_key` | 在 [MiniMax 控制台](https://platform.minimaxi.com/user-center/basic-information/interface-key) 创建 |
也支持 OpenAI 兼容方式接入:
## 图像理解
MiniMax 的 M2.x 系列 chat 模型本身不支持视觉,视觉调用统一路由到 `MiniMax-Text-01`。配置 `minimax_api_key` 后 Agent 的 Vision 工具会自动使用该模型,无需在配置文件中显式指定。
## 图像生成
```json
{
"bot_type": "openai",
"model": "MiniMax-M2.7",
"open_ai_api_base": "https://api.minimaxi.com/v1",
"open_ai_api_key": "YOUR_API_KEY"
"skills": {
"image-generation": {
"model": "image-01"
}
}
}
```
可选模型:`image-01`。
## 语音合成
```json
{
"text_to_voice": "minimax",
"text_to_voice_model": "speech-2.8-hd",
"tts_voice_id": "female-shaonv"
}
```
| 参数 | 说明 |
| --- | --- |
| `text_to_voice_model` | `speech-2.8-hd`(情绪渲染、自然听感)、`speech-2.8-turbo`(极速)、`speech-2.6-hd`、`speech-2.6-turbo` |
| `tts_voice_id` | 音色 ID支持中文 / 粤语 / 英 / 日 / 韩,共 70+ 种 |
常用音色示例:
| 音色 ID | 说明 |
| --- | --- |
| `female-shaonv` | 中文 · 少女(女) |
| `female-yujie` | 中文 · 御姐(女) |
| `female-tianmei` | 中文 · 甜美女性(女) |
| `male-qn-jingying` | 中文 · 精英青年(男) |
| `male-qn-badao` | 中文 · 霸道青年(男) |
| `Cantonese_GentleLady` | 粤语 · 温柔女声 |
| `English_Graceful_Lady` | 英文 · Graceful Lady |
完整音色(中文 / 粤语 / 英 / 日 / 韩共 70+ 种)可参考 [系统音色列表](https://platform.minimaxi.com/docs/faq/system-voice-id),也可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。

View File

@@ -1,11 +1,20 @@
---
title: OpenAI
description: OpenAI 模型配置
description: OpenAI 模型配置(文本 / 视觉 / 图像 / 语音 / 向量)
---
OpenAI 是覆盖最完整的厂商可同时承担文本对话、视觉理解、图像生成、语音识别ASR、语音合成TTS和向量Embedding能力。一份 `open_ai_api_key` 即可让 Agent 用到全部能力。
<Tip>
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
</Tip>
## 文本对话
```json
{
"model": "gpt-5.4",
"model": "gpt-5.5",
"open_ai_api_key": "YOUR_API_KEY",
"open_ai_api_base": "https://api.openai.com/v1"
}
@@ -13,7 +22,82 @@ description: OpenAI 模型配置
| 参数 | 说明 |
| --- | --- |
| `model` | 与 OpenAI 接口的 [model 参数](https://platform.openai.com/docs/models) 一致,支持 o 系列、gpt-5.4gpt-5.4-minigpt-5.4-nanogpt-5 系列、gpt-4.1Agent 模式推荐使用 `gpt-5.4` |
| `model` | 与 OpenAI 接口的 [model 参数](https://platform.openai.com/docs/models) 一致,支持 `gpt-5.5`、`gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano`、`gpt-5` 系列、`gpt-4.1`、o 系列等Agent 模式默认 `gpt-5.5`,追求性价比可改为 `gpt-5.4` |
| `open_ai_api_key` | 在 [OpenAI 平台](https://platform.openai.com/api-keys) 创建 |
| `open_ai_api_base` | 可选,修改可接入第三方代理接口 |
| `bot_type` | 使用 OpenAI 官方模型时无需填写。当通过代理接口使用 Claude 等非 OpenAI 模型时设为 `openai` |
| `open_ai_api_base` | 可选,修改可接入第三方代理 |
| `bot_type` | 使用 OpenAI 官方模型时无需填写;通过兼容协议接入厂商模型时设为 `openai` |
## 图像理解
`gpt-5.5`、`gpt-5.4`、`gpt-4o`、`gpt-4.1` 等 OpenAI 模型均原生支持视觉,配置 `open_ai_api_key` 后 Agent 的 Vision 工具会自动使用主模型识别图像。若主模型不支持视觉或希望显式指定,可在配置文件中配置:
```json
{
"tools": {
"vision": {
"model": "gpt-5.4-mini"
}
}
}
```
支持的 Vision 模型:`gpt-5.5`、`gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano`、`gpt-5`、`gpt-4.1`、`gpt-4.1-mini`、`gpt-4o`。
## 图像生成
在配置文件中指定图像生成模型Agent 调用图像生成技能时会自动路由到 OpenAI
```json
{
"skills": {
"image-generation": {
"model": "gpt-image-2"
}
}
}
```
支持的图像生成模型:`gpt-image-2`、`gpt-image-1`。
## 语音识别
```json
{
"voice_to_text": "openai",
"voice_to_text_model": "gpt-4o-mini-transcribe"
}
```
| 参数 | 说明 |
| --- | --- |
| `voice_to_text` | 设为 `openai` 启用 OpenAI 语音识别 |
| `voice_to_text_model` | 可选,默认 `gpt-4o-mini-transcribe`;也可填 `gpt-4o-transcribe`、`whisper-1` |
凭证自动复用 `open_ai_api_key`。
## 语音合成
```json
{
"text_to_voice": "openai",
"text_to_voice_model": "tts-1",
"tts_voice_id": "alloy"
}
```
| 参数 | 说明 |
| --- | --- |
| `text_to_voice_model` | `tts-1`、`tts-1-hd`、`gpt-4o-mini-tts` |
| `tts_voice_id` | 音色:`alloy`、`echo`、`fable`、`onyx`、`nova`、`shimmer`、`ash`、`ballad`、`coral`、`sage`、`verse` |
## 向量
```json
{
"embedding_provider": "openai",
"embedding_model": "text-embedding-3-small"
}
```
可选模型:`text-embedding-3-small`、`text-embedding-3-large`、`text-embedding-ada-002`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。

View File

@@ -1,14 +1,20 @@
---
title: 百度千帆
description: 百度千帆 ERNIE 模型配置
description: 百度千帆 ERNIE 模型配置(文本对话 + 图像理解)
---
方式一:官方接入(推荐):
百度千帆提供 ERNIE 系列模型,支持文本对话与图像理解。
<Tip>
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
</Tip>
## 文本对话
```json
{
"model": "ernie-5.1",
"qianfan_api_key": "",
"qianfan_api_key": "YOUR_API_KEY",
"qianfan_api_base": "https://qianfan.baidubce.com/v2"
}
```
@@ -19,7 +25,7 @@ description: 百度千帆 ERNIE 模型配置
| `qianfan_api_key` | 千帆 API Key格式通常以 `bce-v3/` 开头 |
| `qianfan_api_base` | 可选,默认为 `https://qianfan.baidubce.com/v2` |
## 模型选择
### 模型选择
| 模型 | 适用场景 |
| --- | --- |
@@ -29,18 +35,18 @@ description: 百度千帆 ERNIE 模型配置
| `ernie-4.5-turbo-128k` | 长上下文和通用对话 |
| `ernie-4.5-turbo-32k` | 通用对话,成本和上下文更均衡 |
## Vision 工具
## 图像理解
配置 `qianfan_api_key` 后Agent 的 Vision 工具可以自动使用千帆视觉模型:
- 当主模型本身是多模态时(如 `ernie-5.1`、`ernie-5.0`、`ernie-x1.1`、`ernie-4.5-turbo-vl`),直接由主模型识别图像,无需额外配置
- 当主模型是纯文本时(如 `ernie-4.5-turbo-128k`Vision 工具会自动 fallback 到 `ernie-4.5-turbo-vl`
如需手动指定 Vision 模型,可在 `config.json` 中显式配置:
如需手动指定 Vision 模型,可在配置文件中显式配置:
```json
{
"tool": {
"tools": {
"vision": {
"model": "ernie-4.5-turbo-vl"
}
@@ -48,17 +54,6 @@ description: 百度千帆 ERNIE 模型配置
}
```
方式二OpenAI 兼容方式接入:
```json
{
"model": "ernie-5.1",
"bot_type": "openai",
"open_ai_api_key": "",
"open_ai_api_base": "https://qianfan.baidubce.com/v2"
}
```
<Tip>
新配置推荐使用 `qianfan_api_key`。旧的 `wenxin`、`wenxin-4`、`baidu_wenxin_api_key`、`baidu_wenxin_secret_key` 配置仍保持兼容。
</Tip>

View File

@@ -1,8 +1,16 @@
---
title: 通义千问 Qwen
description: 通义千问模型配置
description: 通义千问模型配置(文本 / 图像理解 / 图像生成 / 语音识别 / 语音合成 / 向量)
---
通义千问DashScope / 百炼)是国内覆盖最完整的厂商之一,文本、图像理解、图像生成、语音识别、语音合成与向量能力均可用一份 `dashscope_api_key` 启用。
<Tip>
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
</Tip>
## 文本对话
```json
{
"model": "qwen3.6-plus",
@@ -12,16 +20,93 @@ description: 通义千问模型配置
| 参数 | 说明 |
| --- | --- |
| `model` | 可填 `qwen3.6-plus`、`qwen3.5-plus`、`qwen3-max`、`qwen-max`、`qwen-plus`、`qwen-turbo`、`qwq-plus` 等 |
| `model` | 可填 `qwen3.6-plus`、`qwen3.7-max`、`qwen3.5-plus`、`qwen3-max`、`qwen-max`、`qwen-plus`、`qwen-turbo`、`qwq-plus` 等 |
| `dashscope_api_key` | 在 [百炼控制台](https://bailian.console.aliyun.com/?tab=model#/api-key) 创建,参考 [官方文档](https://bailian.console.aliyun.com/?tab=api#/api) |
也支持 OpenAI 兼容方式接入:
## 图像理解
配置 `dashscope_api_key` 后 Agent 的 Vision 工具会自动调用千问的视觉模型识别图像。`qwen3-max` / `qwen3.5-plus` / `qwen3.6-plus` 等模型本身就是多模态;若主模型是纯文本(如 `qwen-turbo`),会自动回落到 `qwen-vl-max`。
如需手动指定 Vision 模型:
```json
{
"bot_type": "openai",
"model": "qwen3.6-plus",
"open_ai_api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"open_ai_api_key": "YOUR_API_KEY"
"tools": {
"vision": {
"model": "qwen3.6-plus"
}
}
}
```
支持模型:`qwen3.6-plus`、`qwen3.5-plus`、`qwen3-max`。
## 图像生成
```json
{
"skills": {
"image-generation": {
"model": "qwen-image-2.0"
}
}
}
```
可选模型:`qwen-image-2.0`、`qwen-image-2.0-pro`。
## 语音识别
```json
{
"voice_to_text": "dashscope",
"voice_to_text_model": "qwen3-asr-flash"
}
```
| 参数 | 说明 |
| --- | --- |
| `voice_to_text` | 设为 `dashscope` 启用通义千问 ASR |
| `voice_to_text_model` | 可选,默认 `qwen3-asr-flash` |
凭证自动复用 `dashscope_api_key`。单段音频建议小于 10MB、时长不超过 300 秒。
## 语音合成
```json
{
"text_to_voice": "dashscope",
"text_to_voice_model": "qwen3-tts-flash",
"tts_voice_id": "Cherry"
}
```
| 参数 | 说明 |
| --- | --- |
| `text_to_voice_model` | 可选,默认 `qwen3-tts-flash`,覆盖普通话、方言与主流外语 |
| `tts_voice_id` | 音色 ID详见下方常用列表 |
常用音色示例:
| 音色 ID | 说明 |
| --- | --- |
| `Cherry` | 芊悦 · 阳光女声 |
| `Serena` | 苏瑶 · 温柔女声 |
| `Ethan` | 晨煦 · 阳光男声 |
| `Chelsie` | 千雪 · 二次元少女 |
| `Dylan` | 北京话 · 晓东 |
| `Rocky` | 粤语 · 阿强 |
| `Sunny` | 四川话 · 晴儿 |
完整音色(普通话 / 各地方言 / 双语等)可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。
## 向量
```json
{
"embedding_provider": "dashscope",
"embedding_model": "text-embedding-v4"
}
```
默认模型 `text-embedding-v4`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。

View File

@@ -5,6 +5,7 @@ description: CowAgent 版本更新历史
| 版本 | 日期 | 说明 |
| --- | --- | --- |
| [2.0.9](/releases/v2.0.9) | 2026.05.21 | MCP 工具生态接入、模型管理页重构(厂商凭据共享 + 多能力统一调度)、语音系统升级、浏览器持久登录 |
| [2.0.8](/releases/v2.0.8) | 2026.05.06 | 飞书渠道全面升级语音、流式输出和Markdown、扫码一键接入、DeepSeek V4和百度模型新增、定时任务工具增强 |
| [2.0.7](/releases/v2.0.7) | 2026.04.22 | 图像生成技能六厂商自动路由、新模型支持Kimi K2.6、Claude Opus 4.7、GLM 5.1、知识库增强、Web 控制台优化 |
| [2.0.6](/releases/v2.0.6) | 2026.04.14 | 项目更名、知识库系统、梦境记忆蒸馏、上下文智能压缩、Web 控制台多会话及多项优化 |

View File

@@ -11,7 +11,7 @@ description: CowAgent 2.0.7 - 图像生成技能(六厂商自动路由)、
- **开箱即用**:配置 API Key 即可使用,无需手动指定模型。也支持在对话中指定特定模型
- **灵活控制**:支持 `quality`(画质)、`size`分辨率512/1K~4K、`aspect_ratio`(宽高比)等参数,各厂商自动适配有效值
- **图片编辑**:传入已有图片即可进行编辑、风格迁移、多图融合
- **Skill 级配置**:支持通过 `config.json` 中的 `skill.image-generation.model` 固定默认模型
- **Skill 级配置**:支持通过 `config.json` 中的 `skills.image-generation.model` 固定默认模型
相关文档:[图像生成技能](https://docs.cowagent.ai/skills/image-generation)

View File

@@ -46,7 +46,7 @@ description: CowAgent 2.0.8 - 飞书渠道全面升级(语音、流式打字
## 🔧 工具与安全
- **图像识别模型**:让 `tool.vision.model` 配置真正生效,未配置时自动 fallback #2792 Thanks CNXudiandian
- **图像识别模型**:让 `tools.vision.model` 配置真正生效,未配置时自动 fallback #2792 Thanks CNXudiandian
- **Bash 安全确认**:仅对工作区外的破坏性删除做二次确认,工作区内常规操作不再打扰
## 🐛 其他修复

92
docs/releases/v2.0.9.mdx Normal file
View File

@@ -0,0 +1,92 @@
---
title: v2.0.9
description: CowAgent 2.0.9 - MCP 工具生态接入、模型管理页重构、语音系统升级、浏览器持久登录
---
## 🧩 MCP 工具生态接入
新增 **MCPModel Context Protocol** 工具集成CowAgent 从固定工具集扩展为开放可插拔的工具生态。任何兼容 MCP 协议的服务高德地图、Chrome DevTools、Filesystem、Playwright 等)都可作为工具直接接入 Agent。
- **零额外依赖**:原生 JSON-RPC 实现,同时支持 `stdio`(本地进程)和 `sse`(远程 URL两种传输
- **兼容主流配置**:兼容 Claude Desktop / Cursor 风格的 `mcpServers` 配置,优先读取 `~/cow/mcp.json`,未配置则回退 `config.json`
- **异步启动**MCP 服务在后台线程启动,不阻塞 Agent 初始化;单个服务失败不影响整体
相关文档:[MCP 工具](https://docs.cowagent.ai/tools/mcp) · 社区贡献 #2801 Thanks @yangluxin613
## 🖥️ 模型管理页面重构
「模型」页面整体重新设计,从原来按 LLM 厂商堆叠的列表,重构为 **厂商凭据 + 能力调度** 两层结构:一处配置厂商凭据,对话、图像、语音、向量、搜索等多个能力共享。
- **厂商凭据集中管理**所有支持厂商OpenAI / Claude / Gemini / DeepSeek / Qwen / 豆包 / Kimi / 智谱 / MiniMax / 千帆 / LinkAI / Custom 等)的 API Key / API Base 在顶部统一维护,编辑后下方所有能力立即生效
- **能力卡片**:按主模型、图像理解、图像生成、语音识别、语音合成、向量、联网搜索分卡,每个能力可独立选择厂商和模型,未配置时自动跟随主模型或按默认顺序回退
### 多厂商联网搜索
联网搜索升级为多厂商架构,**输出格式统一**
- 四家可选博查bocha、百度千帆qianfan、智谱zhipu、LinkAI
- 两种调度策略:`auto`(按 bocha > qianfan > zhipu > linkai 顺序自动选第一个已配置的厂商)/ `fixed`(固定指定厂商)
- 配置 ≥2 家且为 `auto` 时Agent 可在单次调用中临时指定 `provider` 切换搜索源
### 向量厂商热切换
向量Embedding支持多厂商告别对 OpenAI 的单一依赖:
- 原生支持 `openai` / `dashscope` / `doubao` / `zhipu` / `linkai`
- **在线重建索引**:切换厂商后执行 `/memory rebuild-index`,无需重启、不会中断当前对话
- 梦境日记默认排除在向量索引之外,避免反复出现在检索结果中干扰对话
## 🎙️ 语音系统升级
- **TTS 适配更多通道**个人微信ilink、钉钉、企微智能机器人现已原生支持语音回复开关沿用 `always_reply_voice` / `voice_reply_voice`;触发 TTS 时先发文本气泡再发语音消息,方便对照阅读
- **新增 ASR 厂商**百炼DashScope、智谱
- **TTS 多厂商重构**MiniMax / LinkAI / DashScope / 智谱 TTS 在流式合成、长文本切分、错误回退上更稳
- **网页麦克风输入**Web 控制台聊天框新增麦克风按钮,可直接录音发送,自动走 ASR 转文本
## 🌐 浏览器工具
浏览器工具支持三种启动模式,告别"每次开会话都得重新登录"
- **持久化用户配置(默认)**:复用 `~/.cow/browser_profile`,登录一次后下次自动复用登录态
- **CDP 模式**:通过 `cdp_endpoint` 附加到手动启动的真实 Chrome享有完整指纹适合反爬严格的站点
- **Fresh 模式**:每次清空环境,适合做隔离任务
此外浏览器被用户中途关闭后下次调用会自动重新拉起CDP 模式下不会误杀用户的 Chrome 进程。相关文档:[浏览器工具](https://docs.cowagent.ai/tools/browser) #2809
## 🤖 新模型与模型增强
- **百度 ERNIE 5.1**:新增 `ernie-5.1` 模型
- **DeepSeek V4 reasoning_effort**DeepSeek V4 系列支持 `reasoning_effort` 配置思考深度
- **OpenRouter / Vercel AI Gateway 归因**:调用这两个平台时自动注入归因 Header平台可正确识别 CowAgent 用量
- 修复 MiMo 等思考模型在多轮对话中 `reasoning_content` 丢失的问题
## 🚀 启动与运行体验
来自社区的多项体验改进 Thanks @yangluxin613
- **自动选端口 + 自动开浏览器**:默认端口被占用时自动切换,启动成功后默认打开控制台
- **Ctrl+C 干净退出**:不再打印一长串堆栈
- **日志面板**:差异化级别配色、多行日志继承级别、新增级别筛选 Checkbox
## 🔒 部署与安全
- **默认仅本机访问**Web 控制台 `web_host` 默认 `127.0.0.1`,避免无密码情况下被外网直接访问;显式 `0.0.0.0` 且未设密码时给出提示
- **前端资源完全本地化**:第三方 CSS / JS 全部本地分发,离线 / 内网环境也能正常加载控制台 #2816 Thanks @TryToMakeUsBetter
- **支持文件夹上传**:上传区支持整目录一次性上传,路径校验适配 Windows #2815 Thanks @TryToMakeUsBetter
## 🛠 其他改进与修复
- **定时任务防重复执行**:调度器初始化做幂等处理
- **工具失败状态持久化**:刷新页面或重载历史时失败的工具调用正确显示失败状态 #2822 Thanks @a1094174619
- **企微机器人非法字符**:修复消息中包含非法控制字符导致投递失败的问题 #2810 Thanks @Jacques-Zhao
- **飞书文件消息**:飞书通道支持文件消息接收
- **工具配置合并**:修复用户自定义工具配置(如 `tools.browser`)被工作区默认值整体覆盖的问题,现按字段合并
- 修复单文件上传偶发 TypeError、切换语言后 JS 动态视图未重渲染等问题
## 📦 升级方式
源码部署可执行 `cow update` 或 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。
> ⚠️ 切换向量厂商后,建议执行一次 `/memory rebuild-index`,让历史记忆按新的向量维度重新入库。
**发布日期**2026.05.21 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.8...2.0.9)

View File

@@ -3,149 +3,87 @@ title: image-generation - 图像生成
description: 文生图 / 图生图 / 多图融合,支持多家厂商自动路由与回退
---
通用的图像生成与编辑技能,支持 OpenAI、Gemini、Seedream火山方舟、Qwen百炼、MiniMax、LinkAI 共六家厂商。不需要手动选模型,脚本会按固定优先级自动挑选已配置的厂商来出图
通用的图像生成与编辑技能,支持 OpenAI、Gemini、Seedream火山方舟、Qwen百炼、MiniMax、LinkAI 共六家厂商。配好任意一家的 Key 即可使用,配多家可享受自动回退
## 模型选择
`image-generation` 采用「固定优先级 + 自动回退」的策略,配好 Key 就能用:
1. **优先级顺序**`OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI`
2. **没配 Key 的跳过**:只有设了 API Key 的厂商才会参与
3. **失败自动切下一家**:遇到 401、模型未开通、网络异常等错误时会自动试下一个
4. **指定模型时前置**:如果明确传了某个模型名,对应厂商会被提到最前面先试
### 支持的模型
## 支持的模型
| 厂商 | 模型 / 别名 | 特点 |
| --- | --- | --- |
| OpenAI | `gpt-image-2`、`gpt-image-1` | 通用文生图,高质量、高智能,支持 `quality` 参数控制画质 |
| OpenAI | `gpt-image-2`、`gpt-image-1` | 通用文生图,高质量,支持 `quality` 控制画质 |
| Gemini Nano Banana | `nano-banana-2`、`nano-banana-pro`、`nano-banana` | 对应 `gemini-3.1-flash`、`gemini-3-pro`、`gemini-2.5-flash` 的图像版本 |
| Seedream火山方舟 | `seedream-5.0-lite`、`seedream-4.5` | 原生 2K4K最多 14 张图融合 |
| Qwen百炼 | `qwen-image-2.0`、`qwen-image-2.0-pro` | 擅长中文排版和图文融合 |
| MiniMax | `image-01` | 简单快速的图片生成 |
| LinkAI | 任意模型 | 通用代理,兜底 |
| MiniMax | `image-01` | 简单快速 |
| LinkAI | 任意模型 | 统一网关,作为兜底 |
<Note>
默认情况下 Agent 不会主动选模型,而是走自动路由。如果你想用某个特定模型,直接在对话里说就行,比如「用 seedream 画一只猫」或「用 gpt-image-2 生成海报」。也可以通过下面的「自定义配置」固定默认模型。
</Note>
## 模型选择
## 自定义配置
默认走「自动路由 + 失败回退」:
### API Key 配置
1. 按 `OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI` 顺序选第一个已配置的厂商
2. 遇到 401、模型未开通、网络异常等错误时自动切到下一家
3. 用户在对话里指定模型时(如「用 seedream 画一只猫」),对应厂商会被提到最前优先尝试
至少需要配**一个**厂商的 Key配多个就能享受自动回退能力。有三种配置方式
#### 方式一:已有模型 Key 自动复用
如果你在 web控制台 或 `config.json` 中配置了对话模型的 Key比如 `openai_api_key`、`gemini_api_key` 等),启动时这些 Key 会被**自动同步**到对应的环境变量。也就是说,只要你的对话模型能用,图像生成就能直接用同一个 Key不需要额外配置。
#### 方式二:在 config.json 中配置
在 `config.json` 中直接写对应的 Key 字段即可,支持的字段如下:
如需固定使用某个模型
```json
{
"openai_api_key": "sk-xxx",
"openai_api_base": "https://api.openai.com/v1",
"gemini_api_key": "AIza-xxx",
"ark_api_key": "xxx",
"dashscope_api_key": "sk-xxx",
"minimax_api_key": "xxx",
"linkai_api_key": "xxx"
}
```
修改后需要重启生效。每个 Key 还有对应的 `*_api_base` 字段可以自定义接口地址。
#### 方式三:对话中直接配置
在对话里发送 API KeyAgent 会通过 `env_config` 工具自动保存到 `~/cow/.env`**不需要重启**就能生效。例如:
```
帮我配置 OPENAI_API_KEY 为 sk-xxx
```
或者:
```
设置 ARK_API_KEY 为 xxx
```
### API Key 一览
| 环境变量 | config.json 字段 | 对应厂商 | 默认 Base URL |
| --- | --- | --- | --- |
| `OPENAI_API_KEY` | `openai_api_key` | OpenAI | `https://api.openai.com/v1` |
| `GEMINI_API_KEY` | `gemini_api_key` | Gemini | `https://generativelanguage.googleapis.com` |
| `ARK_API_KEY` | `ark_api_key` | 火山方舟Seedream | `https://ark.cn-beijing.volces.com/api/v3` |
| `DASHSCOPE_API_KEY` | `dashscope_api_key` | 阿里百炼Qwen | `https://dashscope.aliyuncs.com` |
| `MINIMAX_API_KEY` | `minimax_api_key` | MiniMax | `https://api.minimaxi.com` |
| `LINKAI_API_KEY` | `linkai_api_key` | LinkAI | `https://api.link-ai.tech` |
### 指定默认模型
如果想让所有图像生成固定走某个厂商的模型,可以在 `config.json` 里加:
```json
"skill": {
"skills": {
"image-generation": {
"model": "seedream-5.0-lite"
}
}
}
```
启动时这段配置会被自动转成环境变量 `SKILL_IMAGE_GENERATION_MODEL`,脚本读到后会固定使用这个模型所在的厂商进行生成。
## 配置 API Key
<Tip>
推荐通过 [Web 控制台](/channels/web) 的「模型管理」页面配置,配好的对话模型 Key 会被图像生成技能自动复用,无需重复配置。也可手动编辑配置文件或在对话中通过 `env_config` 工具临时设置。
</Tip>
凭证统一复用主模型厂商的 Key
| 字段 | 对应厂商 |
| --- | --- |
| `openai_api_key` | OpenAI |
| `gemini_api_key` | Gemini |
| `ark_api_key` | 火山方舟Seedream |
| `dashscope_api_key` | 阿里百炼Qwen |
| `minimax_api_key` | MiniMax |
| `linkai_api_key` | LinkAI |
## 开启和关闭
`image-generation` 是内置技能,**会根据 API Key 自动调整状态**
技能会根据 API Key 自动调整状态:
- **Key 已配置**:技能正常可用,Agent 收到画图请求时直接调用
- **Key 未配置**:技能仍会出现在上下文中标记为「需要配置」Agent 会引导用户去配 Key,而不是直接调用失败
- **已配置 Key**Agent 收到画图请求时直接调用
- **未配置 Key**技能仍会出现在上下文中标记为「需要配置」Agent 会引导用户去配 Key
果想手动控制,也可以用命令
手动控制:
```text
/skill disable image-generation # 手动关闭(即使有 Key 也不会被调用)
/skill disable image-generation # 关闭
/skill enable image-generation # 重新开启
```
终端里对应的命令是 `cow skill disable image-generation` / `cow skill enable image-generation`。
终端等价命令:`cow skill disable image-generation` / `cow skill enable image-generation`。
## 参数
| 参数 | 类型 | 必填 | 默认 | 说明 |
| --- | --- | --- | --- | --- |
| `prompt` | string | 是 | — | 图像描述 |
| `image_url` | string / list | 否 | null | 编辑用的输入图,支持本地路径或 URL。传多个就是多图融合 |
| `quality` | string | 否 | auto | `low` / `medium` / `high`只有部分厂商支持 |
| `size` | string | 否 | auto | `512` / `1K` / `2K` / `3K` / `4K`也可以写像素值如 `1024x1024` |
| `image_url` | string / list | 否 | null | 编辑用的输入图,本地路径或 URL;传列表为多图融合 |
| `quality` | string | 否 | auto | `low` / `medium` / `high`部分厂商支持 |
| `size` | string | 否 | auto | `512` / `1K` / `2K` / `3K` / `4K`像素值如 `1024x1024` |
| `aspect_ratio` | string | 否 | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9`Gemini 还支持 `1:4` / `4:1` / `1:8` / `8:1` |
<Warning>
**质量越高、分辨率越大,花的钱越多、等的时间越长。**
- 日常对话和快速预览直接用默认(`auto`),或者 `quality=low` + `size=1K`,大概 20 秒出图
- 做海报、用户明确要高清的时候再上 `quality=high` + `size=2K/4K`,可能要等 15 分钟,取决于不同模型的速度
**质量越高、分辨率越大,耗时和成本越高。** 日常对话用默认(`auto`)或 `quality=low` + `size=1K` 即可,约 20 秒出图;做海报或明确要高清时再上 `high` + `2K/4K`,可能需要 15 分钟。
</Warning>
## 输出
成功时返回:
```json
{
"model": "doubao-seedream-5-0-260128",
"images": [
{"url": "/path/to/output.png"}
]
}
```
失败时返回 `{ "error": "..." }`。出错后**不要直接重试**——大概率是配置问题Key 填错、API 地址不对、模型没开通),让用户修好配置再试。
## 常见用法
- **文生图**:根据描述生成插画、海报、图标、头像、分镜图等
@@ -153,8 +91,8 @@ description: 文生图 / 图生图 / 多图融合,支持多家厂商自动路
- **多图融合**:把多张参考图合成一张(换装、角色合影等)
<Note>
- bash 超时建议设 600 秒。单个厂商 HTTP 超时 300 秒,脚本可能依次尝试多个厂商
- 输入图片自动压缩到 4MB 以内、最长边不超过 4096px
- Gemini / Seedream / Qwen / MiniMax 不支持 `quality` 参数,传了也没用
- Seedream 默认出 2K 图`seedream-5.0-lite` 支持到 3K`seedream-4.5` 支持到 4K
- bash 超时建议设 600 秒:单厂商 HTTP 超时 300 秒,脚本可能依次尝试多
- 输入图片自动压缩到 4MB 以内、最长边不超过 4096px
- Gemini / Seedream / Qwen / MiniMax 不支持 `quality` 参数
- Seedream 默认出 2K 图`seedream-5.0-lite` 支持到 3K`seedream-4.5` 支持到 4K
</Note>

View File

@@ -40,7 +40,7 @@ Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置
```json
{
"tool": {
"tools": {
"vision": {
"model": "gpt-4.1"
}

View File

@@ -1,32 +1,51 @@
---
title: web_search - 联网搜索
description: 搜索互联网获取实时信息
description: 搜索互联网获取实时信息,支持多个搜索厂商
---
搜索互联网获取实时信息、新闻、研究等内容。支持两个搜索后端,自动选择可用的后端
搜索互联网获取实时信息、新闻、研究等内容。支持博查、百度千帆、智谱、LinkAI 四个后端,配置任意一家即可使用
## 依赖
<Tip>
推荐通过 [Web 控制台](/channels/web) 的「模型管理 → 搜索」面板可视化配置厂商与策略,无需手动编辑配置文件。
</Tip>
需要配置至少一个搜索 API Key通过 `env_config` 工具或工作空间 `.env` 文件配置):
## 厂商
| 后端 | 环境变量 | 优先级 | 获取方式 |
| --- | --- | --- | --- |
| 博查搜索 | `BOCHA_API_KEY` | 优先使用 | [博查开放平台](https://open.bochaai.com/) |
| LinkAI 搜索 | `LINKAI_API_KEY` | 可选 | [LinkAI 控制台](https://link-ai.tech/console/interface) |
| 厂商 | 凭证 | 申请入口 |
| --- | --- | --- |
| 博查 Bocha | `tools.web_search.bocha_api_key` | [博查开放平台](https://open.bochaai.com/) |
| 百度千帆 | 复用 `qianfan_api_key` | [千帆控制台](https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy) |
| 智谱 Zhipu | 复用 `zhipu_ai_api_key` | [智谱开放平台](https://docs.bigmodel.cn/cn/guide/tools/web-search) |
| LinkAI | 复用 `linkai_api_key` | [LinkAI 控制台](https://link-ai.tech/console/interface) |
## 参数
除博查需要单独的 `bocha_api_key` 外,其他三家直接复用对应模型的 API Key配好模型即同时获得搜索能力。
## 路由策略
```json
{
"tools": {
"web_search": {
"strategy": "auto",
"provider": ""
}
}
}
```
- `auto`(默认):由 Agent 在已配置的厂商中智能选择,并可在一次任务中多次调用、切换不同厂商以获取更全面的结果;未指定时按 `bocha → qianfan → zhipu → linkai` 顺序兜底。
- `fixed`:固定使用 `provider` 指定的厂商;该厂商凭证缺失时自动回落到 auto 顺序。
## 工具参数
| 参数 | 类型 | 必填 | 说明 |
| --- | --- | --- | --- |
| `query` | string | 是 | 搜索关键词 |
| `count` | integer | 否 | 返回结果数量1-50默认 10 |
| `freshness` | string | 否 | 时间范围:`noLimit`、`oneDay`、`oneWeek`、`oneMonth`、`oneYear`,或日期范围如 `2025-01-01..2025-02-01` |
| `count` | integer | 否 | 返回结果数量150默认 10 |
| `freshness` | string | 否 | 时间范围:`noLimit`(默认)、`oneDay`、`oneWeek`、`oneMonth`、`oneYear`,或日期范围如 `2025-01-01..2025-02-01` |
| `summary` | boolean | 否 | 是否返回页面摘要(默认 false |
## 使用场景
当用户询问最新信息、需要事实核查或获取实时数据时Agent 会自动调用此工具。
| `provider` | string | 否 | `auto` 策略下配置了多个厂商时可见,用于单次切换厂商 |
<Note>
如果未配置任何搜索 API Key该工具不会被加载
四家凭证均未配置时,该工具不会注册到 Agent
</Note>

View File

@@ -60,7 +60,7 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot):
"timeout": conf().get("request_timeout", None), # 重试超时时间,在这个时间内,将会自动重试
}
# 部分模型暂不支持一些参数,特殊处理
if conf_model in [const.O1, const.O1_MINI, const.GPT_5, const.GPT_5_MINI, const.GPT_5_NANO]:
if conf_model in [const.O1, const.O1_MINI, const.GPT_5, const.GPT_5_MINI, const.GPT_5_NANO, const.GPT_55]:
remove_keys = ["temperature", "top_p", "frequency_penalty", "presence_penalty"]
for key in remove_keys:
self.args.pop(key, None) # 如果键不存在,使用 None 来避免抛出错、

View File

@@ -38,9 +38,9 @@ class GoogleGeminiBot(Bot):
@property
def model(self):
model_name = conf().get("model") or "gemini-3.1-pro-preview"
model_name = conf().get("model") or "gemini-3.5-flash"
if model_name == "gemini":
model_name = "gemini-3.1-pro-preview"
model_name = "gemini-3.5-flash"
return model_name
@property

View File

@@ -89,8 +89,9 @@ class OpenAICompatibleBot:
messages[0] = {"role": "system", "content": system_prompt}
# Build request parameters
model_name = kwargs.get("model", api_config.get('model', 'gpt-5.4'))
request_params = {
"model": kwargs.get("model", api_config.get('model', 'gpt-3.5-turbo')),
"model": model_name,
"messages": messages,
"temperature": kwargs.get("temperature", api_config.get('default_temperature', 0.9)),
"top_p": kwargs.get("top_p", api_config.get('default_top_p', 1.0)),
@@ -98,6 +99,10 @@ class OpenAICompatibleBot:
"presence_penalty": kwargs.get("presence_penalty", api_config.get('default_presence_penalty', 0.0)),
"stream": stream
}
# GPT-5 / GPT-5.5 / o1 series only accept default temperature/top_p and reject penalty params
if model_name in ("gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-5.5", "o1", "o1-mini"):
for key in ("temperature", "top_p", "frequency_penalty", "presence_penalty"):
request_params.pop(key, None)
# Add max_tokens if specified
if kwargs.get("max_tokens"):

View File

@@ -1056,6 +1056,38 @@ class CowCliPlugin(Plugin):
logger.warning(f"[CowCli] /memory dream sync failed: {e}")
return f"❌ 记忆蒸馏失败: {e}"
@staticmethod
def _resolve_active_embedding():
"""
Resolve (provider_label, model, dim) from the LATEST config, not the
possibly-stale provider instance cached on a running agent. Used by
/memory status and rebuild-index hints so they reflect what a rebuild
will actually run as after the user changes embedding_provider.
Returns (label, model, dim) where any field may be None when unknown.
"""
from agent.memory.embedding import EMBEDDING_VENDORS
from config import conf
provider_key = (conf().get("embedding_provider") or "").strip().lower()
cfg_model = (conf().get("embedding_model") or "").strip()
try:
cfg_dim = int(conf().get("embedding_dimensions") or 0)
except (TypeError, ValueError):
cfg_dim = 0
if not provider_key:
# Legacy auto path: openai -> linkai, both default to text-embedding-3-small (1536).
if (conf().get("open_ai_api_key") or "").strip():
return "openai (legacy)", "text-embedding-3-small", 1536
if (conf().get("linkai_api_key") or "").strip():
return "linkai (legacy)", "text-embedding-3-small", 1536
return "(legacy)", None, None
meta = EMBEDDING_VENDORS.get(provider_key) or {}
model = cfg_model or meta.get("default_model")
dim = cfg_dim if cfg_dim > 0 else meta.get("default_dimensions")
return provider_key, model, dim
def _memory_status(self) -> str:
"""Show current memory index status."""
from agent.memory.embedding import detect_index_dim
@@ -1078,15 +1110,14 @@ class CowCliPlugin(Plugin):
lines.append(f" Chunks : {chunks} (embedded: {embedded})")
lines.append("")
# Active provider (from running config + provider instance).
# Resolve from the latest config so users see what /memory rebuild-index
# will actually run as — not what the cached agent was initialized with.
cfg_provider, cfg_model, cfg_dim = self._resolve_active_embedding()
provider_obj = memory_manager.embedding_provider
cfg_provider = (conf().get("embedding_provider") or "").strip().lower() or "(legacy)"
if provider_obj is not None:
cfg_model = getattr(provider_obj, "model", "?")
cfg_dim = getattr(provider_obj, "_dimensions", None) or "?"
if cfg_model:
lines.append(f" Provider : {cfg_provider}")
lines.append(f" Model : {cfg_model}")
lines.append(f" Dim : {cfg_dim}")
lines.append(f" Dim : {cfg_dim if cfg_dim else '?'}")
else:
lines.append(" Provider : (未初始化, keyword-only)")
@@ -1105,7 +1136,6 @@ class CowCliPlugin(Plugin):
)
index_dim = detect_index_dim(memory_manager.storage)
cfg_dim = getattr(provider_obj, "_dimensions", None)
if index_dim is not None and cfg_dim and index_dim != cfg_dim:
warnings.append(
f" ⚠️ 索引中存量向量为 {index_dim} 维,与当前配置 {cfg_dim} 维不一致;"
@@ -1129,15 +1159,27 @@ class CowCliPlugin(Plugin):
)
memory_manager = agent.memory_manager
if memory_manager.embedding_provider is None:
# Rebuild against the LATEST config: build a fresh provider from
# config.json and swap it onto memory_manager. The agent's
# conversation_history and other state are untouched.
try:
from bridge.agent_initializer import AgentInitializer
fresh_provider = AgentInitializer(bridge=None, agent_bridge=None) \
._init_embedding_provider(memory_manager.config, session_id=session_id)
except Exception as e:
logger.exception("[CowCli] /memory rebuild-index: build provider failed")
return f"⚠️ 无法根据当前配置构造 embedding provider: {e}"
if fresh_provider is None:
return (
"⚠️ 当前没有可用的 embedding provider。\n"
"请检查 config.json 中的 embedding 相关配置 (provider / api key)。"
)
memory_manager.embedding_provider = fresh_provider
provider_obj = memory_manager.embedding_provider
model_label = getattr(provider_obj, "model", "?")
dim_label = getattr(provider_obj, "dimensions", "?")
model_label = getattr(fresh_provider, "model", "?")
dim_label = getattr(fresh_provider, "dimensions", "?")
# SaaS (e_context is None): run synchronously, return final result
if e_context is None:
@@ -1168,7 +1210,7 @@ class CowCliPlugin(Plugin):
threading.Thread(target=_run, daemon=True).start()
return (
f"🔧 索引重建已启动 (model={model_label}, dim={dim_label})\n\n"
f"清空现有 chunks 并重新 embed 所有记忆文件,完成后会通知你。"
f"重新向量化所有记忆和知识文件,完成后会通知你。"
)
@staticmethod

View File

@@ -1110,7 +1110,7 @@ def main():
# Model resolution priority:
# 1. Explicit `model` in the call args (agent / user override)
# 2. SKILL_IMAGE_GENERATION_MODEL env var (synced from
# config["skill"]["image-generation"]["model"] at startup)
# config["skills"]["image-generation"]["model"] at startup)
# 3. None → fall back to automatic provider routing (try every
# provider with a configured API key in global priority order)
model = args.get("model") or os.environ.get("SKILL_IMAGE_GENERATION_MODEL") or ""

View File

@@ -394,7 +394,7 @@ class TestQianfanVisionTool(unittest.TestCase):
"open_ai_api_key": "",
"linkai_api_key": "",
"use_linkai": False,
"tool": {},
"tools": {},
}
if values:
data.update(values)
@@ -424,7 +424,7 @@ class TestQianfanVisionTool(unittest.TestCase):
def test_vision_routes_ernie_model_override_to_qianfan(self):
fake_conf = self._fake_conf({
"qianfan_api_key": "test-qianfan-key",
"tool": {"vision": {"model": "ernie-4.5-turbo-vl-32k"}},
"tools": {"vision": {"model": "ernie-4.5-turbo-vl-32k"}},
})
fake_bot = MagicMock()
fake_bot.call_vision = MagicMock()

View File

View File

@@ -0,0 +1,175 @@
# encoding:utf-8
"""DashScope voice: qwen3-asr-flash (ASR) + qwen3-tts-flash (TTS)
via dashscope.MultiModalConversation."""
import datetime
import os
import random
from typing import Optional
import dashscope
import requests
from dashscope import MultiModalConversation
from bridge.reply import Reply, ReplyType
from common.log import logger
from config import conf
from voice import audio_convert
from voice.voice import Voice
DEFAULT_ASR_MODEL = "qwen3-asr-flash"
DEFAULT_TTS_MODEL = "qwen3-tts-flash"
DEFAULT_TTS_VOICE = "Cherry"
MAX_DURATION_SECONDS = 300
MAX_FILE_BYTES = 10 * 1024 * 1024
class DashScopeVoice(Voice):
def __init__(self):
pass
def voiceToText(self, voice_file: str):
try:
voice_file = self._ensure_compatible_format(voice_file)
try:
size = os.path.getsize(voice_file)
if size > MAX_FILE_BYTES:
logger.warning(
f"[DashScopeVoice] audio file {size}B exceeds {MAX_FILE_BYTES}B; "
f"qwen3-asr-flash may reject it"
)
except OSError:
pass
api_key = conf().get("dashscope_api_key", "")
if not api_key:
logger.error("[DashScopeVoice] dashscope_api_key is not configured")
return Reply(ReplyType.ERROR, "未配置 DashScope API key")
dashscope.api_key = api_key
model = conf().get("voice_to_text_model") or DEFAULT_ASR_MODEL
abs_path = os.path.abspath(voice_file)
file_uri = f"file://{abs_path}"
messages = [
{"role": "user", "content": [{"audio": file_uri}]},
]
response = MultiModalConversation.call(
model=model,
messages=messages,
result_format="message",
asr_options={"enable_itn": False, "enable_lid": True},
)
text = self._extract_text(response)
if text is None:
logger.error(f"[DashScopeVoice] voiceToText failed: {response}")
return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
logger.info(f"[DashScopeVoice] voiceToText model={model} text={text}")
return Reply(ReplyType.TEXT, text)
except Exception as e:
logger.exception(f"[DashScopeVoice] voiceToText exception: {e}")
return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
def textToVoice(self, text: str):
try:
api_key = conf().get("dashscope_api_key", "")
if not api_key:
logger.error("[DashScopeVoice] dashscope_api_key is not configured")
return Reply(ReplyType.ERROR, "未配置 DashScope API key")
dashscope.api_key = api_key
model = conf().get("text_to_voice_model") or DEFAULT_TTS_MODEL
voice = conf().get("tts_voice_id") or DEFAULT_TTS_VOICE
response = MultiModalConversation.call(
model=model,
api_key=api_key,
text=text,
voice=voice,
stream=False,
)
url = self._extract_audio_url(response)
if not url:
logger.error(f"[DashScopeVoice] textToVoice failed: {response}")
return Reply(ReplyType.ERROR, "语音合成失败")
local_path = self._download_audio(url)
if not local_path:
return Reply(ReplyType.ERROR, "语音合成失败")
logger.info(f"[DashScopeVoice] textToVoice model={model} voice={voice} file={local_path}")
return Reply(ReplyType.VOICE, local_path)
except Exception as e:
logger.exception(f"[DashScopeVoice] textToVoice exception: {e}")
return Reply(ReplyType.ERROR, "语音合成失败")
@staticmethod
def _extract_audio_url(response) -> Optional[str]:
try:
if getattr(response, "status_code", 200) != 200:
return None
audio = response.output.get("audio") if response.output else None
if isinstance(audio, dict):
return audio.get("url") or None
return getattr(audio, "url", None)
except Exception:
return None
@staticmethod
def _download_audio(url: str) -> Optional[str]:
try:
tmp_dir = os.path.join(os.getcwd(), "tmp")
os.makedirs(tmp_dir, exist_ok=True)
ts = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
ext = os.path.splitext(url.split("?", 1)[0])[1].lower() or ".wav"
if ext not in (".mp3", ".wav", ".m4a", ".aac", ".opus"):
ext = ".wav"
dst = os.path.join(tmp_dir, f"dashscope_tts_{ts}_{random.randint(0, 9999)}{ext}")
resp = requests.get(url, timeout=60)
resp.raise_for_status()
with open(dst, "wb") as f:
f.write(resp.content)
return dst
except Exception as e:
logger.error(f"[DashScopeVoice] download audio failed: {e}")
return None
@staticmethod
def _ensure_compatible_format(voice_file: str) -> str:
# qwen3-asr-flash doesn't accept AMR/SILK; mp3/wav/m4a/aac/opus pass through.
lower = voice_file.lower()
if lower.endswith(".amr") or lower.endswith(".silk") or lower.endswith(".slk"):
try:
mp3_file = os.path.splitext(voice_file)[0] + ".mp3"
audio_convert.any_to_mp3(voice_file, mp3_file)
return mp3_file
except Exception as e:
logger.warning(f"[DashScopeVoice] mp3 convert failed: {e}")
return voice_file
@staticmethod
def _extract_text(response) -> Optional[str]:
try:
if getattr(response, "status_code", 200) != 200:
return None
choices = response.output.get("choices") or []
if not choices:
return None
content = choices[0].get("message", {}).get("content")
if isinstance(content, str):
return content.strip() or None
if isinstance(content, list):
parts = []
for item in content:
if isinstance(item, dict) and "text" in item:
parts.append(item["text"])
elif isinstance(item, str):
parts.append(item)
text = "".join(parts).strip()
return text or None
return None
except Exception:
return None

View File

@@ -58,4 +58,12 @@ def create_voice(voice_type):
from voice.minimax.minimax_voice import MinimaxVoice
return MinimaxVoice()
elif voice_type == "dashscope":
from voice.dashscope.dashscope_voice import DashScopeVoice
return DashScopeVoice()
elif voice_type == "zhipu" or voice_type == "zhipuai":
from voice.zhipuai.zhipuai_voice import ZhipuAIVoice
return ZhipuAIVoice()
raise RuntimeError

View File

@@ -1,16 +1,18 @@
"""
google voice service
"""
"""LinkAI voice: Whisper ASR + multi-vendor TTS (OpenAI / Doubao / Baidu)
proxied via https://docs.link-ai.tech/platform/api/voice-speech."""
import datetime
import os
import random
import requests
from voice import audio_convert
from bridge.reply import Reply, ReplyType
from common import const
from common.log import logger
from config import conf
from voice import audio_convert
from voice.voice import Voice
from common import const
import os
import datetime
class LinkAIVoice(Voice):
def __init__(self):
@@ -21,8 +23,7 @@ class LinkAIVoice(Voice):
try:
url = conf().get("linkai_api_base", "https://api.link-ai.tech") + "/v1/audio/transcriptions"
headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
model = None
if not conf().get("text_to_voice") or conf().get("voice_to_text") == "openai":
# Pin whisper-1: gateway ignores any other ASR model id.
model = const.WHISPER_1
if voice_file.endswith(".amr"):
try:
@@ -30,54 +31,59 @@ class LinkAIVoice(Voice):
audio_convert.any_to_mp3(voice_file, mp3_file)
voice_file = mp3_file
except Exception as e:
logger.warn(f"[LinkVoice] amr file transfer failed, directly send amr voice file: {format(e)}")
file = open(voice_file, "rb")
file_body = {
"file": file
}
data = {
"model": model
}
res = requests.post(url, files=file_body, headers=headers, data=data, timeout=(5, 60))
if res.status_code == 200:
text = res.json().get("text")
else:
res_json = res.json()
logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={res_json.get('message')}")
logger.warning(f"[LinkVoice] amr file transfer failed, directly send amr voice file: {e}")
with open(voice_file, "rb") as file:
res = requests.post(
url,
files={"file": file},
headers=headers,
data={"model": model},
timeout=(5, 60),
)
if res.status_code != 200:
msg = ""
try:
msg = res.json().get("message", "")
except Exception:
pass
logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={msg}")
return None
reply = Reply(ReplyType.TEXT, text)
text = res.json().get("text")
logger.info(f"[LinkVoice] voiceToText success, text={text}, file name={voice_file}")
return Reply(ReplyType.TEXT, text)
except Exception as e:
logger.error(e)
return None
return reply
def textToVoice(self, text):
try:
url = conf().get("linkai_api_base", "https://api.link-ai.tech") + "/v1/audio/speech"
headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
model = const.TTS_1
if not conf().get("text_to_voice") or conf().get("text_to_voice") in ["openai", const.TTS_1, const.TTS_1_HD]:
model = conf().get("text_to_voice_model") or const.TTS_1
# Gateway routes by `model` (tts-1 / doubao / baidu) + `voice` from
# that engine's catalog. `app_code` is optional workspace override.
data = {
"model": model,
"input": text,
"voice": conf().get("tts_voice_id"),
"app_code": conf().get("linkai_app_code")
"app_code": conf().get("linkai_app_code"),
}
model = conf().get("text_to_voice_model")
if model:
data["model"] = model
res = requests.post(url, headers=headers, json=data, timeout=(5, 120))
if res.status_code == 200:
if res.status_code != 200:
msg = ""
try:
msg = res.json().get("message", "")
except Exception:
pass
logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={msg}")
return None
tmp_file_name = "tmp/" + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + str(random.randint(0, 1000)) + ".mp3"
os.makedirs(os.path.dirname(tmp_file_name), exist_ok=True)
with open(tmp_file_name, 'wb') as f:
f.write(res.content)
reply = Reply(ReplyType.VOICE, tmp_file_name)
logger.info(f"[LinkVoice] textToVoice success, input={text}, model={model}, voice_id={data.get('voice')}")
return reply
else:
res_json = res.json()
logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={res_json.get('message')}")
return None
logger.info(f"[LinkVoice] textToVoice success, input={text}, voice_id={data.get('voice')}")
return Reply(ReplyType.VOICE, tmp_file_name)
except Exception as e:
logger.error(e)
# reply = Reply(ReplyType.ERROR, "遇到了一点小问题,请稍后再问我吧")
return None

View File

@@ -1,8 +1,7 @@
# encoding:utf-8
"""
MiniMax TTS voice service
"""
"""MiniMax TTS via /v1/t2a_v2 (SSE stream, hex-encoded mp3 chunks)."""
import datetime
import json
import random
import requests
@@ -12,24 +11,12 @@ from config import conf
from voice.voice import Voice
MINIMAX_TTS_VOICES = [
"English_Graceful_Lady",
"English_Insightful_Speaker",
"English_radiant_girl",
"English_Persuasive_Man",
"English_Lucky_Robot",
"English_expressive_narrator",
"Chinese_Warm_Woman",
"Chinese_Gentle_Man",
]
class MinimaxVoice(Voice):
def __init__(self):
self.api_key = conf().get("minimax_api_key")
self.api_base = conf().get("minimax_api_base") or "https://api.minimax.io"
# Strip trailing /v1 if present so we can always append /v1/t2a_v2
self.api_base = self.api_base.rstrip("/")
# Mainland endpoint matches `sk-api-0-...` keys; override via
# `minimax_api_base` for international (api.minimax.io) workspaces.
self.api_base = (conf().get("minimax_api_base") or "https://api.minimaxi.com").rstrip("/")
if self.api_base.endswith("/v1"):
self.api_base = self.api_base[:-3]
@@ -68,12 +55,14 @@ class MinimaxVoice(Voice):
response = requests.post(url, headers=headers, json=payload, stream=True, timeout=60)
response.raise_for_status()
# Parse SSE stream and collect hex-encoded audio chunks
# MiniMax returns HTTP 200 even on errors; capture base_resp for diagnostics.
audio_chunks = []
buffer = ""
last_base_resp = None
event_count = 0
for raw in response.iter_lines():
if not raw:
continue
event_count += 1
line = raw.decode("utf-8") if isinstance(raw, bytes) else raw
if not line.startswith("data:"):
continue
@@ -81,16 +70,31 @@ class MinimaxVoice(Voice):
if not json_str or json_str == "[DONE]":
continue
try:
import json
event_data = json.loads(json_str)
audio_hex = event_data.get("data", {}).get("audio")
if audio_hex:
audio_chunks.append(bytes.fromhex(audio_hex))
except Exception:
continue
base_resp = event_data.get("base_resp") or {}
if base_resp:
last_base_resp = base_resp
audio_hex = (event_data.get("data") or {}).get("audio")
if audio_hex:
try:
audio_chunks.append(bytes.fromhex(audio_hex))
except Exception as e:
logger.warning(f"[MINIMAX] skip bad audio hex chunk: {e}")
if not audio_chunks:
logger.error("[MINIMAX] TTS returned no audio data")
ct = response.headers.get("Content-Type", "")
if last_base_resp and last_base_resp.get("status_code") not in (None, 0):
logger.error(
f"[MINIMAX] TTS failed: status_code={last_base_resp.get('status_code')}, "
f"status_msg={last_base_resp.get('status_msg')}, model={model}, voice_id={voice_id}"
)
else:
logger.error(
f"[MINIMAX] TTS returned no audio data, model={model}, voice_id={voice_id}, "
f"url={url}, http={response.status_code}, content_type={ct!r}, events={event_count}"
)
return Reply(ReplyType.ERROR, "语音合成失败,未获取到音频数据")
audio_data = b"".join(audio_chunks)

View File

@@ -31,7 +31,8 @@ class OpenaiVoice(Voice):
"file": file,
}
data = {
"model": "whisper-1",
# Override via `voice_to_text_model` (e.g. fall back to whisper-1).
"model": conf().get("voice_to_text_model") or "gpt-4o-mini-transcribe",
}
response = requests.post(url, headers=headers, files=files, data=data)
response_data = response.json()

View File

View File

@@ -0,0 +1,173 @@
# encoding:utf-8
"""ZhipuAI voice: glm-asr-2512 (ASR) + glm-tts (TTS) via BigModel REST API."""
import datetime
import os
import random
import requests
from bridge.reply import Reply, ReplyType
from common.log import logger
from config import conf
from voice import audio_convert
from voice.voice import Voice
DEFAULT_ASR_MODEL = "glm-asr-2512"
DEFAULT_TTS_MODEL = "glm-tts"
DEFAULT_TTS_VOICE = "tongtong"
DEFAULT_API_BASE = "https://open.bigmodel.cn/api/paas/v4"
MAX_FILE_BYTES = 25 * 1024 * 1024
REQUEST_TIMEOUT = (5, 60)
class ZhipuAIVoice(Voice):
def __init__(self):
pass
def voiceToText(self, voice_file: str):
try:
voice_file = self._ensure_compatible_format(voice_file)
try:
size = os.path.getsize(voice_file)
if size > MAX_FILE_BYTES:
logger.warning(
f"[ZhipuAIVoice] audio file {size}B exceeds {MAX_FILE_BYTES}B; "
f"glm-asr-2512 may reject it"
)
except OSError:
pass
api_key = conf().get("zhipu_ai_api_key", "")
if not api_key:
logger.error("[ZhipuAIVoice] zhipu_ai_api_key is not configured")
return Reply(ReplyType.ERROR, "未配置 ZhipuAI API key")
api_base = (conf().get("zhipu_ai_api_base") or DEFAULT_API_BASE).rstrip("/")
url = f"{api_base}/audio/transcriptions"
model = conf().get("voice_to_text_model") or DEFAULT_ASR_MODEL
with open(voice_file, "rb") as f:
files = {"file": (os.path.basename(voice_file), f)}
data = {"model": model, "stream": "false"}
headers = {"Authorization": f"Bearer {api_key}"}
response = requests.post(
url, headers=headers, files=files, data=data, timeout=REQUEST_TIMEOUT
)
if response.status_code != 200:
logger.error(
f"[ZhipuAIVoice] voiceToText failed: status={response.status_code} "
f"body={response.text[:500]}"
)
return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
payload = response.json()
text = (payload.get("text") or "").strip()
if not text:
logger.error(f"[ZhipuAIVoice] voiceToText empty text: {payload}")
return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
logger.info(f"[ZhipuAIVoice] voiceToText model={model} text={text}")
return Reply(ReplyType.TEXT, text)
except Exception as e:
logger.exception(f"[ZhipuAIVoice] voiceToText exception: {e}")
return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
def textToVoice(self, text: str):
try:
api_key = conf().get("zhipu_ai_api_key", "")
if not api_key:
logger.error("[ZhipuAIVoice] zhipu_ai_api_key is not configured")
return Reply(ReplyType.ERROR, "未配置 ZhipuAI API key")
api_base = (conf().get("zhipu_ai_api_base") or DEFAULT_API_BASE).rstrip("/")
url = f"{api_base}/audio/speech"
model = conf().get("text_to_voice_model") or DEFAULT_TTS_MODEL
voice_id = conf().get("tts_voice_id") or DEFAULT_TTS_VOICE
payload = {
"model": model,
"input": text,
"voice": voice_id,
"response_format": "wav",
"speed": 1.0,
"volume": 1.0,
}
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
response = requests.post(
url, headers=headers, json=payload, timeout=REQUEST_TIMEOUT
)
if response.status_code != 200:
logger.error(
f"[ZhipuAIVoice] textToVoice failed: status={response.status_code} "
f"body={response.text[:500]} model={model} voice={voice_id}"
)
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
# Some errors come back as JSON / SSE with HTTP 200.
ct = response.headers.get("Content-Type", "")
if "application/json" in ct or "text/event-stream" in ct:
try:
err = response.json()
except Exception:
err = {"raw": response.text[:500]}
logger.error(
f"[ZhipuAIVoice] textToVoice unexpected text response "
f"(content_type={ct}): {err}"
)
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
audio_bytes = response.content
ext = self._sniff_audio_ext(audio_bytes) or "wav"
file_name = (
"tmp/" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+ str(random.randint(0, 1000)) + "." + ext
)
os.makedirs(os.path.dirname(file_name), exist_ok=True)
with open(file_name, "wb") as f:
f.write(audio_bytes)
logger.info(
f"[ZhipuAIVoice] textToVoice model={model} voice={voice_id} "
f"file={file_name} bytes={len(audio_bytes)} ext={ext}"
)
return Reply(ReplyType.VOICE, file_name)
except Exception as e:
logger.exception(f"[ZhipuAIVoice] textToVoice exception: {e}")
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
@staticmethod
def _sniff_audio_ext(data: bytes) -> str:
"""Detect audio container by magic bytes; returns '' on unknown."""
if len(data) < 12:
return ""
head = data[:12]
if head[:4] == b"RIFF" and head[8:12] == b"WAVE":
return "wav"
if head[:3] == b"ID3" or head[:2] == b"\xff\xfb" or head[:2] == b"\xff\xf3" or head[:2] == b"\xff\xf2":
return "mp3"
if head[:4] == b"OggS":
return "ogg"
if head[:4] == b"fLaC":
return "flac"
return ""
@staticmethod
def _ensure_compatible_format(voice_file: str) -> str:
# glm-asr-2512 only accepts .wav / .mp3
lower = voice_file.lower()
if lower.endswith(".mp3") or lower.endswith(".wav"):
return voice_file
try:
mp3_file = os.path.splitext(voice_file)[0] + ".mp3"
audio_convert.any_to_mp3(voice_file, mp3_file)
return mp3_file
except Exception as e:
logger.warning(f"[ZhipuAIVoice] mp3 convert failed: {e}")
return voice_file