Merge pull request #2826 from zhayujie/feat-multi-model
feat: multi-provider model console
@@ -44,6 +44,7 @@ CREATE TABLE IF NOT EXISTS messages (
|
||||
role TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
created_at INTEGER NOT NULL,
|
||||
extras TEXT NOT NULL DEFAULT '',
|
||||
UNIQUE (session_id, seq)
|
||||
);
|
||||
|
||||
@@ -67,6 +68,12 @@ _MIGRATION_ADD_CONTEXT_START_SEQ = """
|
||||
ALTER TABLE sessions ADD COLUMN context_start_seq INTEGER NOT NULL DEFAULT 0;
|
||||
"""
|
||||
|
||||
# Generic JSON sidecar for per-message attachments (TTS audio URL, future use).
|
||||
# Always optional — readers must tolerate missing column / empty / invalid JSON.
|
||||
_MIGRATION_ADD_MSG_EXTRAS = """
|
||||
ALTER TABLE messages ADD COLUMN extras TEXT NOT NULL DEFAULT '';
|
||||
"""
|
||||
|
||||
DEFAULT_MAX_AGE_DAYS: int = 30
|
||||
|
||||
|
||||
@@ -169,20 +176,26 @@ def _group_into_display_turns(
|
||||
cur_rest: List[tuple] = []
|
||||
started = False
|
||||
|
||||
for role, raw_content, created_at in rows:
|
||||
for role, raw_content, created_at, raw_extras in rows:
|
||||
try:
|
||||
content = json.loads(raw_content)
|
||||
except Exception:
|
||||
content = raw_content
|
||||
try:
|
||||
extras = json.loads(raw_extras) if raw_extras else {}
|
||||
if not isinstance(extras, dict):
|
||||
extras = {}
|
||||
except Exception:
|
||||
extras = {}
|
||||
|
||||
if role == "user" and _is_visible_user_message(content):
|
||||
if started:
|
||||
groups.append((cur_user, cur_rest))
|
||||
cur_user = (content, created_at)
|
||||
cur_user = (content, created_at, extras)
|
||||
cur_rest = []
|
||||
started = True
|
||||
else:
|
||||
cur_rest.append((role, content, created_at))
|
||||
cur_rest.append((role, content, created_at, extras))
|
||||
|
||||
if started:
|
||||
groups.append((cur_user, cur_rest))
|
||||
@@ -195,7 +208,7 @@ def _group_into_display_turns(
|
||||
for user_row, rest in groups:
|
||||
# User turn
|
||||
if user_row:
|
||||
content, created_at = user_row
|
||||
content, created_at, _u_extras = user_row
|
||||
text = _extract_display_text(content)
|
||||
if text:
|
||||
turns.append({"role": "user", "content": text, "created_at": created_at})
|
||||
@@ -206,8 +219,11 @@ def _group_into_display_turns(
|
||||
tool_results: Dict[str, str] = {}
|
||||
final_text = ""
|
||||
final_ts: Optional[int] = None
|
||||
merged_extras: Dict[str, Any] = {}
|
||||
|
||||
for role, content, created_at in rest:
|
||||
for role, content, created_at, extras in rest:
|
||||
if role == "assistant" and isinstance(extras, dict):
|
||||
merged_extras.update(extras)
|
||||
if role == "user":
|
||||
tool_results.update(_extract_tool_results(content))
|
||||
elif role == "assistant":
|
||||
@@ -256,6 +272,8 @@ def _group_into_display_turns(
|
||||
"steps": steps,
|
||||
"created_at": final_ts or (user_row[1] if user_row else 0),
|
||||
}
|
||||
if merged_extras:
|
||||
turn["extras"] = merged_extras
|
||||
turns.append(turn)
|
||||
|
||||
return turns
|
||||
@@ -411,13 +429,15 @@ class ConversationStore:
|
||||
content = json.dumps(
|
||||
msg.get("content", ""), ensure_ascii=False
|
||||
)
|
||||
extras_obj = msg.get("extras") or {}
|
||||
extras = json.dumps(extras_obj, ensure_ascii=False) if extras_obj else ""
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT OR IGNORE INTO messages
|
||||
(session_id, seq, role, content, created_at)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
(session_id, seq, role, content, created_at, extras)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(session_id, next_seq, role, content, now),
|
||||
(session_id, next_seq, role, content, now, extras),
|
||||
)
|
||||
next_seq += 1
|
||||
|
||||
@@ -651,6 +671,55 @@ class ConversationStore:
|
||||
logger.info(f"[ConversationStore] Pruned {deleted} expired sessions")
|
||||
return deleted
|
||||
|
||||
def attach_extras_to_last_assistant(
|
||||
self,
|
||||
session_id: str,
|
||||
extras: Dict[str, Any],
|
||||
) -> Optional[int]:
|
||||
"""
|
||||
Merge ``extras`` into the latest assistant message of a session.
|
||||
|
||||
Used by post-processing (e.g. TTS) that needs to annotate an already
|
||||
persisted bot reply with attachments such as audio URLs.
|
||||
|
||||
Returns the message seq that was updated, or ``None`` if no assistant
|
||||
message exists or the update could not be applied.
|
||||
"""
|
||||
if not extras:
|
||||
return None
|
||||
with self._lock:
|
||||
conn = self._connect()
|
||||
try:
|
||||
row = conn.execute(
|
||||
"""
|
||||
SELECT seq, extras FROM messages
|
||||
WHERE session_id = ? AND role = 'assistant'
|
||||
ORDER BY seq DESC LIMIT 1
|
||||
""",
|
||||
(session_id,),
|
||||
).fetchone()
|
||||
if not row:
|
||||
return None
|
||||
seq, raw = row
|
||||
try:
|
||||
cur = json.loads(raw) if raw else {}
|
||||
if not isinstance(cur, dict):
|
||||
cur = {}
|
||||
except Exception:
|
||||
cur = {}
|
||||
cur.update(extras)
|
||||
conn.execute(
|
||||
"UPDATE messages SET extras = ? WHERE session_id = ? AND seq = ?",
|
||||
(json.dumps(cur, ensure_ascii=False), session_id, seq),
|
||||
)
|
||||
conn.commit()
|
||||
return seq
|
||||
except Exception as e:
|
||||
logger.warning(f"[ConversationStore] attach_extras failed: {e}")
|
||||
return None
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def load_history_page(
|
||||
self,
|
||||
session_id: str,
|
||||
@@ -698,7 +767,22 @@ class ConversationStore:
|
||||
).fetchone()
|
||||
ctx_start = ctx_row[0] if ctx_row else 0
|
||||
|
||||
# extras column is added by migration; tolerate older DBs that
|
||||
# might miss it by falling back to a NULL literal.
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT seq, role, content, created_at, extras
|
||||
FROM messages
|
||||
WHERE session_id = ?
|
||||
ORDER BY seq ASC
|
||||
""",
|
||||
(session_id,),
|
||||
).fetchall()
|
||||
except sqlite3.OperationalError:
|
||||
rows = [
|
||||
(seq, role, content, created_at, "")
|
||||
for (seq, role, content, created_at) in conn.execute(
|
||||
"""
|
||||
SELECT seq, role, content, created_at
|
||||
FROM messages
|
||||
@@ -707,6 +791,7 @@ class ConversationStore:
|
||||
""",
|
||||
(session_id,),
|
||||
).fetchall()
|
||||
]
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
@@ -719,13 +804,16 @@ class ConversationStore:
|
||||
include_thinking = False
|
||||
|
||||
# Strip seq for display grouping, but record max seq per visible user group
|
||||
plain_rows = [(role, content, created_at) for _seq, role, content, created_at in rows]
|
||||
plain_rows = [
|
||||
(role, content, created_at, extras_raw)
|
||||
for _seq, role, content, created_at, extras_raw in rows
|
||||
]
|
||||
visible = _group_into_display_turns(plain_rows, include_thinking=include_thinking)
|
||||
|
||||
# Build a mapping: find the seq of each visible user message to annotate context boundary.
|
||||
# Walk through rows to find visible user message seqs in order.
|
||||
visible_user_seqs: List[int] = []
|
||||
for seq, role, raw_content, _ts in rows:
|
||||
for seq, role, raw_content, _ts, _extras in rows:
|
||||
if role != "user":
|
||||
continue
|
||||
try:
|
||||
@@ -911,6 +999,18 @@ class ConversationStore:
|
||||
except Exception as e:
|
||||
logger.warning(f"[ConversationStore] Migration (context_start_seq) failed: {e}")
|
||||
|
||||
msg_cols = {
|
||||
row[1]
|
||||
for row in conn.execute("PRAGMA table_info(messages)").fetchall()
|
||||
}
|
||||
if "extras" not in msg_cols:
|
||||
try:
|
||||
conn.execute(_MIGRATION_ADD_MSG_EXTRAS)
|
||||
conn.commit()
|
||||
logger.info("[ConversationStore] Migrated: added messages.extras column")
|
||||
except Exception as e:
|
||||
logger.warning(f"[ConversationStore] Migration (extras) failed: {e}")
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
conn = sqlite3.connect(str(self._db_path), timeout=10)
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
|
||||
@@ -603,15 +603,24 @@ class AgentStreamExecutor:
|
||||
except Exception as e:
|
||||
logger.debug(f"[Agent] MCP sync skipped: {e}")
|
||||
|
||||
# Prepare tool definitions (OpenAI/Claude format)
|
||||
# Prepare tool definitions. Prefer get_json_schema() when it yields
|
||||
# real properties (lets tools augment schema at runtime), otherwise
|
||||
# fall back to the static `tool.params` (MCP tools rely on this).
|
||||
tools_schema = None
|
||||
if self.tools:
|
||||
tools_schema = []
|
||||
for tool in self.tools.values():
|
||||
input_schema = tool.params
|
||||
try:
|
||||
dynamic = (tool.get_json_schema() or {}).get("parameters") or {}
|
||||
if dynamic.get("properties"):
|
||||
input_schema = dynamic
|
||||
except Exception:
|
||||
pass
|
||||
tools_schema.append({
|
||||
"name": tool.name,
|
||||
"description": tool.description,
|
||||
"input_schema": tool.params # Claude uses input_schema
|
||||
"input_schema": input_schema,
|
||||
})
|
||||
|
||||
# Create request
|
||||
|
||||
@@ -3,7 +3,7 @@ Vision tool - Analyze images using Vision API.
|
||||
Supports local files (auto base64-encoded) and HTTP URLs.
|
||||
|
||||
Provider resolution:
|
||||
- tool.vision.model (if set) means "prefer this model first; fall back to
|
||||
- tools.vision.model (if set) means "prefer this model first; fall back to
|
||||
other configured providers if it fails". The model name is mapped to its
|
||||
native provider (e.g. doubao-* → Doubao, kimi-* → Moonshot, gpt-* →
|
||||
OpenAI/LinkAI). That provider is tried first, then the standard auto
|
||||
@@ -30,7 +30,7 @@ from common import const
|
||||
from common.log import logger
|
||||
from config import conf
|
||||
|
||||
DEFAULT_MODEL = const.GPT_41_MINI
|
||||
DEFAULT_MODEL = const.GPT_55
|
||||
DEFAULT_TIMEOUT = 60
|
||||
MAX_TOKENS = 1000
|
||||
COMPRESS_THRESHOLD = 1_048_576 # 1 MB
|
||||
@@ -53,14 +53,14 @@ _DISCOVERABLE_MODELS = [
|
||||
("ark_api_key", const.DOUBAO, const.DOUBAO_SEED_2_PRO, "Doubao"),
|
||||
("dashscope_api_key", const.QWEN_DASHSCOPE, const.QWEN36_PLUS, "DashScope"),
|
||||
("claude_api_key", const.CLAUDEAPI, const.CLAUDE_4_6_SONNET, "Claude"),
|
||||
("gemini_api_key", const.GEMINI, const.GEMINI_31_FLASH_LITE_PRE, "Gemini"),
|
||||
("gemini_api_key", const.GEMINI, const.GEMINI_35_FLASH, "Gemini"),
|
||||
("qianfan_api_key", const.QIANFAN, const.ERNIE_45_TURBO_VL, "Qianfan"),
|
||||
("zhipu_ai_api_key", const.ZHIPU_AI, const.GLM_4_7, "ZhipuAI"),
|
||||
("minimax_api_key", const.MiniMax, const.MINIMAX_M2_7, "MiniMax"),
|
||||
]
|
||||
|
||||
# Model name prefix → discoverable provider display_name.
|
||||
# Used to auto-route tool.vision.model to its native provider.
|
||||
# Used to auto-route tools.vision.model to its native provider.
|
||||
# Matched case-insensitively; longest prefix wins.
|
||||
_MODEL_PREFIX_TO_PROVIDER = [
|
||||
("doubao-", "Doubao"),
|
||||
@@ -154,7 +154,7 @@ class Vision(BaseTool):
|
||||
|
||||
# Default model is only used as a last-resort placeholder for providers
|
||||
# whose VisionProvider.model_override is None (e.g. raw OpenAI provider
|
||||
# when the user did not configure tool.vision.model).
|
||||
# when the user did not configure tools.vision.model).
|
||||
return self._call_with_fallback(providers, DEFAULT_MODEL, question, image_content)
|
||||
|
||||
def _call_with_fallback(self, providers: List[VisionProvider], model: str,
|
||||
@@ -193,12 +193,12 @@ class Vision(BaseTool):
|
||||
"""
|
||||
Build an ordered list of providers to try.
|
||||
|
||||
Semantics of `tool.vision.model`:
|
||||
Semantics of `tools.vision.model`:
|
||||
"Prefer this model first; fall back to other configured providers
|
||||
if it fails."
|
||||
|
||||
Order:
|
||||
1. The provider that natively serves `tool.vision.model` (if any
|
||||
1. The provider that natively serves `tools.vision.model` (if any
|
||||
and its API key is configured) — using the user-specified model
|
||||
name verbatim.
|
||||
2. Auto-discovery chain as fallback:
|
||||
@@ -213,7 +213,7 @@ class Vision(BaseTool):
|
||||
user_model = self._resolve_user_vision_model()
|
||||
providers: List[VisionProvider] = []
|
||||
|
||||
# Step 1: preferred provider derived from tool.vision.model
|
||||
# Step 1: preferred provider derived from tools.vision.model
|
||||
if user_model:
|
||||
preferred = self._route_by_model_name(user_model)
|
||||
if preferred:
|
||||
@@ -251,11 +251,11 @@ class Vision(BaseTool):
|
||||
|
||||
@staticmethod
|
||||
def _resolve_user_vision_model() -> Optional[str]:
|
||||
"""Read tool.vision.model from config; return None if unset/blank."""
|
||||
tool_conf = conf().get("tool", {})
|
||||
if not isinstance(tool_conf, dict):
|
||||
"""Read tools.vision.model (singular ``tool`` kept as runtime fallback)."""
|
||||
tools_conf = conf().get("tools") or conf().get("tool") or {}
|
||||
if not isinstance(tools_conf, dict):
|
||||
return None
|
||||
vision_conf = tool_conf.get("vision", {})
|
||||
vision_conf = tools_conf.get("vision", {})
|
||||
if not isinstance(vision_conf, dict):
|
||||
return None
|
||||
m = vision_conf.get("model")
|
||||
@@ -303,7 +303,7 @@ class Vision(BaseTool):
|
||||
self._append_provider(providers, lambda: self._build_linkai_provider(user_model))
|
||||
if providers:
|
||||
return providers
|
||||
logger.warning(f"[Vision] tool.vision.model='{user_model}' looks like an OpenAI "
|
||||
logger.warning(f"[Vision] tools.vision.model='{user_model}' looks like an OpenAI "
|
||||
f"model but neither OPENAI_API_KEY nor LINKAI_API_KEY is configured.")
|
||||
return None # fall through to auto
|
||||
|
||||
@@ -317,7 +317,7 @@ class Vision(BaseTool):
|
||||
continue
|
||||
api_key = conf().get(config_key, "")
|
||||
if not api_key or not api_key.strip():
|
||||
logger.warning(f"[Vision] tool.vision.model='{user_model}' routes to "
|
||||
logger.warning(f"[Vision] tools.vision.model='{user_model}' routes to "
|
||||
f"'{display_name}' but '{config_key}' is not configured. "
|
||||
f"Falling back to auto-discovery.")
|
||||
return None # fall through to auto
|
||||
@@ -452,8 +452,8 @@ class Vision(BaseTool):
|
||||
if not self._main_bot_supports_vision(bot):
|
||||
return None
|
||||
|
||||
# Use the configured main model name; do NOT inject tool.vision.model
|
||||
# here, because by the time we reach this branch the tool.vision.model
|
||||
# Use the configured main model name; do NOT inject tools.vision.model
|
||||
# here, because by the time we reach this branch the tools.vision.model
|
||||
# routing has already been attempted (and either matched the main bot
|
||||
# or failed to find a provider).
|
||||
main_model_name = conf().get("model") or None
|
||||
|
||||
@@ -1,13 +1,27 @@
|
||||
"""
|
||||
Web Search tool - Search the web using Bocha or LinkAI search API.
|
||||
Supports two backends with unified response format:
|
||||
1. Bocha Search (primary, requires BOCHA_API_KEY)
|
||||
2. LinkAI Search (fallback, requires LINKAI_API_KEY)
|
||||
"""Web Search tool. Supports four backends with a unified response format:
|
||||
- bocha (https://open.bochaai.com)
|
||||
- zhipu (https://docs.bigmodel.cn/cn/guide/tools/web-search)
|
||||
- qianfan (https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy)
|
||||
- linkai (https://link-ai.tech, fallback)
|
||||
|
||||
Provider selection
|
||||
- strategy 'auto' (default): pick the first configured provider in the
|
||||
canonical order [bocha, zhipu, qianfan, linkai]. When the caller passes
|
||||
an explicit `provider` it overrides the pick; an invalid/unconfigured
|
||||
one silently falls back to the auto order.
|
||||
- strategy 'fixed': use the configured provider; if its credential is
|
||||
missing at call time, silently fall back to auto order (no card hint).
|
||||
|
||||
Credentials
|
||||
- bocha : tools.web_search.bocha_api_key -> env BOCHA_API_KEY
|
||||
- zhipu : conf.zhipu_ai_api_key -> env ZHIPUAI_API_KEY
|
||||
- qianfan : conf.qianfan_api_key -> env QIANFAN_API_KEY
|
||||
- linkai : conf.linkai_api_key -> env LINKAI_API_KEY
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from typing import Dict, Any, Optional
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
@@ -16,12 +30,63 @@ from common.log import logger
|
||||
from config import conf
|
||||
|
||||
|
||||
# Default timeout for API requests (seconds)
|
||||
DEFAULT_TIMEOUT = 30
|
||||
|
||||
# Canonical fallback order. Empirically ordered by Chinese real-time
|
||||
# quality + relevance: bocha (best overall), qianfan (best for hot news),
|
||||
# zhipu (strong on long-form articles), linkai (cloud aggregator, last
|
||||
# resort).
|
||||
PROVIDER_ORDER = ("bocha", "qianfan", "zhipu", "linkai")
|
||||
|
||||
PROVIDER_LABELS = {
|
||||
"bocha": "Bocha",
|
||||
"zhipu": "Zhipu",
|
||||
"qianfan": "Baidu Qianfan",
|
||||
"linkai": "LinkAI",
|
||||
}
|
||||
|
||||
|
||||
def _tools_web_search_conf() -> dict:
|
||||
"""Return the tools.web_search config block (dict-like)."""
|
||||
tools_cfg = conf().get("tools") or {}
|
||||
if not isinstance(tools_cfg, dict):
|
||||
return {}
|
||||
block = tools_cfg.get("web_search") or {}
|
||||
return block if isinstance(block, dict) else {}
|
||||
|
||||
|
||||
def _get_api_key(provider: str) -> str:
|
||||
"""Resolve API key for a provider, with conf -> env fallback."""
|
||||
if provider == "bocha":
|
||||
key = (_tools_web_search_conf().get("bocha_api_key") or "").strip()
|
||||
return key or os.environ.get("BOCHA_API_KEY", "").strip()
|
||||
if provider == "zhipu":
|
||||
key = (conf().get("zhipu_ai_api_key") or "").strip()
|
||||
return key or os.environ.get("ZHIPUAI_API_KEY", "").strip()
|
||||
if provider == "qianfan":
|
||||
key = (conf().get("qianfan_api_key") or "").strip()
|
||||
return key or os.environ.get("QIANFAN_API_KEY", "").strip()
|
||||
if provider == "linkai":
|
||||
key = (conf().get("linkai_api_key") or "").strip()
|
||||
return key or os.environ.get("LINKAI_API_KEY", "").strip()
|
||||
return ""
|
||||
|
||||
|
||||
def configured_providers() -> List[str]:
|
||||
"""Return configured providers in canonical order."""
|
||||
return [p for p in PROVIDER_ORDER if _get_api_key(p)]
|
||||
|
||||
|
||||
def _configured_strategy() -> str:
|
||||
return (_tools_web_search_conf().get("strategy") or "auto").strip().lower()
|
||||
|
||||
|
||||
def _configured_provider() -> str:
|
||||
return (_tools_web_search_conf().get("provider") or "").strip().lower()
|
||||
|
||||
|
||||
class WebSearch(BaseTool):
|
||||
"""Tool for searching the web using Bocha or LinkAI search API"""
|
||||
"""Tool for searching the web across multiple providers."""
|
||||
|
||||
name: str = "web_search"
|
||||
description: str = "Search the web for real-time information. Returns titles, URLs, and snippets."
|
||||
@@ -55,264 +120,368 @@ class WebSearch(BaseTool):
|
||||
|
||||
def __init__(self, config: dict = None):
|
||||
self.config = config or {}
|
||||
self._backend = None # Will be resolved on first execute
|
||||
|
||||
@staticmethod
|
||||
def is_available() -> bool:
|
||||
"""Check if web search is available (at least one API key is configured)"""
|
||||
return bool(os.environ.get("BOCHA_API_KEY") or os.environ.get("LINKAI_API_KEY"))
|
||||
"""Tool is offered to the agent when at least one provider has a key."""
|
||||
return bool(configured_providers())
|
||||
|
||||
def _resolve_backend(self) -> Optional[str]:
|
||||
"""
|
||||
Determine which search backend to use.
|
||||
Priority: Bocha > LinkAI
|
||||
@classmethod
|
||||
def get_json_schema(cls) -> dict:
|
||||
"""Augment the static schema with a `provider` field — only when the
|
||||
user has ≥2 providers configured AND strategy is 'auto'. Otherwise
|
||||
the backend picks silently and exposing the field would only waste
|
||||
the agent's tokens."""
|
||||
schema = {
|
||||
"name": cls.name,
|
||||
"description": cls.description,
|
||||
"parameters": json.loads(json.dumps(cls.params)), # deep copy
|
||||
}
|
||||
if _configured_strategy() != "auto":
|
||||
return schema
|
||||
available = configured_providers()
|
||||
if len(available) < 2:
|
||||
return schema
|
||||
|
||||
:return: 'bocha', 'linkai', or None
|
||||
schema["parameters"]["properties"]["provider"] = {
|
||||
"type": "string",
|
||||
"enum": available,
|
||||
"description": "Optional. Specifies the search backend. You may switch between providers when the user wants results from a particular source or from multiple sources.",
|
||||
}
|
||||
return schema
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Provider resolution
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _resolve_provider(self, requested: Optional[str]) -> Optional[str]:
|
||||
"""Pick a provider for this call.
|
||||
|
||||
Priority: caller-supplied (if configured) > fixed strategy (if
|
||||
configured) > first configured in PROVIDER_ORDER. Silent fallback
|
||||
when the desired one has no key.
|
||||
"""
|
||||
if os.environ.get("BOCHA_API_KEY"):
|
||||
return "bocha"
|
||||
if os.environ.get("LINKAI_API_KEY"):
|
||||
return "linkai"
|
||||
available = configured_providers()
|
||||
if not available:
|
||||
return None
|
||||
|
||||
def execute(self, args: Dict[str, Any]) -> ToolResult:
|
||||
"""
|
||||
Execute web search
|
||||
if requested:
|
||||
req = requested.strip().lower()
|
||||
if req in available:
|
||||
return req
|
||||
logger.warning(f"[WebSearch] requested provider '{requested}' unavailable, falling back")
|
||||
|
||||
:param args: Search parameters (query, count, freshness, summary)
|
||||
:return: Search results
|
||||
"""
|
||||
query = args.get("query", "").strip()
|
||||
if _configured_strategy() == "fixed":
|
||||
pinned = _configured_provider()
|
||||
if pinned in available:
|
||||
return pinned
|
||||
if pinned:
|
||||
logger.warning(f"[WebSearch] pinned provider '{pinned}' unavailable, falling back to auto")
|
||||
|
||||
return available[0]
|
||||
|
||||
@staticmethod
|
||||
def _resolution_reason(requested: Optional[str], chosen: str) -> str:
|
||||
"""Human-readable explanation for why `chosen` won the resolver."""
|
||||
if requested and requested.strip().lower() == chosen:
|
||||
return "caller-requested"
|
||||
strategy = _configured_strategy()
|
||||
if strategy == "fixed" and _configured_provider() == chosen:
|
||||
return "fixed-strategy"
|
||||
return "auto-fallback"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Entry point
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def execute(self, args: Dict[str, Any]) -> ToolResult:
|
||||
query = (args.get("query") or "").strip()
|
||||
if not query:
|
||||
return ToolResult.fail("Error: 'query' parameter is required")
|
||||
|
||||
count = args.get("count", 10)
|
||||
freshness = args.get("freshness", "noLimit")
|
||||
summary = args.get("summary", False)
|
||||
|
||||
# Validate count
|
||||
if not isinstance(count, int) or count < 1 or count > 50:
|
||||
count = 10
|
||||
|
||||
# Resolve backend
|
||||
backend = self._resolve_backend()
|
||||
if not backend:
|
||||
requested = args.get("provider")
|
||||
provider = self._resolve_provider(requested)
|
||||
if not provider:
|
||||
return ToolResult.fail(
|
||||
"Error: No search API key configured. "
|
||||
"Please set BOCHA_API_KEY or LINKAI_API_KEY using env_config tool.\n"
|
||||
" - Bocha Search: https://open.bocha.cn\n"
|
||||
" - LinkAI Search: https://link-ai.tech"
|
||||
"Error: No search provider configured. "
|
||||
"Configure one of BOCHA_API_KEY / zhipu_ai_api_key / qianfan_api_key / linkai_api_key."
|
||||
)
|
||||
|
||||
# Always log the routing decision so multi-provider deployments can
|
||||
# tell at a glance which backend served any given query.
|
||||
available = configured_providers()
|
||||
reason = self._resolution_reason(requested, provider)
|
||||
q_preview = query if len(query) <= 60 else (query[:57] + "...")
|
||||
logger.info(
|
||||
f"[WebSearch] provider={provider} reason={reason} "
|
||||
f"available={list(available)} query={q_preview!r} count={count} freshness={freshness}"
|
||||
)
|
||||
|
||||
try:
|
||||
if backend == "bocha":
|
||||
if provider == "bocha":
|
||||
return self._search_bocha(query, count, freshness, summary)
|
||||
else:
|
||||
if provider == "zhipu":
|
||||
return self._search_zhipu(query, count, freshness)
|
||||
if provider == "qianfan":
|
||||
return self._search_qianfan(query, count, freshness)
|
||||
if provider == "linkai":
|
||||
return self._search_linkai(query, count, freshness)
|
||||
return ToolResult.fail(f"Error: Unknown provider '{provider}'")
|
||||
except requests.Timeout:
|
||||
return ToolResult.fail(f"Error: Search request timed out after {DEFAULT_TIMEOUT}s")
|
||||
except requests.ConnectionError:
|
||||
return ToolResult.fail("Error: Failed to connect to search API")
|
||||
except Exception as e:
|
||||
logger.error(f"[WebSearch] Unexpected error: {e}", exc_info=True)
|
||||
logger.error(f"[WebSearch] Unexpected error ({provider}): {e}", exc_info=True)
|
||||
return ToolResult.fail(f"Error: Search failed - {str(e)}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Bocha
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _search_bocha(self, query: str, count: int, freshness: str, summary: bool) -> ToolResult:
|
||||
"""
|
||||
Search using Bocha API
|
||||
|
||||
:param query: Search query
|
||||
:param count: Number of results
|
||||
:param freshness: Time range filter
|
||||
:param summary: Whether to include summary
|
||||
:return: Formatted search results
|
||||
"""
|
||||
api_key = os.environ.get("BOCHA_API_KEY", "")
|
||||
url = "https://api.bocha.cn/v1/web-search"
|
||||
|
||||
api_key = _get_api_key("bocha")
|
||||
url = "https://api.bochaai.com/v1/web-search"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json"
|
||||
"Accept": "application/json",
|
||||
}
|
||||
payload = {"query": query, "count": count, "freshness": freshness, "summary": summary}
|
||||
|
||||
payload = {
|
||||
"query": query,
|
||||
"count": count,
|
||||
"freshness": freshness,
|
||||
"summary": summary
|
||||
}
|
||||
logger.debug(f"[WebSearch] bocha: query='{query}', count={count}")
|
||||
resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
|
||||
|
||||
logger.debug(f"[WebSearch] Bocha search: query='{query}', count={count}")
|
||||
if resp.status_code == 401:
|
||||
return ToolResult.fail("Error: Invalid bocha API key.")
|
||||
if resp.status_code == 403:
|
||||
return ToolResult.fail("Error: bocha API — insufficient balance. Top up at https://open.bochaai.com")
|
||||
if resp.status_code == 429:
|
||||
return ToolResult.fail("Error: bocha API rate limit reached.")
|
||||
if resp.status_code != 200:
|
||||
return ToolResult.fail(f"Error: bocha API returned HTTP {resp.status_code}")
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
|
||||
|
||||
if response.status_code == 401:
|
||||
return ToolResult.fail("Error: Invalid BOCHA_API_KEY. Please check your API key.")
|
||||
if response.status_code == 403:
|
||||
return ToolResult.fail("Error: Bocha API - insufficient balance. Please top up at https://open.bocha.cn")
|
||||
if response.status_code == 429:
|
||||
return ToolResult.fail("Error: Bocha API rate limit reached. Please try again later.")
|
||||
if response.status_code != 200:
|
||||
return ToolResult.fail(f"Error: Bocha API returned HTTP {response.status_code}")
|
||||
|
||||
data = response.json()
|
||||
|
||||
# Check API-level error code
|
||||
data = resp.json()
|
||||
api_code = data.get("code")
|
||||
if api_code is not None and api_code != 200:
|
||||
msg = data.get("msg") or "Unknown error"
|
||||
return ToolResult.fail(f"Error: Bocha API error (code={api_code}): {msg}")
|
||||
|
||||
# Extract and format results
|
||||
return self._format_bocha_results(data, query)
|
||||
|
||||
def _format_bocha_results(self, data: dict, query: str) -> ToolResult:
|
||||
"""
|
||||
Format Bocha API response into unified result structure
|
||||
|
||||
:param data: Raw API response
|
||||
:param query: Original query
|
||||
:return: Formatted ToolResult
|
||||
"""
|
||||
search_data = data.get("data", {})
|
||||
web_pages = search_data.get("webPages", {})
|
||||
pages = web_pages.get("value", [])
|
||||
|
||||
if not pages:
|
||||
return ToolResult.success({
|
||||
"query": query,
|
||||
"backend": "bocha",
|
||||
"total": 0,
|
||||
"results": [],
|
||||
"message": "No results found"
|
||||
})
|
||||
return ToolResult.fail(f"Error: bocha API error (code={api_code}): {msg}")
|
||||
|
||||
pages = (data.get("data") or {}).get("webPages", {}).get("value", []) or []
|
||||
results = []
|
||||
for page in pages:
|
||||
result = {
|
||||
"title": page.get("name", ""),
|
||||
"url": page.get("url", ""),
|
||||
"snippet": page.get("snippet", ""),
|
||||
"siteName": page.get("siteName", ""),
|
||||
"datePublished": page.get("datePublished") or page.get("dateLastCrawled", ""),
|
||||
for p in pages:
|
||||
item = {
|
||||
"title": p.get("name", ""),
|
||||
"url": p.get("url", ""),
|
||||
"snippet": p.get("snippet", ""),
|
||||
"siteName": p.get("siteName", ""),
|
||||
"datePublished": p.get("datePublished") or p.get("dateLastCrawled", ""),
|
||||
}
|
||||
# Include summary only if present
|
||||
if page.get("summary"):
|
||||
result["summary"] = page["summary"]
|
||||
results.append(result)
|
||||
|
||||
total = web_pages.get("totalEstimatedMatches", len(results))
|
||||
|
||||
if p.get("summary"):
|
||||
item["summary"] = p["summary"]
|
||||
results.append(item)
|
||||
total = (data.get("data") or {}).get("webPages", {}).get("totalEstimatedMatches", len(results))
|
||||
return ToolResult.success({
|
||||
"query": query,
|
||||
"backend": "bocha",
|
||||
"total": total,
|
||||
"count": len(results),
|
||||
"results": results
|
||||
"query": query, "backend": "bocha",
|
||||
"total": total, "count": len(results), "results": results,
|
||||
})
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Zhipu
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _search_zhipu(self, query: str, count: int, freshness: str) -> ToolResult:
|
||||
api_key = _get_api_key("zhipu")
|
||||
api_base = (conf().get("zhipu_ai_api_base") or "https://open.bigmodel.cn/api/paas/v4").rstrip("/")
|
||||
url = f"{api_base}/web_search"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
# Zhipu Web Search expects `search_query` <= 70 chars; truncate
|
||||
# gracefully so a long agent-supplied query doesn't get rejected.
|
||||
trimmed_query = (query or "")[:70]
|
||||
engine = (_tools_web_search_conf().get("zhipu_search_engine") or "search_pro").strip().lower()
|
||||
if engine not in ("search_std", "search_pro", "search_pro_sogou", "search_pro_quark"):
|
||||
engine = "search_pro"
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"search_engine": engine,
|
||||
"search_query": trimmed_query,
|
||||
"search_intent": False,
|
||||
"count": max(1, min(int(count or 10), 50)),
|
||||
"search_recency_filter": freshness if freshness in (
|
||||
"oneDay", "oneWeek", "oneMonth", "oneYear", "noLimit"
|
||||
) else "noLimit",
|
||||
}
|
||||
content_size = (_tools_web_search_conf().get("zhipu_content_size") or "").strip().lower()
|
||||
if content_size in ("medium", "high"):
|
||||
payload["content_size"] = content_size
|
||||
|
||||
logger.debug(f"[WebSearch] zhipu: query='{trimmed_query}', count={payload['count']}, engine={engine}")
|
||||
resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
|
||||
|
||||
if resp.status_code == 401:
|
||||
return ToolResult.fail("Error: Invalid Zhipu API key.")
|
||||
if resp.status_code != 200:
|
||||
return ToolResult.fail(f"Error: Zhipu API returned HTTP {resp.status_code}: {resp.text[:200]}")
|
||||
|
||||
data = resp.json()
|
||||
# Business-level errors (1701/1702/1703 etc.) come back as
|
||||
# {"error": {"code","message"}} even on HTTP 200.
|
||||
if isinstance(data, dict) and data.get("error"):
|
||||
err = data["error"] or {}
|
||||
return ToolResult.fail(f"Error: Zhipu returned {err.get('code')}: {err.get('message','')}")
|
||||
|
||||
items = data.get("search_result") or (data.get("data") or {}).get("search_result") or []
|
||||
results = []
|
||||
for it in items:
|
||||
results.append({
|
||||
"title": it.get("title", ""),
|
||||
"url": it.get("link") or it.get("url", ""),
|
||||
"snippet": it.get("content") or it.get("snippet", ""),
|
||||
"siteName": it.get("media") or it.get("siteName", ""),
|
||||
"datePublished": it.get("publish_date") or it.get("datePublished", ""),
|
||||
})
|
||||
return ToolResult.success({
|
||||
"query": query, "backend": "zhipu",
|
||||
"total": len(results), "count": len(results), "results": results,
|
||||
})
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Qianfan (Baidu)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _search_qianfan(self, query: str, count: int, freshness: str) -> ToolResult:
|
||||
api_key = _get_api_key("qianfan")
|
||||
api_base = (conf().get("qianfan_api_base") or "https://qianfan.baidubce.com/v2").rstrip("/")
|
||||
url = f"{api_base}/ai_search/web_search"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"X-Appbuilder-From": "cow",
|
||||
}
|
||||
|
||||
count = max(1, min(int(count or 10), 50))
|
||||
payload: Dict[str, Any] = {
|
||||
"messages": [{"role": "user", "content": query}],
|
||||
"search_source": "baidu_search_v2",
|
||||
"resource_type_filter": [{"type": "web", "top_k": count}],
|
||||
}
|
||||
|
||||
# Baidu AI Search expects freshness as a date-range filter, not a
|
||||
# named recency token. Translate our shared vocabulary into the
|
||||
# underlying page_time range expected by the API.
|
||||
search_filter = self._qianfan_build_freshness_filter(freshness)
|
||||
if search_filter:
|
||||
payload["search_filter"] = search_filter
|
||||
|
||||
logger.debug(f"[WebSearch] qianfan: query='{query}', count={count}, freshness={freshness!r}")
|
||||
resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
|
||||
|
||||
if resp.status_code == 401:
|
||||
return ToolResult.fail("Error: Invalid Qianfan API key.")
|
||||
if resp.status_code != 200:
|
||||
return ToolResult.fail(f"Error: Qianfan API returned HTTP {resp.status_code}: {resp.text[:200]}")
|
||||
|
||||
data = resp.json()
|
||||
# Even on HTTP 200 Baidu surfaces business errors as {"code","message"}.
|
||||
if isinstance(data, dict) and data.get("code"):
|
||||
return ToolResult.fail(f"Error: Qianfan returned {data.get('code')}: {data.get('message','')}")
|
||||
|
||||
refs = data.get("references") or []
|
||||
results = []
|
||||
for d in refs:
|
||||
results.append({
|
||||
"title": d.get("title", ""),
|
||||
"url": d.get("url", ""),
|
||||
"snippet": (d.get("content") or "")[:200],
|
||||
"siteName": d.get("web_anchor") or d.get("website") or "",
|
||||
"datePublished": d.get("date", ""),
|
||||
})
|
||||
return ToolResult.success({
|
||||
"query": query, "backend": "qianfan",
|
||||
"total": len(results), "count": len(results), "results": results,
|
||||
})
|
||||
|
||||
@staticmethod
|
||||
def _qianfan_build_freshness_filter(freshness: str) -> Optional[Dict[str, Any]]:
|
||||
if not freshness or freshness == "noLimit":
|
||||
return None
|
||||
delta_days = {"oneDay": 1, "oneWeek": 7, "oneMonth": 30, "oneYear": 365}.get(freshness)
|
||||
if not delta_days:
|
||||
return None
|
||||
from datetime import datetime, timedelta
|
||||
now = datetime.now()
|
||||
end_date = (now + timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
start_date = (now - timedelta(days=delta_days)).strftime("%Y-%m-%d")
|
||||
return {"range": {"page_time": {"gte": start_date, "lt": end_date}}}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# LinkAI (plugin)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _search_linkai(self, query: str, count: int, freshness: str) -> ToolResult:
|
||||
"""
|
||||
Search using LinkAI plugin API
|
||||
|
||||
:param query: Search query
|
||||
:param count: Number of results
|
||||
:param freshness: Time range filter
|
||||
:return: Formatted search results
|
||||
"""
|
||||
api_key = os.environ.get("LINKAI_API_KEY", "")
|
||||
api_base = conf().get("linkai_api_base", "https://api.link-ai.tech")
|
||||
url = f"{api_base.rstrip('/')}/v1/plugin/execute"
|
||||
api_key = _get_api_key("linkai")
|
||||
api_base = (conf().get("linkai_api_base") or "https://api.link-ai.tech").rstrip("/")
|
||||
url = f"{api_base}/v1/plugin/execute"
|
||||
|
||||
from common.utils import get_cloud_headers
|
||||
headers = get_cloud_headers(api_key)
|
||||
|
||||
payload = {
|
||||
"code": "web-search",
|
||||
"args": {
|
||||
"query": query,
|
||||
"count": count,
|
||||
"freshness": freshness
|
||||
}
|
||||
}
|
||||
payload = {"code": "web-search", "args": {"query": query, "count": count, "freshness": freshness}}
|
||||
logger.debug(f"[WebSearch] linkai: query='{query}', count={count}")
|
||||
resp = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
|
||||
|
||||
logger.debug(f"[WebSearch] LinkAI search: query='{query}', count={count}")
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT)
|
||||
|
||||
if response.status_code == 401:
|
||||
return ToolResult.fail("Error: Invalid LINKAI_API_KEY. Please check your API key.")
|
||||
if response.status_code != 200:
|
||||
return ToolResult.fail(f"Error: LinkAI API returned HTTP {response.status_code}")
|
||||
|
||||
data = response.json()
|
||||
if resp.status_code == 401:
|
||||
return ToolResult.fail("Error: Invalid LinkAI API key.")
|
||||
if resp.status_code != 200:
|
||||
return ToolResult.fail(f"Error: LinkAI API returned HTTP {resp.status_code}")
|
||||
|
||||
data = resp.json()
|
||||
if not data.get("success"):
|
||||
msg = data.get("message") or "Unknown error"
|
||||
return ToolResult.fail(f"Error: LinkAI search failed: {msg}")
|
||||
|
||||
return self._format_linkai_results(data, query)
|
||||
|
||||
def _format_linkai_results(self, data: dict, query: str) -> ToolResult:
|
||||
"""
|
||||
Format LinkAI API response into unified result structure.
|
||||
LinkAI returns the search data in data.data field, which follows
|
||||
the same Bing-compatible format as Bocha.
|
||||
|
||||
:param data: Raw API response
|
||||
:param query: Original query
|
||||
:return: Formatted ToolResult
|
||||
"""
|
||||
raw_data = data.get("data", "")
|
||||
|
||||
# LinkAI may return data as a JSON string
|
||||
if isinstance(raw_data, str):
|
||||
raw = data.get("data", "")
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
raw_data = json.loads(raw_data)
|
||||
raw = json.loads(raw)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# If data is plain text, return it as a single result
|
||||
return ToolResult.success({
|
||||
"query": query,
|
||||
"backend": "linkai",
|
||||
"total": 1,
|
||||
"count": 1,
|
||||
"results": [{"content": raw_data}]
|
||||
"query": query, "backend": "linkai",
|
||||
"total": 1, "count": 1, "results": [{"content": raw}],
|
||||
})
|
||||
|
||||
# If the response follows Bing-compatible structure
|
||||
if isinstance(raw_data, dict):
|
||||
web_pages = raw_data.get("webPages", {})
|
||||
pages = web_pages.get("value", [])
|
||||
|
||||
if isinstance(raw, dict):
|
||||
pages = (raw.get("webPages") or {}).get("value", []) or []
|
||||
if pages:
|
||||
results = []
|
||||
for page in pages:
|
||||
result = {
|
||||
"title": page.get("name", ""),
|
||||
"url": page.get("url", ""),
|
||||
"snippet": page.get("snippet", ""),
|
||||
"siteName": page.get("siteName", ""),
|
||||
"datePublished": page.get("datePublished") or page.get("dateLastCrawled", ""),
|
||||
for p in pages:
|
||||
item = {
|
||||
"title": p.get("name", ""),
|
||||
"url": p.get("url", ""),
|
||||
"snippet": p.get("snippet", ""),
|
||||
"siteName": p.get("siteName", ""),
|
||||
"datePublished": p.get("datePublished") or p.get("dateLastCrawled", ""),
|
||||
}
|
||||
if page.get("summary"):
|
||||
result["summary"] = page["summary"]
|
||||
results.append(result)
|
||||
|
||||
total = web_pages.get("totalEstimatedMatches", len(results))
|
||||
if p.get("summary"):
|
||||
item["summary"] = p["summary"]
|
||||
results.append(item)
|
||||
total = (raw.get("webPages") or {}).get("totalEstimatedMatches", len(results))
|
||||
return ToolResult.success({
|
||||
"query": query,
|
||||
"backend": "linkai",
|
||||
"total": total,
|
||||
"count": len(results),
|
||||
"results": results
|
||||
"query": query, "backend": "linkai",
|
||||
"total": total, "count": len(results), "results": results,
|
||||
})
|
||||
|
||||
# Fallback: return raw data
|
||||
return ToolResult.success({
|
||||
"query": query,
|
||||
"backend": "linkai",
|
||||
"total": 1,
|
||||
"count": 1,
|
||||
"results": [{"content": str(raw_data)}]
|
||||
"query": query, "backend": "linkai",
|
||||
"total": 1, "count": 1, "results": [{"content": str(raw)}],
|
||||
})
|
||||
|
||||
@@ -521,7 +521,7 @@ class AgentInitializer:
|
||||
if tool_name == "web_search":
|
||||
from agent.tools.web_search.web_search import WebSearch
|
||||
if not WebSearch.is_available():
|
||||
logger.debug("[AgentInitializer] WebSearch skipped - no BOCHA_API_KEY or LINKAI_API_KEY")
|
||||
logger.debug("[AgentInitializer] WebSearch skipped - no search provider configured")
|
||||
continue
|
||||
|
||||
# Special handling for EnvConfig tool
|
||||
|
||||
@@ -14,7 +14,9 @@ class Bridge(object):
|
||||
def __init__(self):
|
||||
self.btype = {
|
||||
"chat": const.OPENAI,
|
||||
"voice_to_text": conf().get("voice_to_text", "openai"),
|
||||
# Empty `voice_to_text` (the default in new configs) triggers
|
||||
# the auto-pick below — see _auto_pick_voice_to_text for order.
|
||||
"voice_to_text": conf().get("voice_to_text") or self._auto_pick_voice_to_text(),
|
||||
"text_to_voice": conf().get("text_to_voice", "google"),
|
||||
"translate": conf().get("translate", "baidu"),
|
||||
}
|
||||
@@ -84,6 +86,46 @@ class Bridge(object):
|
||||
self.chat_bots = {}
|
||||
self._agent_bridge = None
|
||||
|
||||
def refresh_voice(self):
|
||||
"""Re-read voice_to_text / text_to_voice from config and drop the
|
||||
cached voice bots so the next call picks up the new provider.
|
||||
Used by the web console after the user edits voice settings.
|
||||
Does NOT touch the agent_bridge / agent state.
|
||||
"""
|
||||
new_v2t = conf().get("voice_to_text") or self._auto_pick_voice_to_text()
|
||||
new_t2v = conf().get("text_to_voice", "google")
|
||||
if conf().get("use_linkai") and conf().get("linkai_api_key"):
|
||||
if not conf().get("voice_to_text") or conf().get("voice_to_text") in ["openai"]:
|
||||
new_v2t = const.LINKAI
|
||||
if not conf().get("text_to_voice") or conf().get("text_to_voice") in ["openai", const.TTS_1, const.TTS_1_HD]:
|
||||
new_t2v = const.LINKAI
|
||||
self.btype["voice_to_text"] = new_v2t
|
||||
self.btype["text_to_voice"] = new_t2v
|
||||
self.bots.pop("voice_to_text", None)
|
||||
self.bots.pop("text_to_voice", None)
|
||||
logger.info(f"[Bridge] voice refreshed: voice_to_text={new_v2t}, text_to_voice={new_t2v}")
|
||||
|
||||
@staticmethod
|
||||
def _auto_pick_voice_to_text() -> str:
|
||||
"""Pick an ASR provider by configured api keys when voice_to_text is
|
||||
unset. Order matches the web console: openai → dashscope → zhipu →
|
||||
linkai. Falls back to 'openai' when nothing is configured so the
|
||||
original "missing key" error is preserved.
|
||||
"""
|
||||
def has(k: str) -> bool:
|
||||
v = (conf().get(k) or "").strip()
|
||||
return v != "" and v not in ("YOUR API KEY", "YOUR_API_KEY")
|
||||
|
||||
for key, provider in (
|
||||
("open_ai_api_key", "openai"),
|
||||
("dashscope_api_key", "dashscope"),
|
||||
("zhipu_ai_api_key", "zhipu"),
|
||||
("linkai_api_key", "linkai"),
|
||||
):
|
||||
if has(key):
|
||||
return provider
|
||||
return "openai"
|
||||
|
||||
# 模型对应的接口
|
||||
def get_bot(self, typename):
|
||||
if self.bots.get(typename) is None:
|
||||
|
||||
@@ -171,7 +171,13 @@ class ChatChannel(Channel):
|
||||
if "desire_rtype" not in context and conf().get("always_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
|
||||
context["desire_rtype"] = ReplyType.VOICE
|
||||
elif context.type == ContextType.VOICE:
|
||||
if "desire_rtype" not in context and conf().get("voice_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
|
||||
# Voice input replies with voice when either voice_reply_voice
|
||||
# (mirror voice) or the global always_reply_voice toggle is on.
|
||||
if (
|
||||
"desire_rtype" not in context
|
||||
and (conf().get("voice_reply_voice") or conf().get("always_reply_voice"))
|
||||
and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE
|
||||
):
|
||||
context["desire_rtype"] = ReplyType.VOICE
|
||||
return context
|
||||
|
||||
@@ -264,6 +270,8 @@ class ChatChannel(Channel):
|
||||
if reply.type == ReplyType.TEXT:
|
||||
reply_text = reply.content
|
||||
if desire_rtype == ReplyType.VOICE and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
|
||||
# Preserve original text for the "text-then-voice" pattern in _send_reply.
|
||||
context["voice_reply_text"] = reply.content
|
||||
reply = super().build_text_to_voice(reply.content)
|
||||
return self._decorate_reply(context, reply)
|
||||
if context.get("isgroup", False):
|
||||
@@ -311,6 +319,15 @@ class ChatChannel(Channel):
|
||||
# 短暂延迟后发送图片
|
||||
time.sleep(0.3)
|
||||
self._send(reply, context)
|
||||
# Send text bubble before voice, unless channel already streamed
|
||||
# the text (feishu) or natively renders STT under the voice (wechatcom).
|
||||
elif reply.type == ReplyType.VOICE and context.get("voice_reply_text") \
|
||||
and not context.get("feishu_streamed") \
|
||||
and context.get("channel_type") not in ("wechatcom_app",):
|
||||
text_reply = Reply(ReplyType.TEXT, context.get("voice_reply_text"))
|
||||
self._send(text_reply, context)
|
||||
time.sleep(0.3)
|
||||
self._send(reply, context)
|
||||
else:
|
||||
self._send(reply, context)
|
||||
|
||||
|
||||
@@ -86,6 +86,8 @@ def _check(func):
|
||||
|
||||
@singleton
|
||||
class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
|
||||
NOT_SUPPORT_REPLYTYPE = []
|
||||
|
||||
dingtalk_client_id = conf().get('dingtalk_client_id')
|
||||
dingtalk_client_secret = conf().get('dingtalk_client_secret')
|
||||
|
||||
@@ -870,6 +872,48 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
|
||||
self.reply_text("抱歉,文件上传失败", incoming_message)
|
||||
return
|
||||
|
||||
# Native sampleAudio. Upload only accepts ogg/amr, so convert TTS mp3/wav to amr.
|
||||
elif reply.type == ReplyType.VOICE:
|
||||
logger.info(f"[DingTalk] Sending voice: {reply.content}")
|
||||
access_token = self.get_access_token()
|
||||
if not access_token:
|
||||
logger.error("[DingTalk] Cannot get access token for voice")
|
||||
self.reply_text("抱歉,语音发送失败(无法获取token)", incoming_message)
|
||||
return
|
||||
|
||||
voice_path = reply.content
|
||||
if voice_path.startswith("file://"):
|
||||
voice_path = voice_path[7:]
|
||||
|
||||
amr_path = voice_path
|
||||
duration_ms = 0
|
||||
if not voice_path.lower().endswith((".amr", ".ogg")):
|
||||
try:
|
||||
from voice.audio_convert import any_to_amr
|
||||
amr_path = os.path.splitext(voice_path)[0] + ".amr"
|
||||
duration_ms = int(any_to_amr(voice_path, amr_path) or 0)
|
||||
except Exception as e:
|
||||
logger.error(f"[DingTalk] Failed to convert voice to amr: {e}")
|
||||
self.reply_text("抱歉,语音转码失败", incoming_message)
|
||||
return
|
||||
|
||||
media_id = self.upload_media(amr_path, media_type="voice")
|
||||
if not media_id:
|
||||
logger.error("[DingTalk] Failed to upload voice media")
|
||||
self.reply_text("抱歉,语音上传失败", incoming_message)
|
||||
return
|
||||
|
||||
msg_param = {
|
||||
"mediaId": media_id,
|
||||
"duration": str(duration_ms or 1000),
|
||||
}
|
||||
success = self._send_file_message(
|
||||
access_token, incoming_message, "sampleAudio", msg_param, isgroup
|
||||
)
|
||||
if not success:
|
||||
self.reply_text("抱歉,语音发送失败", incoming_message)
|
||||
return
|
||||
|
||||
# 处理文本消息
|
||||
elif reply.type == ReplyType.TEXT:
|
||||
logger.info(f"[DingTalk] Sending text message, length={len(reply.content)}")
|
||||
|
||||
@@ -1515,10 +1515,16 @@ class FeiShuChanel(ChatChannel):
|
||||
else:
|
||||
context.type = ContextType.TEXT
|
||||
context.content = content.strip()
|
||||
# Text input opts into voice replies only when the always-on toggle is set.
|
||||
if "desire_rtype" not in context and conf().get("always_reply_voice"):
|
||||
context["desire_rtype"] = ReplyType.VOICE
|
||||
|
||||
elif context.type == ContextType.VOICE:
|
||||
# 2.语音请求
|
||||
if "desire_rtype" not in context and conf().get("voice_reply_voice"):
|
||||
# 2.语音请求: voice input replies with voice if either
|
||||
# voice_reply_voice (mirror reply) or always_reply_voice is on.
|
||||
if "desire_rtype" not in context and (
|
||||
conf().get("voice_reply_voice") or conf().get("always_reply_voice")
|
||||
):
|
||||
context["desire_rtype"] = ReplyType.VOICE
|
||||
|
||||
return context
|
||||
|
||||
@@ -137,6 +137,11 @@
|
||||
<i class="fas fa-sliders item-icon text-xs w-5 text-center"></i>
|
||||
<span data-i18n="menu_config">配置</span>
|
||||
</a>
|
||||
<a class="sidebar-item flex items-center gap-3 px-3 py-2 rounded-lg cursor-pointer transition-all duration-150 hover:bg-white/5 hover:text-neutral-200 text-[14px]"
|
||||
data-view="models">
|
||||
<i class="fas fa-microchip item-icon text-xs w-5 text-center"></i>
|
||||
<span data-i18n="menu_models">模型</span>
|
||||
</a>
|
||||
<a class="sidebar-item flex items-center gap-3 px-3 py-2 rounded-lg cursor-pointer transition-all duration-150 hover:bg-white/5 hover:text-neutral-200 text-[14px]"
|
||||
data-view="skills">
|
||||
<i class="fas fa-bolt item-icon text-xs w-5 text-center"></i>
|
||||
@@ -417,8 +422,9 @@
|
||||
</button>
|
||||
</div>
|
||||
<div id="slash-menu" class="slash-menu hidden"></div>
|
||||
<div class="flex-1 min-w-0 relative flex items-center">
|
||||
<textarea id="chat-input"
|
||||
class="flex-1 min-w-0 px-4 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600
|
||||
class="w-full pl-4 pr-11 py-[10px] rounded-xl border border-slate-200 dark:border-slate-600
|
||||
bg-slate-50 dark:bg-white/5 text-slate-800 dark:text-slate-100
|
||||
placeholder:text-slate-400 dark:placeholder:text-slate-500
|
||||
focus:outline-none focus:ring-0 focus:border-primary-600
|
||||
@@ -426,6 +432,14 @@
|
||||
rows="1"
|
||||
data-i18n-placeholder="input_placeholder"
|
||||
placeholder="输入消息,或输入 / 使用指令"></textarea>
|
||||
<button id="mic-btn" type="button"
|
||||
class="absolute right-2 top-1/2 -translate-y-1/2 w-8 h-8 flex items-center justify-center rounded-lg
|
||||
text-slate-400 hover:text-primary-500 hover:bg-primary-50 dark:hover:bg-primary-900/20
|
||||
cursor-pointer transition-colors duration-150"
|
||||
data-i18n-title="mic_idle_title" title="点击录音 / 再按一次结束">
|
||||
<i class="fas fa-microphone text-sm"></i>
|
||||
</button>
|
||||
</div>
|
||||
<button id="send-btn"
|
||||
class="flex-shrink-0 w-10 h-10 flex items-center justify-center rounded-lg
|
||||
bg-primary-400 text-white hover:bg-primary-500
|
||||
@@ -460,6 +474,11 @@
|
||||
<i class="fas fa-microchip text-primary-500 text-sm"></i>
|
||||
</div>
|
||||
<h3 class="font-semibold text-slate-800 dark:text-slate-100" data-i18n="config_model">模型配置</h3>
|
||||
<a class="ml-auto text-xs text-slate-500 dark:text-slate-400 hover:text-primary-500 dark:hover:text-primary-400 cursor-pointer transition-colors flex items-center gap-1"
|
||||
onclick="navigateTo('models')">
|
||||
<span data-i18n="config_model_advanced">高级配置</span>
|
||||
<i class="fas fa-arrow-right text-[10px]"></i>
|
||||
</a>
|
||||
</div>
|
||||
<div class="space-y-5">
|
||||
<!-- Provider -->
|
||||
@@ -850,6 +869,41 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- VIEW: Models -->
|
||||
<!-- ====================================================== -->
|
||||
<div id="view-models" class="view">
|
||||
<!-- Tailwind JIT safelist: capability-card icon colors are
|
||||
emitted from JS template strings. Listing them here
|
||||
(display:none) guarantees the CDN-side compiler picks
|
||||
them up regardless of render timing. -->
|
||||
<div class="hidden bg-blue-50 dark:bg-blue-900/30 text-blue-500
|
||||
bg-orange-50 dark:bg-orange-900/30 text-orange-500
|
||||
bg-purple-50 dark:bg-purple-900/30 text-purple-500
|
||||
bg-amber-50 dark:bg-amber-900/30 text-amber-500
|
||||
bg-primary-50 dark:bg-primary-900/30 text-primary-500"></div>
|
||||
<div class="flex-1 overflow-y-auto p-6">
|
||||
<div class="max-w-4xl mx-auto">
|
||||
<div class="flex items-center justify-between mb-6">
|
||||
<div>
|
||||
<h2 class="text-xl font-bold text-slate-800 dark:text-slate-100" data-i18n="models_title">模型管理</h2>
|
||||
<p class="text-sm text-slate-500 dark:text-slate-400 mt-1" data-i18n="models_desc">统一管理对话、视觉、语音、向量、图像、搜索能力</p>
|
||||
</div>
|
||||
<button id="models-add-vendor-btn" onclick="openVendorModal('')"
|
||||
class="flex items-center gap-2 px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600
|
||||
text-white text-sm font-medium cursor-pointer transition-colors duration-150">
|
||||
<i class="fas fa-plus text-xs"></i>
|
||||
<span data-i18n="models_add_vendor">添加厂商</span>
|
||||
</button>
|
||||
</div>
|
||||
<div id="models-loading" class="flex items-center gap-2 py-12 justify-center text-slate-400 dark:text-slate-500 text-sm">
|
||||
<i class="fas fa-spinner fa-spin text-xs"></i><span>Loading...</span>
|
||||
</div>
|
||||
<div id="models-content" class="grid gap-6 hidden"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- VIEW: Channels -->
|
||||
<!-- ====================================================== -->
|
||||
@@ -959,7 +1013,7 @@
|
||||
</div><!-- /app -->
|
||||
|
||||
<!-- Confirm Dialog -->
|
||||
<div id="confirm-dialog-overlay" class="fixed inset-0 bg-black/50 z-[100] hidden flex items-center justify-center">
|
||||
<div id="confirm-dialog-overlay" class="fixed inset-0 bg-black/50 z-[200] hidden flex items-center justify-center">
|
||||
<div class="bg-white dark:bg-[#1A1A1A] rounded-2xl border border-slate-200 dark:border-white/10 shadow-xl
|
||||
w-full max-w-sm mx-4 overflow-hidden">
|
||||
<div class="p-6">
|
||||
@@ -984,6 +1038,77 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Vendor Credentials Modal -->
|
||||
<div id="vendor-modal-overlay" class="fixed inset-0 bg-black/50 z-[100] hidden flex items-center justify-center">
|
||||
<div class="bg-white dark:bg-[#1A1A1A] rounded-2xl border border-slate-200 dark:border-white/10 shadow-xl
|
||||
w-full max-w-md mx-4">
|
||||
<div class="p-6">
|
||||
<div class="flex items-center gap-3 mb-5">
|
||||
<div class="w-10 h-10 rounded-xl bg-primary-50 dark:bg-primary-900/20 flex items-center justify-center flex-shrink-0">
|
||||
<i class="fas fa-key text-primary-500"></i>
|
||||
</div>
|
||||
<div class="min-w-0 flex-1">
|
||||
<h3 id="vendor-modal-title" class="font-semibold text-slate-800 dark:text-slate-100 text-base"></h3>
|
||||
<p id="vendor-modal-subtitle" class="text-xs text-slate-500 dark:text-slate-400 mt-0.5 font-mono"></p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Provider selector (only visible when adding via top button) -->
|
||||
<div id="vendor-modal-picker-wrap" class="mb-4 hidden">
|
||||
<label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5" data-i18n="models_provider">厂商</label>
|
||||
<div id="vendor-modal-picker" class="cfg-dropdown" tabindex="0">
|
||||
<div class="cfg-dropdown-selected">
|
||||
<span class="cfg-dropdown-text">--</span>
|
||||
<i class="fas fa-chevron-down cfg-dropdown-arrow"></i>
|
||||
</div>
|
||||
<div class="cfg-dropdown-menu"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="space-y-4">
|
||||
<div>
|
||||
<label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">API Key</label>
|
||||
<input id="vendor-modal-key" type="text" autocomplete="off" data-1p-ignore data-lpignore="true"
|
||||
class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
|
||||
bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
|
||||
focus:outline-none focus:border-primary-500 font-mono transition-colors"
|
||||
placeholder="sk-...">
|
||||
</div>
|
||||
<div id="vendor-modal-base-wrap">
|
||||
<label class="block text-sm font-medium text-slate-600 dark:text-slate-400 mb-1.5">API Base</label>
|
||||
<input id="vendor-modal-base" type="text"
|
||||
class="w-full px-3 py-2 rounded-lg border border-slate-200 dark:border-slate-600
|
||||
bg-slate-50 dark:bg-white/5 text-sm text-slate-800 dark:text-slate-100
|
||||
focus:outline-none focus:border-primary-500 font-mono transition-colors"
|
||||
placeholder="https://...../v1">
|
||||
<p id="vendor-modal-base-hint" class="mt-1.5 text-xs text-slate-400 dark:text-slate-500 hidden">
|
||||
<i class="fas fa-info-circle mr-1"></i><span data-i18n="models_base_default_hint">留空将使用官方默认地址</span>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex items-center justify-between gap-3 px-6 py-4 border-t border-slate-100 dark:border-white/5 rounded-b-2xl">
|
||||
<button id="vendor-modal-clear"
|
||||
class="px-3 py-2 rounded-lg text-xs
|
||||
text-red-500 dark:text-red-400 hover:bg-red-50 dark:hover:bg-red-900/20
|
||||
cursor-pointer transition-colors duration-150 hidden"
|
||||
data-i18n="models_clear_credential">清除凭据</button>
|
||||
<span id="vendor-modal-status"
|
||||
class="flex-1 text-xs text-primary-500 opacity-0 transition-opacity duration-300 text-center"></span>
|
||||
<button id="vendor-modal-cancel"
|
||||
class="px-4 py-2 rounded-lg border border-slate-200 dark:border-white/10
|
||||
text-slate-600 dark:text-slate-300 text-sm font-medium
|
||||
hover:bg-slate-50 dark:hover:bg-white/5
|
||||
cursor-pointer transition-colors duration-150"
|
||||
data-i18n="cancel">取消</button>
|
||||
<button id="vendor-modal-save"
|
||||
class="px-4 py-2 rounded-lg bg-primary-500 hover:bg-primary-600 text-white text-sm font-medium
|
||||
cursor-pointer transition-colors duration-150 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
data-i18n="save">保存</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script defer src="assets/js/console.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -725,6 +725,58 @@
|
||||
background: rgba(74, 190, 110, 0.15);
|
||||
color: #74E9A4;
|
||||
}
|
||||
/* When an item carries a hint (e.g. brand alias next to a technical model
|
||||
id), label/hint are split into two spans so the hint sits on the right in
|
||||
a dim, smaller weight. Without a hint the row stays a plain text node and
|
||||
uses the default ellipsis behaviour, so no layout regressions for old call
|
||||
sites. */
|
||||
.cfg-dropdown-label {
|
||||
flex: 1 1 auto;
|
||||
min-width: 0;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
.cfg-dropdown-hint {
|
||||
flex-shrink: 0;
|
||||
margin-left: auto;
|
||||
padding-left: 12px;
|
||||
color: #94a3b8;
|
||||
font-size: 12px;
|
||||
font-weight: 400;
|
||||
}
|
||||
.dark .cfg-dropdown-hint {
|
||||
color: #64748b;
|
||||
}
|
||||
.cfg-dropdown-item.active .cfg-dropdown-hint {
|
||||
/* Tint the hint toward the brand colour on the active row so it doesn't
|
||||
fight with the highlighted label tone. */
|
||||
color: rgba(34, 133, 71, 0.65);
|
||||
}
|
||||
.dark .cfg-dropdown-item.active .cfg-dropdown-hint {
|
||||
color: rgba(116, 233, 164, 0.6);
|
||||
}
|
||||
/* The active row gets a trailing brand-green checkmark via a Font Awesome
|
||||
pseudo-element so every dropdown (chat / vision / image / asr / tts / etc.)
|
||||
surfaces "this is what's currently selected" without per-call JS plumbing.
|
||||
When a hint is present, the ✓ sits to its right with a small gap; without
|
||||
a hint, margin-left:auto pushes the ✓ flush against the right edge. */
|
||||
.cfg-dropdown-item.active::after {
|
||||
content: '\f00c'; /* FontAwesome check glyph */
|
||||
font-family: 'Font Awesome 6 Free', 'Font Awesome 5 Free', 'FontAwesome';
|
||||
font-weight: 900;
|
||||
margin-left: auto;
|
||||
padding-left: 12px;
|
||||
color: #4abe6e;
|
||||
font-size: 11px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
.cfg-dropdown-item.active:has(.cfg-dropdown-hint)::after {
|
||||
/* When hint occupies the auto-margin slot, the ✓ no longer benefits
|
||||
from `margin-left: auto`; replace it with a small fixed gap so the
|
||||
✓ trails the hint cleanly. */
|
||||
margin-left: 0;
|
||||
padding-left: 10px;
|
||||
}
|
||||
|
||||
/* API Key masking via CSS (avoids browser password prompts) */
|
||||
.cfg-key-masked {
|
||||
@@ -732,6 +784,77 @@
|
||||
text-security: disc;
|
||||
}
|
||||
|
||||
/* Provider logo image — vendors flagged as `provider-logo-invert-dark`
|
||||
ship a black wordmark that disappears on the dark canvas; we invert their
|
||||
luminance only in dark mode so the brand stays recognizable without
|
||||
touching multi-color marks like Google/MiniMax. */
|
||||
.provider-logo-img {
|
||||
object-fit: contain;
|
||||
object-position: center;
|
||||
}
|
||||
.dark .provider-logo-invert-dark {
|
||||
filter: invert(1) brightness(1.15);
|
||||
}
|
||||
|
||||
/* Models page — provider dropdown rows.
|
||||
Configured rows look like ordinary picker entries; the .active row's
|
||||
trailing brand-green ✓ already announces "this is what's selected"
|
||||
(handled globally by .cfg-dropdown-item.active::after above).
|
||||
Unconfigured rows are visually subdued and carry a trailing gear icon
|
||||
as a "click to set up" affordance. */
|
||||
.cap-provider-label {
|
||||
flex: 1 1 auto;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
.cap-provider-gear {
|
||||
margin-left: auto;
|
||||
padding-left: 12px;
|
||||
color: #94a3b8;
|
||||
font-size: 11px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
.cap-provider-item.cap-provider-unconfigured {
|
||||
color: #94a3b8;
|
||||
}
|
||||
.dark .cap-provider-item.cap-provider-unconfigured {
|
||||
color: #64748b;
|
||||
}
|
||||
.cap-provider-item.cap-provider-unconfigured:hover {
|
||||
color: #475569;
|
||||
}
|
||||
.dark .cap-provider-item.cap-provider-unconfigured:hover {
|
||||
color: #cbd5e1;
|
||||
}
|
||||
.cap-provider-item.cap-provider-unconfigured:hover .cap-provider-gear {
|
||||
color: #475569;
|
||||
}
|
||||
.dark .cap-provider-item.cap-provider-unconfigured:hover .cap-provider-gear {
|
||||
color: #cbd5e1;
|
||||
}
|
||||
/* If the active row ever lands on an unconfigured vendor (defensive — the
|
||||
click handler normally diverts to the modal), suppress the global ✓ so
|
||||
the gear remains the sole trailing icon and the row keeps reading as
|
||||
"needs setup" rather than "already selected". */
|
||||
.cap-provider-item.cap-provider-unconfigured.active::after {
|
||||
content: none;
|
||||
}
|
||||
|
||||
/* "Add vendor" modal picker — each configured row carries a static
|
||||
brand-green ✓ via decorateVendorModalPicker so users can see what's set
|
||||
up at a glance. The active row's global ✓ is suppressed here to avoid
|
||||
showing two checks side by side on configured + selected rows. */
|
||||
.vendor-picker-item.active::after {
|
||||
content: none;
|
||||
}
|
||||
.vendor-picker-configured-mark {
|
||||
margin-left: auto;
|
||||
padding-left: 12px;
|
||||
color: #4abe6e;
|
||||
font-size: 11px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
/* Chat Input */
|
||||
#chat-input {
|
||||
resize: none; height: 42px; max-height: 180px;
|
||||
@@ -1171,3 +1294,76 @@
|
||||
overflow: hidden;
|
||||
min-height: 2.5em; /* ~2 lines at text-sm leading-relaxed */
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Voice pill — compact custom audio player used by mic uploads and TTS
|
||||
* replies. Replaces the bulky native <audio controls> with a play/pause
|
||||
* icon + thin progress bar + duration counter so it blends into chat
|
||||
* bubbles without the chrome-grey browser default look.
|
||||
* ------------------------------------------------------------------ */
|
||||
.voice-pill {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
padding: 6px 10px;
|
||||
border-radius: 999px;
|
||||
background: rgba(15, 23, 42, 0.05);
|
||||
color: rgb(71, 85, 105);
|
||||
font-size: 12px;
|
||||
line-height: 1;
|
||||
max-width: 240px;
|
||||
user-select: none;
|
||||
cursor: default;
|
||||
}
|
||||
.dark .voice-pill {
|
||||
background: rgba(255, 255, 255, 0.08);
|
||||
color: rgb(203, 213, 225);
|
||||
}
|
||||
.voice-pill[data-loading="1"] {
|
||||
opacity: 0.65;
|
||||
}
|
||||
.voice-pill-btn {
|
||||
width: 22px;
|
||||
height: 22px;
|
||||
border-radius: 999px;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
background: var(--color-primary-500, #2563eb);
|
||||
color: #fff;
|
||||
flex-shrink: 0;
|
||||
cursor: pointer;
|
||||
transition: transform 0.1s ease;
|
||||
}
|
||||
.voice-pill-btn:hover { transform: scale(1.05); }
|
||||
.voice-pill-btn i { font-size: 9px; margin-left: 1px; }
|
||||
.voice-pill-btn[data-state="play"] i { margin-left: 2px; }
|
||||
.voice-pill-btn[data-state="pause"] i { margin-left: 0; }
|
||||
.voice-pill-track {
|
||||
flex: 1;
|
||||
height: 3px;
|
||||
border-radius: 999px;
|
||||
background: rgba(100, 116, 139, 0.25);
|
||||
overflow: hidden;
|
||||
min-width: 70px;
|
||||
}
|
||||
.dark .voice-pill-track {
|
||||
background: rgba(148, 163, 184, 0.25);
|
||||
}
|
||||
.voice-pill-fill {
|
||||
height: 100%;
|
||||
width: 0%;
|
||||
background: var(--color-primary-500, #2563eb);
|
||||
border-radius: inherit;
|
||||
transition: width 0.1s linear;
|
||||
}
|
||||
.voice-pill-time {
|
||||
font-variant-numeric: tabular-nums;
|
||||
font-size: 11px;
|
||||
color: inherit;
|
||||
opacity: 0.75;
|
||||
flex-shrink: 0;
|
||||
min-width: 28px;
|
||||
text-align: right;
|
||||
}
|
||||
.voice-pill audio { display: none; }
|
||||
|
||||
1
channel/web/static/logos/claudeAPI.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251656961" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="18432" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M252.8 652.8l167.893333-94.293333 2.773334-8.106667-2.773334-4.48h-8.106666l-28.16-1.706667-96-2.56-83.2-3.413333-80.64-4.266667-20.266667-4.266666L85.333333 504.746667l1.92-12.586667 17.066667-11.52 24.32 2.133333 53.973333 3.626667 81.066667 5.546667 58.666667 3.413333 87.04 9.173333h13.866666l1.92-5.546666-4.693333-3.413334-3.626667-3.413333-83.84-56.746667-90.666666-60.16-47.573334-34.56-25.813333-17.493333-13.013333-16.426667-5.546667-35.84 23.253333-25.813333 31.36 2.133333 7.893334 2.133334 31.786666 24.32 67.84 52.48L401.066667 391.466667l13.013333 10.88 5.12-3.626667 0.64-2.56-5.76-9.813333-48.213333-87.04L314.453333 210.773333l-22.826666-36.693333-5.973334-21.973333a107.861333 107.861333 0 0 1-3.626666-26.026667l26.666666-36.053333L323.413333 85.333333l35.413334 4.693334 14.933333 13.013333 21.973333 50.346667 35.626667 79.36 55.253333 107.733333 16.213334 32 8.746666 29.653333 3.2 9.173334h5.546667v-5.12l4.48-60.8 8.32-74.453334 8.106667-96 2.773333-27.093333 13.44-32.426667 26.666667-17.493333 20.693333 10.026667 17.066667 24.32-2.346667 15.786666-10.24 65.92-19.84 103.253334-13.013333 69.12h7.466666l8.746667-8.746667 34.986667-46.506667 58.666666-73.386666 26.026667-29.226667 30.293333-32.213333 19.413334-15.36h36.693333l27.093333 40.106666-12.16 41.386667-37.76 48-31.36 40.533333-45.013333 60.586667-28.16 48.426667 2.56 3.84 6.613333-0.64 101.546667-21.546667 54.826667-10.026667 65.493333-11.306666 29.653333 13.866666 3.2 14.08-11.733333 28.8-69.973333 17.28-82.133334 16.426667-122.24 29.013333-1.493333 1.066667 1.706667 2.133333 55.04 5.12 23.466666 1.28h57.6l107.306667 7.893334 28.16 18.56 16.853333 22.613333-2.773333 17.28-43.306667 21.973333-58.24-13.866666-136.106666-32.426667-46.72-11.733333h-6.4v3.84l38.826666 37.973333 71.253334 64.426667 89.173333 82.986666 4.48 20.48-11.52 16.213334-12.16-1.706667-78.506667-58.88-30.293333-26.666667-68.48-57.6h-4.48v5.973334l15.786667 23.04 83.413333 125.226666 4.266667 38.4-5.973334 12.586667-21.546666 7.466667-23.68-4.266667-48.853334-68.48-50.346666-77.226667-40.533334-69.12-4.906666 2.773334-23.893334 258.133333-11.306666 13.226667-26.026667 10.026666-21.546667-16.426666-11.52-26.666667 11.52-52.48 13.866667-68.48 11.306667-54.4 10.24-67.626667 5.973333-22.4-0.426667-1.493333-4.906666 0.64-50.986667 69.973333-77.653333 104.746667-61.44 65.706667-14.72 5.76-25.386667-13.226667 2.346667-23.466667 14.293333-20.906666 84.906667-107.946667 51.2-66.986667 33.066666-38.613333v-5.546667h-2.133333l-225.493333 146.56-40.106667 5.12-17.28-16.213333 2.133333-26.666667 8.106667-8.746666 67.84-46.72h-0.213333l0.853333 0.853333z" fill="#D97757" p-id="18433"></path></svg>
|
||||
|
After Width: | Height: | Size: 2.9 KiB |
10
channel/web/static/logos/custom.svg
Normal file
@@ -0,0 +1,10 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="200" height="200" fill="none" stroke="#475569" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
|
||||
<!-- Horizontal slider tracks -->
|
||||
<line x1="4" y1="7" x2="20" y2="7"/>
|
||||
<line x1="4" y1="12" x2="20" y2="12"/>
|
||||
<line x1="4" y1="17" x2="20" y2="17"/>
|
||||
<!-- Knobs (filled circles) -->
|
||||
<circle cx="9" cy="7" r="2.2" fill="#475569" stroke="none"/>
|
||||
<circle cx="15" cy="12" r="2.2" fill="#475569" stroke="none"/>
|
||||
<circle cx="7" cy="17" r="2.2" fill="#475569" stroke="none"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 573 B |
1
channel/web/static/logos/dashscope.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251621200" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="17444" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M1019.364785 620.816931L891.797142 397.807295 946.450846 293.15069a29.097778 29.097778 0 0 0 6.399732-36.393472l-70.184053-126.586684a30.078737 30.078737 0 0 0-24.574968-13.652427H597.4945L539.171949 14.549389a27.348852 27.348852 0 0 0-20.906122-14.549389H380.628607a29.139776 29.139776 0 0 0-24.616967 14.549389v5.545767L225.797108 243.062793H100.919352a29.182775 29.182775 0 0 0-25.513928 13.653427L3.428446 384.11187a32.766624 32.766624 0 0 0 0 29.182775L132.831012 638.096205 74.508461 740.064923a32.766624 32.766624 0 0 0 0 29.05478l66.514207 116.561105a29.905744 29.905744 0 0 0 25.513929 14.505391H427.132654l62.845361 109.222414A30.078737 30.078737 0 0 0 512.762058 1024H660.382859a29.139776 29.139776 0 0 0 24.574968-14.549389l128.463606-224.843558h114.76818a31.91366 31.91366 0 0 0 24.660965-15.444352l66.471208-117.414069a28.158818 28.158818 0 0 0 0-30.9747l0.042999 0.042999z m-161.273228 14.591387L791.57735 512.490479 518.265827 993.964261l-74.748861-122.87484h-273.268525l65.618244-119.205994h139.386147L101.856313 272.244568h143.055993L380.671605 30.121735l68.34913 119.247993-70.184053 122.87484H925.501726l-69.202094 121.936879 137.594222 241.183873H858.134555z" fill="#605BEC" p-id="17445"></path><path d="M499.962596 699.320634l174.371677-274.719464H324.694955z" fill="#605BEC" p-id="17446"></path></svg>
|
||||
|
After Width: | Height: | Size: 1.6 KiB |
1
channel/web/static/logos/deepseek.svg
Normal file
|
After Width: | Height: | Size: 5.1 KiB |
1
channel/web/static/logos/doubao.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779261485522" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5381" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M958.976 439.808C804.864 336.896 642.56 321.536 642.56 321.536s8.192 235.008-10.752 306.176c-0.512 9.728-11.776 75.264-43.008 157.696-10.752 28.16-24.064 55.296-39.424 81.408-40.96 74.24-89.6 127.488-89.6 127.488 119.808-48.64 205.312-92.672 309.76-175.616 122.88-96.768 229.376-254.464 189.44-378.88z" fill="#37E1BE" p-id="5382"></path><path d="M329.728 395.776c158.208-100.864 308.736-78.848 312.32-74.752 0.512 0.512 1.024 0.512 1.024 0.512 0-14.336-6.656-60.928-13.312-106.496-11.776-60.928-22.528-124.928-23.04-133.632-170.496-139.264-356.864-78.336-448 25.6-61.44 70.144-103.424 169.984-102.4 224.256V762.88c0.512-12.8 1.536-20.48 2.048-20.48 17.92-197.12 271.36-346.624 271.36-346.624z" fill="#A569FF" p-id="5383"></path><path d="M792.064 272.384c-41.984-43.52-87.552-88.576-122.368-125.44-33.28-34.816-59.392-60.928-62.976-65.536 0.512 8.704 11.264 72.704 23.04 133.632 6.656 45.568 12.8 92.672 13.312 106.496 0 0 162.304 15.36 316.416 118.272-0.512 0-83.456-80.384-167.424-167.424zM549.888 866.816c-2.56 1.024-198.656 107.008-292.352-30.72-20.992-30.72-31.744-68.096-33.28-106.496-3.072-74.752 5.12-227.84 105.472-333.824 0 0-253.44 149.504-270.848 346.624-0.512 0.512-2.048 8.192-2.048 20.48-1.024 32.768 4.608 98.304 43.008 155.136 52.224 78.336 193.024 138.752 328.192 85.504l33.28-9.728c-1.024 0.512 47.616-52.224 88.576-126.976z" fill="#1E37FC" p-id="5384"></path></svg>
|
||||
|
After Width: | Height: | Size: 1.7 KiB |
1
channel/web/static/logos/gemini.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251750646" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="29551" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M214.101333 512c0-32.512 5.546667-63.701333 15.36-92.928L57.173333 290.218667A491.861333 491.861333 0 0 0 4.693333 512c0 79.701333 18.858667 154.88 52.394667 221.610667l172.202667-129.066667A290.56 290.56 0 0 1 214.101333 512" fill="#FBBC05" p-id="29552"></path><path d="M516.693333 216.192c72.106667 0 137.258667 25.002667 188.458667 65.962667L854.101333 136.533333C763.349333 59.178667 646.997333 11.392 516.693333 11.392c-202.325333 0-376.234667 113.28-459.52 278.826667l172.373334 128.853333c39.68-118.016 152.832-202.88 287.146666-202.88" fill="#EA4335" p-id="29553"></path><path d="M516.693333 807.808c-134.357333 0-247.509333-84.864-287.232-202.88l-172.288 128.853333c83.242667 165.546667 257.152 278.826667 459.52 278.826667 124.842667 0 244.053333-43.392 333.568-124.757333l-163.584-123.818667c-46.122667 28.458667-104.234667 43.776-170.026666 43.776" fill="#34A853" p-id="29554"></path><path d="M1005.397333 512c0-29.568-4.693333-61.44-11.648-91.008H516.650667V614.4h274.602666c-13.696 65.962667-51.072 116.650667-104.533333 149.632l163.541333 123.818667c93.994667-85.418667 155.136-212.650667 155.136-375.850667" fill="#4285F4" p-id="29555"></path></svg>
|
||||
|
After Width: | Height: | Size: 1.5 KiB |
1
channel/web/static/logos/linkai.svg
Normal file
|
After Width: | Height: | Size: 11 KiB |
1
channel/web/static/logos/minimax.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251514432" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="11888" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M415.392 475.808v329.984c-22.304 111.744-170.56 82.944-171.2 1.92-0.672-101.824 0-202.976 0-304.064v-117.184c0-14.656-3.2-26.24-16-35.392-24.96-18.72-54.944 3.264-55.584 30.208-1.408 36.16-0.704 71.616-1.408 107.264 0 28.16 0 55.52 0.64 83.648-18.368 123.776-168.32 103.232-171.808 0.704V487.04c0-28.032 54.944-34.624 52.256 7.36-1.792 20.8-0.64 42.272-1.344 62.912-0.64 36.8 55.648 61.6 68.896 1.408 0.64-49.632 0.64-99.264 0.64-149.344 0-62.752 17.824-113.856 84.352-118.624 28.8-2.56 47.968 9.504 66.336 30.304 7.04 7.36 23.68 30.72 24.32 56.16 0 23.456 0.64 46.752 0.64 70.464 0 46.72-0.64 93.76-0.64 140.48 0 30.304 0.64 60.256 0.64 89.856 0 37.536 0 75.552-0.64 113.152-0.64 48.864 58.816 48.16 68.352-0.768 0-57.632 0.64-114.56 0.64-172.192 0-141.984-0.64-283.968-0.64-425.856 0-14.72-2.048-55.584 5.76-70.464 41.504-101.12 167.392-56.96 168.544 26.72 2.432 171.52 0 344.896 0.64 516.8 0 59.616-48.416 46.816-51.104 23.488 0-178.88 0-358.4 0.64-537.024-2.368-44.832-68.832-38.72-72.672-6.592-1.28 36.864-0.64 74.4-1.28 111.232v219.008h0.64l0.448 0.256h-0.064z" fill="#D4367A" p-id="11889"></path><path d="M610.016 473.184v242.336V143.648c21.632-112.512 169.824-83.264 170.464-2.176 0.704 101.12 0 202.912 0.704 304 0 38.784 0 77.728-0.64 116.544 0 15.36 3.776 26.176 16.64 36.032 24.32 18.24 54.24-3.2 55.584-30.592 1.344-35.488 0.64-70.976 0.64-107.328V376.96c18.56-123.776 168.128-103.232 171.264-0.704v310.592c0 28.16-54.304 34.848-51.872-7.296 1.472-21.44 0-267.104 0.768-288.64 1.28-36.16-55.712-61.664-68.928-0.768v148.576c0 63.68-17.856 113.92-84.96 119.36-63.264 1.504-88.704-42.24-90.752-86.432V271.328c0-38.24 0-75.552 0.64-113.088 0.64-48.864-58.784-48.864-68.896 0.704V831.36c0 14.592 2.048 55.52-5.184 70.432-41.44 101.056-168 56.864-169.152-26.752v-79.616c3.136-53.6 48.416-40.864 50.464-18.176v94.464c2.432 44.928 68.928 39.488 72.064 6.656 1.344-36.896 1.344-73.728 1.344-111.296v-293.824h-0.192v-0.064z" fill="#ED6D48" p-id="11890"></path></svg>
|
||||
|
After Width: | Height: | Size: 2.2 KiB |
1
channel/web/static/logos/moonshot.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251592968" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="16416" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M117.9648 684.6464l342.30272 93.57312v75.34592l209.7152 58.5728A428.99456 428.99456 0 0 1 512 942.08c-176.128 0-327.53664-105.8816-394.0352-257.4336zM83.29216 477.42976l407.30624 112.64-9.6256 37.00736-6.0416 35.0208 383.3856 104.96a432.5376 432.5376 0 0 1-65.10592 70.32832l-688.18944-185.9584A429.4656 429.4656 0 0 1 81.92 512c0-11.63264 0.47104-23.1424 1.37216-34.54976z m57.344-182.4768l429.07648 114.21696a279.94112 279.94112 0 0 0-23.06048 35.55328 201.17504 201.17504 0 0 0-14.70464 34.93888l403.08736 110.26432a426.8032 426.8032 0 0 1-23.552 81.7152L86.54848 448.7168a427.25376 427.25376 0 0 1 54.0672-153.76384z m158.47424-156.75392l404.23424 108.31872a190.2592 190.2592 0 0 0-32.80896 24.90368c-9.13408 8.8064-19.8656 21.4016-32.1536 37.74464l285.24544 77.78304c9.216 30.45376 15.03232 61.8496 17.32608 93.5936L156.61056 269.68064a432.27136 432.27136 0 0 1 142.49984-131.4816zM512 81.92c142.90944 0 269.55776 69.71392 347.7504 176.98816L337.26464 118.90688A428.50304 428.50304 0 0 1 512 81.92z" fill="#000000" p-id="16417"></path></svg>
|
||||
|
After Width: | Height: | Size: 1.3 KiB |
1
channel/web/static/logos/openai.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251225589" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="9015" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M881.664 431.488a218.88 218.88 0 0 0-18.176-177.088A218.624 218.624 0 0 0 628.992 149.76c-40.576-45.824-100.288-71.424-162.176-71.424a219.136 219.136 0 0 0-208 150.4 215.68 215.68 0 0 0-144 104.512 218.944 218.944 0 0 0 26.688 254.912 218.752 218.752 0 0 0 19.2 177.152 217.088 217.088 0 0 0 234.624 104.512 219.136 219.136 0 0 0 162.112 72.512 219.136 219.136 0 0 0 208-150.4 215.68 215.68 0 0 0 144-104.512 219.008 219.008 0 0 0-27.712-256z m-324.288 454.4a158.08 158.08 0 0 1-103.424-37.376c1.088-1.088 4.288-2.176 5.376-3.2l171.712-99.2a28.16 28.16 0 0 0 13.824-24.512V479.488l72.576 41.6c1.024 0 1.024 1.024 1.024 2.112v200.512a160.512 160.512 0 0 1-161.088 162.112z m-347.712-148.288c-19.2-33.088-25.6-71.488-19.2-108.8 1.088 1.024 3.2 2.176 5.376 3.2l171.712 99.2a25.984 25.984 0 0 0 27.712 0l210.112-121.6v84.224c0 1.152 0 2.176-1.024 2.176L430.464 796.16c-76.8 44.8-176 18.176-220.8-58.624z m-44.736-375.424c19.2-32.64 48.896-57.856 84.224-71.488v204.8c0 9.6 5.376 19.2 13.888 24.512l210.176 121.6-72.576 41.6c-1.024 0-2.112 1.088-2.112 0L224.64 582.912a160.448 160.448 0 0 1-59.776-220.8h0.064z m597.312 138.688l-210.112-121.6 72.512-41.6c1.088 0 2.176-1.088 2.176 0l173.824 100.224a161.088 161.088 0 0 1-25.6 291.2V525.44a26.304 26.304 0 0 0-12.8-24.512z m71.488-108.8a23.232 23.232 0 0 0-5.312-3.2L656.64 289.536a26.048 26.048 0 0 0-27.712 0l-210.176 121.6V326.912c0-1.088 0-2.176 1.088-2.176l173.824-100.224a161.152 161.152 0 0 1 220.8 59.712c19.2 32 25.6 70.4 19.2 107.776z m-454.4 149.248l-72.64-41.6c-1.024 0-1.024-1.088-1.024-2.176V297.088A162.048 162.048 0 0 1 467.84 135.04a158.08 158.08 0 0 1 103.424 37.312 22.848 22.848 0 0 1-5.312 3.2L394.24 274.688a28.16 28.16 0 0 0-13.888 24.512v242.112h-1.088z m39.424-85.312l93.824-54.4 93.888 54.4v107.712l-93.888 54.4-93.824-54.4V456z" fill="#000000" p-id="9016"></path></svg>
|
||||
|
After Width: | Height: | Size: 2.1 KiB |
1
channel/web/static/logos/qianfan.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251568791" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="14450" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M96.20121136 636.3124965c-0.1472897-113.41305959-0.29457937-226.8261192-0.29457937-340.23917879 0-14.87625845 7.65906378-26.51214381 20.4732666-34.02391789 45.51251353-26.65943349 91.02502705-53.31886698 136.83211997-79.53643141 71.1409192-40.94653321 142.42912809-81.59848704 213.71733698-122.39773055 7.36448439-4.12411126 14.58167909-8.3955122 21.50429441-13.2560719 19.44223878-13.40336159 39.03176725-16.05457598 60.09419263-3.53495252 27.39588193 16.34915535 54.93905355 32.25644163 82.48222516 48.16372793 88.0792333 50.96223197 176.30575629 101.77717426 264.38498958 152.59211653 9.86840908 5.74429781 19.88410785 11.19401627 29.60522725 17.0856038 14.13981003 8.54280189 21.50429441 21.06242535 21.50429443 37.70616007 0 147.73155685 0.29457937 295.46311371-0.1472897 443.19467057 0 15.46541722-7.2171947 28.57419943-21.7988738 36.96971163-34.7603663 20.17868721-70.55176044 38.88447758-104.57567833 59.94690293-48.90017634 30.19438599-100.00969801 56.11737105-148.76258466 86.60633642-29.01606849 18.11663161-59.50503387 34.02391789-89.11026112 50.96223197-13.10878221 7.51177407-26.07027474 15.17083783-39.03176726 22.9771913-13.84523065 8.3955122-27.83775099 8.83738127-41.97756102 0.73644843-56.41195043-32.55102101-112.82390085-65.10204201-169.38314098-97.653063-61.86166887-35.64410444-123.72333775-71.1409192-185.4377169-106.78502365-11.19401627-6.48074626-22.24074286-12.81420285-32.99289009-19.88410785-11.48859565-7.65906378-17.08560379-19.14765941-17.08560378-32.69831069-0.1472897-34.7603663 0.1472897-69.52073264 0.29457938-104.28109895 1.62018657-0.58915875 1.62018657-1.62018657-0.29457938-2.65121438z m356.58833414-225.500512c2.20934532-1.76747625 4.41869063-3.68224221 6.77532565-5.15513907 68.93157389-39.62092601 137.86314777-79.24185204 206.94201135-118.86277807 2.79850407-1.62018657 6.48074626-1.62018657 6.62803594-6.18616688 0.1472897-4.8605597-4.12411126-4.71327001-6.77532564-6.18616688-40.65195383-23.56635005-81.59848704-46.83812071-122.10315117-70.84633984-16.79102442-10.01569877-32.84560039-8.54280189-48.45830728 0.58915876-45.9543826 26.51214381-91.46689612 53.61344636-137.27398903 80.42016953-31.96186226 18.70579035-64.21830387 37.11700133-96.32745581 55.67550198-18.41121097 10.60485751-27.54317163 25.33382629-27.24859225 47.72185885 0.88373813 89.55213018 0.58915875 179.10426036 0.14728969 268.65639053-0.1472897 20.17868721 9.27925033 33.58204881 25.33382629 43.15587853 31.3727035 18.70579035 63.18727606 37.11700133 95.14913832 54.93905355 10.89943689 6.03887719 21.06242535 13.99252034 35.79139414 18.41121096V505.51925374c6.48074626 19.58952848 18.55850066 34.02391789 36.67513226 44.6287754 27.83775099 16.20186565 63.18727606 12.51962347 86.31175705-10.45756784 26.95401286-26.65943349 28.72148912-62.89269668 12.81420282-90.14128893-16.34915535-28.42690974-43.59774757-37.55887038-74.38129233-38.73718787z m82.48222517 429.64401928c14.28709972-3.82953187 25.92298506-13.99252034 38.88447758-21.35700473 40.94653321-23.27177067 81.30390766-47.72185885 122.54502023-70.55176046 26.95401286-15.02354815 52.87699792-31.66728287 80.71474891-45.21793415 16.79102442-8.10093283 29.60522723-22.53532223 29.60522726-43.4504579 0.1472897-92.939793 0.29457937-185.73229631 0.14728969-278.6720893 0-11.19401627-5.15513907-13.99252034-13.84523067-7.06990501-26.51214381 20.76784598-57.29568854 34.46578693-86.16446735 51.25681135-54.49718448 31.81457257-109.14165865 63.33456576-163.78613282 95.00184862-8.54280189 4.8605597-11.78317502 10.45756784-11.63588535 20.47326662 0.29457937 96.18016613 0.1472897 192.50762194 0.1472897 288.68778806-0.29457937 3.5349525-1.47289687 7.65906378 3.38766282 10.8994369z" fill="#066AF3" p-id="14451"></path><path d="M96.20121136 636.3124965c1.91476594 1.03102783 1.91476594 2.06205563 0 3.09308345v-3.09308345z" fill="#4372E0" p-id="14452"></path><path d="M391.3697457 505.37196405c-5.44971845-44.33419602 13.84523065-74.08671296 61.4197998-94.55997955 30.93083443 1.17831749 58.03213699 10.31027814 74.38129233 38.5898982 15.75999659 27.39588193 14.13981003 63.48185543-12.81420282 90.14128893-23.27177067 22.97719129-58.47400606 26.65943349-86.31175705 10.45756783-18.11663161-10.60485751-30.34167568-25.03924691-36.67513226-44.62877541z" fill="#002A9A" p-id="14453"></path></svg>
|
||||
|
After Width: | Height: | Size: 4.5 KiB |
1
channel/web/static/logos/zhipu.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1779251419020" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="10062" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M520.063496 0v77.563152c0 269.231173-144.758953 414.054122-434.212862 434.340854L86.106618 511.968002H76.827198V255.984001l443.236298-255.984001z" fill="#5B55F6" p-id="10063"></path><path d="M520.063496 1023.936004v-77.563152c0-269.231173-144.758953-414.054122-434.212862-434.340854L86.042622 511.968002H76.827198v255.984001l443.236298 255.984001z" fill="#376AF3" p-id="10064"></path><path d="M520.063496 0v77.563152c0 269.231173 144.758953 414.054122 434.276858 434.340854L954.08437 511.968002h9.215424V255.984001L520.063496 0z" fill="#5B55F6" p-id="10065"></path><path d="M520.063496 1023.936004v-77.563152c0-269.231173 144.758953-414.054122 434.276858-434.340854L954.08437 511.968002h9.27942v255.984001l-443.236298 255.984001z" fill="#376AF3" p-id="10066"></path></svg>
|
||||
|
After Width: | Height: | Size: 1.1 KiB |
@@ -81,6 +81,8 @@ def _loads_wecom_ws_json(raw):
|
||||
@singleton
|
||||
class WecomBotChannel(ChatChannel):
|
||||
|
||||
NOT_SUPPORT_REPLYTYPE = []
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.bot_id = ""
|
||||
@@ -472,6 +474,8 @@ class WecomBotChannel(ChatChannel):
|
||||
else:
|
||||
context.type = ContextType.TEXT
|
||||
context.content = content.strip()
|
||||
if "desire_rtype" not in context and conf().get("always_reply_voice"):
|
||||
context["desire_rtype"] = ReplyType.VOICE
|
||||
|
||||
return context
|
||||
|
||||
@@ -498,6 +502,8 @@ class WecomBotChannel(ChatChannel):
|
||||
self._send_file(reply.content, receiver, is_group, req_id)
|
||||
elif reply.type == ReplyType.VIDEO or reply.type == ReplyType.VIDEO_URL:
|
||||
self._send_file(reply.content, receiver, is_group, req_id, media_type="video")
|
||||
elif reply.type == ReplyType.VOICE:
|
||||
self._send_voice(reply.content, receiver, is_group, req_id)
|
||||
else:
|
||||
logger.warning(f"[WecomBot] Unsupported reply type: {reply.type}, falling back to text")
|
||||
self._send_text(str(reply.content), receiver, is_group, req_id)
|
||||
@@ -730,6 +736,65 @@ class WecomBotChannel(ChatChannel):
|
||||
},
|
||||
})
|
||||
|
||||
def _send_voice(self, voice_path: str, receiver: str, is_group: bool, req_id: str = None):
|
||||
"""Send native voice reply. WeCom voice media must be amr."""
|
||||
local_path = voice_path
|
||||
if local_path.startswith("file://"):
|
||||
local_path = local_path[7:]
|
||||
|
||||
if local_path.startswith(("http://", "https://")):
|
||||
try:
|
||||
resp = requests.get(local_path, timeout=60)
|
||||
resp.raise_for_status()
|
||||
ext = os.path.splitext(local_path)[1] or ".mp3"
|
||||
tmp_path = f"/tmp/wecom_voice_{uuid.uuid4().hex[:8]}{ext}"
|
||||
with open(tmp_path, "wb") as f:
|
||||
f.write(resp.content)
|
||||
local_path = tmp_path
|
||||
except Exception as e:
|
||||
logger.error(f"[WecomBot] Failed to download voice for sending: {e}")
|
||||
return
|
||||
|
||||
if not os.path.exists(local_path):
|
||||
logger.error(f"[WecomBot] Voice file not found: {local_path}")
|
||||
return
|
||||
|
||||
amr_path = local_path
|
||||
if not local_path.lower().endswith(".amr"):
|
||||
try:
|
||||
from voice.audio_convert import any_to_amr
|
||||
amr_path = os.path.splitext(local_path)[0] + ".amr"
|
||||
any_to_amr(local_path, amr_path)
|
||||
except Exception as e:
|
||||
logger.error(f"[WecomBot] Failed to convert voice to amr: {e}")
|
||||
return
|
||||
|
||||
media_id = self._upload_media(amr_path, "voice")
|
||||
if not media_id:
|
||||
logger.error("[WecomBot] Failed to upload voice media")
|
||||
return
|
||||
|
||||
if req_id:
|
||||
self._ws_send({
|
||||
"cmd": "aibot_respond_msg",
|
||||
"headers": {"req_id": req_id},
|
||||
"body": {
|
||||
"msgtype": "voice",
|
||||
"voice": {"media_id": media_id},
|
||||
},
|
||||
})
|
||||
else:
|
||||
self._ws_send({
|
||||
"cmd": "aibot_send_msg",
|
||||
"headers": {"req_id": self._gen_req_id()},
|
||||
"body": {
|
||||
"chatid": receiver,
|
||||
"chat_type": 2 if is_group else 1,
|
||||
"msgtype": "voice",
|
||||
"voice": {"media_id": media_id},
|
||||
},
|
||||
})
|
||||
|
||||
def _active_send_markdown(self, content: str, receiver: str, is_group: bool):
|
||||
"""Proactively send markdown message (for scheduled tasks, no req_id)."""
|
||||
self._ws_send({
|
||||
|
||||
@@ -60,6 +60,9 @@ def _save_credentials(cred_path: str, data: dict):
|
||||
@singleton
|
||||
class WeixinChannel(ChatChannel):
|
||||
|
||||
# ilink bot protocol has no outbound voice item; deliver TTS as a file.
|
||||
NOT_SUPPORT_REPLYTYPE = []
|
||||
|
||||
LOGIN_STATUS_IDLE = "idle"
|
||||
LOGIN_STATUS_WAITING = "waiting_scan"
|
||||
LOGIN_STATUS_SCANNED = "scanned"
|
||||
@@ -464,6 +467,14 @@ class WeixinChannel(ChatChannel):
|
||||
else:
|
||||
context.type = ContextType.TEXT
|
||||
context.content = content.strip()
|
||||
if "desire_rtype" not in context and conf().get("always_reply_voice"):
|
||||
context["desire_rtype"] = ReplyType.VOICE
|
||||
|
||||
elif ctype == ContextType.VOICE:
|
||||
if "desire_rtype" not in context and (
|
||||
conf().get("voice_reply_voice") or conf().get("always_reply_voice")
|
||||
):
|
||||
context["desire_rtype"] = ReplyType.VOICE
|
||||
|
||||
return context
|
||||
|
||||
@@ -486,6 +497,9 @@ class WeixinChannel(ChatChannel):
|
||||
self._send_file(reply.content, receiver, context_token)
|
||||
elif reply.type in (ReplyType.VIDEO, ReplyType.VIDEO_URL):
|
||||
self._send_video(reply.content, receiver, context_token)
|
||||
elif reply.type == ReplyType.VOICE:
|
||||
# ilink has no outbound voice item; deliver TTS as a file attachment.
|
||||
self._send_file(reply.content, receiver, context_token)
|
||||
else:
|
||||
logger.warning(f"[Weixin] Unsupported reply type: {reply.type}, fallback to text")
|
||||
self._send_text(str(reply.content), receiver, context_token)
|
||||
|
||||
@@ -1 +1 @@
|
||||
2.0.8
|
||||
2.0.9
|
||||
|
||||
@@ -47,6 +47,7 @@ GEMINI_3_FLASH_PRE = "gemini-3-flash-preview" # Gemini 3 Flash Preview - Agent
|
||||
GEMINI_3_PRO_PRE = "gemini-3-pro-preview" # Gemini 3 Pro Preview
|
||||
GEMINI_31_PRO_PRE = "gemini-3.1-pro-preview" # Gemini 3.1 Pro Preview - Agent推荐模型
|
||||
GEMINI_31_FLASH_LITE_PRE = "gemini-3.1-flash-lite-preview" # Gemini 3.1 Flash Lite Preview - Agent推荐模型
|
||||
GEMINI_35_FLASH = "gemini-3.5-flash" # Gemini 3.5 Flash - Agent推荐模型
|
||||
|
||||
# OpenAI
|
||||
GPT35 = "gpt-3.5-turbo"
|
||||
@@ -74,6 +75,7 @@ GPT_5_NANO = "gpt-5-nano"
|
||||
GPT_54 = "gpt-5.4" # GPT-5.4 - Agent recommended model
|
||||
GPT_54_MINI = "gpt-5.4-mini"
|
||||
GPT_54_NANO = "gpt-5.4-nano"
|
||||
GPT_55 = "gpt-5.5" # GPT-5.5 - top-tier (expensive), not default
|
||||
O1 = "o1-preview"
|
||||
O1_MINI = "o1-mini"
|
||||
WHISPER_1 = "whisper-1"
|
||||
@@ -104,10 +106,12 @@ QWEN_LONG = "qwen-long"
|
||||
QWEN3_MAX = "qwen3-max" # Qwen3 Max - Agent推荐模型
|
||||
QWEN35_PLUS = "qwen3.5-plus" # Qwen3.5 Plus - Omni model (MultiModalConversation)
|
||||
QWEN36_PLUS = "qwen3.6-plus" # Qwen3.6 Plus - Omni model (MultiModalConversation)
|
||||
QWEN37_MAX = "qwen3.7-max" # Qwen3.7 Max - Agent推荐模型
|
||||
QWQ_PLUS = "qwq-plus"
|
||||
|
||||
# MiniMax
|
||||
MINIMAX_M2_7 = "MiniMax-M2.7" # MiniMax M2.7 - Latest
|
||||
MINIMAX_TEXT_01 = "MiniMax-Text-01" # MiniMax 多模态 (vision)
|
||||
MINIMAX_M2_7_HIGHSPEED = "MiniMax-M2.7-highspeed" # MiniMax M2.7 highspeed
|
||||
MINIMAX_M2_5 = "MiniMax-M2.5" # MiniMax M2.5
|
||||
MINIMAX_M2_1 = "MiniMax-M2.1" # MiniMax M2.1
|
||||
@@ -119,6 +123,7 @@ MINIMAX_ABAB6_5 = "abab6.5-chat" # MiniMax abab6.5
|
||||
GLM_5_1 = "glm-5.1" # 智谱 GLM-5.1 - Agent recommended model (default)
|
||||
GLM_5_TURBO = "glm-5-turbo" # 智谱 GLM-5-Turbo
|
||||
GLM_5 = "glm-5" # 智谱 GLM-5
|
||||
GLM_5V_TURBO = "glm-5v-turbo" # 智谱多模态 (vision)
|
||||
GLM_4 = "glm-4"
|
||||
GLM_4_PLUS = "glm-4-plus"
|
||||
GLM_4_flash = "glm-4-flash"
|
||||
@@ -183,7 +188,7 @@ MODEL_LIST = [
|
||||
"claude", "claude-3-haiku", "claude-3-sonnet", "claude-3-opus", "claude-3.5-sonnet",
|
||||
|
||||
# Gemini
|
||||
GEMINI_31_FLASH_LITE_PRE, GEMINI_31_PRO_PRE, GEMINI_3_PRO_PRE, GEMINI_3_FLASH_PRE, GEMINI_25_PRO_PRE, GEMINI_25_FLASH_PRE,
|
||||
GEMINI_35_FLASH, GEMINI_31_FLASH_LITE_PRE, GEMINI_31_PRO_PRE, GEMINI_3_PRO_PRE, GEMINI_3_FLASH_PRE, GEMINI_25_PRO_PRE, GEMINI_25_FLASH_PRE,
|
||||
GEMINI_20_FLASH, GEMINI_20_flash_exp, GEMINI_15_PRO, GEMINI_15_flash, GEMINI_PRO, GEMINI,
|
||||
|
||||
# OpenAI
|
||||
@@ -193,7 +198,7 @@ MODEL_LIST = [
|
||||
GPT_4o, GPT_4O_0806, GPT_4o_MINI,
|
||||
GPT_41, GPT_41_MINI, GPT_41_NANO,
|
||||
GPT_5, GPT_5_MINI, GPT_5_NANO,
|
||||
GPT_54, GPT_54_MINI, GPT_54_NANO,
|
||||
GPT_54, GPT_55, GPT_54_MINI, GPT_54_NANO,
|
||||
O1, O1_MINI,
|
||||
|
||||
# GLM (智谱AI)
|
||||
@@ -201,7 +206,7 @@ MODEL_LIST = [
|
||||
GLM_4_0520, GLM_4_AIR, GLM_4_AIRX, GLM_4_7,
|
||||
|
||||
# Qwen (通义千问)
|
||||
QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG,
|
||||
QWEN37_MAX, QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG,
|
||||
|
||||
# Doubao (豆包)
|
||||
DOUBAO, DOUBAO_SEED_2_CODE, DOUBAO_SEED_2_PRO, DOUBAO_SEED_2_LITE, DOUBAO_SEED_2_MINI,
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
"open_ai_api_base": "https://api.openai.com/v1",
|
||||
"gemini_api_key": "",
|
||||
"gemini_api_base": "https://generativelanguage.googleapis.com",
|
||||
"voice_to_text": "openai",
|
||||
"text_to_voice": "openai",
|
||||
"voice_to_text": "",
|
||||
"text_to_voice": "",
|
||||
"voice_reply_voice": false,
|
||||
"speech_recognition": true,
|
||||
"group_speech_recognition": false,
|
||||
|
||||
99
config.py
@@ -330,8 +330,18 @@ def load_config():
|
||||
config_str = read_file(config_path)
|
||||
logger.debug("[INIT] config str: {}".format(drag_sensitive(config_str)))
|
||||
|
||||
# 将json字符串反序列化为dict类型
|
||||
config = Config(json.loads(config_str))
|
||||
# 将json字符串反序列化为dict类型。
|
||||
# `object_pairs_hook` lets us catch users who accidentally typed the
|
||||
# same key twice (e.g. two `"tools"` blocks) — json.loads would
|
||||
# otherwise silently drop all but the last occurrence.
|
||||
config = Config(json.loads(config_str, object_pairs_hook=_merge_duplicate_keys))
|
||||
|
||||
# Migrate legacy singular keys (`tool`, `skill`) into the canonical
|
||||
# plural buckets so the rest of the codebase only reads one schema.
|
||||
# Deep-merge so existing `tools`/`skills` entries are preserved and
|
||||
# only missing namespaces are filled in from the legacy section.
|
||||
_merge_legacy_namespace(config, legacy="tool", canonical="tools")
|
||||
_merge_legacy_namespace(config, legacy="skill", canonical="skills")
|
||||
|
||||
# override config with environment variables.
|
||||
# Some online deployment platforms (e.g. Railway) deploy project from github directly. So you shouldn't put your secrets like api key in a config file, instead use environment variables to override the default config.
|
||||
@@ -422,7 +432,7 @@ def load_config():
|
||||
os.environ[env_key] = str(val)
|
||||
injected += 1
|
||||
|
||||
injected += _sync_skill_config_to_env(config.get("skill", {}))
|
||||
injected += _sync_skill_config_to_env(config.get("skills", {}))
|
||||
|
||||
if injected:
|
||||
logger.info("[INIT] Synced {} config values to environment variables".format(injected))
|
||||
@@ -430,11 +440,90 @@ def load_config():
|
||||
config.load_user_datas()
|
||||
|
||||
|
||||
def _deep_merge_dicts(base: dict, incoming: dict) -> dict:
|
||||
"""Recursively merge ``incoming`` into ``base`` (incoming wins on leaves)."""
|
||||
for key, val in incoming.items():
|
||||
if (
|
||||
key in base
|
||||
and isinstance(base[key], dict)
|
||||
and isinstance(val, dict)
|
||||
):
|
||||
_deep_merge_dicts(base[key], val)
|
||||
else:
|
||||
base[key] = val
|
||||
return base
|
||||
|
||||
|
||||
def _merge_duplicate_keys(pairs):
|
||||
"""object_pairs_hook for json.loads: deep-merge duplicate top-level keys
|
||||
(lists concat, dicts merge, scalars take the latter) instead of dropping."""
|
||||
out = {}
|
||||
duplicates = []
|
||||
for key, val in pairs:
|
||||
if key not in out:
|
||||
out[key] = val
|
||||
continue
|
||||
duplicates.append(key)
|
||||
prev = out[key]
|
||||
if isinstance(prev, dict) and isinstance(val, dict):
|
||||
_deep_merge_dicts(prev, val)
|
||||
elif isinstance(prev, list) and isinstance(val, list):
|
||||
prev.extend(val)
|
||||
else:
|
||||
out[key] = val
|
||||
if duplicates:
|
||||
# logger may not be wired yet — fall back to print so we never lose the warning.
|
||||
unique = sorted(set(duplicates))
|
||||
try:
|
||||
logger.warning("[INIT] config.json has duplicate keys (merged): %s", unique)
|
||||
except Exception:
|
||||
print("[INIT] config.json has duplicate keys (merged):", unique)
|
||||
return out
|
||||
|
||||
|
||||
def _merge_legacy_namespace(cfg, legacy: str, canonical: str) -> None:
|
||||
"""Fold deprecated singular keys (``tool`` / ``skill``) into their plural
|
||||
canonical counterparts at load time. Canonical entries always win."""
|
||||
legacy_section = cfg.get(legacy)
|
||||
if not isinstance(legacy_section, dict) or not legacy_section:
|
||||
cfg.pop(legacy, None)
|
||||
return
|
||||
canonical_section = cfg.get(canonical)
|
||||
if not isinstance(canonical_section, dict):
|
||||
canonical_section = {}
|
||||
merged_keys = []
|
||||
for name, val in legacy_section.items():
|
||||
if name in canonical_section:
|
||||
if isinstance(canonical_section[name], dict) and isinstance(val, dict):
|
||||
for sub_key, sub_val in val.items():
|
||||
if (
|
||||
sub_key in canonical_section[name]
|
||||
and isinstance(canonical_section[name][sub_key], dict)
|
||||
and isinstance(sub_val, dict)
|
||||
):
|
||||
_deep_merge_dicts(sub_val, canonical_section[name][sub_key])
|
||||
canonical_section[name][sub_key] = sub_val
|
||||
else:
|
||||
canonical_section[name].setdefault(sub_key, sub_val)
|
||||
continue
|
||||
canonical_section[name] = val
|
||||
merged_keys.append(name)
|
||||
cfg[canonical] = canonical_section
|
||||
cfg.pop(legacy, None)
|
||||
if merged_keys:
|
||||
logger.warning(
|
||||
"[INIT] Legacy config key '{}' is deprecated; merged into '{}': {}. "
|
||||
"Please rename '{}' to '{}' in your config.json.".format(
|
||||
legacy, canonical, merged_keys, legacy, canonical,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _sync_skill_config_to_env(skill_section) -> int:
|
||||
"""Flatten skill-namespaced config into environment variables.
|
||||
|
||||
Mapping rule: ``config["skill"][<name>][<key>]`` -> ``SKILL_<NAME>_<KEY>``
|
||||
(e.g. ``skill["image-generation"].model`` -> ``SKILL_IMAGE_GENERATION_MODEL``).
|
||||
Mapping rule: ``config["skills"][<name>][<key>]`` -> ``SKILL_<NAME>_<KEY>``
|
||||
(e.g. ``skills["image-generation"].model`` -> ``SKILL_IMAGE_GENERATION_MODEL``).
|
||||
|
||||
This lets subprocess-based skill scripts read their own settings without
|
||||
importing project code. Existing env vars are NOT overwritten so the
|
||||
|
||||
39
docs/channels/index.mdx
Normal file
@@ -0,0 +1,39 @@
|
||||
---
|
||||
title: 通道概览
|
||||
description: CowAgent 支持的通道及能力矩阵
|
||||
---
|
||||
|
||||
CowAgent 支持接入多种聊天通道,启动时通过 `channel_type` 切换。Web 控制台默认开启,可与其他接入通道并行运行。
|
||||
|
||||
## 能力矩阵
|
||||
|
||||
下表汇总各通道支持的入站消息类型、机器人回复类型与群聊能力,方便按场景选择。
|
||||
|
||||
| 通道 | 文本 | 图片 | 文件 | 语音 | 群聊 |
|
||||
| --- | :-: | :-: | :-: | :-: | :-: |
|
||||
| [微信](/channels/weixin) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [Web 控制台](/channels/web) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [飞书](/channels/feishu) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [钉钉](/channels/dingtalk) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [企微智能机器人](/channels/wecom-bot) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [QQ](/channels/qq) | ✅ | ✅ | ✅ | | ✅ |
|
||||
| [企业微信应用](/channels/wecom) | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [公众号](/channels/wechatmp) | ✅ | ✅ | | ✅ | |
|
||||
|
||||
- **图片 / 文件 / 语音**列表示通道支持收发对应消息类型,具体细节详见各通道文档
|
||||
- **群聊**列指可识别并响应群消息
|
||||
|
||||
<Tip>
|
||||
每个通道的语音 / 图像能力依赖对应模型厂商的配置,详见 [模型概览](/models)。
|
||||
</Tip>
|
||||
|
||||
## 通道一览
|
||||
|
||||
- [Web 控制台](/channels/web) — 内置浏览器对话和管理面板,默认开启
|
||||
- [微信](/channels/weixin) — 通过个人微信扫码登录
|
||||
- [飞书](/channels/feishu) — 飞书自建机器人
|
||||
- [钉钉](/channels/dingtalk) — 钉钉自建机器人
|
||||
- [企微智能机器人](/channels/wecom-bot) — 企业微信智能机器人
|
||||
- [QQ](/channels/qq) — QQ 官方机器人开放平台
|
||||
- [企业微信应用](/channels/wecom) — 企业微信自建应用接入
|
||||
- [公众号](/channels/wechatmp) — 微信公众号(订阅号 / 服务号)
|
||||
@@ -59,9 +59,9 @@ Web 控制台是 CowAgent 的默认通道,启动后会自动运行,通过浏
|
||||
|
||||
### 模型管理
|
||||
|
||||
支持在线管理模型配置,无需手动编辑配置文件:
|
||||
支持在线管理不同模型厂商的文本、图像、语音、向量模型配置,无需手动编辑配置文件:
|
||||
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" />
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260521212949.png" />
|
||||
|
||||
### 技能管理
|
||||
|
||||
|
||||
@@ -181,6 +181,7 @@
|
||||
{
|
||||
"group": "接入渠道",
|
||||
"pages": [
|
||||
"channels/index",
|
||||
"channels/weixin",
|
||||
"channels/web",
|
||||
"channels/feishu",
|
||||
|
||||
@@ -40,7 +40,7 @@ To force a specific Vision model, set it explicitly in `config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"tool": {
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "ernie-4.5-turbo-vl"
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ New built-in `image-generation` skill supporting text-to-image, image-to-image,
|
||||
- **Zero model selection**: Just configure an API key and it works — no need to manually specify a model. You can also name a specific model in conversation (e.g. "draw a cat with seedream")
|
||||
- **Flexible control**: Supports `quality`, `size` (512/1K–4K), and `aspect_ratio` parameters, with each provider automatically mapping to its supported values
|
||||
- **Image editing**: Pass existing images for editing, style transfer, or multi-image fusion (Seedream supports up to 14 reference images)
|
||||
- **Skill-level config**: Pin a default model via `skill.image-generation.model` in `config.json`
|
||||
- **Skill-level config**: Pin a default model via `skills.image-generation.model` in `config.json`
|
||||
- **Image lightbox**: All images in the Web console now support click-to-enlarge preview
|
||||
|
||||
Docs: [Image Generation Skill](https://docs.cowagent.ai/en/skills/image-generation)
|
||||
|
||||
@@ -51,7 +51,7 @@ The voice and streaming building blocks come from a community contribution #2791
|
||||
|
||||
## 🔧 Tools and Safety
|
||||
|
||||
- **Vision model selection**: `tool.vision.model` config now actually takes effect, with automatic fallback when unconfigured #2792
|
||||
- **Vision model selection**: `tools.vision.model` config now actually takes effect, with automatic fallback when unconfigured #2792
|
||||
- **Bash safety prompt**: The destructive-deletion confirm prompt is now scoped to paths outside the workspace — routine in-workspace operations are no longer interrupted
|
||||
|
||||
## 🐛 Other Fixes
|
||||
|
||||
@@ -87,7 +87,7 @@ Configure ARK_API_KEY as xxx
|
||||
To force all image generation through a specific provider's model, add this to `config.json`:
|
||||
|
||||
```json
|
||||
"skill": {
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "seedream-5.0-lite"
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@ To specify a particular model for the vision tool, add to `config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"tool": {
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "ernie-4.5-turbo-vl"
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ description: Baidu Qianfan ERNIE モデル設定
|
||||
|
||||
```json
|
||||
{
|
||||
"tool": {
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "ernie-4.5-turbo-vl"
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ description: CowAgent 2.0.7 - 画像生成スキル(6プロバイダー自動
|
||||
- **モデル選択不要**:API Key を設定するだけで使用可能、モデルを手動で指定する必要なし。会話で特定モデルを指名することも可能(例:「seedream で猫を描いて」)
|
||||
- **柔軟な制御**:`quality`(画質)、`size`(解像度、512/1K〜4K)、`aspect_ratio`(アスペクト比)パラメータ対応、各プロバイダーが自動的に有効な値にマッピング
|
||||
- **画像編集**:既存の画像を渡して編集・スタイル変換・複数画像融合が可能(Seedream は最大 14 枚の参照画像をサポート)
|
||||
- **スキルレベル設定**:`config.json` の `skill.image-generation.model` でデフォルトモデルを固定可能
|
||||
- **スキルレベル設定**:`config.json` の `skills.image-generation.model` でデフォルトモデルを固定可能
|
||||
- **画像ライトボックス**:Web コンソールのすべての画像がクリックで拡大プレビュー対応
|
||||
|
||||
ドキュメント:[画像生成スキル](https://docs.cowagent.ai/ja/skills/image-generation)
|
||||
|
||||
@@ -51,7 +51,7 @@ description: CowAgent 2.0.8 - 飛書チャネル全面アップグレード(
|
||||
|
||||
## 🔧 ツールと安全性
|
||||
|
||||
- **Vision モデル選択**:`tool.vision.model` 設定が実際に反映されるようになり、未設定時は自動フォールバック #2792
|
||||
- **Vision モデル選択**:`tools.vision.model` 設定が実際に反映されるようになり、未設定時は自動フォールバック #2792
|
||||
- **Bash セーフティ確認**:破壊的削除の確認プロンプトをワークスペース外のパスに限定。ワークスペース内の通常操作は中断されません
|
||||
|
||||
## 🐛 その他の修正
|
||||
|
||||
@@ -87,7 +87,7 @@ ARK_API_KEY を xxx に設定して
|
||||
すべての画像生成を特定のプロバイダーのモデルで固定したい場合、`config.json` に以下を追加:
|
||||
|
||||
```json
|
||||
"skill": {
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "seedream-5.0-lite"
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@ Vision ツールで使用するモデルを指定するには、`config.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"tool": {
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "ernie-4.5-turbo-vl"
|
||||
}
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
---
|
||||
title: Claude
|
||||
description: Claude 模型配置
|
||||
description: Anthropic Claude 模型配置(文本对话 + 图像理解)
|
||||
---
|
||||
|
||||
Claude 由 Anthropic 提供,支持文本对话与图像理解,主流 Sonnet / Opus 模型均原生支持视觉,无需额外指定 Vision 模型。
|
||||
|
||||
<Tip>
|
||||
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||
</Tip>
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "claude-sonnet-4-6",
|
||||
@@ -14,4 +22,28 @@ description: Claude 模型配置
|
||||
| --- | --- |
|
||||
| `model` | 支持 `claude-sonnet-4-6`、`claude-opus-4-7`、`claude-opus-4-6`、`claude-sonnet-4-5`、`claude-sonnet-4-0`、`claude-3-5-sonnet-latest` 等,参考 [官方模型](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
|
||||
| `claude_api_key` | 在 [Claude 控制台](https://console.anthropic.com/settings/keys) 创建 |
|
||||
| `claude_api_base` | 可选,默认为 `https://api.anthropic.com/v1`,修改可接入第三方代理 |
|
||||
| `claude_api_base` | 可选,默认为 `https://api.anthropic.com/v1`,可改为第三方代理 |
|
||||
|
||||
### 模型选择
|
||||
|
||||
| 模型 | 适用场景 |
|
||||
| --- | --- |
|
||||
| `claude-sonnet-4-6` | 默认推荐,性价比与速度平衡 |
|
||||
| `claude-opus-4-7` | 复杂推理与长链路任务,效果最佳但成本更高 |
|
||||
| `claude-sonnet-4-5` / `claude-sonnet-4-0` | 上一代旗舰,价格更低 |
|
||||
|
||||
## 图像理解
|
||||
|
||||
配置 `claude_api_key` 后 Agent 的 Vision 工具会自动使用 Claude 主模型识别图像,无需额外配置。
|
||||
|
||||
如需手动指定 Vision 模型,可在配置文件中显式配置:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "claude-sonnet-4-6"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -13,7 +13,7 @@ description: 自定义厂商配置,适用于第三方 API 代理和本地模
|
||||
与 `openai` 厂商的区别:选择自定义厂商后,通过 `/config model` 切换模型时,不会自动切换厂商类型,始终使用自定义的 API 地址。
|
||||
</Note>
|
||||
|
||||
## 配置方式
|
||||
## 文本对话
|
||||
|
||||
### 第三方 API 代理
|
||||
|
||||
@@ -35,7 +35,7 @@ description: 自定义厂商配置,适用于第三方 API 代理和本地模
|
||||
|
||||
### 本地模型
|
||||
|
||||
本地模型通常不需要 API Key,只需填写 API Base 即可:
|
||||
本地模型通常不需要 API Key,只需填写 API Base:
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -53,7 +53,7 @@ description: 自定义厂商配置,适用于第三方 API 代理和本地模
|
||||
| [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` |
|
||||
| [LocalAI](https://localai.io) | `http://localhost:8080/v1` |
|
||||
|
||||
## 切换模型
|
||||
### 切换模型
|
||||
|
||||
自定义厂商下切换模型时,只会修改 `model`,不会改变 `bot_type` 和 API 地址:
|
||||
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
---
|
||||
title: DeepSeek
|
||||
description: DeepSeek 模型配置
|
||||
description: DeepSeek 模型配置(文本对话 + 思考模式)
|
||||
---
|
||||
|
||||
方式一:官方接入(推荐):
|
||||
DeepSeek 是当前 Agent 模式默认推荐的厂商之一,主打高性价比的文本对话和任务规划能力。
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -18,20 +20,20 @@ description: DeepSeek 模型配置
|
||||
| `deepseek_api_key` | 在 [DeepSeek 平台](https://platform.deepseek.com/api_keys) 创建 |
|
||||
| `deepseek_api_base` | 可选,默认为 `https://api.deepseek.com/v1`,可修改为第三方代理地址 |
|
||||
|
||||
## 模型选择
|
||||
### 模型选择
|
||||
|
||||
| 模型 | 适用场景 |
|
||||
| --- | --- |
|
||||
| `deepseek-v4-flash` | 默认推荐,速度快、成本低 |
|
||||
| `deepseek-v4-pro` | 更智能、复杂任务效果更强 |
|
||||
| `deepseek-v4-pro` | 更智能,复杂任务效果更强 |
|
||||
|
||||
## 思考模式
|
||||
|
||||
V4 系列(`deepseek-v4-flash` / `deepseek-v4-pro`)支持显式的"思考模式":模型在输出最终回答前,先输出一段思维链(`reasoning_content`),从而提升答案质量。
|
||||
V4 系列(`deepseek-v4-flash` / `deepseek-v4-pro`)支持显式的「思考模式」:模型在输出最终回答前,先输出一段思维链(`reasoning_content`),从而提升答案质量。
|
||||
|
||||
### 开关
|
||||
|
||||
通过全局配置 `enable_thinking` 控制:
|
||||
通过全局配置 `enable_thinking` 控制,也可在 web控制台 - 配置页面中进行切换:
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -66,16 +68,5 @@ V4 系列(`deepseek-v4-flash` / `deepseek-v4-pro`)支持显式的"思考模
|
||||
- **多轮工具调用**:当历史中包含工具调用时,DeepSeek 要求所有 assistant 消息必须回传 `reasoning_content`。CowAgent 会自动处理回传逻辑,跨轮次切换思考开关也不会出错。
|
||||
|
||||
<Tip>
|
||||
默认使用 `deepseek-v4-flash`;复杂任务可使用 `deepseek-v4-pro`;需要深度思考可开启 `enable_thinking`。
|
||||
默认使用 `deepseek-v4-flash`;复杂任务可使用 `deepseek-v4-pro`;需要深度推理可开启 `enable_thinking`。
|
||||
</Tip>
|
||||
|
||||
方式二:OpenAI 兼容方式接入:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "deepseek-v4-flash",
|
||||
"bot_type": "openai",
|
||||
"open_ai_api_key": "YOUR_API_KEY",
|
||||
"open_ai_api_base": "https://api.deepseek.com/v1"
|
||||
}
|
||||
```
|
||||
|
||||
@@ -1,17 +1,66 @@
|
||||
---
|
||||
title: 豆包 Doubao
|
||||
description: 豆包 (火山方舟) 模型配置
|
||||
description: 豆包(火山方舟)模型配置(文本 / 图像理解 / 图像生成 / 向量)
|
||||
---
|
||||
|
||||
豆包(火山方舟)支持文本对话、图像理解、图像生成(Seedream)和向量能力,一份 `ark_api_key` 即可启用全部能力。
|
||||
|
||||
<Tip>
|
||||
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||
</Tip>
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "doubao-seed-2-0-code-preview-260215",
|
||||
"model": "doubao-seed-2-0-pro-260215",
|
||||
"ark_api_key": "YOUR_API_KEY"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `model` | 可填 `doubao-seed-2-0-code-preview-260215`、`doubao-seed-2-0-pro-260215`、`doubao-seed-2-0-lite-260215` 等 |
|
||||
| `model` | 可填 `doubao-seed-2-0-pro-260215`、`doubao-seed-2-0-code-preview-260215`、`doubao-seed-2-0-lite-260215` 等 |
|
||||
| `ark_api_key` | 在 [火山方舟控制台](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) 创建 |
|
||||
| `ark_base_url` | 可选,默认为 `https://ark.cn-beijing.volces.com/api/v3` |
|
||||
|
||||
## 图像理解
|
||||
|
||||
配置 `ark_api_key` 后 Agent 的 Vision 工具会自动使用 `doubao-seed-2-0-pro-260215` 识别图像,无需额外配置。
|
||||
|
||||
如需手动指定 Vision 模型:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "doubao-seed-2-0-pro-260215"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 图像生成
|
||||
|
||||
```json
|
||||
{
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "seedream-5.0-lite"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
可选模型:`seedream-5.0-lite`、`seedream-4.5`。
|
||||
|
||||
## 向量
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding_provider": "doubao",
|
||||
"embedding_model": "doubao-embedding-vision-251215"
|
||||
}
|
||||
```
|
||||
|
||||
默认模型 `doubao-embedding-vision-251215`(多模态 embedding),可在配置文件中通过 `embedding_dimensions` 指定 1024 或 2048 维。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。
|
||||
|
||||
@@ -1,16 +1,59 @@
|
||||
---
|
||||
title: Gemini
|
||||
description: Google Gemini 模型配置
|
||||
description: Google Gemini 模型配置(文本对话 + 图像理解 + 图像生成)
|
||||
---
|
||||
|
||||
Google Gemini 支持文本对话、图像理解和图像生成(Nano Banana 系列),一个 `gemini_api_key` 即可启用全部能力。
|
||||
|
||||
<Tip>
|
||||
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||
</Tip>
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "gemini-3.1-pro-preview",
|
||||
"model": "gemini-3.5-flash",
|
||||
"gemini_api_key": "YOUR_API_KEY"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `model` | 支持 `gemini-3.1-flash-lite-preview`、`gemini-3.1-pro-preview`、`gemini-3-flash-preview`、`gemini-3-pro-preview` 等,参考 [官方文档](https://ai.google.dev/gemini-api/docs/models) |
|
||||
| `model` | 推荐 `gemini-3.5-flash`,亦支持 `gemini-3.1-pro-preview`、`gemini-3.1-flash-lite-preview`、`gemini-3-flash-preview`、`gemini-3-pro-preview` 等,参考 [官方文档](https://ai.google.dev/gemini-api/docs/models) |
|
||||
| `gemini_api_key` | 在 [Google AI Studio](https://aistudio.google.com/app/apikey) 创建 |
|
||||
| `gemini_api_base` | 可选,默认为 `https://generativelanguage.googleapis.com`,可改为第三方代理 |
|
||||
|
||||
## 图像理解
|
||||
|
||||
Gemini 全系列模型均原生支持视觉,配置 `gemini_api_key` 后 Agent 的 Vision 工具会自动使用主模型识别图像,无需额外配置。
|
||||
|
||||
如需手动指定 Vision 模型:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "gemini-3.1-flash-lite-preview"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 图像生成
|
||||
|
||||
```json
|
||||
{
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "gemini-3.1-flash-image-preview"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 模型 ID | 别名 |
|
||||
| --- | --- |
|
||||
| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
|
||||
| `gemini-3-pro-image-preview` | Nano Banana Pro |
|
||||
| `gemini-2.5-flash-image` | Nano Banana |
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
---
|
||||
title: 智谱 GLM
|
||||
description: 智谱AI GLM 模型配置
|
||||
description: 智谱 AI GLM 模型配置(文本 / 图像理解 / 语音识别 / 向量)
|
||||
---
|
||||
|
||||
智谱 AI 支持文本对话、图像理解、语音识别(ASR)和向量(Embedding),一份 `zhipu_ai_api_key` 即可启用全部能力。
|
||||
|
||||
<Tip>
|
||||
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||
</Tip>
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "glm-5.1",
|
||||
@@ -13,15 +21,36 @@ description: 智谱AI GLM 模型配置
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `model` | 可填 `glm-5.1`、`glm-5-turbo`、`glm-5`、`glm-4.7`、`glm-4-plus`、`glm-4-flash`、`glm-4-air` 等,参考 [模型编码](https://bigmodel.cn/dev/api/normal-model/glm-4) |
|
||||
| `zhipu_ai_api_key` | 在 [智谱AI 控制台](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) 创建 |
|
||||
| `zhipu_ai_api_key` | 在 [智谱 AI 控制台](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) 创建 |
|
||||
| `zhipu_ai_api_base` | 可选,默认为 `https://open.bigmodel.cn/api/paas/v4` |
|
||||
|
||||
也支持 OpenAI 兼容方式接入:
|
||||
## 图像理解
|
||||
|
||||
智谱 chat 系列模型(`glm-5.1`、`glm-5-turbo` 等)不支持视觉,视觉调用统一路由到 `glm-5v-turbo`。配置 `zhipu_ai_api_key` 后 Agent 的 Vision 工具会自动使用该模型,无需在配置文件中显式指定。
|
||||
|
||||
## 语音识别
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "glm-5.1",
|
||||
"open_ai_api_base": "https://open.bigmodel.cn/api/paas/v4",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
"voice_to_text": "zhipu",
|
||||
"voice_to_text_model": "glm-asr-2512"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `voice_to_text` | 设为 `zhipu` 启用智谱 ASR |
|
||||
| `voice_to_text_model` | 可选,默认 `glm-asr-2512` |
|
||||
|
||||
凭证自动复用 `zhipu_ai_api_key`。语音文件建议小于 25MB,超大文件可能被服务端拒绝。
|
||||
|
||||
## 向量
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding_provider": "zhipu",
|
||||
"embedding_model": "embedding-3"
|
||||
}
|
||||
```
|
||||
|
||||
可选模型:`embedding-3`、`embedding-2`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。
|
||||
|
||||
@@ -1,67 +1,45 @@
|
||||
---
|
||||
title: 模型概览
|
||||
description: CowAgent 支持的模型及推荐选择
|
||||
description: CowAgent 支持的模型厂商及能力矩阵
|
||||
---
|
||||
|
||||
CowAgent 支持国内外主流厂商的大语言模型,模型接口实现在项目的 `models/` 目录下。
|
||||
CowAgent 支持国内外主流厂商的大语言模型,模型接口实现在项目的 `models/` 目录下。除文本对话外,部分厂商还提供视觉理解、图像生成、语音识别、语音合成、向量等能力,可在 Agent 流程中按需调用。
|
||||
|
||||
<Note>
|
||||
Agent 模式下推荐使用以下模型,可根据效果及成本综合选择:deepseek-v4-flash、MiniMax-M2.7、claude-sonnet-4-6、gemini-3.1-pro-preview、glm-5.1、qwen3.6-plus、kimi-k2.6、ernie-5.1
|
||||
Agent 模式下推荐使用以下模型,可根据效果及成本综合选择:deepseek-v4-flash、MiniMax-M2.7、claude-sonnet-4-6、gemini-3.5-flash、glm-5.1、qwen3.6-plus、kimi-k2.6、ernie-5.1。
|
||||
|
||||
同时支持使用 [LinkAI](https://link-ai.tech) 平台接口,可灵活切换多种模型,并支持知识库、工作流、插件等 Agent 能力。
|
||||
同时支持使用 [LinkAI](https://link-ai.tech) 平台接口,一个 Key 即可灵活切换多家厂商,并附带知识库、工作流、插件等能力。
|
||||
</Note>
|
||||
|
||||
|
||||
## 模型能力总览
|
||||
|
||||
各厂商提供的能力一览。「文本」指主对话模型,其余列表示该厂商可承担对应 Agent 能力。
|
||||
|
||||
| 厂商 | 代表模型 | 文本 | 图像理解 | 图像生成 | 语音识别 | 语音合成 | 向量 |
|
||||
| --- | --- | :-: | :-: | :-: | :-: | :-: | :-: |
|
||||
| [DeepSeek](/models/deepseek) | deepseek-v4-flash / pro | ✅ | | | | | |
|
||||
| [MiniMax](/models/minimax) | MiniMax-M2.7 | ✅ | ✅ | ✅ | | ✅ | |
|
||||
| [Claude](/models/claude) | claude-opus-4-7 | ✅ | ✅ | | | | |
|
||||
| [Gemini](/models/gemini) | gemini-3.5-flash | ✅ | ✅ | ✅ | | | |
|
||||
| [OpenAI](/models/openai) | gpt-5.5、o 系列 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [智谱 GLM](/models/glm) | glm-5.1、glm-5v-turbo | ✅ | ✅ | | ✅ | | ✅ |
|
||||
| [通义千问](/models/qwen) | qwen3.7-max | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [豆包 Doubao](/models/doubao) | doubao-seed-2.0 系列 | ✅ | ✅ | ✅ | | | ✅ |
|
||||
| [Kimi](/models/kimi) | kimi-k2.6 | ✅ | ✅ | | | | |
|
||||
| [百度千帆](/models/qianfan) | ernie-5.1 | ✅ | ✅ | | | | |
|
||||
| [LinkAI](/models/linkai) | 多厂商 100+ 模型统一接入 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [自定义](/models/custom) |本地模型 / 三方代理 | ✅ | | | | | |
|
||||
|
||||
<Tip>
|
||||
Web 控制台中各项能力(视觉 / 图像 / 语音识别 / 语音合成 / 向量 / 网络搜索)均可独立配置厂商与模型,互相之间不强制绑定。
|
||||
</Tip>
|
||||
|
||||
|
||||
## 配置方式
|
||||
|
||||
**方式一(推荐):** 通过 [Web 控制台](/channels/web) 在线管理模型配置,无需手动编辑配置文件:
|
||||
**方式一(推荐):** 通过 [Web 控制台](/channels/web) 在线管理模型与各项能力,无需手动编辑配置文件:
|
||||
|
||||
<img width="850" src="https://cdn.link-ai.tech/doc/20260227173811.png" />
|
||||
<img width="900" src="https://cdn.link-ai.tech/doc/20260521212527.png" />
|
||||
|
||||
**方式二:** 手动编辑 `config.json`,根据所选模型填写对应的模型名称和 API Key。每个模型也支持 OpenAI 兼容方式接入,将 `bot_type` 设为 `openai`,配置 `open_ai_api_base` 和 `open_ai_api_key` 即可。
|
||||
|
||||
|
||||
## 支持的模型
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="DeepSeek" href="/models/deepseek">
|
||||
deepseek-v4-flash、deepseek-v4-pro 等
|
||||
</Card>
|
||||
<Card title="百度千帆 / ERNIE" href="/models/qianfan">
|
||||
ernie-5.1、ernie-5.0、ernie-4.5-turbo-128k 等
|
||||
</Card>
|
||||
<Card title="MiniMax" href="/models/minimax">
|
||||
MiniMax-M2.7 等系列模型
|
||||
</Card>
|
||||
<Card title="Claude" href="/models/claude">
|
||||
claude-sonnet-4-6 等
|
||||
</Card>
|
||||
<Card title="Gemini" href="/models/gemini">
|
||||
gemini-3.1-pro-preview 等
|
||||
</Card>
|
||||
<Card title="OpenAI" href="/models/openai">
|
||||
gpt-5.4、gpt-4.1、o 系列等
|
||||
</Card>
|
||||
<Card title="智谱 GLM" href="/models/glm">
|
||||
glm-5.1、glm-5-turbo、glm-5 等系列模型
|
||||
</Card>
|
||||
<Card title="通义千问 Qwen" href="/models/qwen">
|
||||
qwen3.6-plus、qwen3-max 等
|
||||
</Card>
|
||||
<Card title="豆包 Doubao" href="/models/doubao">
|
||||
doubao-seed 系列模型
|
||||
</Card>
|
||||
<Card title="Kimi" href="/models/kimi">
|
||||
kimi-k2.6、kimi-k2.5、kimi-k2 等
|
||||
</Card>
|
||||
<Card title="LinkAI" href="/models/linkai">
|
||||
多模型统一接口 + 知识库
|
||||
</Card>
|
||||
<Card title="自定义" href="/models/custom">
|
||||
第三方代理、本地模型等
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
|
||||
<Tip>
|
||||
全部模型名称可参考项目 [`common/const.py`](https://github.com/zhayujie/CowAgent/blob/master/common/const.py) 文件。
|
||||
</Tip>
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
---
|
||||
title: Kimi
|
||||
description: Kimi (Moonshot) 模型配置
|
||||
description: Kimi(Moonshot)模型配置(文本对话 + 图像理解)
|
||||
---
|
||||
|
||||
Kimi 由 Moonshot 提供,支持文本对话与图像理解,`kimi-k2.x` 系列原生支持视觉。
|
||||
|
||||
<Tip>
|
||||
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||
</Tip>
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "kimi-k2.6",
|
||||
@@ -14,14 +22,20 @@ description: Kimi (Moonshot) 模型配置
|
||||
| --- | --- |
|
||||
| `model` | 可填 `kimi-k2.6`、`kimi-k2.5`、`kimi-k2`、`moonshot-v1-8k`、`moonshot-v1-32k`、`moonshot-v1-128k` |
|
||||
| `moonshot_api_key` | 在 [Moonshot 控制台](https://platform.moonshot.cn/console/api-keys) 创建 |
|
||||
| `moonshot_base_url` | 可选,默认为 `https://api.moonshot.cn/v1` |
|
||||
|
||||
也支持 OpenAI 兼容方式接入:
|
||||
## 图像理解
|
||||
|
||||
配置 `moonshot_api_key` 后 Agent 的 Vision 工具会自动使用 `kimi-k2.6` 识别图像,无需额外配置。
|
||||
|
||||
如需手动指定 Vision 模型:
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "kimi-k2.6",
|
||||
"open_ai_api_base": "https://api.moonshot.cn/v1",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "kimi-k2.6"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
---
|
||||
title: LinkAI
|
||||
description: 通过 LinkAI 平台统一接入多种模型
|
||||
description: 通过 LinkAI 平台统一接入文本、视觉、图像、语音与向量能力
|
||||
---
|
||||
|
||||
通过 [LinkAI](https://link-ai.tech) 平台可灵活切换 OpenAI、Claude、Gemini、DeepSeek、MiniMax、Qwen、Kimi 等多种模型,并支持知识库、工作流、插件等 Agent 能力。
|
||||
通过一份 `linkai_api_key` 即可访问 OpenAI、Claude、Gemini、DeepSeek、MiniMax、Qwen、Kimi、豆包 等主流厂商的全部能力。
|
||||
|
||||
<Tip>
|
||||
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||
</Tip>
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -14,8 +20,84 @@ description: 通过 LinkAI 平台统一接入多种模型
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `use_linkai` | 设为 `true` 启用 LinkAI 接口 |
|
||||
| `use_linkai` | 设为 `true` 启用 |
|
||||
| `linkai_api_key` | 在 [控制台](https://link-ai.tech/console/interface) 创建 |
|
||||
| `model` | 留空则使用智能体默认模型,可在平台中灵活切换,[模型列表](https://link-ai.tech/console/models) 中的全部模型均可使用 |
|
||||
| `model` | 可填写 [模型列表](https://link-ai.tech/console/models) 中任意编码 |
|
||||
|
||||
参考 [接口文档](https://docs.link-ai.tech/platform/api) 了解更多。
|
||||
前往 [模型服务](https://link-ai.tech/console/models) 了解更多。
|
||||
|
||||
## 图像理解
|
||||
|
||||
配置完成后 Agent 的 Vision 工具会自动调用网关上的多模态模型,无需额外配置。如需手动指定 Vision 模型:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "gpt-5.4-mini"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
可选模型:`gpt-4.1-mini`、`gpt-5.4-mini`、`qwen3.6-plus`、`doubao-seed-2-0-pro-260215`、`kimi-k2.6`、`claude-sonnet-4-6`、`gemini-3.1-flash-lite-preview` 等。
|
||||
|
||||
## 图像生成
|
||||
|
||||
```json
|
||||
{
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "gpt-image-2"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 模型 ID | 别名 |
|
||||
| --- | --- |
|
||||
| `gpt-image-2` | OpenAI |
|
||||
| `gemini-3.1-flash-image-preview` | Nano Banana 2 |
|
||||
| `gemini-3-pro-image-preview` | Nano Banana Pro |
|
||||
| `seedream-5.0-lite` | 字节豆包 Seedream |
|
||||
|
||||
## 语音识别
|
||||
|
||||
```json
|
||||
{
|
||||
"voice_to_text": "linkai"
|
||||
}
|
||||
```
|
||||
|
||||
ASR 固定使用 Whisper,凭证自动复用 `linkai_api_key`。
|
||||
|
||||
## 语音合成
|
||||
|
||||
语音合成网关下支持多个底层 TTS 引擎,按 `text_to_voice_model` 选择引擎,音色随引擎切换。
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "linkai",
|
||||
"text_to_voice_model": "doubao",
|
||||
"tts_voice_id": "BV001_streaming"
|
||||
}
|
||||
```
|
||||
|
||||
| `text_to_voice_model` | 引擎说明 |
|
||||
| --- | --- |
|
||||
| `tts-1` | OpenAI · 多语种通用(音色 `alloy` / `nova` / `echo` 等) |
|
||||
| `doubao` | 字节豆包 · 中文音色丰富 |
|
||||
| `baidu` | 百度 · 中文主播音色 |
|
||||
|
||||
不同引擎对应的音色不同,建议在 Web 控制台「模型管理 → 语音合成」中可视化选择。
|
||||
|
||||
## 向量
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding_provider": "linkai",
|
||||
"embedding_model": "text-embedding-3-small"
|
||||
}
|
||||
```
|
||||
|
||||
默认模型 `text-embedding-3-small`(OpenAI 兼容)。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
---
|
||||
title: MiniMax
|
||||
description: MiniMax 模型配置
|
||||
description: MiniMax 模型配置(文本 / 图像理解 / 图像生成 / 语音合成)
|
||||
---
|
||||
|
||||
MiniMax 支持文本对话、图像理解、图像生成与语音合成,一份 `minimax_api_key` 即可启用全部能力。
|
||||
|
||||
<Tip>
|
||||
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||
</Tip>
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "MiniMax-M2.7",
|
||||
@@ -12,16 +20,52 @@ description: MiniMax 模型配置
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `model` | 可填 `MiniMax-M2.7`、`MiniMax-M2.5`、`MiniMax-M2.1`、`MiniMax-M2.1-lightning`、`MiniMax-M2` 等 |
|
||||
| `model` | 可填 `MiniMax-M2.7`、`MiniMax-M2.7-highspeed`、`MiniMax-M2.5`、`MiniMax-M2.1`、`MiniMax-M2.1-lightning`、`MiniMax-M2` 等 |
|
||||
| `minimax_api_key` | 在 [MiniMax 控制台](https://platform.minimaxi.com/user-center/basic-information/interface-key) 创建 |
|
||||
|
||||
也支持 OpenAI 兼容方式接入:
|
||||
## 图像理解
|
||||
|
||||
MiniMax 的 M2.x 系列 chat 模型本身不支持视觉,视觉调用统一路由到 `MiniMax-Text-01`。配置 `minimax_api_key` 后 Agent 的 Vision 工具会自动使用该模型,无需在配置文件中显式指定。
|
||||
|
||||
## 图像生成
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "MiniMax-M2.7",
|
||||
"open_ai_api_base": "https://api.minimaxi.com/v1",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "image-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
可选模型:`image-01`。
|
||||
|
||||
## 语音合成
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "minimax",
|
||||
"text_to_voice_model": "speech-2.8-hd",
|
||||
"tts_voice_id": "female-shaonv"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `text_to_voice_model` | `speech-2.8-hd`(情绪渲染、自然听感)、`speech-2.8-turbo`(极速)、`speech-2.6-hd`、`speech-2.6-turbo` |
|
||||
| `tts_voice_id` | 音色 ID,支持中文 / 粤语 / 英 / 日 / 韩,共 70+ 种 |
|
||||
|
||||
常用音色示例:
|
||||
|
||||
| 音色 ID | 说明 |
|
||||
| --- | --- |
|
||||
| `female-shaonv` | 中文 · 少女(女) |
|
||||
| `female-yujie` | 中文 · 御姐(女) |
|
||||
| `female-tianmei` | 中文 · 甜美女性(女) |
|
||||
| `male-qn-jingying` | 中文 · 精英青年(男) |
|
||||
| `male-qn-badao` | 中文 · 霸道青年(男) |
|
||||
| `Cantonese_GentleLady` | 粤语 · 温柔女声 |
|
||||
| `English_Graceful_Lady` | 英文 · Graceful Lady |
|
||||
|
||||
完整音色(中文 / 粤语 / 英 / 日 / 韩共 70+ 种)可参考 [系统音色列表](https://platform.minimaxi.com/docs/faq/system-voice-id),也可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。
|
||||
|
||||
@@ -1,11 +1,20 @@
|
||||
---
|
||||
title: OpenAI
|
||||
description: OpenAI 模型配置
|
||||
description: OpenAI 模型配置(文本 / 视觉 / 图像 / 语音 / 向量)
|
||||
---
|
||||
|
||||
OpenAI 是覆盖最完整的厂商,可同时承担文本对话、视觉理解、图像生成、语音识别(ASR)、语音合成(TTS)和向量(Embedding)能力。一份 `open_ai_api_key` 即可让 Agent 用到全部能力。
|
||||
|
||||
<Tip>
|
||||
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||
</Tip>
|
||||
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "gpt-5.4",
|
||||
"model": "gpt-5.5",
|
||||
"open_ai_api_key": "YOUR_API_KEY",
|
||||
"open_ai_api_base": "https://api.openai.com/v1"
|
||||
}
|
||||
@@ -13,7 +22,82 @@ description: OpenAI 模型配置
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `model` | 与 OpenAI 接口的 [model 参数](https://platform.openai.com/docs/models) 一致,支持 o 系列、gpt-5.4、gpt-5.4-mini、gpt-5.4-nano、gpt-5 系列、gpt-4.1 等,Agent 模式推荐使用 `gpt-5.4` |
|
||||
| `model` | 与 OpenAI 接口的 [model 参数](https://platform.openai.com/docs/models) 一致,支持 `gpt-5.5`、`gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano`、`gpt-5` 系列、`gpt-4.1`、o 系列等;Agent 模式默认 `gpt-5.5`,追求性价比可改为 `gpt-5.4` |
|
||||
| `open_ai_api_key` | 在 [OpenAI 平台](https://platform.openai.com/api-keys) 创建 |
|
||||
| `open_ai_api_base` | 可选,修改可接入第三方代理接口 |
|
||||
| `bot_type` | 使用 OpenAI 官方模型时无需填写。当通过代理接口使用 Claude 等非 OpenAI 模型时,设为 `openai` |
|
||||
| `open_ai_api_base` | 可选,修改可接入第三方代理 |
|
||||
| `bot_type` | 使用 OpenAI 官方模型时无需填写;通过兼容协议接入厂商模型时需设为 `openai` |
|
||||
|
||||
## 图像理解
|
||||
|
||||
`gpt-5.5`、`gpt-5.4`、`gpt-4o`、`gpt-4.1` 等 OpenAI 模型均原生支持视觉,配置 `open_ai_api_key` 后 Agent 的 Vision 工具会自动使用主模型识别图像。若主模型不支持视觉或希望显式指定,可在配置文件中配置:
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "gpt-5.4-mini"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
支持的 Vision 模型:`gpt-5.5`、`gpt-5.4`、`gpt-5.4-mini`、`gpt-5.4-nano`、`gpt-5`、`gpt-4.1`、`gpt-4.1-mini`、`gpt-4o`。
|
||||
|
||||
## 图像生成
|
||||
|
||||
在配置文件中指定图像生成模型,Agent 调用图像生成技能时会自动路由到 OpenAI:
|
||||
|
||||
```json
|
||||
{
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "gpt-image-2"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
支持的图像生成模型:`gpt-image-2`、`gpt-image-1`。
|
||||
|
||||
## 语音识别
|
||||
|
||||
```json
|
||||
{
|
||||
"voice_to_text": "openai",
|
||||
"voice_to_text_model": "gpt-4o-mini-transcribe"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `voice_to_text` | 设为 `openai` 启用 OpenAI 语音识别 |
|
||||
| `voice_to_text_model` | 可选,默认 `gpt-4o-mini-transcribe`;也可填 `gpt-4o-transcribe`、`whisper-1` |
|
||||
|
||||
凭证自动复用 `open_ai_api_key`。
|
||||
|
||||
## 语音合成
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "openai",
|
||||
"text_to_voice_model": "tts-1",
|
||||
"tts_voice_id": "alloy"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `text_to_voice_model` | `tts-1`、`tts-1-hd`、`gpt-4o-mini-tts` |
|
||||
| `tts_voice_id` | 音色:`alloy`、`echo`、`fable`、`onyx`、`nova`、`shimmer`、`ash`、`ballad`、`coral`、`sage`、`verse` |
|
||||
|
||||
## 向量
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding_provider": "openai",
|
||||
"embedding_model": "text-embedding-3-small"
|
||||
}
|
||||
```
|
||||
|
||||
可选模型:`text-embedding-3-small`、`text-embedding-3-large`、`text-embedding-ada-002`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。
|
||||
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
---
|
||||
title: 百度千帆
|
||||
description: 百度千帆 ERNIE 模型配置
|
||||
description: 百度千帆 ERNIE 模型配置(文本对话 + 图像理解)
|
||||
---
|
||||
|
||||
方式一:官方接入(推荐):
|
||||
百度千帆提供 ERNIE 系列模型,支持文本对话与图像理解。
|
||||
|
||||
<Tip>
|
||||
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||
</Tip>
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "ernie-5.1",
|
||||
"qianfan_api_key": "",
|
||||
"qianfan_api_key": "YOUR_API_KEY",
|
||||
"qianfan_api_base": "https://qianfan.baidubce.com/v2"
|
||||
}
|
||||
```
|
||||
@@ -19,7 +25,7 @@ description: 百度千帆 ERNIE 模型配置
|
||||
| `qianfan_api_key` | 千帆 API Key,格式通常以 `bce-v3/` 开头 |
|
||||
| `qianfan_api_base` | 可选,默认为 `https://qianfan.baidubce.com/v2` |
|
||||
|
||||
## 模型选择
|
||||
### 模型选择
|
||||
|
||||
| 模型 | 适用场景 |
|
||||
| --- | --- |
|
||||
@@ -29,18 +35,18 @@ description: 百度千帆 ERNIE 模型配置
|
||||
| `ernie-4.5-turbo-128k` | 长上下文和通用对话 |
|
||||
| `ernie-4.5-turbo-32k` | 通用对话,成本和上下文更均衡 |
|
||||
|
||||
## Vision 工具
|
||||
## 图像理解
|
||||
|
||||
配置 `qianfan_api_key` 后,Agent 的 Vision 工具可以自动使用千帆视觉模型:
|
||||
|
||||
- 当主模型本身是多模态时(如 `ernie-5.1`、`ernie-5.0`、`ernie-x1.1`、`ernie-4.5-turbo-vl`),直接由主模型识别图像,无需额外配置
|
||||
- 当主模型是纯文本时(如 `ernie-4.5-turbo-128k`),Vision 工具会自动 fallback 到 `ernie-4.5-turbo-vl`
|
||||
|
||||
如需手动指定 Vision 模型,可在 `config.json` 中显式配置:
|
||||
如需手动指定 Vision 模型,可在配置文件中显式配置:
|
||||
|
||||
```json
|
||||
{
|
||||
"tool": {
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "ernie-4.5-turbo-vl"
|
||||
}
|
||||
@@ -48,17 +54,6 @@ description: 百度千帆 ERNIE 模型配置
|
||||
}
|
||||
```
|
||||
|
||||
方式二:OpenAI 兼容方式接入:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "ernie-5.1",
|
||||
"bot_type": "openai",
|
||||
"open_ai_api_key": "",
|
||||
"open_ai_api_base": "https://qianfan.baidubce.com/v2"
|
||||
}
|
||||
```
|
||||
|
||||
<Tip>
|
||||
新配置推荐使用 `qianfan_api_key`。旧的 `wenxin`、`wenxin-4`、`baidu_wenxin_api_key`、`baidu_wenxin_secret_key` 配置仍保持兼容。
|
||||
</Tip>
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
---
|
||||
title: 通义千问 Qwen
|
||||
description: 通义千问模型配置
|
||||
description: 通义千问模型配置(文本 / 图像理解 / 图像生成 / 语音识别 / 语音合成 / 向量)
|
||||
---
|
||||
|
||||
通义千问(DashScope / 百炼)是国内覆盖最完整的厂商之一,文本、图像理解、图像生成、语音识别、语音合成与向量能力均可用一份 `dashscope_api_key` 启用。
|
||||
|
||||
<Tip>
|
||||
通过 Web 控制台的「模型管理」页面可一站式配置以下全部能力,无需手动改配置文件。
|
||||
</Tip>
|
||||
|
||||
## 文本对话
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "qwen3.6-plus",
|
||||
@@ -12,16 +20,93 @@ description: 通义千问模型配置
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `model` | 可填 `qwen3.6-plus`、`qwen3.5-plus`、`qwen3-max`、`qwen-max`、`qwen-plus`、`qwen-turbo`、`qwq-plus` 等 |
|
||||
| `model` | 可填 `qwen3.6-plus`、`qwen3.7-max`、`qwen3.5-plus`、`qwen3-max`、`qwen-max`、`qwen-plus`、`qwen-turbo`、`qwq-plus` 等 |
|
||||
| `dashscope_api_key` | 在 [百炼控制台](https://bailian.console.aliyun.com/?tab=model#/api-key) 创建,参考 [官方文档](https://bailian.console.aliyun.com/?tab=api#/api) |
|
||||
|
||||
也支持 OpenAI 兼容方式接入:
|
||||
## 图像理解
|
||||
|
||||
配置 `dashscope_api_key` 后 Agent 的 Vision 工具会自动调用千问的视觉模型识别图像。`qwen3-max` / `qwen3.5-plus` / `qwen3.6-plus` 等模型本身就是多模态;若主模型是纯文本(如 `qwen-turbo`),会自动回落到 `qwen-vl-max`。
|
||||
|
||||
如需手动指定 Vision 模型:
|
||||
|
||||
```json
|
||||
{
|
||||
"bot_type": "openai",
|
||||
"model": "qwen3.6-plus",
|
||||
"open_ai_api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
"open_ai_api_key": "YOUR_API_KEY"
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "qwen3.6-plus"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
支持模型:`qwen3.6-plus`、`qwen3.5-plus`、`qwen3-max`。
|
||||
|
||||
## 图像生成
|
||||
|
||||
```json
|
||||
{
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "qwen-image-2.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
可选模型:`qwen-image-2.0`、`qwen-image-2.0-pro`。
|
||||
|
||||
## 语音识别
|
||||
|
||||
```json
|
||||
{
|
||||
"voice_to_text": "dashscope",
|
||||
"voice_to_text_model": "qwen3-asr-flash"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `voice_to_text` | 设为 `dashscope` 启用通义千问 ASR |
|
||||
| `voice_to_text_model` | 可选,默认 `qwen3-asr-flash` |
|
||||
|
||||
凭证自动复用 `dashscope_api_key`。单段音频建议小于 10MB、时长不超过 300 秒。
|
||||
|
||||
## 语音合成
|
||||
|
||||
```json
|
||||
{
|
||||
"text_to_voice": "dashscope",
|
||||
"text_to_voice_model": "qwen3-tts-flash",
|
||||
"tts_voice_id": "Cherry"
|
||||
}
|
||||
```
|
||||
|
||||
| 参数 | 说明 |
|
||||
| --- | --- |
|
||||
| `text_to_voice_model` | 可选,默认 `qwen3-tts-flash`,覆盖普通话、方言与主流外语 |
|
||||
| `tts_voice_id` | 音色 ID,详见下方常用列表 |
|
||||
|
||||
常用音色示例:
|
||||
|
||||
| 音色 ID | 说明 |
|
||||
| --- | --- |
|
||||
| `Cherry` | 芊悦 · 阳光女声 |
|
||||
| `Serena` | 苏瑶 · 温柔女声 |
|
||||
| `Ethan` | 晨煦 · 阳光男声 |
|
||||
| `Chelsie` | 千雪 · 二次元少女 |
|
||||
| `Dylan` | 北京话 · 晓东 |
|
||||
| `Rocky` | 粤语 · 阿强 |
|
||||
| `Sunny` | 四川话 · 晴儿 |
|
||||
|
||||
完整音色(普通话 / 各地方言 / 双语等)可在 Web 控制台的「模型管理 → 语音合成」下拉框中可视化选择。
|
||||
|
||||
## 向量
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding_provider": "dashscope",
|
||||
"embedding_model": "text-embedding-v4"
|
||||
}
|
||||
```
|
||||
|
||||
默认模型 `text-embedding-v4`。修改 embedding 后需执行 `/memory rebuild-index` 命令重建索引。
|
||||
|
||||
@@ -5,6 +5,7 @@ description: CowAgent 版本更新历史
|
||||
|
||||
| 版本 | 日期 | 说明 |
|
||||
| --- | --- | --- |
|
||||
| [2.0.9](/releases/v2.0.9) | 2026.05.21 | MCP 工具生态接入、模型管理页重构(厂商凭据共享 + 多能力统一调度)、语音系统升级、浏览器持久登录 |
|
||||
| [2.0.8](/releases/v2.0.8) | 2026.05.06 | 飞书渠道全面升级(语音、流式输出和Markdown、扫码一键接入)、DeepSeek V4和百度模型新增、定时任务工具增强 |
|
||||
| [2.0.7](/releases/v2.0.7) | 2026.04.22 | 图像生成技能(六厂商自动路由)、新模型支持(Kimi K2.6、Claude Opus 4.7、GLM 5.1)、知识库增强、Web 控制台优化 |
|
||||
| [2.0.6](/releases/v2.0.6) | 2026.04.14 | 项目更名、知识库系统、梦境记忆蒸馏、上下文智能压缩、Web 控制台多会话及多项优化 |
|
||||
|
||||
@@ -11,7 +11,7 @@ description: CowAgent 2.0.7 - 图像生成技能(六厂商自动路由)、
|
||||
- **开箱即用**:配置 API Key 即可使用,无需手动指定模型。也支持在对话中指定特定模型
|
||||
- **灵活控制**:支持 `quality`(画质)、`size`(分辨率,512/1K~4K)、`aspect_ratio`(宽高比)等参数,各厂商自动适配有效值
|
||||
- **图片编辑**:传入已有图片即可进行编辑、风格迁移、多图融合
|
||||
- **Skill 级配置**:支持通过 `config.json` 中的 `skill.image-generation.model` 固定默认模型
|
||||
- **Skill 级配置**:支持通过 `config.json` 中的 `skills.image-generation.model` 固定默认模型
|
||||
|
||||
相关文档:[图像生成技能](https://docs.cowagent.ai/skills/image-generation)
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ description: CowAgent 2.0.8 - 飞书渠道全面升级(语音、流式打字
|
||||
|
||||
## 🔧 工具与安全
|
||||
|
||||
- **图像识别模型**:让 `tool.vision.model` 配置真正生效,未配置时自动 fallback #2792 Thanks CNXudiandian
|
||||
- **图像识别模型**:让 `tools.vision.model` 配置真正生效,未配置时自动 fallback #2792 Thanks CNXudiandian
|
||||
- **Bash 安全确认**:仅对工作区外的破坏性删除做二次确认,工作区内常规操作不再打扰
|
||||
|
||||
## 🐛 其他修复
|
||||
|
||||
92
docs/releases/v2.0.9.mdx
Normal file
@@ -0,0 +1,92 @@
|
||||
---
|
||||
title: v2.0.9
|
||||
description: CowAgent 2.0.9 - MCP 工具生态接入、模型管理页重构、语音系统升级、浏览器持久登录
|
||||
---
|
||||
|
||||
## 🧩 MCP 工具生态接入
|
||||
|
||||
新增 **MCP(Model Context Protocol)** 工具集成,CowAgent 从固定工具集扩展为开放可插拔的工具生态。任何兼容 MCP 协议的服务(高德地图、Chrome DevTools、Filesystem、Playwright 等)都可作为工具直接接入 Agent。
|
||||
|
||||
- **零额外依赖**:原生 JSON-RPC 实现,同时支持 `stdio`(本地进程)和 `sse`(远程 URL)两种传输
|
||||
- **兼容主流配置**:兼容 Claude Desktop / Cursor 风格的 `mcpServers` 配置,优先读取 `~/cow/mcp.json`,未配置则回退 `config.json`
|
||||
- **异步启动**:MCP 服务在后台线程启动,不阻塞 Agent 初始化;单个服务失败不影响整体
|
||||
|
||||
相关文档:[MCP 工具](https://docs.cowagent.ai/tools/mcp) · 社区贡献 #2801 Thanks @yangluxin613
|
||||
|
||||
## 🖥️ 模型管理页面重构
|
||||
|
||||
「模型」页面整体重新设计,从原来按 LLM 厂商堆叠的列表,重构为 **厂商凭据 + 能力调度** 两层结构:一处配置厂商凭据,对话、图像、语音、向量、搜索等多个能力共享。
|
||||
|
||||
- **厂商凭据集中管理**:所有支持厂商(OpenAI / Claude / Gemini / DeepSeek / Qwen / 豆包 / Kimi / 智谱 / MiniMax / 千帆 / LinkAI / Custom 等)的 API Key / API Base 在顶部统一维护,编辑后下方所有能力立即生效
|
||||
- **能力卡片**:按主模型、图像理解、图像生成、语音识别、语音合成、向量、联网搜索分卡,每个能力可独立选择厂商和模型,未配置时自动跟随主模型或按默认顺序回退
|
||||
|
||||
### 多厂商联网搜索
|
||||
|
||||
联网搜索升级为多厂商架构,**输出格式统一**:
|
||||
|
||||
- 四家可选:博查(bocha)、百度千帆(qianfan)、智谱(zhipu)、LinkAI
|
||||
- 两种调度策略:`auto`(按 bocha > qianfan > zhipu > linkai 顺序自动选第一个已配置的厂商)/ `fixed`(固定指定厂商)
|
||||
- 配置 ≥2 家且为 `auto` 时,Agent 可在单次调用中临时指定 `provider` 切换搜索源
|
||||
|
||||
### 向量厂商热切换
|
||||
|
||||
向量(Embedding)支持多厂商,告别对 OpenAI 的单一依赖:
|
||||
|
||||
- 原生支持 `openai` / `dashscope` / `doubao` / `zhipu` / `linkai`
|
||||
- **在线重建索引**:切换厂商后执行 `/memory rebuild-index`,无需重启、不会中断当前对话
|
||||
- 梦境日记默认排除在向量索引之外,避免反复出现在检索结果中干扰对话
|
||||
|
||||
## 🎙️ 语音系统升级
|
||||
|
||||
- **TTS 适配更多通道**:个人微信(ilink)、钉钉、企微智能机器人现已原生支持语音回复,开关沿用 `always_reply_voice` / `voice_reply_voice`;触发 TTS 时先发文本气泡再发语音消息,方便对照阅读
|
||||
- **新增 ASR 厂商**:百炼(DashScope)、智谱
|
||||
- **TTS 多厂商重构**:MiniMax / LinkAI / DashScope / 智谱 TTS 在流式合成、长文本切分、错误回退上更稳
|
||||
- **网页麦克风输入**:Web 控制台聊天框新增麦克风按钮,可直接录音发送,自动走 ASR 转文本
|
||||
|
||||
## 🌐 浏览器工具
|
||||
|
||||
浏览器工具支持三种启动模式,告别"每次开会话都得重新登录":
|
||||
|
||||
- **持久化用户配置(默认)**:复用 `~/.cow/browser_profile`,登录一次后下次自动复用登录态
|
||||
- **CDP 模式**:通过 `cdp_endpoint` 附加到手动启动的真实 Chrome,享有完整指纹,适合反爬严格的站点
|
||||
- **Fresh 模式**:每次清空环境,适合做隔离任务
|
||||
|
||||
此外,浏览器被用户中途关闭后下次调用会自动重新拉起,CDP 模式下不会误杀用户的 Chrome 进程。相关文档:[浏览器工具](https://docs.cowagent.ai/tools/browser) #2809
|
||||
|
||||
## 🤖 新模型与模型增强
|
||||
|
||||
- **百度 ERNIE 5.1**:新增 `ernie-5.1` 模型
|
||||
- **DeepSeek V4 reasoning_effort**:DeepSeek V4 系列支持 `reasoning_effort` 配置思考深度
|
||||
- **OpenRouter / Vercel AI Gateway 归因**:调用这两个平台时自动注入归因 Header,平台可正确识别 CowAgent 用量
|
||||
- 修复 MiMo 等思考模型在多轮对话中 `reasoning_content` 丢失的问题
|
||||
|
||||
## 🚀 启动与运行体验
|
||||
|
||||
来自社区的多项体验改进 Thanks @yangluxin613
|
||||
|
||||
- **自动选端口 + 自动开浏览器**:默认端口被占用时自动切换,启动成功后默认打开控制台
|
||||
- **Ctrl+C 干净退出**:不再打印一长串堆栈
|
||||
- **日志面板**:差异化级别配色、多行日志继承级别、新增级别筛选 Checkbox
|
||||
|
||||
## 🔒 部署与安全
|
||||
|
||||
- **默认仅本机访问**:Web 控制台 `web_host` 默认 `127.0.0.1`,避免无密码情况下被外网直接访问;显式 `0.0.0.0` 且未设密码时给出提示
|
||||
- **前端资源完全本地化**:第三方 CSS / JS 全部本地分发,离线 / 内网环境也能正常加载控制台 #2816 Thanks @TryToMakeUsBetter
|
||||
- **支持文件夹上传**:上传区支持整目录一次性上传,路径校验适配 Windows #2815 Thanks @TryToMakeUsBetter
|
||||
|
||||
## 🛠 其他改进与修复
|
||||
|
||||
- **定时任务防重复执行**:调度器初始化做幂等处理
|
||||
- **工具失败状态持久化**:刷新页面或重载历史时失败的工具调用正确显示失败状态 #2822 Thanks @a1094174619
|
||||
- **企微机器人非法字符**:修复消息中包含非法控制字符导致投递失败的问题 #2810 Thanks @Jacques-Zhao
|
||||
- **飞书文件消息**:飞书通道支持文件消息接收
|
||||
- **工具配置合并**:修复用户自定义工具配置(如 `tools.browser`)被工作区默认值整体覆盖的问题,现按字段合并
|
||||
- 修复单文件上传偶发 TypeError、切换语言后 JS 动态视图未重渲染等问题
|
||||
|
||||
## 📦 升级方式
|
||||
|
||||
源码部署可执行 `cow update` 或 `./run.sh update` 一键升级,或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。
|
||||
|
||||
> ⚠️ 切换向量厂商后,建议执行一次 `/memory rebuild-index`,让历史记忆按新的向量维度重新入库。
|
||||
|
||||
**发布日期**:2026.05.21 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.8...2.0.9)
|
||||
@@ -3,149 +3,87 @@ title: image-generation - 图像生成
|
||||
description: 文生图 / 图生图 / 多图融合,支持多家厂商自动路由与回退
|
||||
---
|
||||
|
||||
通用的图像生成与编辑技能,支持 OpenAI、Gemini、Seedream(火山方舟)、Qwen(百炼)、MiniMax、LinkAI 共六家厂商。不需要手动选模型,脚本会按固定优先级自动挑选已配置的厂商来出图。
|
||||
通用的图像生成与编辑技能,支持 OpenAI、Gemini、Seedream(火山方舟)、Qwen(百炼)、MiniMax、LinkAI 共六家厂商。配好任意一家的 Key 即可使用,配多家可享受自动回退。
|
||||
|
||||
## 模型选择
|
||||
|
||||
`image-generation` 采用「固定优先级 + 自动回退」的策略,配好 Key 就能用:
|
||||
|
||||
1. **优先级顺序**:`OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI`
|
||||
2. **没配 Key 的跳过**:只有设了 API Key 的厂商才会参与
|
||||
3. **失败自动切下一家**:遇到 401、模型未开通、网络异常等错误时,会自动试下一个
|
||||
4. **指定模型时前置**:如果明确传了某个模型名,对应厂商会被提到最前面先试
|
||||
|
||||
### 支持的模型
|
||||
## 支持的模型
|
||||
|
||||
| 厂商 | 模型 / 别名 | 特点 |
|
||||
| --- | --- | --- |
|
||||
| OpenAI | `gpt-image-2`、`gpt-image-1` | 通用文生图,高质量、高智能,支持 `quality` 参数控制画质 |
|
||||
| OpenAI | `gpt-image-2`、`gpt-image-1` | 通用文生图,高质量,支持 `quality` 控制画质 |
|
||||
| Gemini Nano Banana | `nano-banana-2`、`nano-banana-pro`、`nano-banana` | 对应 `gemini-3.1-flash`、`gemini-3-pro`、`gemini-2.5-flash` 的图像版本 |
|
||||
| Seedream(火山方舟) | `seedream-5.0-lite`、`seedream-4.5` | 原生 2K–4K,最多 14 张图融合 |
|
||||
| Qwen(百炼) | `qwen-image-2.0`、`qwen-image-2.0-pro` | 擅长中文排版和图文融合 |
|
||||
| MiniMax | `image-01` | 简单快速的图片生成 |
|
||||
| LinkAI | 任意模型 | 通用代理,兜底用 |
|
||||
| MiniMax | `image-01` | 简单快速 |
|
||||
| LinkAI | 任意模型 | 统一网关,作为兜底 |
|
||||
|
||||
<Note>
|
||||
默认情况下 Agent 不会主动选模型,而是走自动路由。如果你想用某个特定模型,直接在对话里说就行,比如「用 seedream 画一只猫」或「用 gpt-image-2 生成海报」。也可以通过下面的「自定义配置」固定默认模型。
|
||||
</Note>
|
||||
## 模型选择
|
||||
|
||||
## 自定义配置
|
||||
默认走「自动路由 + 失败回退」:
|
||||
|
||||
### API Key 配置
|
||||
1. 按 `OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI` 顺序选第一个已配置的厂商
|
||||
2. 遇到 401、模型未开通、网络异常等错误时,自动切到下一家
|
||||
3. 用户在对话里指定模型时(如「用 seedream 画一只猫」),对应厂商会被提到最前优先尝试
|
||||
|
||||
至少需要配**一个**厂商的 Key,配多个就能享受自动回退能力。有三种配置方式:
|
||||
|
||||
#### 方式一:已有模型 Key 自动复用
|
||||
|
||||
如果你在 web控制台 或 `config.json` 中配置了对话模型的 Key(比如 `openai_api_key`、`gemini_api_key` 等),启动时这些 Key 会被**自动同步**到对应的环境变量。也就是说,只要你的对话模型能用,图像生成就能直接用同一个 Key,不需要额外配置。
|
||||
|
||||
#### 方式二:在 config.json 中配置
|
||||
|
||||
在 `config.json` 中直接写对应的 Key 字段即可,支持的字段如下:
|
||||
如需固定使用某个模型:
|
||||
|
||||
```json
|
||||
{
|
||||
"openai_api_key": "sk-xxx",
|
||||
"openai_api_base": "https://api.openai.com/v1",
|
||||
"gemini_api_key": "AIza-xxx",
|
||||
"ark_api_key": "xxx",
|
||||
"dashscope_api_key": "sk-xxx",
|
||||
"minimax_api_key": "xxx",
|
||||
"linkai_api_key": "xxx"
|
||||
}
|
||||
```
|
||||
|
||||
修改后需要重启生效。每个 Key 还有对应的 `*_api_base` 字段可以自定义接口地址。
|
||||
|
||||
#### 方式三:对话中直接配置
|
||||
|
||||
在对话里发送 API Key,Agent 会通过 `env_config` 工具自动保存到 `~/cow/.env`,**不需要重启**就能生效。例如:
|
||||
|
||||
```
|
||||
帮我配置 OPENAI_API_KEY 为 sk-xxx
|
||||
```
|
||||
|
||||
或者:
|
||||
|
||||
```
|
||||
设置 ARK_API_KEY 为 xxx
|
||||
```
|
||||
|
||||
### API Key 一览
|
||||
|
||||
| 环境变量 | config.json 字段 | 对应厂商 | 默认 Base URL |
|
||||
| --- | --- | --- | --- |
|
||||
| `OPENAI_API_KEY` | `openai_api_key` | OpenAI | `https://api.openai.com/v1` |
|
||||
| `GEMINI_API_KEY` | `gemini_api_key` | Gemini | `https://generativelanguage.googleapis.com` |
|
||||
| `ARK_API_KEY` | `ark_api_key` | 火山方舟(Seedream) | `https://ark.cn-beijing.volces.com/api/v3` |
|
||||
| `DASHSCOPE_API_KEY` | `dashscope_api_key` | 阿里百炼(Qwen) | `https://dashscope.aliyuncs.com` |
|
||||
| `MINIMAX_API_KEY` | `minimax_api_key` | MiniMax | `https://api.minimaxi.com` |
|
||||
| `LINKAI_API_KEY` | `linkai_api_key` | LinkAI | `https://api.link-ai.tech` |
|
||||
|
||||
|
||||
### 指定默认模型
|
||||
|
||||
如果想让所有图像生成固定走某个厂商的模型,可以在 `config.json` 里加:
|
||||
|
||||
```json
|
||||
"skill": {
|
||||
"skills": {
|
||||
"image-generation": {
|
||||
"model": "seedream-5.0-lite"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
启动时这段配置会被自动转成环境变量 `SKILL_IMAGE_GENERATION_MODEL`,脚本读到后会固定使用这个模型所在的厂商进行生成。
|
||||
## 配置 API Key
|
||||
|
||||
<Tip>
|
||||
推荐通过 [Web 控制台](/channels/web) 的「模型管理」页面配置,配好的对话模型 Key 会被图像生成技能自动复用,无需重复配置。也可手动编辑配置文件或在对话中通过 `env_config` 工具临时设置。
|
||||
</Tip>
|
||||
|
||||
凭证统一复用主模型厂商的 Key:
|
||||
|
||||
| 字段 | 对应厂商 |
|
||||
| --- | --- |
|
||||
| `openai_api_key` | OpenAI |
|
||||
| `gemini_api_key` | Gemini |
|
||||
| `ark_api_key` | 火山方舟(Seedream) |
|
||||
| `dashscope_api_key` | 阿里百炼(Qwen) |
|
||||
| `minimax_api_key` | MiniMax |
|
||||
| `linkai_api_key` | LinkAI |
|
||||
|
||||
|
||||
## 开启和关闭
|
||||
|
||||
`image-generation` 是内置技能,**会根据 API Key 自动调整状态**:
|
||||
技能会根据 API Key 自动调整状态:
|
||||
|
||||
- **Key 已配置**:技能正常可用,Agent 收到画图请求时会直接调用
|
||||
- **Key 未配置**:技能仍然会出现在上下文中(标记为「需要配置」),Agent 会引导用户去配 Key,而不是直接调用失败
|
||||
- **已配置 Key**:Agent 收到画图请求时直接调用
|
||||
- **未配置 Key**:技能仍会出现在上下文中(标记为「需要配置」),Agent 会引导用户去配 Key
|
||||
|
||||
如果想手动控制,也可以用命令:
|
||||
如需手动控制:
|
||||
|
||||
```text
|
||||
/skill disable image-generation # 手动关闭(即使有 Key 也不会被调用)
|
||||
/skill disable image-generation # 关闭
|
||||
/skill enable image-generation # 重新开启
|
||||
```
|
||||
|
||||
终端里对应的命令是 `cow skill disable image-generation` / `cow skill enable image-generation`。
|
||||
终端等价命令:`cow skill disable image-generation` / `cow skill enable image-generation`。
|
||||
|
||||
## 参数
|
||||
|
||||
| 参数 | 类型 | 必填 | 默认 | 说明 |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| `prompt` | string | 是 | — | 图像描述 |
|
||||
| `image_url` | string / list | 否 | null | 编辑用的输入图,支持本地路径或 URL。传多个就是多图融合 |
|
||||
| `quality` | string | 否 | auto | `low` / `medium` / `high`,只有部分厂商支持 |
|
||||
| `size` | string | 否 | auto | `512` / `1K` / `2K` / `3K` / `4K`,也可以写像素值如 `1024x1024` |
|
||||
| `image_url` | string / list | 否 | null | 编辑用的输入图,本地路径或 URL;传列表为多图融合 |
|
||||
| `quality` | string | 否 | auto | `low` / `medium` / `high`,仅部分厂商支持 |
|
||||
| `size` | string | 否 | auto | `512` / `1K` / `2K` / `3K` / `4K`,或像素值如 `1024x1024` |
|
||||
| `aspect_ratio` | string | 否 | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9`;Gemini 还支持 `1:4` / `4:1` / `1:8` / `8:1` |
|
||||
|
||||
<Warning>
|
||||
**质量越高、分辨率越大,花的钱越多、等的时间越长。**
|
||||
|
||||
- 日常对话和快速预览直接用默认(`auto`),或者 `quality=low` + `size=1K`,大概 20 秒出图
|
||||
- 做海报、用户明确要高清的时候再上 `quality=high` + `size=2K/4K`,可能要等 1~5 分钟,取决于不同模型的速度
|
||||
**质量越高、分辨率越大,耗时和成本越高。** 日常对话用默认(`auto`)或 `quality=low` + `size=1K` 即可,约 20 秒出图;做海报或明确要高清时再上 `high` + `2K/4K`,可能需要 1–5 分钟。
|
||||
</Warning>
|
||||
|
||||
## 输出
|
||||
|
||||
成功时返回:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "doubao-seedream-5-0-260128",
|
||||
"images": [
|
||||
{"url": "/path/to/output.png"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
失败时返回 `{ "error": "..." }`。出错后**不要直接重试**——大概率是配置问题(Key 填错、API 地址不对、模型没开通),让用户修好配置再试。
|
||||
|
||||
## 常见用法
|
||||
|
||||
- **文生图**:根据描述生成插画、海报、图标、头像、分镜图等
|
||||
@@ -153,8 +91,8 @@ description: 文生图 / 图生图 / 多图融合,支持多家厂商自动路
|
||||
- **多图融合**:把多张参考图合成一张(换装、角色合影等)
|
||||
|
||||
<Note>
|
||||
- bash 超时建议设 600 秒。单个厂商的 HTTP 超时是 300 秒,但脚本可能依次尝试多个厂商
|
||||
- 输入的图片会自动压缩到 4MB 以内、最长边不超过 4096px
|
||||
- Gemini / Seedream / Qwen / MiniMax 不支持 `quality` 参数,传了也没用
|
||||
- Seedream 默认出 2K 图,`seedream-5.0-lite` 支持到 3K,`seedream-4.5` 支持到 4K
|
||||
- bash 超时建议设 600 秒:单厂商 HTTP 超时 300 秒,脚本可能依次尝试多家
|
||||
- 输入图片自动压缩到 4MB 以内、最长边不超过 4096px
|
||||
- Gemini / Seedream / Qwen / MiniMax 不支持 `quality` 参数
|
||||
- Seedream 默认出 2K 图;`seedream-5.0-lite` 支持到 3K,`seedream-4.5` 支持到 4K
|
||||
</Note>
|
||||
|
||||
@@ -40,7 +40,7 @@ Vision 工具采用多级自动选择 + 自动兜底策略,无需手动配置
|
||||
|
||||
```json
|
||||
{
|
||||
"tool": {
|
||||
"tools": {
|
||||
"vision": {
|
||||
"model": "gpt-4.1"
|
||||
}
|
||||
|
||||
@@ -1,32 +1,51 @@
|
||||
---
|
||||
title: web_search - 联网搜索
|
||||
description: 搜索互联网获取实时信息
|
||||
description: 搜索互联网获取实时信息,支持多个搜索厂商
|
||||
---
|
||||
|
||||
搜索互联网获取实时信息、新闻、研究等内容。支持两个搜索后端,自动选择可用的后端。
|
||||
搜索互联网获取实时信息、新闻、研究等内容。支持博查、百度千帆、智谱、LinkAI 四个后端,配置任意一家即可使用。
|
||||
|
||||
## 依赖
|
||||
<Tip>
|
||||
推荐通过 [Web 控制台](/channels/web) 的「模型管理 → 搜索」面板可视化配置厂商与策略,无需手动编辑配置文件。
|
||||
</Tip>
|
||||
|
||||
需要配置至少一个搜索 API Key(通过 `env_config` 工具或工作空间 `.env` 文件配置):
|
||||
## 厂商
|
||||
|
||||
| 后端 | 环境变量 | 优先级 | 获取方式 |
|
||||
| --- | --- | --- | --- |
|
||||
| 博查搜索 | `BOCHA_API_KEY` | 优先使用 | [博查开放平台](https://open.bochaai.com/) |
|
||||
| LinkAI 搜索 | `LINKAI_API_KEY` | 可选 | [LinkAI 控制台](https://link-ai.tech/console/interface) |
|
||||
| 厂商 | 凭证 | 申请入口 |
|
||||
| --- | --- | --- |
|
||||
| 博查 Bocha | `tools.web_search.bocha_api_key` | [博查开放平台](https://open.bochaai.com/) |
|
||||
| 百度千帆 | 复用 `qianfan_api_key` | [千帆控制台](https://cloud.baidu.com/doc/qianfan/s/2mh4su4uy) |
|
||||
| 智谱 Zhipu | 复用 `zhipu_ai_api_key` | [智谱开放平台](https://docs.bigmodel.cn/cn/guide/tools/web-search) |
|
||||
| LinkAI | 复用 `linkai_api_key` | [LinkAI 控制台](https://link-ai.tech/console/interface) |
|
||||
|
||||
## 参数
|
||||
除博查需要单独的 `bocha_api_key` 外,其他三家直接复用对应模型的 API Key,配好模型即同时获得搜索能力。
|
||||
|
||||
## 路由策略
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"web_search": {
|
||||
"strategy": "auto",
|
||||
"provider": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- `auto`(默认):由 Agent 在已配置的厂商中智能选择,并可在一次任务中多次调用、切换不同厂商以获取更全面的结果;未指定时按 `bocha → qianfan → zhipu → linkai` 顺序兜底。
|
||||
- `fixed`:固定使用 `provider` 指定的厂商;该厂商凭证缺失时自动回落到 auto 顺序。
|
||||
|
||||
## 工具参数
|
||||
|
||||
| 参数 | 类型 | 必填 | 说明 |
|
||||
| --- | --- | --- | --- |
|
||||
| `query` | string | 是 | 搜索关键词 |
|
||||
| `count` | integer | 否 | 返回结果数量(1-50,默认 10) |
|
||||
| `freshness` | string | 否 | 时间范围:`noLimit`、`oneDay`、`oneWeek`、`oneMonth`、`oneYear`,或日期范围如 `2025-01-01..2025-02-01` |
|
||||
| `count` | integer | 否 | 返回结果数量(1–50,默认 10) |
|
||||
| `freshness` | string | 否 | 时间范围:`noLimit`(默认)、`oneDay`、`oneWeek`、`oneMonth`、`oneYear`,或日期范围如 `2025-01-01..2025-02-01` |
|
||||
| `summary` | boolean | 否 | 是否返回页面摘要(默认 false) |
|
||||
|
||||
## 使用场景
|
||||
|
||||
当用户询问最新信息、需要事实核查或获取实时数据时,Agent 会自动调用此工具。
|
||||
| `provider` | string | 否 | `auto` 策略下配置了多个厂商时可见,用于单次切换厂商 |
|
||||
|
||||
<Note>
|
||||
如果未配置任何搜索 API Key,该工具不会被加载。
|
||||
四家凭证均未配置时,该工具不会注册到 Agent。
|
||||
</Note>
|
||||
|
||||
@@ -60,7 +60,7 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot):
|
||||
"timeout": conf().get("request_timeout", None), # 重试超时时间,在这个时间内,将会自动重试
|
||||
}
|
||||
# 部分模型暂不支持一些参数,特殊处理
|
||||
if conf_model in [const.O1, const.O1_MINI, const.GPT_5, const.GPT_5_MINI, const.GPT_5_NANO]:
|
||||
if conf_model in [const.O1, const.O1_MINI, const.GPT_5, const.GPT_5_MINI, const.GPT_5_NANO, const.GPT_55]:
|
||||
remove_keys = ["temperature", "top_p", "frequency_penalty", "presence_penalty"]
|
||||
for key in remove_keys:
|
||||
self.args.pop(key, None) # 如果键不存在,使用 None 来避免抛出错、
|
||||
|
||||
@@ -38,9 +38,9 @@ class GoogleGeminiBot(Bot):
|
||||
|
||||
@property
|
||||
def model(self):
|
||||
model_name = conf().get("model") or "gemini-3.1-pro-preview"
|
||||
model_name = conf().get("model") or "gemini-3.5-flash"
|
||||
if model_name == "gemini":
|
||||
model_name = "gemini-3.1-pro-preview"
|
||||
model_name = "gemini-3.5-flash"
|
||||
return model_name
|
||||
|
||||
@property
|
||||
|
||||
@@ -89,8 +89,9 @@ class OpenAICompatibleBot:
|
||||
messages[0] = {"role": "system", "content": system_prompt}
|
||||
|
||||
# Build request parameters
|
||||
model_name = kwargs.get("model", api_config.get('model', 'gpt-5.4'))
|
||||
request_params = {
|
||||
"model": kwargs.get("model", api_config.get('model', 'gpt-3.5-turbo')),
|
||||
"model": model_name,
|
||||
"messages": messages,
|
||||
"temperature": kwargs.get("temperature", api_config.get('default_temperature', 0.9)),
|
||||
"top_p": kwargs.get("top_p", api_config.get('default_top_p', 1.0)),
|
||||
@@ -98,6 +99,10 @@ class OpenAICompatibleBot:
|
||||
"presence_penalty": kwargs.get("presence_penalty", api_config.get('default_presence_penalty', 0.0)),
|
||||
"stream": stream
|
||||
}
|
||||
# GPT-5 / GPT-5.5 / o1 series only accept default temperature/top_p and reject penalty params
|
||||
if model_name in ("gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-5.5", "o1", "o1-mini"):
|
||||
for key in ("temperature", "top_p", "frequency_penalty", "presence_penalty"):
|
||||
request_params.pop(key, None)
|
||||
|
||||
# Add max_tokens if specified
|
||||
if kwargs.get("max_tokens"):
|
||||
|
||||
@@ -1056,6 +1056,38 @@ class CowCliPlugin(Plugin):
|
||||
logger.warning(f"[CowCli] /memory dream sync failed: {e}")
|
||||
return f"❌ 记忆蒸馏失败: {e}"
|
||||
|
||||
@staticmethod
|
||||
def _resolve_active_embedding():
|
||||
"""
|
||||
Resolve (provider_label, model, dim) from the LATEST config, not the
|
||||
possibly-stale provider instance cached on a running agent. Used by
|
||||
/memory status and rebuild-index hints so they reflect what a rebuild
|
||||
will actually run as after the user changes embedding_provider.
|
||||
Returns (label, model, dim) where any field may be None when unknown.
|
||||
"""
|
||||
from agent.memory.embedding import EMBEDDING_VENDORS
|
||||
from config import conf
|
||||
|
||||
provider_key = (conf().get("embedding_provider") or "").strip().lower()
|
||||
cfg_model = (conf().get("embedding_model") or "").strip()
|
||||
try:
|
||||
cfg_dim = int(conf().get("embedding_dimensions") or 0)
|
||||
except (TypeError, ValueError):
|
||||
cfg_dim = 0
|
||||
|
||||
if not provider_key:
|
||||
# Legacy auto path: openai -> linkai, both default to text-embedding-3-small (1536).
|
||||
if (conf().get("open_ai_api_key") or "").strip():
|
||||
return "openai (legacy)", "text-embedding-3-small", 1536
|
||||
if (conf().get("linkai_api_key") or "").strip():
|
||||
return "linkai (legacy)", "text-embedding-3-small", 1536
|
||||
return "(legacy)", None, None
|
||||
|
||||
meta = EMBEDDING_VENDORS.get(provider_key) or {}
|
||||
model = cfg_model or meta.get("default_model")
|
||||
dim = cfg_dim if cfg_dim > 0 else meta.get("default_dimensions")
|
||||
return provider_key, model, dim
|
||||
|
||||
def _memory_status(self) -> str:
|
||||
"""Show current memory index status."""
|
||||
from agent.memory.embedding import detect_index_dim
|
||||
@@ -1078,15 +1110,14 @@ class CowCliPlugin(Plugin):
|
||||
lines.append(f" Chunks : {chunks} (embedded: {embedded})")
|
||||
lines.append("")
|
||||
|
||||
# Active provider (from running config + provider instance).
|
||||
# Resolve from the latest config so users see what /memory rebuild-index
|
||||
# will actually run as — not what the cached agent was initialized with.
|
||||
cfg_provider, cfg_model, cfg_dim = self._resolve_active_embedding()
|
||||
provider_obj = memory_manager.embedding_provider
|
||||
cfg_provider = (conf().get("embedding_provider") or "").strip().lower() or "(legacy)"
|
||||
if provider_obj is not None:
|
||||
cfg_model = getattr(provider_obj, "model", "?")
|
||||
cfg_dim = getattr(provider_obj, "_dimensions", None) or "?"
|
||||
if cfg_model:
|
||||
lines.append(f" Provider : {cfg_provider}")
|
||||
lines.append(f" Model : {cfg_model}")
|
||||
lines.append(f" Dim : {cfg_dim}")
|
||||
lines.append(f" Dim : {cfg_dim if cfg_dim else '?'}")
|
||||
else:
|
||||
lines.append(" Provider : (未初始化, keyword-only)")
|
||||
|
||||
@@ -1105,7 +1136,6 @@ class CowCliPlugin(Plugin):
|
||||
)
|
||||
|
||||
index_dim = detect_index_dim(memory_manager.storage)
|
||||
cfg_dim = getattr(provider_obj, "_dimensions", None)
|
||||
if index_dim is not None and cfg_dim and index_dim != cfg_dim:
|
||||
warnings.append(
|
||||
f" ⚠️ 索引中存量向量为 {index_dim} 维,与当前配置 {cfg_dim} 维不一致;"
|
||||
@@ -1129,15 +1159,27 @@ class CowCliPlugin(Plugin):
|
||||
)
|
||||
|
||||
memory_manager = agent.memory_manager
|
||||
if memory_manager.embedding_provider is None:
|
||||
|
||||
# Rebuild against the LATEST config: build a fresh provider from
|
||||
# config.json and swap it onto memory_manager. The agent's
|
||||
# conversation_history and other state are untouched.
|
||||
try:
|
||||
from bridge.agent_initializer import AgentInitializer
|
||||
fresh_provider = AgentInitializer(bridge=None, agent_bridge=None) \
|
||||
._init_embedding_provider(memory_manager.config, session_id=session_id)
|
||||
except Exception as e:
|
||||
logger.exception("[CowCli] /memory rebuild-index: build provider failed")
|
||||
return f"⚠️ 无法根据当前配置构造 embedding provider: {e}"
|
||||
|
||||
if fresh_provider is None:
|
||||
return (
|
||||
"⚠️ 当前没有可用的 embedding provider。\n"
|
||||
"请检查 config.json 中的 embedding 相关配置 (provider / api key)。"
|
||||
)
|
||||
memory_manager.embedding_provider = fresh_provider
|
||||
|
||||
provider_obj = memory_manager.embedding_provider
|
||||
model_label = getattr(provider_obj, "model", "?")
|
||||
dim_label = getattr(provider_obj, "dimensions", "?")
|
||||
model_label = getattr(fresh_provider, "model", "?")
|
||||
dim_label = getattr(fresh_provider, "dimensions", "?")
|
||||
|
||||
# SaaS (e_context is None): run synchronously, return final result
|
||||
if e_context is None:
|
||||
@@ -1168,7 +1210,7 @@ class CowCliPlugin(Plugin):
|
||||
threading.Thread(target=_run, daemon=True).start()
|
||||
return (
|
||||
f"🔧 索引重建已启动 (model={model_label}, dim={dim_label})\n\n"
|
||||
f"将清空现有 chunks 并重新 embed 所有记忆文件,完成后会通知你。"
|
||||
f"将重新向量化所有记忆和知识文件,完成后会通知你。"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -1110,7 +1110,7 @@ def main():
|
||||
# Model resolution priority:
|
||||
# 1. Explicit `model` in the call args (agent / user override)
|
||||
# 2. SKILL_IMAGE_GENERATION_MODEL env var (synced from
|
||||
# config["skill"]["image-generation"]["model"] at startup)
|
||||
# config["skills"]["image-generation"]["model"] at startup)
|
||||
# 3. None → fall back to automatic provider routing (try every
|
||||
# provider with a configured API key in global priority order)
|
||||
model = args.get("model") or os.environ.get("SKILL_IMAGE_GENERATION_MODEL") or ""
|
||||
|
||||
@@ -394,7 +394,7 @@ class TestQianfanVisionTool(unittest.TestCase):
|
||||
"open_ai_api_key": "",
|
||||
"linkai_api_key": "",
|
||||
"use_linkai": False,
|
||||
"tool": {},
|
||||
"tools": {},
|
||||
}
|
||||
if values:
|
||||
data.update(values)
|
||||
@@ -424,7 +424,7 @@ class TestQianfanVisionTool(unittest.TestCase):
|
||||
def test_vision_routes_ernie_model_override_to_qianfan(self):
|
||||
fake_conf = self._fake_conf({
|
||||
"qianfan_api_key": "test-qianfan-key",
|
||||
"tool": {"vision": {"model": "ernie-4.5-turbo-vl-32k"}},
|
||||
"tools": {"vision": {"model": "ernie-4.5-turbo-vl-32k"}},
|
||||
})
|
||||
fake_bot = MagicMock()
|
||||
fake_bot.call_vision = MagicMock()
|
||||
|
||||
0
voice/dashscope/__init__.py
Normal file
175
voice/dashscope/dashscope_voice.py
Normal file
@@ -0,0 +1,175 @@
|
||||
# encoding:utf-8
|
||||
"""DashScope voice: qwen3-asr-flash (ASR) + qwen3-tts-flash (TTS)
|
||||
via dashscope.MultiModalConversation."""
|
||||
import datetime
|
||||
import os
|
||||
import random
|
||||
from typing import Optional
|
||||
|
||||
import dashscope
|
||||
import requests
|
||||
from dashscope import MultiModalConversation
|
||||
|
||||
from bridge.reply import Reply, ReplyType
|
||||
from common.log import logger
|
||||
from config import conf
|
||||
from voice import audio_convert
|
||||
from voice.voice import Voice
|
||||
|
||||
|
||||
DEFAULT_ASR_MODEL = "qwen3-asr-flash"
|
||||
DEFAULT_TTS_MODEL = "qwen3-tts-flash"
|
||||
DEFAULT_TTS_VOICE = "Cherry"
|
||||
MAX_DURATION_SECONDS = 300
|
||||
MAX_FILE_BYTES = 10 * 1024 * 1024
|
||||
|
||||
|
||||
class DashScopeVoice(Voice):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def voiceToText(self, voice_file: str):
|
||||
try:
|
||||
voice_file = self._ensure_compatible_format(voice_file)
|
||||
|
||||
try:
|
||||
size = os.path.getsize(voice_file)
|
||||
if size > MAX_FILE_BYTES:
|
||||
logger.warning(
|
||||
f"[DashScopeVoice] audio file {size}B exceeds {MAX_FILE_BYTES}B; "
|
||||
f"qwen3-asr-flash may reject it"
|
||||
)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
api_key = conf().get("dashscope_api_key", "")
|
||||
if not api_key:
|
||||
logger.error("[DashScopeVoice] dashscope_api_key is not configured")
|
||||
return Reply(ReplyType.ERROR, "未配置 DashScope API key")
|
||||
dashscope.api_key = api_key
|
||||
|
||||
model = conf().get("voice_to_text_model") or DEFAULT_ASR_MODEL
|
||||
abs_path = os.path.abspath(voice_file)
|
||||
file_uri = f"file://{abs_path}"
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": [{"audio": file_uri}]},
|
||||
]
|
||||
response = MultiModalConversation.call(
|
||||
model=model,
|
||||
messages=messages,
|
||||
result_format="message",
|
||||
asr_options={"enable_itn": False, "enable_lid": True},
|
||||
)
|
||||
|
||||
text = self._extract_text(response)
|
||||
if text is None:
|
||||
logger.error(f"[DashScopeVoice] voiceToText failed: {response}")
|
||||
return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
|
||||
|
||||
logger.info(f"[DashScopeVoice] voiceToText model={model} text={text}")
|
||||
return Reply(ReplyType.TEXT, text)
|
||||
except Exception as e:
|
||||
logger.exception(f"[DashScopeVoice] voiceToText exception: {e}")
|
||||
return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
|
||||
|
||||
def textToVoice(self, text: str):
|
||||
try:
|
||||
api_key = conf().get("dashscope_api_key", "")
|
||||
if not api_key:
|
||||
logger.error("[DashScopeVoice] dashscope_api_key is not configured")
|
||||
return Reply(ReplyType.ERROR, "未配置 DashScope API key")
|
||||
dashscope.api_key = api_key
|
||||
|
||||
model = conf().get("text_to_voice_model") or DEFAULT_TTS_MODEL
|
||||
voice = conf().get("tts_voice_id") or DEFAULT_TTS_VOICE
|
||||
response = MultiModalConversation.call(
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
text=text,
|
||||
voice=voice,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
url = self._extract_audio_url(response)
|
||||
if not url:
|
||||
logger.error(f"[DashScopeVoice] textToVoice failed: {response}")
|
||||
return Reply(ReplyType.ERROR, "语音合成失败")
|
||||
|
||||
local_path = self._download_audio(url)
|
||||
if not local_path:
|
||||
return Reply(ReplyType.ERROR, "语音合成失败")
|
||||
|
||||
logger.info(f"[DashScopeVoice] textToVoice model={model} voice={voice} file={local_path}")
|
||||
return Reply(ReplyType.VOICE, local_path)
|
||||
except Exception as e:
|
||||
logger.exception(f"[DashScopeVoice] textToVoice exception: {e}")
|
||||
return Reply(ReplyType.ERROR, "语音合成失败")
|
||||
|
||||
@staticmethod
|
||||
def _extract_audio_url(response) -> Optional[str]:
|
||||
try:
|
||||
if getattr(response, "status_code", 200) != 200:
|
||||
return None
|
||||
audio = response.output.get("audio") if response.output else None
|
||||
if isinstance(audio, dict):
|
||||
return audio.get("url") or None
|
||||
return getattr(audio, "url", None)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _download_audio(url: str) -> Optional[str]:
|
||||
try:
|
||||
tmp_dir = os.path.join(os.getcwd(), "tmp")
|
||||
os.makedirs(tmp_dir, exist_ok=True)
|
||||
ts = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
ext = os.path.splitext(url.split("?", 1)[0])[1].lower() or ".wav"
|
||||
if ext not in (".mp3", ".wav", ".m4a", ".aac", ".opus"):
|
||||
ext = ".wav"
|
||||
dst = os.path.join(tmp_dir, f"dashscope_tts_{ts}_{random.randint(0, 9999)}{ext}")
|
||||
resp = requests.get(url, timeout=60)
|
||||
resp.raise_for_status()
|
||||
with open(dst, "wb") as f:
|
||||
f.write(resp.content)
|
||||
return dst
|
||||
except Exception as e:
|
||||
logger.error(f"[DashScopeVoice] download audio failed: {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _ensure_compatible_format(voice_file: str) -> str:
|
||||
# qwen3-asr-flash doesn't accept AMR/SILK; mp3/wav/m4a/aac/opus pass through.
|
||||
lower = voice_file.lower()
|
||||
if lower.endswith(".amr") or lower.endswith(".silk") or lower.endswith(".slk"):
|
||||
try:
|
||||
mp3_file = os.path.splitext(voice_file)[0] + ".mp3"
|
||||
audio_convert.any_to_mp3(voice_file, mp3_file)
|
||||
return mp3_file
|
||||
except Exception as e:
|
||||
logger.warning(f"[DashScopeVoice] mp3 convert failed: {e}")
|
||||
return voice_file
|
||||
|
||||
@staticmethod
|
||||
def _extract_text(response) -> Optional[str]:
|
||||
try:
|
||||
if getattr(response, "status_code", 200) != 200:
|
||||
return None
|
||||
choices = response.output.get("choices") or []
|
||||
if not choices:
|
||||
return None
|
||||
content = choices[0].get("message", {}).get("content")
|
||||
if isinstance(content, str):
|
||||
return content.strip() or None
|
||||
if isinstance(content, list):
|
||||
parts = []
|
||||
for item in content:
|
||||
if isinstance(item, dict) and "text" in item:
|
||||
parts.append(item["text"])
|
||||
elif isinstance(item, str):
|
||||
parts.append(item)
|
||||
text = "".join(parts).strip()
|
||||
return text or None
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
@@ -58,4 +58,12 @@ def create_voice(voice_type):
|
||||
from voice.minimax.minimax_voice import MinimaxVoice
|
||||
|
||||
return MinimaxVoice()
|
||||
elif voice_type == "dashscope":
|
||||
from voice.dashscope.dashscope_voice import DashScopeVoice
|
||||
|
||||
return DashScopeVoice()
|
||||
elif voice_type == "zhipu" or voice_type == "zhipuai":
|
||||
from voice.zhipuai.zhipuai_voice import ZhipuAIVoice
|
||||
|
||||
return ZhipuAIVoice()
|
||||
raise RuntimeError
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
"""
|
||||
google voice service
|
||||
"""
|
||||
"""LinkAI voice: Whisper ASR + multi-vendor TTS (OpenAI / Doubao / Baidu)
|
||||
proxied via https://docs.link-ai.tech/platform/api/voice-speech."""
|
||||
import datetime
|
||||
import os
|
||||
import random
|
||||
|
||||
import requests
|
||||
from voice import audio_convert
|
||||
|
||||
from bridge.reply import Reply, ReplyType
|
||||
from common import const
|
||||
from common.log import logger
|
||||
from config import conf
|
||||
from voice import audio_convert
|
||||
from voice.voice import Voice
|
||||
from common import const
|
||||
import os
|
||||
import datetime
|
||||
|
||||
|
||||
class LinkAIVoice(Voice):
|
||||
def __init__(self):
|
||||
@@ -21,8 +23,7 @@ class LinkAIVoice(Voice):
|
||||
try:
|
||||
url = conf().get("linkai_api_base", "https://api.link-ai.tech") + "/v1/audio/transcriptions"
|
||||
headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
|
||||
model = None
|
||||
if not conf().get("text_to_voice") or conf().get("voice_to_text") == "openai":
|
||||
# Pin whisper-1: gateway ignores any other ASR model id.
|
||||
model = const.WHISPER_1
|
||||
if voice_file.endswith(".amr"):
|
||||
try:
|
||||
@@ -30,54 +31,59 @@ class LinkAIVoice(Voice):
|
||||
audio_convert.any_to_mp3(voice_file, mp3_file)
|
||||
voice_file = mp3_file
|
||||
except Exception as e:
|
||||
logger.warn(f"[LinkVoice] amr file transfer failed, directly send amr voice file: {format(e)}")
|
||||
file = open(voice_file, "rb")
|
||||
file_body = {
|
||||
"file": file
|
||||
}
|
||||
data = {
|
||||
"model": model
|
||||
}
|
||||
res = requests.post(url, files=file_body, headers=headers, data=data, timeout=(5, 60))
|
||||
if res.status_code == 200:
|
||||
text = res.json().get("text")
|
||||
else:
|
||||
res_json = res.json()
|
||||
logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={res_json.get('message')}")
|
||||
logger.warning(f"[LinkVoice] amr file transfer failed, directly send amr voice file: {e}")
|
||||
with open(voice_file, "rb") as file:
|
||||
res = requests.post(
|
||||
url,
|
||||
files={"file": file},
|
||||
headers=headers,
|
||||
data={"model": model},
|
||||
timeout=(5, 60),
|
||||
)
|
||||
if res.status_code != 200:
|
||||
msg = ""
|
||||
try:
|
||||
msg = res.json().get("message", "")
|
||||
except Exception:
|
||||
pass
|
||||
logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={msg}")
|
||||
return None
|
||||
reply = Reply(ReplyType.TEXT, text)
|
||||
text = res.json().get("text")
|
||||
logger.info(f"[LinkVoice] voiceToText success, text={text}, file name={voice_file}")
|
||||
return Reply(ReplyType.TEXT, text)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
return None
|
||||
return reply
|
||||
|
||||
def textToVoice(self, text):
|
||||
try:
|
||||
url = conf().get("linkai_api_base", "https://api.link-ai.tech") + "/v1/audio/speech"
|
||||
headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
|
||||
model = const.TTS_1
|
||||
if not conf().get("text_to_voice") or conf().get("text_to_voice") in ["openai", const.TTS_1, const.TTS_1_HD]:
|
||||
model = conf().get("text_to_voice_model") or const.TTS_1
|
||||
# Gateway routes by `model` (tts-1 / doubao / baidu) + `voice` from
|
||||
# that engine's catalog. `app_code` is optional workspace override.
|
||||
data = {
|
||||
"model": model,
|
||||
"input": text,
|
||||
"voice": conf().get("tts_voice_id"),
|
||||
"app_code": conf().get("linkai_app_code")
|
||||
"app_code": conf().get("linkai_app_code"),
|
||||
}
|
||||
model = conf().get("text_to_voice_model")
|
||||
if model:
|
||||
data["model"] = model
|
||||
res = requests.post(url, headers=headers, json=data, timeout=(5, 120))
|
||||
if res.status_code == 200:
|
||||
if res.status_code != 200:
|
||||
msg = ""
|
||||
try:
|
||||
msg = res.json().get("message", "")
|
||||
except Exception:
|
||||
pass
|
||||
logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={msg}")
|
||||
return None
|
||||
tmp_file_name = "tmp/" + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + str(random.randint(0, 1000)) + ".mp3"
|
||||
os.makedirs(os.path.dirname(tmp_file_name), exist_ok=True)
|
||||
with open(tmp_file_name, 'wb') as f:
|
||||
f.write(res.content)
|
||||
reply = Reply(ReplyType.VOICE, tmp_file_name)
|
||||
logger.info(f"[LinkVoice] textToVoice success, input={text}, model={model}, voice_id={data.get('voice')}")
|
||||
return reply
|
||||
else:
|
||||
res_json = res.json()
|
||||
logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={res_json.get('message')}")
|
||||
return None
|
||||
logger.info(f"[LinkVoice] textToVoice success, input={text}, voice_id={data.get('voice')}")
|
||||
return Reply(ReplyType.VOICE, tmp_file_name)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
# reply = Reply(ReplyType.ERROR, "遇到了一点小问题,请稍后再问我吧")
|
||||
return None
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# encoding:utf-8
|
||||
"""
|
||||
MiniMax TTS voice service
|
||||
"""
|
||||
"""MiniMax TTS via /v1/t2a_v2 (SSE stream, hex-encoded mp3 chunks)."""
|
||||
import datetime
|
||||
import json
|
||||
import random
|
||||
import requests
|
||||
|
||||
@@ -12,24 +11,12 @@ from config import conf
|
||||
from voice.voice import Voice
|
||||
|
||||
|
||||
MINIMAX_TTS_VOICES = [
|
||||
"English_Graceful_Lady",
|
||||
"English_Insightful_Speaker",
|
||||
"English_radiant_girl",
|
||||
"English_Persuasive_Man",
|
||||
"English_Lucky_Robot",
|
||||
"English_expressive_narrator",
|
||||
"Chinese_Warm_Woman",
|
||||
"Chinese_Gentle_Man",
|
||||
]
|
||||
|
||||
|
||||
class MinimaxVoice(Voice):
|
||||
def __init__(self):
|
||||
self.api_key = conf().get("minimax_api_key")
|
||||
self.api_base = conf().get("minimax_api_base") or "https://api.minimax.io"
|
||||
# Strip trailing /v1 if present so we can always append /v1/t2a_v2
|
||||
self.api_base = self.api_base.rstrip("/")
|
||||
# Mainland endpoint matches `sk-api-0-...` keys; override via
|
||||
# `minimax_api_base` for international (api.minimax.io) workspaces.
|
||||
self.api_base = (conf().get("minimax_api_base") or "https://api.minimaxi.com").rstrip("/")
|
||||
if self.api_base.endswith("/v1"):
|
||||
self.api_base = self.api_base[:-3]
|
||||
|
||||
@@ -68,12 +55,14 @@ class MinimaxVoice(Voice):
|
||||
response = requests.post(url, headers=headers, json=payload, stream=True, timeout=60)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse SSE stream and collect hex-encoded audio chunks
|
||||
# MiniMax returns HTTP 200 even on errors; capture base_resp for diagnostics.
|
||||
audio_chunks = []
|
||||
buffer = ""
|
||||
last_base_resp = None
|
||||
event_count = 0
|
||||
for raw in response.iter_lines():
|
||||
if not raw:
|
||||
continue
|
||||
event_count += 1
|
||||
line = raw.decode("utf-8") if isinstance(raw, bytes) else raw
|
||||
if not line.startswith("data:"):
|
||||
continue
|
||||
@@ -81,16 +70,31 @@ class MinimaxVoice(Voice):
|
||||
if not json_str or json_str == "[DONE]":
|
||||
continue
|
||||
try:
|
||||
import json
|
||||
event_data = json.loads(json_str)
|
||||
audio_hex = event_data.get("data", {}).get("audio")
|
||||
if audio_hex:
|
||||
audio_chunks.append(bytes.fromhex(audio_hex))
|
||||
except Exception:
|
||||
continue
|
||||
base_resp = event_data.get("base_resp") or {}
|
||||
if base_resp:
|
||||
last_base_resp = base_resp
|
||||
audio_hex = (event_data.get("data") or {}).get("audio")
|
||||
if audio_hex:
|
||||
try:
|
||||
audio_chunks.append(bytes.fromhex(audio_hex))
|
||||
except Exception as e:
|
||||
logger.warning(f"[MINIMAX] skip bad audio hex chunk: {e}")
|
||||
|
||||
if not audio_chunks:
|
||||
logger.error("[MINIMAX] TTS returned no audio data")
|
||||
ct = response.headers.get("Content-Type", "")
|
||||
if last_base_resp and last_base_resp.get("status_code") not in (None, 0):
|
||||
logger.error(
|
||||
f"[MINIMAX] TTS failed: status_code={last_base_resp.get('status_code')}, "
|
||||
f"status_msg={last_base_resp.get('status_msg')}, model={model}, voice_id={voice_id}"
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
f"[MINIMAX] TTS returned no audio data, model={model}, voice_id={voice_id}, "
|
||||
f"url={url}, http={response.status_code}, content_type={ct!r}, events={event_count}"
|
||||
)
|
||||
return Reply(ReplyType.ERROR, "语音合成失败,未获取到音频数据")
|
||||
|
||||
audio_data = b"".join(audio_chunks)
|
||||
|
||||
@@ -31,7 +31,8 @@ class OpenaiVoice(Voice):
|
||||
"file": file,
|
||||
}
|
||||
data = {
|
||||
"model": "whisper-1",
|
||||
# Override via `voice_to_text_model` (e.g. fall back to whisper-1).
|
||||
"model": conf().get("voice_to_text_model") or "gpt-4o-mini-transcribe",
|
||||
}
|
||||
response = requests.post(url, headers=headers, files=files, data=data)
|
||||
response_data = response.json()
|
||||
|
||||
0
voice/zhipuai/__init__.py
Normal file
173
voice/zhipuai/zhipuai_voice.py
Normal file
@@ -0,0 +1,173 @@
|
||||
# encoding:utf-8
|
||||
"""ZhipuAI voice: glm-asr-2512 (ASR) + glm-tts (TTS) via BigModel REST API."""
|
||||
import datetime
|
||||
import os
|
||||
import random
|
||||
|
||||
import requests
|
||||
|
||||
from bridge.reply import Reply, ReplyType
|
||||
from common.log import logger
|
||||
from config import conf
|
||||
from voice import audio_convert
|
||||
from voice.voice import Voice
|
||||
|
||||
|
||||
DEFAULT_ASR_MODEL = "glm-asr-2512"
|
||||
DEFAULT_TTS_MODEL = "glm-tts"
|
||||
DEFAULT_TTS_VOICE = "tongtong"
|
||||
DEFAULT_API_BASE = "https://open.bigmodel.cn/api/paas/v4"
|
||||
MAX_FILE_BYTES = 25 * 1024 * 1024
|
||||
REQUEST_TIMEOUT = (5, 60)
|
||||
|
||||
|
||||
class ZhipuAIVoice(Voice):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def voiceToText(self, voice_file: str):
|
||||
try:
|
||||
voice_file = self._ensure_compatible_format(voice_file)
|
||||
|
||||
try:
|
||||
size = os.path.getsize(voice_file)
|
||||
if size > MAX_FILE_BYTES:
|
||||
logger.warning(
|
||||
f"[ZhipuAIVoice] audio file {size}B exceeds {MAX_FILE_BYTES}B; "
|
||||
f"glm-asr-2512 may reject it"
|
||||
)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
api_key = conf().get("zhipu_ai_api_key", "")
|
||||
if not api_key:
|
||||
logger.error("[ZhipuAIVoice] zhipu_ai_api_key is not configured")
|
||||
return Reply(ReplyType.ERROR, "未配置 ZhipuAI API key")
|
||||
|
||||
api_base = (conf().get("zhipu_ai_api_base") or DEFAULT_API_BASE).rstrip("/")
|
||||
url = f"{api_base}/audio/transcriptions"
|
||||
model = conf().get("voice_to_text_model") or DEFAULT_ASR_MODEL
|
||||
|
||||
with open(voice_file, "rb") as f:
|
||||
files = {"file": (os.path.basename(voice_file), f)}
|
||||
data = {"model": model, "stream": "false"}
|
||||
headers = {"Authorization": f"Bearer {api_key}"}
|
||||
response = requests.post(
|
||||
url, headers=headers, files=files, data=data, timeout=REQUEST_TIMEOUT
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(
|
||||
f"[ZhipuAIVoice] voiceToText failed: status={response.status_code} "
|
||||
f"body={response.text[:500]}"
|
||||
)
|
||||
return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
|
||||
|
||||
payload = response.json()
|
||||
text = (payload.get("text") or "").strip()
|
||||
if not text:
|
||||
logger.error(f"[ZhipuAIVoice] voiceToText empty text: {payload}")
|
||||
return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
|
||||
|
||||
logger.info(f"[ZhipuAIVoice] voiceToText model={model} text={text}")
|
||||
return Reply(ReplyType.TEXT, text)
|
||||
except Exception as e:
|
||||
logger.exception(f"[ZhipuAIVoice] voiceToText exception: {e}")
|
||||
return Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
|
||||
|
||||
def textToVoice(self, text: str):
|
||||
try:
|
||||
api_key = conf().get("zhipu_ai_api_key", "")
|
||||
if not api_key:
|
||||
logger.error("[ZhipuAIVoice] zhipu_ai_api_key is not configured")
|
||||
return Reply(ReplyType.ERROR, "未配置 ZhipuAI API key")
|
||||
|
||||
api_base = (conf().get("zhipu_ai_api_base") or DEFAULT_API_BASE).rstrip("/")
|
||||
url = f"{api_base}/audio/speech"
|
||||
model = conf().get("text_to_voice_model") or DEFAULT_TTS_MODEL
|
||||
voice_id = conf().get("tts_voice_id") or DEFAULT_TTS_VOICE
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"input": text,
|
||||
"voice": voice_id,
|
||||
"response_format": "wav",
|
||||
"speed": 1.0,
|
||||
"volume": 1.0,
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
response = requests.post(
|
||||
url, headers=headers, json=payload, timeout=REQUEST_TIMEOUT
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(
|
||||
f"[ZhipuAIVoice] textToVoice failed: status={response.status_code} "
|
||||
f"body={response.text[:500]} model={model} voice={voice_id}"
|
||||
)
|
||||
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||
|
||||
# Some errors come back as JSON / SSE with HTTP 200.
|
||||
ct = response.headers.get("Content-Type", "")
|
||||
if "application/json" in ct or "text/event-stream" in ct:
|
||||
try:
|
||||
err = response.json()
|
||||
except Exception:
|
||||
err = {"raw": response.text[:500]}
|
||||
logger.error(
|
||||
f"[ZhipuAIVoice] textToVoice unexpected text response "
|
||||
f"(content_type={ct}): {err}"
|
||||
)
|
||||
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||
|
||||
audio_bytes = response.content
|
||||
ext = self._sniff_audio_ext(audio_bytes) or "wav"
|
||||
|
||||
file_name = (
|
||||
"tmp/" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
+ str(random.randint(0, 1000)) + "." + ext
|
||||
)
|
||||
os.makedirs(os.path.dirname(file_name), exist_ok=True)
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
logger.info(
|
||||
f"[ZhipuAIVoice] textToVoice model={model} voice={voice_id} "
|
||||
f"file={file_name} bytes={len(audio_bytes)} ext={ext}"
|
||||
)
|
||||
return Reply(ReplyType.VOICE, file_name)
|
||||
except Exception as e:
|
||||
logger.exception(f"[ZhipuAIVoice] textToVoice exception: {e}")
|
||||
return Reply(ReplyType.ERROR, "语音合成失败,请稍后再试")
|
||||
|
||||
@staticmethod
|
||||
def _sniff_audio_ext(data: bytes) -> str:
|
||||
"""Detect audio container by magic bytes; returns '' on unknown."""
|
||||
if len(data) < 12:
|
||||
return ""
|
||||
head = data[:12]
|
||||
if head[:4] == b"RIFF" and head[8:12] == b"WAVE":
|
||||
return "wav"
|
||||
if head[:3] == b"ID3" or head[:2] == b"\xff\xfb" or head[:2] == b"\xff\xf3" or head[:2] == b"\xff\xf2":
|
||||
return "mp3"
|
||||
if head[:4] == b"OggS":
|
||||
return "ogg"
|
||||
if head[:4] == b"fLaC":
|
||||
return "flac"
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _ensure_compatible_format(voice_file: str) -> str:
|
||||
# glm-asr-2512 only accepts .wav / .mp3
|
||||
lower = voice_file.lower()
|
||||
if lower.endswith(".mp3") or lower.endswith(".wav"):
|
||||
return voice_file
|
||||
try:
|
||||
mp3_file = os.path.splitext(voice_file)[0] + ".mp3"
|
||||
audio_convert.any_to_mp3(voice_file, mp3_file)
|
||||
return mp3_file
|
||||
except Exception as e:
|
||||
logger.warning(f"[ZhipuAIVoice] mp3 convert failed: {e}")
|
||||
return voice_file
|
||||