mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 00:57:41 +08:00
feat(voice): rework TTS/ASR stack and unify tool/skill config schema
This commit is contained in:
@@ -44,6 +44,7 @@ CREATE TABLE IF NOT EXISTS messages (
|
||||
role TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
created_at INTEGER NOT NULL,
|
||||
extras TEXT NOT NULL DEFAULT '',
|
||||
UNIQUE (session_id, seq)
|
||||
);
|
||||
|
||||
@@ -67,6 +68,12 @@ _MIGRATION_ADD_CONTEXT_START_SEQ = """
|
||||
ALTER TABLE sessions ADD COLUMN context_start_seq INTEGER NOT NULL DEFAULT 0;
|
||||
"""
|
||||
|
||||
# Generic JSON sidecar for per-message attachments (TTS audio URL, future use).
|
||||
# Always optional — readers must tolerate missing column / empty / invalid JSON.
|
||||
_MIGRATION_ADD_MSG_EXTRAS = """
|
||||
ALTER TABLE messages ADD COLUMN extras TEXT NOT NULL DEFAULT '';
|
||||
"""
|
||||
|
||||
DEFAULT_MAX_AGE_DAYS: int = 30
|
||||
|
||||
|
||||
@@ -169,20 +176,26 @@ def _group_into_display_turns(
|
||||
cur_rest: List[tuple] = []
|
||||
started = False
|
||||
|
||||
for role, raw_content, created_at in rows:
|
||||
for role, raw_content, created_at, raw_extras in rows:
|
||||
try:
|
||||
content = json.loads(raw_content)
|
||||
except Exception:
|
||||
content = raw_content
|
||||
try:
|
||||
extras = json.loads(raw_extras) if raw_extras else {}
|
||||
if not isinstance(extras, dict):
|
||||
extras = {}
|
||||
except Exception:
|
||||
extras = {}
|
||||
|
||||
if role == "user" and _is_visible_user_message(content):
|
||||
if started:
|
||||
groups.append((cur_user, cur_rest))
|
||||
cur_user = (content, created_at)
|
||||
cur_user = (content, created_at, extras)
|
||||
cur_rest = []
|
||||
started = True
|
||||
else:
|
||||
cur_rest.append((role, content, created_at))
|
||||
cur_rest.append((role, content, created_at, extras))
|
||||
|
||||
if started:
|
||||
groups.append((cur_user, cur_rest))
|
||||
@@ -195,7 +208,7 @@ def _group_into_display_turns(
|
||||
for user_row, rest in groups:
|
||||
# User turn
|
||||
if user_row:
|
||||
content, created_at = user_row
|
||||
content, created_at, _u_extras = user_row
|
||||
text = _extract_display_text(content)
|
||||
if text:
|
||||
turns.append({"role": "user", "content": text, "created_at": created_at})
|
||||
@@ -206,8 +219,11 @@ def _group_into_display_turns(
|
||||
tool_results: Dict[str, str] = {}
|
||||
final_text = ""
|
||||
final_ts: Optional[int] = None
|
||||
merged_extras: Dict[str, Any] = {}
|
||||
|
||||
for role, content, created_at in rest:
|
||||
for role, content, created_at, extras in rest:
|
||||
if role == "assistant" and isinstance(extras, dict):
|
||||
merged_extras.update(extras)
|
||||
if role == "user":
|
||||
tool_results.update(_extract_tool_results(content))
|
||||
elif role == "assistant":
|
||||
@@ -256,6 +272,8 @@ def _group_into_display_turns(
|
||||
"steps": steps,
|
||||
"created_at": final_ts or (user_row[1] if user_row else 0),
|
||||
}
|
||||
if merged_extras:
|
||||
turn["extras"] = merged_extras
|
||||
turns.append(turn)
|
||||
|
||||
return turns
|
||||
@@ -411,13 +429,15 @@ class ConversationStore:
|
||||
content = json.dumps(
|
||||
msg.get("content", ""), ensure_ascii=False
|
||||
)
|
||||
extras_obj = msg.get("extras") or {}
|
||||
extras = json.dumps(extras_obj, ensure_ascii=False) if extras_obj else ""
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT OR IGNORE INTO messages
|
||||
(session_id, seq, role, content, created_at)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
(session_id, seq, role, content, created_at, extras)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(session_id, next_seq, role, content, now),
|
||||
(session_id, next_seq, role, content, now, extras),
|
||||
)
|
||||
next_seq += 1
|
||||
|
||||
@@ -651,6 +671,55 @@ class ConversationStore:
|
||||
logger.info(f"[ConversationStore] Pruned {deleted} expired sessions")
|
||||
return deleted
|
||||
|
||||
def attach_extras_to_last_assistant(
|
||||
self,
|
||||
session_id: str,
|
||||
extras: Dict[str, Any],
|
||||
) -> Optional[int]:
|
||||
"""
|
||||
Merge ``extras`` into the latest assistant message of a session.
|
||||
|
||||
Used by post-processing (e.g. TTS) that needs to annotate an already
|
||||
persisted bot reply with attachments such as audio URLs.
|
||||
|
||||
Returns the message seq that was updated, or ``None`` if no assistant
|
||||
message exists or the update could not be applied.
|
||||
"""
|
||||
if not extras:
|
||||
return None
|
||||
with self._lock:
|
||||
conn = self._connect()
|
||||
try:
|
||||
row = conn.execute(
|
||||
"""
|
||||
SELECT seq, extras FROM messages
|
||||
WHERE session_id = ? AND role = 'assistant'
|
||||
ORDER BY seq DESC LIMIT 1
|
||||
""",
|
||||
(session_id,),
|
||||
).fetchone()
|
||||
if not row:
|
||||
return None
|
||||
seq, raw = row
|
||||
try:
|
||||
cur = json.loads(raw) if raw else {}
|
||||
if not isinstance(cur, dict):
|
||||
cur = {}
|
||||
except Exception:
|
||||
cur = {}
|
||||
cur.update(extras)
|
||||
conn.execute(
|
||||
"UPDATE messages SET extras = ? WHERE session_id = ? AND seq = ?",
|
||||
(json.dumps(cur, ensure_ascii=False), session_id, seq),
|
||||
)
|
||||
conn.commit()
|
||||
return seq
|
||||
except Exception as e:
|
||||
logger.warning(f"[ConversationStore] attach_extras failed: {e}")
|
||||
return None
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def load_history_page(
|
||||
self,
|
||||
session_id: str,
|
||||
@@ -698,15 +767,31 @@ class ConversationStore:
|
||||
).fetchone()
|
||||
ctx_start = ctx_row[0] if ctx_row else 0
|
||||
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT seq, role, content, created_at
|
||||
FROM messages
|
||||
WHERE session_id = ?
|
||||
ORDER BY seq ASC
|
||||
""",
|
||||
(session_id,),
|
||||
).fetchall()
|
||||
# extras column is added by migration; tolerate older DBs that
|
||||
# might miss it by falling back to a NULL literal.
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT seq, role, content, created_at, extras
|
||||
FROM messages
|
||||
WHERE session_id = ?
|
||||
ORDER BY seq ASC
|
||||
""",
|
||||
(session_id,),
|
||||
).fetchall()
|
||||
except sqlite3.OperationalError:
|
||||
rows = [
|
||||
(seq, role, content, created_at, "")
|
||||
for (seq, role, content, created_at) in conn.execute(
|
||||
"""
|
||||
SELECT seq, role, content, created_at
|
||||
FROM messages
|
||||
WHERE session_id = ?
|
||||
ORDER BY seq ASC
|
||||
""",
|
||||
(session_id,),
|
||||
).fetchall()
|
||||
]
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
@@ -719,13 +804,16 @@ class ConversationStore:
|
||||
include_thinking = False
|
||||
|
||||
# Strip seq for display grouping, but record max seq per visible user group
|
||||
plain_rows = [(role, content, created_at) for _seq, role, content, created_at in rows]
|
||||
plain_rows = [
|
||||
(role, content, created_at, extras_raw)
|
||||
for _seq, role, content, created_at, extras_raw in rows
|
||||
]
|
||||
visible = _group_into_display_turns(plain_rows, include_thinking=include_thinking)
|
||||
|
||||
# Build a mapping: find the seq of each visible user message to annotate context boundary.
|
||||
# Walk through rows to find visible user message seqs in order.
|
||||
visible_user_seqs: List[int] = []
|
||||
for seq, role, raw_content, _ts in rows:
|
||||
for seq, role, raw_content, _ts, _extras in rows:
|
||||
if role != "user":
|
||||
continue
|
||||
try:
|
||||
@@ -911,6 +999,18 @@ class ConversationStore:
|
||||
except Exception as e:
|
||||
logger.warning(f"[ConversationStore] Migration (context_start_seq) failed: {e}")
|
||||
|
||||
msg_cols = {
|
||||
row[1]
|
||||
for row in conn.execute("PRAGMA table_info(messages)").fetchall()
|
||||
}
|
||||
if "extras" not in msg_cols:
|
||||
try:
|
||||
conn.execute(_MIGRATION_ADD_MSG_EXTRAS)
|
||||
conn.commit()
|
||||
logger.info("[ConversationStore] Migrated: added messages.extras column")
|
||||
except Exception as e:
|
||||
logger.warning(f"[ConversationStore] Migration (extras) failed: {e}")
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
conn = sqlite3.connect(str(self._db_path), timeout=10)
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
|
||||
@@ -3,7 +3,7 @@ Vision tool - Analyze images using Vision API.
|
||||
Supports local files (auto base64-encoded) and HTTP URLs.
|
||||
|
||||
Provider resolution:
|
||||
- tool.vision.model (if set) means "prefer this model first; fall back to
|
||||
- tools.vision.model (if set) means "prefer this model first; fall back to
|
||||
other configured providers if it fails". The model name is mapped to its
|
||||
native provider (e.g. doubao-* → Doubao, kimi-* → Moonshot, gpt-* →
|
||||
OpenAI/LinkAI). That provider is tried first, then the standard auto
|
||||
@@ -60,7 +60,7 @@ _DISCOVERABLE_MODELS = [
|
||||
]
|
||||
|
||||
# Model name prefix → discoverable provider display_name.
|
||||
# Used to auto-route tool.vision.model to its native provider.
|
||||
# Used to auto-route tools.vision.model to its native provider.
|
||||
# Matched case-insensitively; longest prefix wins.
|
||||
_MODEL_PREFIX_TO_PROVIDER = [
|
||||
("doubao-", "Doubao"),
|
||||
@@ -154,7 +154,7 @@ class Vision(BaseTool):
|
||||
|
||||
# Default model is only used as a last-resort placeholder for providers
|
||||
# whose VisionProvider.model_override is None (e.g. raw OpenAI provider
|
||||
# when the user did not configure tool.vision.model).
|
||||
# when the user did not configure tools.vision.model).
|
||||
return self._call_with_fallback(providers, DEFAULT_MODEL, question, image_content)
|
||||
|
||||
def _call_with_fallback(self, providers: List[VisionProvider], model: str,
|
||||
@@ -193,12 +193,12 @@ class Vision(BaseTool):
|
||||
"""
|
||||
Build an ordered list of providers to try.
|
||||
|
||||
Semantics of `tool.vision.model`:
|
||||
Semantics of `tools.vision.model`:
|
||||
"Prefer this model first; fall back to other configured providers
|
||||
if it fails."
|
||||
|
||||
Order:
|
||||
1. The provider that natively serves `tool.vision.model` (if any
|
||||
1. The provider that natively serves `tools.vision.model` (if any
|
||||
and its API key is configured) — using the user-specified model
|
||||
name verbatim.
|
||||
2. Auto-discovery chain as fallback:
|
||||
@@ -213,7 +213,7 @@ class Vision(BaseTool):
|
||||
user_model = self._resolve_user_vision_model()
|
||||
providers: List[VisionProvider] = []
|
||||
|
||||
# Step 1: preferred provider derived from tool.vision.model
|
||||
# Step 1: preferred provider derived from tools.vision.model
|
||||
if user_model:
|
||||
preferred = self._route_by_model_name(user_model)
|
||||
if preferred:
|
||||
@@ -251,11 +251,11 @@ class Vision(BaseTool):
|
||||
|
||||
@staticmethod
|
||||
def _resolve_user_vision_model() -> Optional[str]:
|
||||
"""Read tool.vision.model from config; return None if unset/blank."""
|
||||
tool_conf = conf().get("tool", {})
|
||||
if not isinstance(tool_conf, dict):
|
||||
"""Read tools.vision.model (singular ``tool`` kept as runtime fallback)."""
|
||||
tools_conf = conf().get("tools") or conf().get("tool") or {}
|
||||
if not isinstance(tools_conf, dict):
|
||||
return None
|
||||
vision_conf = tool_conf.get("vision", {})
|
||||
vision_conf = tools_conf.get("vision", {})
|
||||
if not isinstance(vision_conf, dict):
|
||||
return None
|
||||
m = vision_conf.get("model")
|
||||
@@ -303,7 +303,7 @@ class Vision(BaseTool):
|
||||
self._append_provider(providers, lambda: self._build_linkai_provider(user_model))
|
||||
if providers:
|
||||
return providers
|
||||
logger.warning(f"[Vision] tool.vision.model='{user_model}' looks like an OpenAI "
|
||||
logger.warning(f"[Vision] tools.vision.model='{user_model}' looks like an OpenAI "
|
||||
f"model but neither OPENAI_API_KEY nor LINKAI_API_KEY is configured.")
|
||||
return None # fall through to auto
|
||||
|
||||
@@ -317,7 +317,7 @@ class Vision(BaseTool):
|
||||
continue
|
||||
api_key = conf().get(config_key, "")
|
||||
if not api_key or not api_key.strip():
|
||||
logger.warning(f"[Vision] tool.vision.model='{user_model}' routes to "
|
||||
logger.warning(f"[Vision] tools.vision.model='{user_model}' routes to "
|
||||
f"'{display_name}' but '{config_key}' is not configured. "
|
||||
f"Falling back to auto-discovery.")
|
||||
return None # fall through to auto
|
||||
@@ -452,8 +452,8 @@ class Vision(BaseTool):
|
||||
if not self._main_bot_supports_vision(bot):
|
||||
return None
|
||||
|
||||
# Use the configured main model name; do NOT inject tool.vision.model
|
||||
# here, because by the time we reach this branch the tool.vision.model
|
||||
# Use the configured main model name; do NOT inject tools.vision.model
|
||||
# here, because by the time we reach this branch the tools.vision.model
|
||||
# routing has already been attempted (and either matched the main bot
|
||||
# or failed to find a provider).
|
||||
main_model_name = conf().get("model") or None
|
||||
|
||||
Reference in New Issue
Block a user