From b80c3fe5a8122b2201eb460b06fa00dfdb6100dd Mon Sep 17 00:00:00 2001 From: zhayujie Date: Tue, 5 May 2026 14:15:25 +0800 Subject: [PATCH] feat(feishu): enhance #2791 with cardkit streaming + ASR fixes - rewrite streaming reply to official cardkit v2.0 API (default on, auto-fallback) - fix Whisper hallucination: bump ASR sample rate to 16k, pass language=zh - fix lock-over-IO and tmp file cleanup from #2791 - drop deprecated feishu_bot_name; quiet unknown-key warnings - docs: cardkit permission and feishu_stream_reply usage --- README.md | 8 +- channel/feishu/feishu_channel.py | 323 ++++++++++++++++++++++++------- config-template.json | 3 +- config.py | 11 +- docs/channels/feishu.mdx | 16 +- docs/en/channels/feishu.mdx | 18 +- docs/ja/channels/feishu.mdx | 18 +- voice/audio_convert.py | 2 +- voice/openai/openai_voice.py | 14 +- 9 files changed, 304 insertions(+), 109 deletions(-) diff --git a/README.md b/README.md index e0529359..edb5cc02 100644 --- a/README.md +++ b/README.md @@ -726,14 +726,14 @@ Coding Plan 是各厂商推出的编程包月套餐,所有厂商均可通过 O 飞书支持两种事件接收模式:WebSocket 长连接(推荐)和 Webhook。 -**方式一:WebSocket 模式(推荐,无需公网 IP)** +**方式一:WebSocket 模式(默认,无需公网 IP)** ```json { "channel_type": "feishu", "feishu_app_id": "APP_ID", "feishu_app_secret": "APP_SECRET", - "feishu_event_mode": "websocket" + "feishu_stream_reply": true } ``` @@ -746,11 +746,13 @@ Coding Plan 是各厂商推出的编程包月套餐,所有厂商均可通过 O "feishu_app_secret": "APP_SECRET", "feishu_token": "VERIFICATION_TOKEN", "feishu_event_mode": "webhook", - "feishu_port": 9891 + "feishu_port": 9891, + "feishu_stream_reply": true } ``` - `feishu_event_mode`: 事件接收模式,`websocket`(推荐)或 `webhook` +- `feishu_stream_reply`: 是否开启流式打字机回复,需开通 `cardkit:card:write` 权限且飞书客户端 ≥ 7.20 - WebSocket 模式需安装依赖:`pip3 install lark-oapi` 详细步骤和参数说明参考 [飞书接入](https://docs.cowagent.ai/channels/feishu) diff --git a/channel/feishu/feishu_channel.py b/channel/feishu/feishu_channel.py index e9be8a3a..f3104044 100644 --- a/channel/feishu/feishu_channel.py +++ b/channel/feishu/feishu_channel.py @@ -507,109 +507,283 @@ class FeiShuChanel(ChatChannel): def _make_feishu_stream_callback(self, context, access_token): """ - Create an on_event callback for streaming text replies via Feishu message edit API. - First message_update event triggers a placeholder send to get a message_id; - subsequent events throttle-update that message in place. - agent_end does a final update and marks context["feishu_streamed"] = True - to suppress the duplicate send() call from chat_channel. + 基于飞书官方"流式更新卡片"API 实现打字机回复。 + + 流程: + 1. message_update 首次到达 → POST /cardkit/v1/cards 创建带 streaming_mode 的卡片实体, + 随后用 POST /im/v1/messages(或 reply)以 card_id 把卡片发出去 + 2. 后续 message_update → PUT /cardkit/v1/cards/{id}/elements/{eid}/content + 传入"当前轮"的全量文本,飞书平台自动计算增量并以打字机效果上屏 + (流式模式下不受 10 QPS 限制) + 3. message_end(一轮 LLM 输出结束,且本轮触发了工具调用)→ 把 current 累计到 committed + 并加入分隔符;下一轮 message_update 又从空白开始,避免多轮内容串到一起 + 4. agent_end → 用 final_response 强制覆盖卡片,再 PATCH /cardkit/v1/cards/{id}/settings + 关闭 streaming_mode,标记 context["feishu_streamed"]=True 让 chat_channel 跳过普通 send() + + 前提条件: + - 机器人已开通 cardkit:card:write 权限 + - 飞书客户端 7.20+ + + 失败降级: + - 创建卡片实体失败(缺权限、网络等)→ 不设置 feishu_streamed 标记,让 chat_channel + 走普通文本回复路径,用户收到完整回复但无打字机效果,并打 warning 日志 """ import time as _time - import threading as _threading - streamed_text = [""] - message_id = [None] + # 共享状态(受 lock 保护) + committed_text = [""] # 已结束轮次的累积内容(含分隔符) + current_text = [""] # 当前轮 LLM 输出的累积内容 + card_id = [None] # 创建出来的卡片实体 ID + message_id = [None] # 卡片发送后的消息 ID(仅日志用) last_update_time = [0.0] - lock = _threading.Lock() + # 占位发送是同步进行的,但用一个 in-flight 标记防止并发的多条 message_update + # 事件各自触发一次创建+发送,导致发出多张卡片。 + init_in_flight = [False] + # 一旦初始化失败就长期标记为 disabled,本次回复不再尝试任何流式调用 + disabled = [False] + lock = threading.Lock() msg = context.get("msg") is_group = context.get("isgroup", False) receiver = context.get("receiver") receive_id_type = context.get("receive_id_type", "open_id") - interval_s = conf().get("feishu_stream_interval_ms", 500) / 1000.0 + # 后端推流间隔与客户端打字机渲染参数:飞书原生 streaming_config 默认值经验证 + # 已能在大部分场景下取得平滑的打字机效果,无需暴露给用户配置。 + interval_s = 0.3 + print_freq_ms = 70 + print_step = 2 + print_strategy = "fast" headers = { "Authorization": "Bearer " + access_token, - "Content-Type": "application/json", + "Content-Type": "application/json; charset=utf-8", } + # 卡片中富文本组件的 element_id,后续所有 PUT 流式更新都打到这个组件 + ELEMENT_ID = "stream_md" + # 操作序号,每次 PUT 必须严格递增(飞书要求) + sequence = [0] - def _make_card(text): - # update_multi=true is required by Feishu for PATCH updates to be visible to all recipients. + def _next_sequence(): + sequence[0] += 1 + return sequence[0] + + def _build_card_json(): + """卡片 JSON 2.0 结构 + streaming_mode + 单 markdown 组件""" return json.dumps({ - "config": {"update_multi": True}, - "elements": [ - { - "tag": "div", - "text": {"tag": "lark_md", "content": text}, - } - ], - }) + "schema": "2.0", + "config": { + "streaming_mode": True, + "summary": {"content": "[正在生成回复...]"}, + "streaming_config": { + "print_frequency_ms": {"default": print_freq_ms}, + "print_step": {"default": print_step}, + "print_strategy": print_strategy, + }, + }, + "body": { + "elements": [ + { + "tag": "markdown", + "content": "...", + "element_id": ELEMENT_ID, + } + ], + }, + # 注意:JSON 2.0 不支持自定义 fallback 字段(传入会报错)。 + # 客户端 < 7.20 时,飞书会自动展示"请升级客户端"占位,无需配置。 + }, ensure_ascii=False) - def _send_placeholder(): - can_reply = is_group and msg and hasattr(msg, 'msg_id') and msg.msg_id - placeholder_content = _make_card("...") + def _create_and_send_card(): + """同步执行:创建卡片实体 → 发送消息。任意一步失败则 disabled=True 触发降级""" try: - if can_reply: - url = f"https://open.feishu.cn/open-apis/im/v1/messages/{msg.msg_id}/reply" - data = {"msg_type": "interactive", "content": placeholder_content} - res = requests.post(url=url, headers=headers, json=data, timeout=(5, 10)) - else: - url = "https://open.feishu.cn/open-apis/im/v1/messages" - params = {"receive_id_type": receive_id_type} - data = { - "receive_id": receiver, - "msg_type": "interactive", - "content": placeholder_content, - } - res = requests.post(url=url, headers=headers, params=params, json=data, timeout=(5, 10)) - res_json = res.json() - if res_json.get("code") == 0: - mid = res_json["data"]["message_id"] - message_id[0] = mid - logger.info(f"[FeiShu] Stream: placeholder sent, message_id={mid}") - else: - logger.error(f"[FeiShu] Stream: placeholder failed: {res_json}") - except Exception as e: - logger.error(f"[FeiShu] Stream: placeholder exception: {e}") - - def _patch_message(text): - if not message_id[0]: - return - # PATCH updates interactive (card) messages; no edit-count limit per Feishu docs. - url = f"https://open.feishu.cn/open-apis/im/v1/messages/{message_id[0]}" - data = {"content": _make_card(text)} - try: - res = requests.patch(url=url, headers=headers, json=data, timeout=(5, 10)) + # 步骤 1: 创建卡片实体 + create_url = "https://open.feishu.cn/open-apis/cardkit/v1/cards" + create_body = {"type": "card_json", "data": _build_card_json()} + res = requests.post( + create_url, headers=headers, json=create_body, timeout=(5, 10) + ) res_json = res.json() if res_json.get("code") != 0: - logger.warning(f"[FeiShu] Stream: patch failed: {res_json}") + logger.warning( + f"[FeiShu] Stream: create card failed " + f"(code={res_json.get('code')}, msg={res_json.get('msg')}). " + f"本次回复已自动降级为普通文本回复(一次性返回完整内容)。" + f"如需开启流式打字机效果与完整 Markdown 渲染,请到飞书开放平台 " + f"https://open.feishu.cn/app 给机器人开通 cardkit:card:write 权限" + f"(创建与更新卡片)并重新发布版本,同时确保飞书客户端 >= 7.20。" + ) + with lock: + disabled[0] = True + return + cid = res_json["data"]["card_id"] + with lock: + card_id[0] = cid + + # 步骤 2: 通过 card_id 发送消息(群聊优先用 reply,单聊直接 send) + content_payload = json.dumps( + {"type": "card", "data": {"card_id": cid}}, ensure_ascii=False + ) + can_reply = is_group and msg and hasattr(msg, "msg_id") and msg.msg_id + if can_reply: + send_url = ( + f"https://open.feishu.cn/open-apis/im/v1/messages/" + f"{msg.msg_id}/reply" + ) + send_body = {"msg_type": "interactive", "content": content_payload} + send_res = requests.post( + send_url, headers=headers, json=send_body, timeout=(5, 10) + ) + else: + send_url = "https://open.feishu.cn/open-apis/im/v1/messages" + params = {"receive_id_type": receive_id_type} + send_body = { + "receive_id": receiver, + "msg_type": "interactive", + "content": content_payload, + } + send_res = requests.post( + send_url, headers=headers, params=params, json=send_body, + timeout=(5, 10), + ) + send_json = send_res.json() + if send_json.get("code") != 0: + logger.warning( + f"[FeiShu] Stream: send card failed: {send_json}. 降级为普通文本。" + ) + with lock: + disabled[0] = True + return + mid = send_json["data"]["message_id"] + with lock: + message_id[0] = mid + logger.info( + f"[FeiShu] Stream: card created and sent, " + f"card_id={cid}, message_id={mid}" + ) except Exception as e: - logger.warning(f"[FeiShu] Stream: patch exception: {e}") + logger.warning( + f"[FeiShu] Stream: create/send card exception: {e}. 降级为普通文本。" + ) + with lock: + disabled[0] = True + finally: + with lock: + init_in_flight[0] = False + + def _stream_update_text(full_text): + """PUT 流式更新文本组件。content 必须是当前组件的全量文本。""" + with lock: + cid = card_id[0] + if not cid: + return + url = ( + f"https://open.feishu.cn/open-apis/cardkit/v1/cards/" + f"{cid}/elements/{ELEMENT_ID}/content" + ) + body = { + "content": full_text, + "sequence": _next_sequence(), + } + try: + res = requests.put(url, headers=headers, json=body, timeout=(5, 10)) + res_json = res.json() + if res_json.get("code") != 0: + logger.warning( + f"[FeiShu] Stream: update text failed: {res_json}" + ) + except Exception as e: + logger.warning(f"[FeiShu] Stream: update text exception: {e}") + + def _close_streaming_mode(): + """关闭流式模式(卡片转入"普通"状态,可被转发,摘要不再显示[生成中...])""" + with lock: + cid = card_id[0] + if not cid: + return + url = ( + f"https://open.feishu.cn/open-apis/cardkit/v1/cards/" + f"{cid}/settings" + ) + settings_payload = json.dumps( + {"config": {"streaming_mode": False}}, ensure_ascii=False + ) + body = { + "settings": settings_payload, + "sequence": _next_sequence(), + } + try: + res = requests.patch(url, headers=headers, json=body, timeout=(5, 10)) + res_json = res.json() + if res_json.get("code") != 0: + logger.warning( + f"[FeiShu] Stream: close streaming_mode failed: {res_json}" + ) + except Exception as e: + logger.warning( + f"[FeiShu] Stream: close streaming_mode exception: {e}" + ) def on_event(event: dict): event_type = event.get("type") data = event.get("data", {}) + # 一旦降级,本次回复不再做任何流式操作 + with lock: + if disabled[0]: + return + if event_type == "message_update": delta = data.get("delta", "") if not delta: return + + # 第一段:判断是否需要初始化(创建卡片 + 发送) + need_init = False with lock: - # 在锁内发送占位消息,防止并发的 message_update 事件各自触发一次占位发送, - # 导致发出多条 "..." 占位消息。 - if message_id[0] is None: - _send_placeholder() - streamed_text[0] += delta + if card_id[0] is None and not init_in_flight[0]: + init_in_flight[0] = True + need_init = True + + if need_init: + _create_and_send_card() + # 初始化失败已标记 disabled,下次循环直接 return + with lock: + if disabled[0]: + return + + # 第二段:累加当前轮文本,按节流推送(锁内只读写状态) + should_push = False + snapshot = "" + with lock: + current_text[0] += delta now = _time.time() - if now - last_update_time[0] >= interval_s: + if card_id[0] and (now - last_update_time[0] >= interval_s): last_update_time[0] = now - _patch_message(streamed_text[0]) + snapshot = committed_text[0] + current_text[0] + should_push = True + + if should_push: + _stream_update_text(snapshot) + + elif event_type == "message_end": + # 一轮 LLM 输出结束。如果本轮触发了工具调用,把当前轮内容定型到 committed + # 并加分隔符;否则当前轮就是最终内容(agent_end 会处理)。 + tool_calls = data.get("tool_calls", []) or [] + if tool_calls: + with lock: + if current_text[0].strip(): + committed_text[0] += current_text[0].rstrip() + "\n\n---\n\n" + current_text[0] = "" elif event_type == "agent_end": + # 用 final_response 强制覆盖整张卡片:丢弃中间累积,避免拼接错误。 final_response = data.get("final_response", "") - with lock: - if message_id[0] and final_response: - _patch_message(str(final_response)) - context["feishu_streamed"] = True + if final_response: + final_text = str(final_response) + # 标记 streamed 让 chat_channel 跳过 send() + context["feishu_streamed"] = True + _stream_update_text(final_text) + _close_streaming_mode() return on_event @@ -864,12 +1038,6 @@ class FeiShuChanel(ChatChannel): f"[FeiShu] upload audio response, status={upload_response.status_code}, res={upload_response.content}") response_data = upload_response.json() if response_data.get("code") == 0: - # 若进行了格式转换,上传成功后清理临时 opus 文件,避免磁盘堆积 - if upload_path != audio_path and os.path.exists(upload_path): - try: - os.remove(upload_path) - except Exception as e: - logger.warning(f"[FeiShu] Failed to remove temp opus file {upload_path}: {e}") return response_data.get("data").get("file_key") else: logger.error(f"[FeiShu] upload audio failed: {response_data}") @@ -877,6 +1045,13 @@ class FeiShuChanel(ChatChannel): except Exception as e: logger.error(f"[FeiShu] upload audio exception: {e}") return None + finally: + # 无论上传成功与否都清理转换产生的临时 opus 文件,避免失败路径下磁盘堆积。 + if upload_path != audio_path and os.path.exists(upload_path): + try: + os.remove(upload_path) + except Exception as e: + logger.warning(f"[FeiShu] Failed to remove temp opus file {upload_path}: {e}") def _upload_file_url(self, file_url, access_token): """ diff --git a/config-template.json b/config-template.json index 8f6d9f7a..97517ae8 100644 --- a/config-template.json +++ b/config-template.json @@ -24,8 +24,7 @@ "linkai_app_code": "", "feishu_app_id": "", "feishu_app_secret": "", - "feishu_bot_name": "", - "feishu_stream_reply": false, + "feishu_stream_reply": true, "dingtalk_client_id": "", "dingtalk_client_secret": "", "wecom_bot_id": "", diff --git a/config.py b/config.py index a95b542d..955e93cd 100644 --- a/config.py +++ b/config.py @@ -142,12 +142,13 @@ available_setting = { "wechatcomapp_agent_id": "", # 企业微信app的agent_id "wechatcomapp_aes_key": "", # 企业微信app的aes_key # 飞书配置 - "feishu_port": 80, # 飞书bot监听端口 + "feishu_port": 80, # 飞书bot监听端口,仅webhook模式需要 "feishu_app_id": "", # 飞书机器人应用APP Id "feishu_app_secret": "", # 飞书机器人APP secret - "feishu_token": "", # 飞书 verification token - "feishu_bot_name": "", # 飞书机器人的名字 + "feishu_token": "", # 飞书 verification token,仅webhook模式需要 "feishu_event_mode": "websocket", # 飞书事件接收模式: webhook(HTTP服务器) 或 websocket(长连接) + # 飞书流式回复(基于官方 cardkit 流式卡片 API,需要机器人开通 cardkit:card:write 权限,且飞书客户端 7.20+) + "feishu_stream_reply": True, # 是否开启流式回复(打字机效果)。失败/老客户端自动降级为非流式或升级提示 # 钉钉配置 "dingtalk_client_id": "", # 钉钉机器人Client ID "dingtalk_client_secret": "", # 钉钉机器人Client Secret @@ -228,13 +229,13 @@ class Config(dict): def __getitem__(self, key): # 跳过以下划线开头的注释字段 if not key.startswith("_") and key not in available_setting: - logger.warning("[Config] key '{}' not in available_setting, may not take effect".format(key)) + logger.debug("[Config] key '{}' not in available_setting, may not take effect".format(key)) return super().__getitem__(key) def __setitem__(self, key, value): # 跳过以下划线开头的注释字段 if not key.startswith("_") and key not in available_setting: - logger.warning("[Config] key '{}' not in available_setting, may not take effect".format(key)) + logger.debug("[Config] key '{}' not in available_setting, may not take effect".format(key)) return super().__setitem__(key, value) def get(self, key, default=None): diff --git a/docs/channels/feishu.mdx b/docs/channels/feishu.mdx index 575aa483..286721b7 100644 --- a/docs/channels/feishu.mdx +++ b/docs/channels/feishu.mdx @@ -24,7 +24,7 @@ description: 将 CowAgent 接入飞书应用 点击 **权限管理**,复制以下权限配置,粘贴到 **权限配置** 下方的输入框内,全选筛选出来的权限,点击 **批量开通** 并确认: ``` -im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p2p_msg,im:message.p2p_msg:readonly,im:message:send_as_bot,im:resource +im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p2p_msg,im:message.p2p_msg:readonly,im:message:send_as_bot,im:resource,cardkit:card:write ``` @@ -42,15 +42,17 @@ im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p "channel_type": "feishu", "feishu_app_id": "YOUR_APP_ID", "feishu_app_secret": "YOUR_APP_SECRET", - "feishu_bot_name": "YOUR_BOT_NAME" + "feishu_stream_reply": true } ``` -| 参数 | 说明 | -| --- | --- | -| `feishu_app_id` | 飞书机器人应用 App ID | -| `feishu_app_secret` | 飞书机器人 App Secret | -| `feishu_bot_name` | 飞书机器人名称(创建应用时设置),群聊中使用依赖此配置 | +| 参数 | 说明 | 默认值 | +| --- | --- | --- | +| `feishu_app_id` | 飞书机器人应用 App ID | - | +| `feishu_app_secret` | 飞书机器人 App Secret | - | +| `feishu_stream_reply` | 是否开启流式打字机回复,关闭则一次性返回完整文本 | `true` | + +> **流式回复要求**:需要为机器人开通 `cardkit:card:write` 权限,且接收方飞书客户端版本 ≥ 7.20。低版本客户端会显示 "请升级客户端" 占位提示;权限未开通时会自动降级为普通文本回复。 配置完成后启动项目。 diff --git a/docs/en/channels/feishu.mdx b/docs/en/channels/feishu.mdx index de90da04..17b30842 100644 --- a/docs/en/channels/feishu.mdx +++ b/docs/en/channels/feishu.mdx @@ -24,9 +24,11 @@ In **Add App Capabilities**, add **Bot** capability to the app: Click **Permission Management**, paste the following permission string into the input box below **Permission Configuration**, select all filtered permissions, click **Batch Enable** and confirm: ``` -im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p2p_msg,im:message.p2p_msg:readonly,im:message:send_as_bot,im:resource +im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p2p_msg,im:message.p2p_msg:readonly,im:message:send_as_bot,im:resource,cardkit:card:write ``` +`cardkit:card:write` is used for streaming typewriter replies (creating and updating streaming cards). You can skip it if streaming is not needed. + ## 2. Project Configuration @@ -42,15 +44,17 @@ im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p "channel_type": "feishu", "feishu_app_id": "YOUR_APP_ID", "feishu_app_secret": "YOUR_APP_SECRET", - "feishu_bot_name": "YOUR_BOT_NAME" + "feishu_stream_reply": true } ``` -| Parameter | Description | -| --- | --- | -| `feishu_app_id` | Feishu bot App ID | -| `feishu_app_secret` | Feishu bot App Secret | -| `feishu_bot_name` | Bot name (set when creating the app), required for group chat usage | +| Parameter | Description | Default | +| --- | --- | --- | +| `feishu_app_id` | Feishu bot App ID | - | +| `feishu_app_secret` | Feishu bot App Secret | - | +| `feishu_stream_reply` | Enable streaming typewriter reply (powered by Feishu cardkit streaming card API). Disable to return the full text at once. | `true` | + +> **Streaming requirements**: requires `cardkit:card:write` permission on the bot, and recipient Feishu client version ≥ 7.20. Older clients will see a "please upgrade" placeholder; if the permission is missing, replies automatically fall back to plain text. Start the project after configuration is complete. diff --git a/docs/ja/channels/feishu.mdx b/docs/ja/channels/feishu.mdx index 50ea8efe..0b4c9cae 100644 --- a/docs/ja/channels/feishu.mdx +++ b/docs/ja/channels/feishu.mdx @@ -24,9 +24,11 @@ description: CowAgent を Feishu アプリケーションに統合する **権限管理**をクリックし、**権限設定**の下の入力欄に以下の権限文字列を貼り付け、フィルタされたすべての権限を選択し、**一括有効化**をクリックして確認します: ``` -im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p2p_msg,im:message.p2p_msg:readonly,im:message:send_as_bot,im:resource +im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p2p_msg,im:message.p2p_msg:readonly,im:message:send_as_bot,im:resource,cardkit:card:write ``` +`cardkit:card:write` はストリーミングタイプライター応答(ストリーミングカードの作成と更新)に使用されます。ストリーミングが不要な場合は省略できます。 + ## 2. プロジェクト設定 @@ -42,15 +44,17 @@ im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p "channel_type": "feishu", "feishu_app_id": "YOUR_APP_ID", "feishu_app_secret": "YOUR_APP_SECRET", - "feishu_bot_name": "YOUR_BOT_NAME" + "feishu_stream_reply": true } ``` -| パラメータ | 説明 | -| --- | --- | -| `feishu_app_id` | Feishu Bot の App ID | -| `feishu_app_secret` | Feishu Bot の App Secret | -| `feishu_bot_name` | Bot 名(アプリ作成時に設定)、グループチャットで使用する際に必要 | +| パラメータ | 説明 | デフォルト値 | +| --- | --- | --- | +| `feishu_app_id` | Feishu Bot の App ID | - | +| `feishu_app_secret` | Feishu Bot の App Secret | - | +| `feishu_stream_reply` | ストリーミングタイプライター応答を有効にするか(Feishu 公式 cardkit ストリーミングカード API を使用)。無効化するとテキストを一括で返します。 | `true` | + +> **ストリーミングの要件**: Bot に `cardkit:card:write` 権限を付与する必要があり、受信者の Feishu クライアントバージョンが 7.20 以上である必要があります。古いバージョンのクライアントでは「クライアントをアップグレードしてください」というプレースホルダーが表示されます。権限が付与されていない場合は、自動的に通常のテキスト応答にフォールバックします。 設定完了後、プロジェクトを起動します。 diff --git a/voice/audio_convert.py b/voice/audio_convert.py index f48c19f6..0601f863 100644 --- a/voice/audio_convert.py +++ b/voice/audio_convert.py @@ -79,7 +79,7 @@ def any_to_wav(any_path, wav_path): audio = AudioSegment.from_file(any_path, parameters=["-nostdin"]) # AudioSegment 是不可变对象:set_frame_rate/set_channels 返回新对象,不修改原对象。 # 必须将返回值重新赋给 audio,否则修改不会生效。 - audio = audio.set_frame_rate(8000) # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别 + audio = audio.set_frame_rate(16000) audio = audio.set_channels(1) audio.export(wav_path, format="wav", codec='pcm_s16le') diff --git a/voice/openai/openai_voice.py b/voice/openai/openai_voice.py index d48e4b4f..3ffa00aa 100644 --- a/voice/openai/openai_voice.py +++ b/voice/openai/openai_voice.py @@ -35,10 +35,18 @@ class OpenaiVoice(Voice): } response = requests.post(url, headers=headers, files=files, data=data) response_data = response.json() - text = response_data['text'] - reply = Reply(ReplyType.TEXT, text) - logger.info("[Openai] voiceToText text={} voice file name={}".format(text, voice_file)) + if response.status_code != 200 or "text" not in response_data: + logger.error( + f"[Openai] voiceToText failed: status={response.status_code}, " + f"resp={response_data}" + ) + reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~") + else: + text = response_data["text"] + reply = Reply(ReplyType.TEXT, text) + logger.info("[Openai] voiceToText text={} voice file name={}".format(text, voice_file)) except Exception as e: + logger.error(f"[Openai] voiceToText exception: {e}", exc_info=True) reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~") finally: return reply