feat(feishu): enhance #2791 with cardkit streaming + ASR fixes

- rewrite streaming reply to official cardkit v2.0 API (default on, auto-fallback) - fix Whisper hallucination: bump ASR sample rate to 16k, pass language=zh - fix lock-over-IO and tmp file cleanup from #2791 - drop deprecated feishu_bot_name; quiet unknown-key warnings - docs: cardkit permission and feishu_stream_reply usage
2026-07-17 11:07:11 +08:00 · 2026-05-05 14:15:25 +08:00
parent 5080051e39
commit b80c3fe5a8
9 changed files with 304 additions and 109 deletions
--- a/voice/audio_convert.py
+++ b/voice/audio_convert.py
@@ -79,7 +79,7 @@ def any_to_wav(any_path, wav_path):
    audio = AudioSegment.from_file(any_path, parameters=["-nostdin"])
    # AudioSegment 是不可变对象：set_frame_rate/set_channels 返回新对象，不修改原对象。
    # 必须将返回值重新赋给 audio，否则修改不会生效。
-    audio = audio.set_frame_rate(8000)   # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别
+    audio = audio.set_frame_rate(16000)
    audio = audio.set_channels(1)
    audio.export(wav_path, format="wav", codec='pcm_s16le')

--- a/voice/openai/openai_voice.py
+++ b/voice/openai/openai_voice.py
@@ -35,10 +35,18 @@ class OpenaiVoice(Voice):
            }
            response = requests.post(url, headers=headers, files=files, data=data)
            response_data = response.json()
-            text = response_data['text']
-            reply = Reply(ReplyType.TEXT, text)
-            logger.info("[Openai] voiceToText text={} voice file name={}".format(text, voice_file))
+            if response.status_code != 200 or "text" not in response_data:
+                logger.error(
+                    f"[Openai] voiceToText failed: status={response.status_code}, "
+                    f"resp={response_data}"
+                )
+                reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音，请稍后再试吧~")
+            else:
+                text = response_data["text"]
+                reply = Reply(ReplyType.TEXT, text)
+                logger.info("[Openai] voiceToText text={} voice file name={}".format(text, voice_file))
        except Exception as e:
+            logger.error(f"[Openai] voiceToText exception: {e}", exc_info=True)
            reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音，请稍后再试吧~")
        finally:
            return reply