feat(feishu): enhance #2791 with cardkit streaming + ASR fixes

- rewrite streaming reply to official cardkit v2.0 API (default on, auto-fallback)
- fix Whisper hallucination: bump ASR sample rate to 16k, pass language=zh
- fix lock-over-IO and tmp file cleanup from #2791
- drop deprecated feishu_bot_name; quiet unknown-key warnings
- docs: cardkit permission and feishu_stream_reply usage
This commit is contained in:
zhayujie
2026-05-05 14:15:25 +08:00
parent 5080051e39
commit b80c3fe5a8
9 changed files with 304 additions and 109 deletions

View File

@@ -79,7 +79,7 @@ def any_to_wav(any_path, wav_path):
audio = AudioSegment.from_file(any_path, parameters=["-nostdin"])
# AudioSegment 是不可变对象set_frame_rate/set_channels 返回新对象,不修改原对象。
# 必须将返回值重新赋给 audio否则修改不会生效。
audio = audio.set_frame_rate(8000) # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别
audio = audio.set_frame_rate(16000)
audio = audio.set_channels(1)
audio.export(wav_path, format="wav", codec='pcm_s16le')

View File

@@ -35,10 +35,18 @@ class OpenaiVoice(Voice):
}
response = requests.post(url, headers=headers, files=files, data=data)
response_data = response.json()
text = response_data['text']
reply = Reply(ReplyType.TEXT, text)
logger.info("[Openai] voiceToText text={} voice file name={}".format(text, voice_file))
if response.status_code != 200 or "text" not in response_data:
logger.error(
f"[Openai] voiceToText failed: status={response.status_code}, "
f"resp={response_data}"
)
reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
else:
text = response_data["text"]
reply = Reply(ReplyType.TEXT, text)
logger.info("[Openai] voiceToText text={} voice file name={}".format(text, voice_file))
except Exception as e:
logger.error(f"[Openai] voiceToText exception: {e}", exc_info=True)
reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
finally:
return reply