mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 00:57:41 +08:00
feat(feishu): enhance #2791 with cardkit streaming + ASR fixes
- rewrite streaming reply to official cardkit v2.0 API (default on, auto-fallback) - fix Whisper hallucination: bump ASR sample rate to 16k, pass language=zh - fix lock-over-IO and tmp file cleanup from #2791 - drop deprecated feishu_bot_name; quiet unknown-key warnings - docs: cardkit permission and feishu_stream_reply usage
This commit is contained in:
@@ -79,7 +79,7 @@ def any_to_wav(any_path, wav_path):
|
||||
audio = AudioSegment.from_file(any_path, parameters=["-nostdin"])
|
||||
# AudioSegment 是不可变对象:set_frame_rate/set_channels 返回新对象,不修改原对象。
|
||||
# 必须将返回值重新赋给 audio,否则修改不会生效。
|
||||
audio = audio.set_frame_rate(8000) # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别
|
||||
audio = audio.set_frame_rate(16000)
|
||||
audio = audio.set_channels(1)
|
||||
audio.export(wav_path, format="wav", codec='pcm_s16le')
|
||||
|
||||
|
||||
@@ -35,10 +35,18 @@ class OpenaiVoice(Voice):
|
||||
}
|
||||
response = requests.post(url, headers=headers, files=files, data=data)
|
||||
response_data = response.json()
|
||||
text = response_data['text']
|
||||
reply = Reply(ReplyType.TEXT, text)
|
||||
logger.info("[Openai] voiceToText text={} voice file name={}".format(text, voice_file))
|
||||
if response.status_code != 200 or "text" not in response_data:
|
||||
logger.error(
|
||||
f"[Openai] voiceToText failed: status={response.status_code}, "
|
||||
f"resp={response_data}"
|
||||
)
|
||||
reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
|
||||
else:
|
||||
text = response_data["text"]
|
||||
reply = Reply(ReplyType.TEXT, text)
|
||||
logger.info("[Openai] voiceToText text={} voice file name={}".format(text, voice_file))
|
||||
except Exception as e:
|
||||
logger.error(f"[Openai] voiceToText exception: {e}", exc_info=True)
|
||||
reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~")
|
||||
finally:
|
||||
return reply
|
||||
|
||||
Reference in New Issue
Block a user