mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 00:57:41 +08:00
百度语音转写支持8000采样率, pcm_s16le编码, 单通道语音的组合
This commit is contained in:
@@ -64,7 +64,9 @@ def any_to_wav(any_path, wav_path):
|
|||||||
if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
|
if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
|
||||||
return sil_to_wav(any_path, wav_path)
|
return sil_to_wav(any_path, wav_path)
|
||||||
audio = AudioSegment.from_file(any_path)
|
audio = AudioSegment.from_file(any_path)
|
||||||
audio.export(wav_path, format="wav")
|
audio.set_frame_rate(8000) # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别
|
||||||
|
audio.set_channels(1)
|
||||||
|
audio.export(wav_path, format="wav", codec='pcm_s16le')
|
||||||
|
|
||||||
|
|
||||||
def any_to_sil(any_path, sil_path):
|
def any_to_sil(any_path, sil_path):
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ class BaiduVoice(Voice):
|
|||||||
# 识别本地文件
|
# 识别本地文件
|
||||||
logger.debug("[Baidu] voice file name={}".format(voice_file))
|
logger.debug("[Baidu] voice file name={}".format(voice_file))
|
||||||
pcm = get_pcm_from_wav(voice_file)
|
pcm = get_pcm_from_wav(voice_file)
|
||||||
res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id})
|
res = self.client.asr(pcm, "pcm", 8000, {"dev_pid": self.dev_id})
|
||||||
if res["err_no"] == 0:
|
if res["err_no"] == 0:
|
||||||
logger.info("百度语音识别到了:{}".format(res["result"]))
|
logger.info("百度语音识别到了:{}".format(res["result"]))
|
||||||
text = "".join(res["result"])
|
text = "".join(res["result"])
|
||||||
|
|||||||
Reference in New Issue
Block a user