fix: get correct audio format in pytts

This commit is contained in:
lanvent
2023-04-01 20:58:06 +08:00
parent 1545a9f262
commit 18aa5ce75c
6 changed files with 49 additions and 39 deletions

5
app.py
View File

@@ -1,5 +1,6 @@
# encoding:utf-8 # encoding:utf-8
import os
from config import conf, load_config from config import conf, load_config
from channel import channel_factory from channel import channel_factory
from common.log import logger from common.log import logger
@@ -13,6 +14,10 @@ def run():
# create channel # create channel
channel_name=conf().get('channel_type', 'wx') channel_name=conf().get('channel_type', 'wx')
if channel_name == 'wxy':
os.environ['WECHATY_LOG']="warn"
# os.environ['WECHATY_PUPPET_SERVICE_ENDPOINT'] = '127.0.0.1:9001'
channel = channel_factory.create_channel(channel_name) channel = channel_factory.create_channel(channel_name)
if channel_name in ['wx','wxy']: if channel_name in ['wx','wxy']:
PluginManager().load_plugins() PluginManager().load_plugins()

View File

@@ -194,14 +194,17 @@ class ChatChannel(Channel):
'channel': self, 'context': context, 'reply': reply})) 'channel': self, 'context': context, 'reply': reply}))
reply = e_context['reply'] reply = e_context['reply']
if not e_context.is_pass() and reply and reply.type: if not e_context.is_pass() and reply and reply.type:
logger.debug('[WX] ready to send reply: {} to {}'.format(reply, context)) logger.debug('[WX] ready to send reply: {}, context: {}'.format(reply, context))
self._send(reply, context) self._send(reply, context)
def _send(self, reply: Reply, context: Context, retry_cnt = 0): def _send(self, reply: Reply, context: Context, retry_cnt = 0):
try: try:
self.send(reply, context) self.send(reply, context)
except Exception as e: except Exception as e:
logger.error('[WX] sendMsg error: {}'.format(e)) logger.error('[WX] sendMsg error: {}'.format(str(e)))
if isinstance(e, NotImplementedError):
return
logger.exception(e)
if retry_cnt < 2: if retry_cnt < 2:
time.sleep(3+3*retry_cnt) time.sleep(3+3*retry_cnt)
self._send(reply, context, retry_cnt+1) self._send(reply, context, retry_cnt+1)

View File

@@ -20,7 +20,7 @@ from channel.wechat.wechaty_message import WechatyMessage
from common.log import logger from common.log import logger
from config import conf from config import conf
try: try:
from voice.audio_convert import mp3_to_sil from voice.audio_convert import any_to_sil
except Exception as e: except Exception as e:
pass pass
@@ -35,14 +35,12 @@ class WechatyChannel(ChatChannel):
pass pass
def startup(self): def startup(self):
asyncio.run(self.main())
async def main(self):
config = conf() config = conf()
token = config.get('wechaty_puppet_service_token') token = config.get('wechaty_puppet_service_token')
os.environ['WECHATY_PUPPET_SERVICE_TOKEN'] = token os.environ['WECHATY_PUPPET_SERVICE_TOKEN'] = token
os.environ['WECHATY_LOG']="warn" asyncio.run(self.main())
# os.environ['WECHATY_PUPPET_SERVICE_ENDPOINT'] = '127.0.0.1:9001'
async def main(self):
self.bot = Wechaty() self.bot = Wechaty()
self.bot.on('login', self.on_login) self.bot.on('login', self.on_login)
self.bot.on('message', self.on_message) self.bot.on('message', self.on_message)
@@ -72,18 +70,9 @@ class WechatyChannel(ChatChannel):
logger.info('[WX] sendMsg={}, receiver={}'.format(reply, receiver)) logger.info('[WX] sendMsg={}, receiver={}'.format(reply, receiver))
elif reply.type == ReplyType.VOICE: elif reply.type == ReplyType.VOICE:
voiceLength = None voiceLength = None
if reply.content.endswith('.mp3'): file_path = reply.content
mp3_file = reply.content sil_file = os.path.splitext(file_path)[0] + '.sil'
sil_file = os.path.splitext(mp3_file)[0] + '.sil' voiceLength = any_to_sil(file_path, sil_file)
voiceLength = mp3_to_sil(mp3_file, sil_file)
try:
os.remove(mp3_file)
except Exception as e:
pass
elif reply.content.endswith('.sil'):
sil_file = reply.content
else:
raise Exception('voice file must be mp3 or sil format')
# 发送语音 # 发送语音
t = int(time.time()) t = int(time.time())
msg = FileBox.from_file(sil_file, name=str(t) + '.sil') msg = FileBox.from_file(sil_file, name=str(t) + '.sil')
@@ -91,6 +80,7 @@ class WechatyChannel(ChatChannel):
msg.metadata['voiceLength'] = voiceLength msg.metadata['voiceLength'] = voiceLength
asyncio.run_coroutine_threadsafe(receiver.say(msg),loop).result() asyncio.run_coroutine_threadsafe(receiver.say(msg),loop).result()
try: try:
os.remove(file_path)
os.remove(sil_file) os.remove(sil_file)
except Exception as e: except Exception as e:
pass pass

View File

@@ -1,8 +1,8 @@
import shutil
import wave import wave
import pysilk import pysilk
from pydub import AudioSegment from pydub import AudioSegment
def get_pcm_from_wav(wav_path): def get_pcm_from_wav(wav_path):
""" """
从 wav 文件中读取 pcm 从 wav 文件中读取 pcm
@@ -13,6 +13,30 @@ def get_pcm_from_wav(wav_path):
wav = wave.open(wav_path, "rb") wav = wave.open(wav_path, "rb")
return wav.readframes(wav.getnframes()) return wav.readframes(wav.getnframes())
def any_to_wav(any_path, wav_path):
"""
把任意格式转成wav文件
"""
if any_path.endswith('.wav'):
shutil.copy2(any_path, wav_path)
return
if any_path.endswith('.sil') or any_path.endswith('.silk') or any_path.endswith('.slk'):
return sil_to_wav(any_path, wav_path)
audio = AudioSegment.from_file(any_path)
audio.export(wav_path, format="wav")
def any_to_sil(any_path, sil_path):
"""
把任意格式转成sil文件
"""
if any_path.endswith('.sil') or any_path.endswith('.silk') or any_path.endswith('.slk'):
shutil.copy2(any_path, sil_path)
return 10000
if any_path.endswith('.wav'):
return pcm_to_sil(any_path, sil_path)
if any_path.endswith('.mp3'):
return mp3_to_sil(any_path, sil_path)
raise NotImplementedError("Not support file type: {}".format(any_path))
def mp3_to_wav(mp3_path, wav_path): def mp3_to_wav(mp3_path, wav_path):
""" """
@@ -21,18 +45,7 @@ def mp3_to_wav(mp3_path, wav_path):
audio = AudioSegment.from_mp3(mp3_path) audio = AudioSegment.from_mp3(mp3_path)
audio.export(wav_path, format="wav") audio.export(wav_path, format="wav")
def any_to_wav(any_path, wav_path): def pcm_to_sil(pcm_path, silk_path):
"""
把任意格式转成wav文件
"""
if any_path.endswith('.wav'):
return
if any_path.endswith('.sil') or any_path.endswith('.silk') or any_path.endswith('.slk'):
return sil_to_wav(any_path, wav_path)
audio = AudioSegment.from_file(any_path)
audio.export(wav_path, format="wav")
def pcm_to_silk(pcm_path, silk_path):
""" """
wav 文件转成 silk wav 文件转成 silk
return 声音长度,毫秒 return 声音长度,毫秒
@@ -60,7 +73,6 @@ def mp3_to_sil(mp3_path, silk_path):
f.write(silk_data) f.write(silk_data)
return audio.duration_seconds * 1000 return audio.duration_seconds * 1000
def sil_to_wav(silk_path, wav_path, rate: int = 24000): def sil_to_wav(silk_path, wav_path, rate: int = 24000):
""" """
silk 文件转 wav silk 文件转 wav

View File

@@ -56,7 +56,7 @@ class AzureVoice(Voice):
return reply return reply
def textToVoice(self, text): def textToVoice(self, text):
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.wav'
audio_config = speechsdk.AudioConfig(filename=fileName) audio_config = speechsdk.AudioConfig(filename=fileName)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config) speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)
result = speech_synthesizer.speak_text(text) result = speech_synthesizer.speak_text(text)

View File

@@ -25,12 +25,12 @@ class PyttsVoice(Voice):
def textToVoice(self, text): def textToVoice(self, text):
try: try:
mp3File = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' wavFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.wav'
self.engine.save_to_file(text, mp3File) self.engine.save_to_file(text, wavFile)
self.engine.runAndWait() self.engine.runAndWait()
logger.info( logger.info(
'[Pytts] textToVoice text={} voice file name={}'.format(text, mp3File)) '[Pytts] textToVoice text={} voice file name={}'.format(text, wavFile))
reply = Reply(ReplyType.VOICE, mp3File) reply = Reply(ReplyType.VOICE, wavFile)
except Exception as e: except Exception as e:
reply = Reply(ReplyType.ERROR, str(e)) reply = Reply(ReplyType.ERROR, str(e))
finally: finally: