Compare commits

...

19 Commits

Author SHA1 Message Date
lanvent
7458a6298f feat: add trigger_by_self option 2023-04-03 23:58:19 +08:00
lanvent
b0f54bb8b7 fix: dirty message including at and prefix 2023-04-03 23:53:58 +08:00
lanvent
acddadc406 feat: add convert pcm32 to pcm16 2023-04-03 22:55:39 +08:00
lanvent
b74274b96b fix: old code in hello plugin 2023-04-03 02:00:33 +08:00
lanvent
49ba278316 fix: use english filename 2023-04-02 16:50:11 +08:00
lanvent
388058467c fix: delete same file twice 2023-04-02 14:55:45 +08:00
lanvent
cf25bd7869 feat: itchat show qrcode using viewer 2023-04-02 14:45:38 +08:00
lanvent
02a95345aa fix: add more qrcode api 2023-04-02 14:13:38 +08:00
lanvent
6076e2ed0a fix: voice longer than 60s cannot be sent 2023-04-02 12:29:10 +08:00
lanvent
cec674cb47 update qrcode 2023-04-02 04:44:08 +08:00
Jianglang
c5a90823fa Update README.md 2023-04-02 04:30:40 +08:00
Jianglang
18d82bc1f0 Update README.md 2023-04-02 04:23:13 +08:00
lanvent
a68af990ea update Readme.md 2023-04-02 04:19:50 +08:00
lanvent
e71c600d10 feat: new itchat qrcode generator 2023-04-02 03:46:09 +08:00
lanvent
d7f1f7182c feat: add always_reply_voice option 2023-04-01 22:27:11 +08:00
lanvent
dfb2e460b4 fix: voice length bug in wechaty 2023-04-01 21:58:55 +08:00
lanvent
5badef8ba9 fix: correct sample rate when convert to silk 2023-04-01 20:59:52 +08:00
lanvent
18aa5ce75c fix: get correct audio format in pytts 2023-04-01 20:58:06 +08:00
lanvent
1545a9f262 feat: support azure voice 2023-04-01 16:36:27 +08:00
18 changed files with 271 additions and 74 deletions

View File

@@ -90,6 +90,13 @@ pip3 install -r requirements.txt
参考[#415](https://github.com/zhayujie/chatgpt-on-wechat/issues/415)
使用`azure`语音功能需安装依赖:
```bash
pip3 install azure-cognitiveservices-speech
```
> 目前默认发布的镜像和`railway`部署,都基于`apline`,无法安装`azure`的依赖。若有需求请自行基于[`debian`](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/docker/Dockerfile.debian.latest)打包。
参考[文档](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/quickstarts/setup-platform?pivots=programming-language-python&tabs=linux%2Cubuntu%2Cdotnet%2Cjre%2Cmaven%2Cnodejs%2Cmac%2Cpypi)
## 配置
配置文件的模板在根目录的`config-template.json`中,需复制该模板创建最终生效的 `config.json` 文件:

5
app.py
View File

@@ -1,5 +1,6 @@
# encoding:utf-8
import os
from config import conf, load_config
from channel import channel_factory
from common.log import logger
@@ -13,6 +14,10 @@ def run():
# create channel
channel_name=conf().get('channel_type', 'wx')
if channel_name == 'wxy':
os.environ['WECHATY_LOG']="warn"
# os.environ['WECHATY_PUPPET_SERVICE_ENDPOINT'] = '127.0.0.1:9001'
channel = channel_factory.create_channel(channel_name)
if channel_name in ['wx','wxy']:
PluginManager().load_plugins()

View File

@@ -38,7 +38,7 @@ class ChatChannel(Channel):
if first_in: # context首次传入时receiver是None根据类型设置receiver
config = conf()
cmsg = context['msg']
if cmsg.from_user_id == self.user_id:
if cmsg.from_user_id == self.user_id and not config.get('trigger_by_self', False):
logger.debug("[WX]self message skipped")
return None
if context["isgroup"]:
@@ -70,17 +70,21 @@ class ChatChannel(Channel):
# 校验关键字
match_prefix = check_prefix(content, conf().get('group_chat_prefix'))
match_contain = check_contain(content, conf().get('group_chat_keyword'))
flag = False
if match_prefix is not None or match_contain is not None:
flag = True
if match_prefix:
content = content.replace(match_prefix, '', 1).strip()
elif context['msg'].is_at and not conf().get("group_at_off", False):
logger.info("[WX]receive group at, continue")
if context['msg'].is_at:
logger.info("[WX]receive group at")
if not conf().get("group_at_off", False):
flag = True
pattern = f'@{self.name}(\u2005|\u0020)'
content = re.sub(pattern, r'', content)
elif context["origin_ctype"] == ContextType.VOICE:
logger.info("[WX]receive group voice, checkprefix didn't match")
return None
else:
if not flag:
if context["origin_ctype"] == ContextType.VOICE:
logger.info("[WX]receive group voice, but checkprefix didn't match")
return None
else: # 单聊
match_prefix = check_prefix(content, conf().get('single_chat_prefix'))
@@ -98,14 +102,14 @@ class ChatChannel(Channel):
else:
context.type = ContextType.TEXT
context.content = content
if 'desire_rtype' not in context and conf().get('always_reply_voice'):
context['desire_rtype'] = ReplyType.VOICE
elif context.type == ContextType.VOICE:
if 'desire_rtype' not in context and conf().get('voice_reply_voice'):
context['desire_rtype'] = ReplyType.VOICE
return context
# 处理消息 TODO: 如果wechaty解耦此处逻辑可以放置到父类
def _handle(self, context: Context):
if context is None or not context.content:
return
@@ -143,9 +147,11 @@ class ChatChannel(Channel):
# 删除临时文件
try:
os.remove(file_path)
os.remove(wav_path)
if wav_path != file_path:
os.remove(wav_path)
except Exception as e:
logger.warning("[WX]delete temp file error: " + str(e))
pass
# logger.warning("[WX]delete temp file error: " + str(e))
if reply.type == ReplyType.TEXT:
new_context = self._compose_context(
@@ -194,14 +200,17 @@ class ChatChannel(Channel):
'channel': self, 'context': context, 'reply': reply}))
reply = e_context['reply']
if not e_context.is_pass() and reply and reply.type:
logger.debug('[WX] ready to send reply: {} to {}'.format(reply, context))
logger.debug('[WX] ready to send reply: {}, context: {}'.format(reply, context))
self._send(reply, context)
def _send(self, reply: Reply, context: Context, retry_cnt = 0):
try:
self.send(reply, context)
except Exception as e:
logger.error('[WX] sendMsg error: {}'.format(e))
logger.error('[WX] sendMsg error: {}'.format(str(e)))
if isinstance(e, NotImplementedError):
return
logger.exception(e)
if retry_cnt < 2:
time.sleep(3+3*retry_cnt)
self._send(reply, context, retry_cnt+1)

View File

@@ -64,6 +64,37 @@ def _check(func):
return func(self, cmsg)
return wrapper
#可用的二维码生成接口
#https://api.qrserver.com/v1/create-qr-code/?size=400×400&data=https://www.abc.com
#https://api.isoyu.com/qr/?m=1&e=L&p=20&url=https://www.abc.com
def qrCallback(uuid,status,qrcode):
# logger.debug("qrCallback: {} {}".format(uuid,status))
if status == '0':
try:
from PIL import Image
img = Image.open(io.BytesIO(qrcode))
thread_pool.submit(img.show,"QRCode")
except Exception as e:
pass
import qrcode
url = f"https://login.weixin.qq.com/l/{uuid}"
qr_api1="https://api.isoyu.com/qr/?m=1&e=L&p=20&url={}".format(url)
qr_api2="https://api.qrserver.com/v1/create-qr-code/?size=400×400&data={}".format(url)
qr_api3="https://api.pwmqr.com/qrcode/create/?url={}".format(url)
qr_api4="https://my.tv.sohu.com/user/a/wvideo/getQRCode.do?text={}".format(url)
print("You can also scan QRCode in any website below:")
print(qr_api3)
print(qr_api4)
print(qr_api2)
print(qr_api1)
qr = qrcode.QRCode(border=1)
qr.add_data(url)
qr.make(fit=True)
qr.print_ascii(invert=True)
@singleton
class WechatChannel(ChatChannel):
def __init__(self):
@@ -76,13 +107,13 @@ class WechatChannel(ChatChannel):
# login by scan QRCode
hotReload = conf().get('hot_reload', False)
try:
itchat.auto_login(enableCmdQR=2, hotReload=hotReload)
itchat.auto_login(enableCmdQR=2, hotReload=hotReload, qrCallback=qrCallback)
except Exception as e:
if hotReload:
logger.error("Hot reload failed, try to login without hot reload")
itchat.logout()
os.remove("itchat.pkl")
itchat.auto_login(enableCmdQR=2, hotReload=hotReload)
itchat.auto_login(enableCmdQR=2, hotReload=hotReload, qrCallback=qrCallback)
else:
raise e
self.user_id = itchat.instance.storageClass.userName

View File

@@ -20,7 +20,7 @@ from channel.wechat.wechaty_message import WechatyMessage
from common.log import logger
from config import conf
try:
from voice.audio_convert import mp3_to_sil
from voice.audio_convert import any_to_sil
except Exception as e:
pass
@@ -35,14 +35,12 @@ class WechatyChannel(ChatChannel):
pass
def startup(self):
asyncio.run(self.main())
async def main(self):
config = conf()
token = config.get('wechaty_puppet_service_token')
os.environ['WECHATY_PUPPET_SERVICE_TOKEN'] = token
os.environ['WECHATY_LOG']="warn"
# os.environ['WECHATY_PUPPET_SERVICE_ENDPOINT'] = '127.0.0.1:9001'
asyncio.run(self.main())
async def main(self):
self.bot = Wechaty()
self.bot.on('login', self.on_login)
self.bot.on('message', self.on_message)
@@ -72,18 +70,12 @@ class WechatyChannel(ChatChannel):
logger.info('[WX] sendMsg={}, receiver={}'.format(reply, receiver))
elif reply.type == ReplyType.VOICE:
voiceLength = None
if reply.content.endswith('.mp3'):
mp3_file = reply.content
sil_file = os.path.splitext(mp3_file)[0] + '.sil'
voiceLength = mp3_to_sil(mp3_file, sil_file)
try:
os.remove(mp3_file)
except Exception as e:
pass
elif reply.content.endswith('.sil'):
sil_file = reply.content
else:
raise Exception('voice file must be mp3 or sil format')
file_path = reply.content
sil_file = os.path.splitext(file_path)[0] + '.sil'
voiceLength = int(any_to_sil(file_path, sil_file))
if voiceLength >= 60000:
voiceLength = 60000
logger.info('[WX] voice too long, length={}, set to 60s'.format(voiceLength))
# 发送语音
t = int(time.time())
msg = FileBox.from_file(sil_file, name=str(t) + '.sil')
@@ -91,7 +83,9 @@ class WechatyChannel(ChatChannel):
msg.metadata['voiceLength'] = voiceLength
asyncio.run_coroutine_threadsafe(receiver.say(msg),loop).result()
try:
os.remove(sil_file)
os.remove(file_path)
if sil_file != file_path:
os.remove(sil_file)
except Exception as e:
pass
logger.info('[WX] sendVoice={}, receiver={}'.format(reply.content, receiver))
@@ -123,7 +117,6 @@ class WechatyChannel(ChatChannel):
return
logger.debug('[WX] message:{}'.format(cmsg))
room = msg.room() # 获取消息来自的群聊. 如果消息不是来自群聊, 则返回None
isgroup = room is not None
ctype = cmsg.ctype
context = self._compose_context(ctype, cmsg.content, isgroup=isgroup, msg=cmsg)

View File

@@ -25,6 +25,7 @@ available_setting = {
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], # 开启自动回复的群名称列表
"group_name_keyword_white_list": [], # 开启自动回复的群名称关键词列表
"group_chat_in_one_session": ["ChatGPT测试群"], # 支持会话上下文共享的群名称
"trigger_by_self": False, # 是否允许机器人触发
"image_create_prefix": ["", "", ""], # 开启图片回复的前缀
# chatgpt会话参数
@@ -47,16 +48,21 @@ available_setting = {
"speech_recognition": False, # 是否开启语音识别
"group_speech_recognition": False, # 是否开启群组语音识别
"voice_reply_voice": False, # 是否使用语音回复语音需要设置对应语音合成引擎的api key
"voice_to_text": "openai", # 语音识别引擎支持openai,google
"text_to_voice": "baidu", # 语音合成引擎,支持baidu,google,pytts(offline)
"always_reply_voice": False, # 是否一直使用语音回复
"voice_to_text": "openai", # 语音识别引擎,支持openai,google,azure
"text_to_voice": "baidu", # 语音合成引擎支持baidu,google,pytts(offline),azure
# baidu api配置, 使用百度语音识别和语音合成时需要
# baidu 语音api配置 使用百度语音识别和语音合成时需要
"baidu_app_id": "",
"baidu_api_key": "",
"baidu_secret_key": "",
# 1536普通话(支持简单的英文识别) 1737英语 1637粤语 1837四川话 1936普通话远场
"baidu_dev_pid": "1536",
# azure 语音api配置 使用azure语音识别和语音合成时需要
"azure_voice_api_key": "",
"azure_voice_region": "japaneast",
# 服务时间限制目前支持itchat
"chat_time_module": False, # 是否开启服务时间限制
"chat_start_time": "00:00", # 服务开始时间

View File

@@ -0,0 +1,32 @@
FROM python:3.10-slim
LABEL maintainer="foo@bar.com"
ARG TZ='Asia/Shanghai'
ARG CHATGPT_ON_WECHAT_VER
ENV BUILD_PREFIX=/app
ADD . ${BUILD_PREFIX}
RUN apt-get update \
&&apt-get install -y --no-install-recommends bash \
ffmpeg espeak \
&& cd ${BUILD_PREFIX} \
&& cp config-template.json config.json \
&& /usr/local/bin/python -m pip install --no-cache --upgrade pip \
&& pip install --no-cache -r requirements.txt \
&& pip install azure-cognitiveservices-speech
WORKDIR ${BUILD_PREFIX}
ADD docker/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh \
&& groupadd -r noroot \
&& useradd -r -g noroot -s /bin/bash -d /home/noroot noroot \
&& chown -R noroot:noroot ${BUILD_PREFIX}
USER noroot
ENTRYPOINT ["docker/entrypoint.sh"]

View File

@@ -1,4 +1,4 @@
#!/bin/bash
cd .. && docker build -f Dockerfile \
cd .. && docker build -f docker/Dockerfile.latest \
-t zhayujie/chatgpt-on-wechat .

View File

@@ -1,6 +1,6 @@
## 插件化初衷
之前未插件化的代码耦合程度高,如果要定制一些个性化功能(如流量控制、接入`NovelAI`画图平台等),需要了解代码主体,避免影响到其他的功能。在实现多个功能后,不但无法调整功能的优先级顺序,功能配置项也会变得非常混乱。
之前未插件化的代码耦合程度高,如果要定制一些个性化功能(如流量控制、接入`NovelAI`画图平台等),需要了解代码主体,避免影响到其他的功能。多个功能同时存在时,无法调整功能的优先级顺序,功能配置项也非常混乱。
此时插件化应声而出。
@@ -11,7 +11,7 @@
- [x] 插件化能够自由开关和调整优先级。
- [x] 每个插件可在插件文件夹内维护独立的配置文件,方便代码的测试和调试,可以在独立的仓库开发插件。
PS: 插件目前支持`itchat`
PS: 插件目前支持`itchat``wechaty`
## 插件化实现
@@ -101,7 +101,7 @@ PS: 插件目前仅支持`itchat`
根据`Context`和回复`Reply`的类型,对回复的内容进行装饰。目前的装饰有以下两种:
- `TEXT`文本回复根据是否在群聊中来决定是艾特接收方还是添加回复的前缀。
- `TEXT`文本回复:如果这次消息需要的回复是`VOICE`,进行文字转语音回复之后再次装饰。 否则根据是否在群聊中来决定是艾特接收方还是添加回复的前缀。
- `INFO``ERROR`类型,会在消息前添加对应的系统提示字样。
@@ -110,8 +110,11 @@ PS: 插件目前仅支持`itchat`
```python
if reply.type == ReplyType.TEXT:
reply_text = reply.content
if context.get('desire_rtype') == ReplyType.VOICE:
reply = super().build_text_to_voice(reply.content)
return self._decorate_reply(context, reply)
if context['isgroup']:
reply_text = '@' + context['msg']['ActualNickName'] + ' ' + reply_text.strip()
reply_text = '@' + context['msg'].actual_user_nickname + ' ' + reply_text.strip()
reply_text = conf().get("group_chat_reply_prefix", "")+reply_text
else:
reply_text = conf().get("single_chat_reply_prefix", "")+reply_text
@@ -213,11 +216,11 @@ class Hello(Plugin):
if content == "Hello":
reply = Reply()
reply.type = ReplyType.TEXT
msg = e_context['context']['msg']
msg:ChatMessage = e_context['context']['msg']
if e_context['context']['isgroup']:
reply.content = "Hello, " + msg['ActualNickName'] + " from " + msg['User'].get('NickName', "Group")
reply.content = f"Hello, {msg.actual_user_nickname} from {msg.from_user_nickname}"
else:
reply.content = "Hello, " + msg['User'].get('NickName', "My friend")
reply.content = f"Hello, {msg.from_user_nickname}"
e_context['reply'] = reply
e_context.action = EventAction.BREAK_PASS # 事件结束并跳过处理context的默认逻辑
if content == "End":

View File

@@ -2,6 +2,7 @@
from bridge.context import ContextType
from bridge.reply import Reply, ReplyType
from channel.chat_message import ChatMessage
import plugins
from plugins import *
from common.log import logger
@@ -24,11 +25,11 @@ class Hello(Plugin):
if content == "Hello":
reply = Reply()
reply.type = ReplyType.TEXT
msg = e_context['context']['msg']
msg:ChatMessage = e_context['context']['msg']
if e_context['context']['isgroup']:
reply.content = "Hello, " + msg['ActualNickName'] + " from " + msg['User'].get('NickName', "Group")
reply.content = f"Hello, {msg.actual_user_nickname} from {msg.from_user_nickname}"
else:
reply.content = "Hello, " + msg['User'].get('NickName', "My friend")
reply.content = f"Hello, {msg.from_user_nickname}"
e_context['reply'] = reply
e_context.action = EventAction.BREAK_PASS # 事件结束并跳过处理context的默认逻辑

View File

@@ -7,6 +7,7 @@ PyQRCode>=1.2.1
pysilk>=0.0.1
pysilk_mod>=1.6.0
pyttsx3>=2.90
qrcode>=7.4.2
requests>=2.28.2
webuiapi>=0.6.2
wechaty>=0.10.7

View File

@@ -1,7 +1,23 @@
import shutil
import wave
import pysilk
from pydub import AudioSegment
sil_supports=[8000, 12000, 16000, 24000, 32000, 44100, 48000] # slk转wav时支持的采样率
def find_closest_sil_supports(sample_rate):
"""
找到最接近的支持的采样率
"""
if sample_rate in sil_supports:
return sample_rate
closest = 0
mindiff = 9999999
for rate in sil_supports:
diff = abs(rate - sample_rate)
if diff < mindiff:
closest = rate
mindiff = diff
return closest
def get_pcm_from_wav(wav_path):
"""
@@ -13,6 +29,30 @@ def get_pcm_from_wav(wav_path):
wav = wave.open(wav_path, "rb")
return wav.readframes(wav.getnframes())
def any_to_wav(any_path, wav_path):
"""
把任意格式转成wav文件
"""
if any_path.endswith('.wav'):
shutil.copy2(any_path, wav_path)
return
if any_path.endswith('.sil') or any_path.endswith('.silk') or any_path.endswith('.slk'):
return sil_to_wav(any_path, wav_path)
audio = AudioSegment.from_file(any_path)
audio.export(wav_path, format="wav")
def any_to_sil(any_path, sil_path):
"""
把任意格式转成sil文件
"""
if any_path.endswith('.sil') or any_path.endswith('.silk') or any_path.endswith('.slk'):
shutil.copy2(any_path, sil_path)
return 10000
if any_path.endswith('.wav'):
return pcm_to_sil(any_path, sil_path)
if any_path.endswith('.mp3'):
return mp3_to_sil(any_path, sil_path)
raise NotImplementedError("Not support file type: {}".format(any_path))
def mp3_to_wav(mp3_path, wav_path):
"""
@@ -21,46 +61,40 @@ def mp3_to_wav(mp3_path, wav_path):
audio = AudioSegment.from_mp3(mp3_path)
audio.export(wav_path, format="wav")
def any_to_wav(any_path, wav_path):
"""
把任意格式转成wav文件
"""
if any_path.endswith('.wav'):
return
if any_path.endswith('.sil') or any_path.endswith('.silk') or any_path.endswith('.slk'):
return sil_to_wav(any_path, wav_path)
audio = AudioSegment.from_file(any_path)
audio.export(wav_path, format="wav")
def pcm_to_silk(pcm_path, silk_path):
def pcm_to_sil(pcm_path, silk_path):
"""
wav 文件转成 silk
return 声音长度,毫秒
"""
audio = AudioSegment.from_wav(pcm_path)
wav_data = audio.raw_data
rate = find_closest_sil_supports(audio.frame_rate)
# Convert to PCM_s16
pcm_s16 = audio.set_sample_width(2)
pcm_s16 = pcm_s16.set_frame_rate(rate)
wav_data = pcm_s16.raw_data
silk_data = pysilk.encode(
wav_data, data_rate=audio.frame_rate, sample_rate=audio.frame_rate)
wav_data, data_rate=rate, sample_rate=rate)
with open(silk_path, "wb") as f:
f.write(silk_data)
return audio.duration_seconds * 1000
def mp3_to_sil(mp3_path, silk_path):
"""
mp3 文件转成 silk
return 声音长度,毫秒
"""
audio = AudioSegment.from_mp3(mp3_path)
wav_data = audio.raw_data
silk_data = pysilk.encode(
wav_data, data_rate=audio.frame_rate, sample_rate=audio.frame_rate)
rate = find_closest_sil_supports(audio.frame_rate)
# Convert to PCM_s16
pcm_s16 = audio.set_sample_width(2)
pcm_s16 = pcm_s16.set_frame_rate(rate)
wav_data = pcm_s16.raw_data
silk_data = pysilk.encode(wav_data, data_rate=rate, sample_rate=rate)
# Save the silk file
with open(silk_path, "wb") as f:
f.write(silk_data)
return audio.duration_seconds * 1000
def sil_to_wav(silk_path, wav_path, rate: int = 24000):
"""
silk 文件转 wav

View File

@@ -0,0 +1,68 @@
"""
azure voice service
"""
import json
import os
import time
import azure.cognitiveservices.speech as speechsdk
from bridge.reply import Reply, ReplyType
from common.log import logger
from common.tmp_dir import TmpDir
from voice.voice import Voice
from config import conf
"""
Azure voice
主目录设置文件中需填写azure_voice_api_key和azure_voice_region
查看可用的 voice https://speech.microsoft.com/portal/voicegallery
"""
class AzureVoice(Voice):
def __init__(self):
try:
curdir = os.path.dirname(__file__)
config_path = os.path.join(curdir, "config.json")
config = None
if not os.path.exists(config_path): #如果没有配置文件,创建本地配置文件
config = { "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_recognition_language": "zh-CN"}
with open(config_path, "w") as fw:
json.dump(config, fw, indent=4)
else:
with open(config_path, "r") as fr:
config = json.load(fr)
self.api_key = conf().get('azure_voice_api_key')
self.api_region = conf().get('azure_voice_region')
self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"]
self.speech_config.speech_recognition_language = config["speech_recognition_language"]
except Exception as e:
logger.warn("AzureVoice init failed: %s, ignore " % e)
def voiceToText(self, voice_file):
audio_config = speechsdk.AudioConfig(filename=voice_file)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config)
result = speech_recognizer.recognize_once()
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
logger.info('[Azure] voiceToText voice file name={} text={}'.format(voice_file, result.text))
reply = Reply(ReplyType.TEXT, result.text)
else:
logger.error('[Azure] voiceToText error, result={}'.format(result))
reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
return reply
def textToVoice(self, text):
fileName = TmpDir().path() + 'reply-' + str(int(time.time())) + '.wav'
audio_config = speechsdk.AudioConfig(filename=fileName)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)
result = speech_synthesizer.speak_text(text)
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
logger.info(
'[Azure] textToVoice text={} voice file name={}'.format(text, fileName))
reply = Reply(ReplyType.VOICE, fileName)
else:
logger.error('[Azure] textToVoice error, result={}'.format(result))
reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
return reply

View File

@@ -0,0 +1,4 @@
{
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
"speech_recognition_language": "zh-CN"
}

View File

@@ -80,7 +80,7 @@ class BaiduVoice(Voice):
result = self.client.synthesis(text, self.lang, self.ctp, {
'spd': self.spd, 'pit': self.pit, 'vol': self.vol, 'per': self.per})
if not isinstance(result, dict):
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
fileName = TmpDir().path() + 'reply-' + str(int(time.time())) + '.mp3'
with open(fileName, 'wb') as f:
f.write(result)
logger.info(

View File

@@ -34,7 +34,7 @@ class GoogleVoice(Voice):
return reply
def textToVoice(self, text):
try:
mp3File = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
mp3File = TmpDir().path() + 'reply-' + str(int(time.time())) + '.mp3'
tts = gTTS(text=text, lang='zh')
tts.save(mp3File)
logger.info(

View File

@@ -25,12 +25,12 @@ class PyttsVoice(Voice):
def textToVoice(self, text):
try:
mp3File = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
self.engine.save_to_file(text, mp3File)
wavFile = TmpDir().path() + 'reply-' + str(int(time.time())) + '.wav'
self.engine.save_to_file(text, wavFile)
self.engine.runAndWait()
logger.info(
'[Pytts] textToVoice text={} voice file name={}'.format(text, mp3File))
reply = Reply(ReplyType.VOICE, mp3File)
'[Pytts] textToVoice text={} voice file name={}'.format(text, wavFile))
reply = Reply(ReplyType.VOICE, wavFile)
except Exception as e:
reply = Reply(ReplyType.ERROR, str(e))
finally:

View File

@@ -20,4 +20,7 @@ def create_voice(voice_type):
elif voice_type == 'pytts':
from voice.pytts.pytts_voice import PyttsVoice
return PyttsVoice()
elif voice_type == 'azure':
from voice.azure.azure_voice import AzureVoice
return AzureVoice()
raise RuntimeError