mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-06-02 09:48:22 +08:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
66a81cd47c | ||
|
|
81edd13470 | ||
|
|
7a94745b8a | ||
|
|
06b02f5df8 | ||
|
|
83136e3142 |
@@ -14,6 +14,15 @@ class Context:
|
||||
self.type = type
|
||||
self.content = content
|
||||
self.kwargs = kwargs
|
||||
|
||||
def __contains__(self, key):
|
||||
if key == 'type':
|
||||
return self.type is not None
|
||||
elif key == 'content':
|
||||
return self.content is not None
|
||||
else:
|
||||
return key in self.kwargs
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key == 'type':
|
||||
return self.type
|
||||
@@ -21,6 +30,12 @@ class Context:
|
||||
return self.content
|
||||
else:
|
||||
return self.kwargs[key]
|
||||
|
||||
def get(self, key, default=None):
|
||||
try:
|
||||
return self[key]
|
||||
except KeyError:
|
||||
return default
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if key == 'type':
|
||||
|
||||
@@ -19,6 +19,7 @@ from common.log import logger
|
||||
from common.tmp_dir import TmpDir
|
||||
from config import conf
|
||||
from common.time_check import time_checker
|
||||
from common.expired_dict import ExpiredDict
|
||||
from plugins import *
|
||||
try:
|
||||
from voice.audio_convert import mp3_to_wav
|
||||
@@ -53,12 +54,26 @@ def handler_group_voice(msg):
|
||||
WechatChannel().handle_group_voice(msg)
|
||||
return None
|
||||
|
||||
def _check(func):
|
||||
def wrapper(self, msg):
|
||||
msgId = msg['MsgId']
|
||||
if msgId in self.receivedMsgs:
|
||||
logger.info("Wechat message {} already received, ignore".format(msgId))
|
||||
return
|
||||
self.receivedMsgs[msgId] = msg
|
||||
create_time = msg['CreateTime'] # 消息时间
|
||||
if conf().get('hot_reload') == True and int(create_time) < int(time.time()) - 60: # 跳过1分钟前的历史消息
|
||||
logger.debug("[WX]history message {} skipped".format(msgId))
|
||||
return
|
||||
return func(self, msg)
|
||||
return wrapper
|
||||
|
||||
|
||||
class WechatChannel(Channel):
|
||||
def __init__(self):
|
||||
self.userName = None
|
||||
self.nickName = None
|
||||
self.receivedMsgs = ExpiredDict(60*60*24)
|
||||
|
||||
def startup(self):
|
||||
|
||||
@@ -90,7 +105,11 @@ class WechatChannel(Channel):
|
||||
# isgroup: 是否是群聊
|
||||
# receiver: 需要回复的对象
|
||||
# msg: itchat的原始消息对象
|
||||
# origin_ctype: 原始消息类型,用于私聊语音消息时,避免匹配前缀
|
||||
# desire_rtype: 希望回复类型,TEXT类型是文本回复,VOICE类型是语音回复
|
||||
|
||||
@time_checker
|
||||
@_check
|
||||
def handle_voice(self, msg):
|
||||
if conf().get('speech_recognition') != True:
|
||||
return
|
||||
@@ -106,11 +125,12 @@ class WechatChannel(Channel):
|
||||
else:
|
||||
other_user_id = from_user_id
|
||||
if from_user_id == other_user_id:
|
||||
context = Context(ContextType.VOICE,msg['FileName'])
|
||||
context.kwargs = {'isgroup': False, 'msg': msg, 'receiver': other_user_id, 'session_id': other_user_id}
|
||||
thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
|
||||
context = self._compose_context(ContextType.VOICE, msg['FileName'], isgroup=False, msg=msg, receiver=other_user_id, session_id=other_user_id)
|
||||
if context:
|
||||
thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
|
||||
|
||||
@time_checker
|
||||
@_check
|
||||
def handle_text(self, msg):
|
||||
logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
|
||||
content = msg['Text']
|
||||
@@ -124,41 +144,20 @@ class WechatChannel(Channel):
|
||||
other_user_id = to_user_id
|
||||
else:
|
||||
other_user_id = from_user_id
|
||||
create_time = msg['CreateTime'] # 消息时间
|
||||
match_prefix = check_prefix(content, conf().get('single_chat_prefix'))
|
||||
if conf().get('hot_reload') == True and int(create_time) < int(time.time()) - 60: # 跳过1分钟前的历史消息
|
||||
logger.debug("[WX]history message skipped")
|
||||
return
|
||||
if "」\n- - - - - - - - - - - - - - -" in content:
|
||||
logger.debug("[WX]reference query skipped")
|
||||
return
|
||||
if match_prefix:
|
||||
content = content.replace(match_prefix, '', 1).strip()
|
||||
elif match_prefix is None:
|
||||
return
|
||||
context = Context()
|
||||
context.kwargs = {'isgroup': False, 'msg': msg,
|
||||
'receiver': other_user_id, 'session_id': other_user_id}
|
||||
|
||||
img_match_prefix = check_prefix(content, conf().get('image_create_prefix'))
|
||||
if img_match_prefix:
|
||||
content = content.replace(img_match_prefix, '', 1).strip()
|
||||
context.type = ContextType.IMAGE_CREATE
|
||||
else:
|
||||
context.type = ContextType.TEXT
|
||||
|
||||
context.content = content
|
||||
thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
|
||||
|
||||
context = self._compose_context(ContextType.TEXT, content, isgroup=False, msg=msg, receiver=other_user_id, session_id=other_user_id)
|
||||
if context:
|
||||
thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
|
||||
|
||||
@time_checker
|
||||
@_check
|
||||
def handle_group(self, msg):
|
||||
logger.debug("[WX]receive group msg: " + json.dumps(msg, ensure_ascii=False))
|
||||
group_name = msg['User'].get('NickName', None)
|
||||
group_id = msg['User'].get('UserName', None)
|
||||
create_time = msg['CreateTime'] # 消息时间
|
||||
if conf().get('hot_reload') == True and int(create_time) < int(time.time()) - 60: # 跳过1分钟前的历史消息
|
||||
logger.debug("[WX]history group message skipped")
|
||||
return
|
||||
if not group_name:
|
||||
return ""
|
||||
origin_content = msg['Content']
|
||||
@@ -172,13 +171,74 @@ class WechatChannel(Channel):
|
||||
if "」\n- - - - - - - - - - - - - - -" in content:
|
||||
logger.debug("[WX]reference query skipped")
|
||||
return ""
|
||||
config = conf()
|
||||
match_prefix = (msg['IsAt'] and not config.get("group_at_off", False)) or check_prefix(origin_content, config.get('group_chat_prefix')) \
|
||||
or check_contain(origin_content, config.get('group_chat_keyword'))
|
||||
if ('ALL_GROUP' in config.get('group_name_white_list') or group_name in config.get('group_name_white_list') or check_contain(group_name, config.get('group_name_keyword_white_list'))) and match_prefix:
|
||||
context = Context()
|
||||
context.kwargs = { 'isgroup': True, 'msg': msg, 'receiver': group_id}
|
||||
|
||||
config = conf()
|
||||
group_name_white_list = config.get('group_name_white_list', [])
|
||||
group_name_keyword_white_list = config.get('group_name_keyword_white_list', [])
|
||||
|
||||
if any([group_name in group_name_white_list, 'ALL_GROUP' in group_name_white_list, check_contain(group_name, group_name_keyword_white_list)]):
|
||||
group_chat_in_one_session = conf().get('group_chat_in_one_session', [])
|
||||
session_id = msg['ActualUserName']
|
||||
if any([group_name in group_chat_in_one_session, 'ALL_GROUP' in group_chat_in_one_session]):
|
||||
session_id = group_id
|
||||
context = self._compose_context(ContextType.TEXT, content, isgroup=True, msg=msg, receiver=group_id, session_id=session_id)
|
||||
if context:
|
||||
thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
|
||||
|
||||
@time_checker
|
||||
@_check
|
||||
def handle_group_voice(self, msg):
|
||||
if conf().get('group_speech_recognition', False) != True:
|
||||
return
|
||||
logger.debug("[WX]receive voice for group msg: " + msg['FileName'])
|
||||
group_name = msg['User'].get('NickName', None)
|
||||
group_id = msg['User'].get('UserName', None)
|
||||
# 验证群名
|
||||
if not group_name:
|
||||
return ""
|
||||
|
||||
config = conf()
|
||||
group_name_white_list = config.get('group_name_white_list', [])
|
||||
group_name_keyword_white_list = config.get('group_name_keyword_white_list', [])
|
||||
if any([group_name in group_name_white_list, 'ALL_GROUP' in group_name_white_list, check_contain(group_name, group_name_keyword_white_list)]):
|
||||
group_chat_in_one_session = conf().get('group_chat_in_one_session', [])
|
||||
session_id =msg['ActualUserName']
|
||||
if any([group_name in group_chat_in_one_session, 'ALL_GROUP' in group_chat_in_one_session]):
|
||||
session_id = group_id
|
||||
context = self._compose_context(ContextType.VOICE, msg['FileName'], isgroup=True, msg=msg, receiver=group_id, session_id=session_id)
|
||||
if context:
|
||||
thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
|
||||
|
||||
def _compose_context(self, ctype: ContextType, content, **kwargs):
|
||||
context = Context(ctype, content)
|
||||
context.kwargs = kwargs
|
||||
if 'origin_ctype' not in context:
|
||||
context['origin_ctype'] = ctype
|
||||
|
||||
if ctype == ContextType.TEXT:
|
||||
if context["isgroup"]: # 群聊
|
||||
# 校验关键字
|
||||
match_prefix = check_prefix(content, conf().get('group_chat_prefix'))
|
||||
match_contain = check_contain(content, conf().get('group_chat_keyword'))
|
||||
if match_prefix is not None or match_contain is not None:
|
||||
# 判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容,用于实现类似自定义+前缀触发生成AI图片的功能
|
||||
if match_prefix:
|
||||
content = content.replace(match_prefix, '', 1).strip()
|
||||
elif context['msg']['IsAt'] and not conf().get("group_at_off", False):
|
||||
logger.info("[WX]receive group at, continue")
|
||||
elif context["origin_ctype"] == ContextType.VOICE:
|
||||
logger.info("[WX]receive group voice, checkprefix didn't match")
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
else: # 单聊
|
||||
match_prefix = check_prefix(content, conf().get('single_chat_prefix'))
|
||||
if match_prefix: # 判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容
|
||||
content = content.replace(match_prefix, '', 1).strip()
|
||||
elif context["origin_ctype"] == ContextType.VOICE: # 如果源消息是私聊的语音消息,不匹配前缀,直接返回
|
||||
pass
|
||||
else:
|
||||
return None
|
||||
img_match_prefix = check_prefix(content, conf().get('image_create_prefix'))
|
||||
if img_match_prefix:
|
||||
content = content.replace(img_match_prefix, '', 1).strip()
|
||||
@@ -186,44 +246,11 @@ class WechatChannel(Channel):
|
||||
else:
|
||||
context.type = ContextType.TEXT
|
||||
context.content = content
|
||||
|
||||
group_chat_in_one_session = conf().get('group_chat_in_one_session', [])
|
||||
if ('ALL_GROUP' in group_chat_in_one_session or
|
||||
group_name in group_chat_in_one_session or
|
||||
check_contain(group_name, group_chat_in_one_session)):
|
||||
context['session_id'] = group_id
|
||||
else:
|
||||
context['session_id'] = msg['ActualUserName']
|
||||
|
||||
thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
|
||||
|
||||
def handle_group_voice(self, msg):
|
||||
if conf().get('group_speech_recognition', False) != True:
|
||||
return
|
||||
logger.debug("[WX]receive voice for group msg: " + msg['FileName'])
|
||||
group_name = msg['User'].get('NickName', None)
|
||||
group_id = msg['User'].get('UserName', None)
|
||||
create_time = msg['CreateTime'] # 消息时间
|
||||
if conf().get('hot_reload') == True and int(create_time) < int(time.time()) - 60: #跳过1分钟前的历史消息
|
||||
logger.debug("[WX]history group voice skipped")
|
||||
return
|
||||
# 验证群名
|
||||
if not group_name:
|
||||
return ""
|
||||
if ('ALL_GROUP' in conf().get('group_name_white_list') or group_name in conf().get('group_name_white_list') or check_contain(group_name, conf().get('group_name_keyword_white_list'))):
|
||||
context = Context(ContextType.VOICE,msg['FileName'])
|
||||
context.kwargs = {'isgroup': True, 'msg': msg, 'receiver': group_id}
|
||||
|
||||
group_chat_in_one_session = conf().get('group_chat_in_one_session', [])
|
||||
if ('ALL_GROUP' in group_chat_in_one_session or
|
||||
group_name in group_chat_in_one_session or
|
||||
check_contain(group_name, group_chat_in_one_session)):
|
||||
context['session_id'] = group_id
|
||||
else:
|
||||
context['session_id'] = msg['ActualUserName']
|
||||
|
||||
thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
|
||||
|
||||
elif context.type == ContextType.VOICE:
|
||||
if 'desire_rtype' not in context and conf().get('voice_reply_voice'):
|
||||
context['desire_rtype'] = ReplyType.VOICE
|
||||
return context
|
||||
|
||||
# 统一的发送函数,每个Channel自行实现,根据reply的type字段发送不同类型的消息
|
||||
def send(self, reply: Reply, receiver, retry_cnt = 0):
|
||||
try:
|
||||
@@ -257,23 +284,29 @@ class WechatChannel(Channel):
|
||||
self.send(reply, receiver, retry_cnt + 1)
|
||||
|
||||
# 处理消息 TODO: 如果wechaty解耦,此处逻辑可以放置到父类
|
||||
def handle(self, context):
|
||||
if not context.content:
|
||||
return
|
||||
|
||||
reply = Reply()
|
||||
|
||||
def handle(self, context: Context):
|
||||
if context is None or not context.content:
|
||||
return
|
||||
logger.debug('[WX] ready to handle context: {}'.format(context))
|
||||
|
||||
# reply的构建步骤
|
||||
reply = self._generate_reply(context)
|
||||
|
||||
logger.debug('[WX] ready to decorate reply: {}'.format(reply))
|
||||
# reply的包装步骤
|
||||
reply = self._decorate_reply(context, reply)
|
||||
|
||||
# reply的发送步骤
|
||||
self._send_reply(context, reply)
|
||||
|
||||
def _generate_reply(self, context: Context, reply: Reply = Reply()) -> Reply:
|
||||
e_context = PluginManager().emit_event(EventContext(Event.ON_HANDLE_CONTEXT, {
|
||||
'channel': self, 'context': context, 'reply': reply}))
|
||||
reply = e_context['reply']
|
||||
if not e_context.is_pass():
|
||||
logger.debug('[WX] ready to handle context: type={}, content={}'.format(context.type, context.content))
|
||||
if context.type == ContextType.TEXT or context.type == ContextType.IMAGE_CREATE: # 文字和图片消息
|
||||
if context.type == ContextType.TEXT or context.type == ContextType.IMAGE_CREATE: # 文字和图片消息
|
||||
reply = super().build_reply_content(context.content, context)
|
||||
elif context.type == ContextType.VOICE: # 语音消息
|
||||
elif context.type == ContextType.VOICE: # 语音消息
|
||||
msg = context['msg']
|
||||
mp3_path = TmpDir().path() + context.content
|
||||
msg.download(mp3_path)
|
||||
@@ -281,7 +314,7 @@ class WechatChannel(Channel):
|
||||
wav_path = os.path.splitext(mp3_path)[0] + '.wav'
|
||||
try:
|
||||
mp3_to_wav(mp3_path=mp3_path, wav_path=wav_path)
|
||||
except Exception as e: # 转换失败,直接使用mp3,对于某些api,mp3也可以识别
|
||||
except Exception as e: # 转换失败,直接使用mp3,对于某些api,mp3也可以识别
|
||||
logger.warning("[WX]mp3 to wav error, use mp3 path. " + str(e))
|
||||
wav_path = mp3_path
|
||||
# 语音识别
|
||||
@@ -293,50 +326,30 @@ class WechatChannel(Channel):
|
||||
except Exception as e:
|
||||
logger.warning("[WX]delete temp file error: " + str(e))
|
||||
|
||||
if reply.type != ReplyType.ERROR and reply.type != ReplyType.INFO:
|
||||
content = reply.content # 语音转文字后,将文字内容作为新的context
|
||||
context.type = ContextType.TEXT
|
||||
if context["isgroup"]: # 群聊
|
||||
# 校验关键字
|
||||
match_prefix = check_prefix(content, conf().get('group_chat_prefix'))
|
||||
match_contain = check_contain(content, conf().get('group_chat_keyword'))
|
||||
if match_prefix is not None or match_contain is not None:
|
||||
# 判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容,用于实现类似自定义+前缀触发生成AI图片的功能
|
||||
if match_prefix:
|
||||
content = content.replace(match_prefix, '', 1).strip()
|
||||
else:
|
||||
logger.info("[WX]receive voice, checkprefix didn't match")
|
||||
return
|
||||
else: # 单聊
|
||||
match_prefix = check_prefix(content, conf().get('single_chat_prefix'))
|
||||
if match_prefix: # 判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容
|
||||
content = content.replace(match_prefix, '', 1).strip()
|
||||
|
||||
img_match_prefix = check_prefix(content, conf().get('image_create_prefix'))
|
||||
if img_match_prefix:
|
||||
content = content.replace(img_match_prefix, '', 1).strip()
|
||||
context.type = ContextType.IMAGE_CREATE
|
||||
if reply.type == ReplyType.TEXT:
|
||||
new_context = self._compose_context(
|
||||
ContextType.TEXT, reply.content, **context.kwargs)
|
||||
if new_context:
|
||||
reply = self._generate_reply(new_context)
|
||||
else:
|
||||
context.type = ContextType.TEXT
|
||||
context.content = content
|
||||
reply = super().build_reply_content(context.content, context)
|
||||
if reply.type == ReplyType.TEXT:
|
||||
if conf().get('voice_reply_voice'):
|
||||
reply = super().build_text_to_voice(reply.content)
|
||||
return
|
||||
else:
|
||||
logger.error('[WX] unknown context type: {}'.format(context.type))
|
||||
return
|
||||
return reply
|
||||
|
||||
logger.debug('[WX] ready to decorate reply: {}'.format(reply))
|
||||
|
||||
# reply的包装步骤
|
||||
def _decorate_reply(self, context: Context, reply: Reply) -> Reply:
|
||||
if reply and reply.type:
|
||||
e_context = PluginManager().emit_event(EventContext(Event.ON_DECORATE_REPLY, {
|
||||
'channel': self, 'context': context, 'reply': reply}))
|
||||
reply = e_context['reply']
|
||||
desire_rtype = context.get('desire_rtype')
|
||||
if not e_context.is_pass() and reply and reply.type:
|
||||
if reply.type == ReplyType.TEXT:
|
||||
reply_text = reply.content
|
||||
if desire_rtype == ReplyType.VOICE:
|
||||
reply = super().build_text_to_voice(reply.content)
|
||||
return self._decorate_reply(context, reply)
|
||||
if context['isgroup']:
|
||||
reply_text = '@' + context['msg']['ActualNickName'] + ' ' + reply_text.strip()
|
||||
reply_text = conf().get("group_chat_reply_prefix", "")+reply_text
|
||||
@@ -350,8 +363,11 @@ class WechatChannel(Channel):
|
||||
else:
|
||||
logger.error('[WX] unknown reply type: {}'.format(reply.type))
|
||||
return
|
||||
if desire_rtype and desire_rtype != reply.type and reply.type not in [ReplyType.ERROR, ReplyType.INFO]:
|
||||
logger.warning('[WX] desire_rtype: {}, but reply type: {}'.format(context.get('desire_rtype'), reply.type))
|
||||
return reply
|
||||
|
||||
# reply的发送步骤
|
||||
def _send_reply(self, context: Context, reply: Reply):
|
||||
if reply and reply.type:
|
||||
e_context = PluginManager().emit_event(EventContext(Event.ON_SEND_REPLY, {
|
||||
'channel': self, 'context': context, 'reply': reply}))
|
||||
@@ -360,6 +376,7 @@ class WechatChannel(Channel):
|
||||
logger.debug('[WX] ready to send reply: {} to {}'.format(reply, context['receiver']))
|
||||
self.send(reply, context['receiver'])
|
||||
|
||||
|
||||
def check_prefix(content, prefix_list):
|
||||
for prefix in prefix_list:
|
||||
if content.startswith(prefix):
|
||||
|
||||
@@ -70,6 +70,8 @@ available_setting = {
|
||||
|
||||
# chatgpt指令自定义触发词
|
||||
"clear_memory_commands": ['#清除记忆'], # 重置会话指令
|
||||
|
||||
# channel配置
|
||||
"channel_type": "wx", # 通道类型,支持wx,wxy和terminal
|
||||
|
||||
|
||||
|
||||
55
voice/baidu/README.md
Normal file
55
voice/baidu/README.md
Normal file
@@ -0,0 +1,55 @@
|
||||
## 说明
|
||||
百度语音识别与合成参数说明
|
||||
百度语音依赖,经常会出现问题,可能就是缺少依赖:
|
||||
pip install baidu-aip
|
||||
pip install pydub
|
||||
pip install pysilk
|
||||
还有ffmpeg,不同系统安装方式不同
|
||||
|
||||
系统中收到的语音文件为mp3格式(wx)或者sil格式(wxy),如果要识别需要转换为pcm格式,转换后的文件为16k采样率,单声道,16bit的pcm文件
|
||||
发送时又需要(wx)转换为mp3格式,转换后的文件为16k采样率,单声道,16bit的pcm文件,(wxy)转换为sil格式,还要计算声音长度,发送时需要带上声音长度
|
||||
这些事情都在audio_convert.py中封装了,直接调用即可
|
||||
|
||||
|
||||
参数说明
|
||||
识别参数
|
||||
https://ai.baidu.com/ai-doc/SPEECH/Vk38lxily
|
||||
合成参数
|
||||
https://ai.baidu.com/ai-doc/SPEECH/Gk38y8lzk
|
||||
|
||||
## 使用说明
|
||||
分两个地方配置
|
||||
|
||||
1、对于def voiceToText(self, filename)函数中调用的百度语音识别API,中接口调用asr(参数)这个配置见CHATGPT-ON-WECHAT工程目录下的`config.json`文件和config.py文件。
|
||||
参数 可需 描述
|
||||
app_id 必填 应用的APPID
|
||||
api_key 必填 应用的APIKey
|
||||
secret_key 必填 应用的SecretKey
|
||||
dev_pid 必填 语言选择,填写语言对应的dev_pid值
|
||||
|
||||
2、对于def textToVoice(self, text)函数中调用的百度语音合成API,中接口调用synthesis(参数)在本目录下的`config.json`文件中进行配置。
|
||||
参数 可需 描述
|
||||
tex 必填 合成的文本,使用UTF-8编码,请注意文本长度必须小于1024字节
|
||||
lan 必填 固定值zh。语言选择,目前只有中英文混合模式,填写固定值zh
|
||||
spd 选填 语速,取值0-15,默认为5中语速
|
||||
pit 选填 音调,取值0-15,默认为5中语调
|
||||
vol 选填 音量,取值0-15,默认为5中音量(取值为0时为音量最小值,并非为无声)
|
||||
per(基础音库) 选填 度小宇=1,度小美=0,度逍遥(基础)=3,度丫丫=4
|
||||
per(精品音库) 选填 度逍遥(精品)=5003,度小鹿=5118,度博文=106,度小童=110,度小萌=111,度米朵=103,度小娇=5
|
||||
aue 选填 3为mp3格式(默认); 4为pcm-16k;5为pcm-8k;6为wav(内容同pcm-16k); 注意aue=4或者6是语音识别要求的格式,但是音频内容不是语音识别要求的自然人发音,所以识别效果会受影响。
|
||||
|
||||
关于per参数的说明,注意您购买的哪个音库,就填写哪个音库的参数,否则会报错。如果您购买的是基础音库,那么per参数只能填写0到4,如果您购买的是精品音库,那么per参数只能填写5003,5118,106,110,111,103,5其他的都会报错。
|
||||
### 配置文件
|
||||
|
||||
将文件夹中`config.json.template`复制为`config.json`。
|
||||
|
||||
``` json
|
||||
{
|
||||
"lang": "zh",
|
||||
"ctp": 1,
|
||||
"spd": 5,
|
||||
"pit": 5,
|
||||
"vol": 5,
|
||||
"per": 0
|
||||
}
|
||||
```
|
||||
@@ -2,6 +2,8 @@
|
||||
"""
|
||||
baidu voice service
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from aip import AipSpeech
|
||||
from bridge.reply import Reply, ReplyType
|
||||
@@ -21,29 +23,47 @@ from config import conf
|
||||
- 1837:四川话
|
||||
要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
|
||||
之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
|
||||
填入 config.json 中.
|
||||
baidu_app_id: ''
|
||||
baidu_api_key: ''
|
||||
baidu_secret_key: ''
|
||||
baidu_dev_pid: '1536'
|
||||
"""
|
||||
然后在 config.json 中填入这两个值, 以及 app_id, dev_pid
|
||||
"""
|
||||
|
||||
|
||||
class BaiduVoice(Voice):
|
||||
APP_ID = conf().get('baidu_app_id')
|
||||
API_KEY = conf().get('baidu_api_key')
|
||||
SECRET_KEY = conf().get('baidu_secret_key')
|
||||
DEV_ID = conf().get('baidu_dev_pid')
|
||||
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
try:
|
||||
curdir = os.path.dirname(__file__)
|
||||
config_path = os.path.join(curdir, "config.json")
|
||||
bconf = None
|
||||
if not os.path.exists(config_path): #如果没有配置文件,创建本地配置文件
|
||||
bconf = { "lang": "zh", "ctp": 1, "spd": 5,
|
||||
"pit": 5, "vol": 5, "per": 0}
|
||||
with open(config_path, "w") as fw:
|
||||
json.dump(bconf, fw, indent=4)
|
||||
else:
|
||||
with open(config_path, "r") as fr:
|
||||
bconf = json.load(fr)
|
||||
|
||||
self.app_id = conf().get('baidu_app_id')
|
||||
self.api_key = conf().get('baidu_api_key')
|
||||
self.secret_key = conf().get('baidu_secret_key')
|
||||
self.dev_id = conf().get('baidu_dev_pid')
|
||||
self.lang = bconf["lang"]
|
||||
self.ctp = bconf["ctp"]
|
||||
self.spd = bconf["spd"]
|
||||
self.pit = bconf["pit"]
|
||||
self.vol = bconf["vol"]
|
||||
self.per = bconf["per"]
|
||||
|
||||
self.client = AipSpeech(self.app_id, self.api_key, self.secret_key)
|
||||
except Exception as e:
|
||||
logger.warn("BaiduVoice init failed: %s, ignore " % e)
|
||||
|
||||
|
||||
def voiceToText(self, voice_file):
|
||||
# 识别本地文件
|
||||
logger.debug('[Baidu] voice file name={}'.format(voice_file))
|
||||
pcm = get_pcm_from_wav(voice_file)
|
||||
res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.DEV_ID})
|
||||
res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id})
|
||||
if res["err_no"] == 0:
|
||||
logger.info("百度语音识别到了:{}".format(res["result"]))
|
||||
text = "".join(res["result"])
|
||||
@@ -57,9 +77,8 @@ class BaiduVoice(Voice):
|
||||
return reply
|
||||
|
||||
def textToVoice(self, text):
|
||||
result = self.client.synthesis(text, 'zh', 1, {
|
||||
'spd': 5, 'pit': 5, 'vol': 5, 'per': 111
|
||||
})
|
||||
result = self.client.synthesis(text, self.lang, self.ctp, {
|
||||
'spd': self.spd, 'pit': self.pit, 'vol': self.vol, 'per': self.per})
|
||||
if not isinstance(result, dict):
|
||||
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
|
||||
with open(fileName, 'wb') as f:
|
||||
|
||||
8
voice/baidu/config.json.template
Normal file
8
voice/baidu/config.json.template
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"lang": "zh",
|
||||
"ctp": 1,
|
||||
"spd": 5,
|
||||
"pit": 5,
|
||||
"vol": 5,
|
||||
"per": 0
|
||||
}
|
||||
Reference in New Issue
Block a user