增加百度语音识别

2026-07-19 21:07:28 +08:00 · 2023-03-27 14:40:19 +08:00
parent b6684fe7a3
commit c2ea6214a9
2 changed files with 94 additions and 52 deletions
--- a/config.py
+++ b/config.py
@@ -8,9 +8,11 @@ from common.log import logger
 available_setting = {
    # openai api配置
    "open_ai_api_key": "",  # openai api key
-    "open_ai_api_base": "https://api.openai.com/v1", # openai apibase，当use_azure_chatgpt为true时，需要设置对应的api base
+    # openai apibase，当use_azure_chatgpt为true时，需要设置对应的api base
+    "open_ai_api_base": "https://api.openai.com/v1",
    "proxy": "",  # openai使用的代理
-    "model": "gpt-3.5-turbo", # chatgpt模型， 当use_azure_chatgpt为true时，其名称为Azure上model deployment名称
+    # chatgpt模型， 当use_azure_chatgpt为true时，其名称为Azure上model deployment名称
+    "model": "gpt-3.5-turbo",
    "use_azure_chatgpt": False,  # 是否使用azure的chatgpt

    # Bot触发配置
@@ -49,9 +51,11 @@ available_setting ={
    "text_to_voice": "baidu",  # 语音合成引擎，支持baidu和google

    # baidu api的配置， 使用百度语音识别和语音合成时需要
-    'baidu_app_id': "",
-    'baidu_api_key': "",
-    'baidu_secret_key': "",
+    "baidu_app_id": "",
+    "baidu_api_key": "",
+    "baidu_secret_key": "",
+    # 1536普通话(支持简单的英文识别) 1737英语 1637粤语 1837四川话 1936普通话远场
+    "baidu_dev_pid": "1536",

    # 服务时间限制，目前支持itchat
    "chat_time_module": False,  # 是否开启服务时间限制
@@ -70,6 +74,7 @@ available_setting ={

 }

+
 class Config(dict):
    def __getitem__(self, key):
        if key not in available_setting:
@@ -89,8 +94,10 @@ class Config(dict):
        except Exception as e:
            raise e

+
 config = Config()

+
 def load_config():
    global config
    config_path = "./config.json"
@@ -109,7 +116,8 @@ def load_config():
    for name, value in os.environ.items():
        name = name.lower()
        if name in available_setting:
-            logger.info("[INIT] override config by environ args: {}={}".format(name, value))
+            logger.info(
+                "[INIT] override config by environ args: {}={}".format(name, value))
            try:
                config[name] = eval(value)
            except:
@@ -118,7 +126,6 @@ def load_config():
    logger.info("[INIT] load config: {}".format(config))


-
 def get_root():
    return os.path.dirname(os.path.abspath(__file__))

--- a/voice/baidu/baidu_voice.py
+++ b/voice/baidu/baidu_voice.py
@@ -8,19 +8,53 @@ from bridge.reply import Reply, ReplyType
 from common.log import logger
 from common.tmp_dir import TmpDir
 from voice.voice import Voice
+from voice.audio_convert import get_pcm_from_wav
 from config import conf
+"""
+    百度的语音识别API.
+    dev_pid:
+        - 1936: 普通话远场
+        - 1536：普通话(支持简单的英文识别)
+        - 1537：普通话(纯中文识别)
+        - 1737：英语
+        - 1637：粤语
+        - 1837：四川话
+    要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
+    之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
+    填入 config.json 中.
+        baidu_app_id: ''
+        baidu_api_key: ''
+        baidu_secret_key: ''
+        baidu_dev_pid: '1536'
+"""
+

 class BaiduVoice(Voice):
    APP_ID = conf().get('baidu_app_id')
    API_KEY = conf().get('baidu_api_key')
    SECRET_KEY = conf().get('baidu_secret_key')
+    DEV_ID = conf().get('baidu_dev_pid')
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    def __init__(self):
        pass

    def voiceToText(self, voice_file):
-        pass
+        # 识别本地文件
+        logger.debug('[Baidu] voice file name={}'.format(voice_file))
+        pcm = get_pcm_from_wav(voice_file)
+        res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.DEV_ID})
+        if res["err_no"] == 0:
+            logger.info("百度语音识别到了：{}".format(res["result"]))
+            text = "".join(res["result"])
+            reply = Reply(ReplyType.TEXT, text)
+        else:
+            logger.info("百度语音识别出错了: {}".format(res["err_msg"]))
+            if res["err_msg"] == "request pv too much":
+                logger.info("  出现这个原因很可能是你的百度语音服务调用量超出限制，或未开通付费")
+            reply = Reply(ReplyType.ERROR,
+                          "百度语音识别出错了；{0}".format(res["err_msg"]))
+        return reply

    def textToVoice(self, text):
        result = self.client.synthesis(text, 'zh', 1, {
@@ -30,7 +64,8 @@ class BaiduVoice(Voice):
            fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
            with open(fileName, 'wb') as f:
                f.write(result)
-            logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
+            logger.info(
+                '[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
            reply = Reply(ReplyType.VOICE, fileName)
        else:
            logger.error('[Baidu] textToVoice error={}'.format(result))