解决百度语音合成的一些问题和参数化设置 (#676)

* 解决百度语音合成的一些问题和参数化设置 * 补充百度语音说明
2026-07-19 12:47:25 +08:00 · 2023-03-30 14:59:52 +08:00
parent 83136e3142
commit 06b02f5df8
3 changed files with 98 additions and 16 deletions
--- a/voice/baidu/baidu_voice.py
+++ b/voice/baidu/baidu_voice.py
@@ -2,6 +2,8 @@
 """
 baidu voice service
 """
+import json
+import os
 import time
 from aip import AipSpeech
 from bridge.reply import Reply, ReplyType
@@ -21,29 +23,47 @@ from config import conf
        - 1837：四川话
    要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
    之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
-    填入 config.json 中.
-        baidu_app_id: ''
-        baidu_api_key: ''
-        baidu_secret_key: ''
-        baidu_dev_pid: '1536'
-"""
+    然后在 config.json 中填入这两个值, 以及 app_id, dev_pid
+    """


 class BaiduVoice(Voice):
-    APP_ID = conf().get('baidu_app_id')
-    API_KEY = conf().get('baidu_api_key')
-    SECRET_KEY = conf().get('baidu_secret_key')
-    DEV_ID = conf().get('baidu_dev_pid')
-    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    def __init__(self):
-        pass
+        try:
+            curdir = os.path.dirname(__file__)
+            config_path = os.path.join(curdir, "config.json")
+            bconf = None
+            if not os.path.exists(config_path): #如果没有配置文件，创建本地配置文件
+                bconf = { "lang": "zh", "ctp": 1, "spd": 5,
+                         "pit": 5, "vol": 5, "per": 0}
+                with open(config_path, "w") as fw:
+                    json.dump(bconf, fw, indent=4)
+            else:
+                with open(config_path, "r") as fr:
+                    bconf = json.load(fr)
+                    
+            self.app_id = conf().get('baidu_app_id')
+            self.api_key = conf().get('baidu_api_key')
+            self.secret_key = conf().get('baidu_secret_key')
+            self.dev_id = conf().get('baidu_dev_pid')
+            self.lang = bconf["lang"]
+            self.ctp = bconf["ctp"]
+            self.spd = bconf["spd"]
+            self.pit = bconf["pit"]
+            self.vol = bconf["vol"]
+            self.per = bconf["per"]
+            
+            self.client = AipSpeech(self.app_id, self.api_key, self.secret_key)
+        except Exception as e:
+            logger.warn("BaiduVoice init failed: %s, ignore " % e)

+        
    def voiceToText(self, voice_file):
        # 识别本地文件
        logger.debug('[Baidu] voice file name={}'.format(voice_file))
        pcm = get_pcm_from_wav(voice_file)
-        res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.DEV_ID})
+        res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id})
        if res["err_no"] == 0:
            logger.info("百度语音识别到了：{}".format(res["result"]))
            text = "".join(res["result"])
@@ -57,9 +77,8 @@ class BaiduVoice(Voice):
        return reply

    def textToVoice(self, text):
-        result = self.client.synthesis(text, 'zh', 1, {
-            'spd': 5, 'pit': 5, 'vol': 5, 'per': 111
-        })
+        result = self.client.synthesis(text, self.lang, self.ctp, {
+            'spd': self.spd, 'pit': self.pit, 'vol': self.vol, 'per': self.per})
        if not isinstance(result, dict):
            fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
            with open(fileName, 'wb') as f: