From ec1c6c7b92e30ce738a260d84b07dc251fd6de96 Mon Sep 17 00:00:00 2001 From: gaojia Date: Tue, 4 Mar 2025 09:56:26 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E8=85=BE=E8=AE=AF?= =?UTF-8?q?=E8=AF=AD=E9=9F=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 1 + voice/factory.py | 4 + voice/telent/config.json.template | 5 ++ voice/telent/tencent_voice.py | 119 ++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+) create mode 100644 voice/telent/config.json.template create mode 100644 voice/telent/tencent_voice.py diff --git a/requirements.txt b/requirements.txt index 45d09cc1..47c88f15 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ Pillow pre-commit web.py linkai>=0.0.6.0 +tencentcloud-sdk-python>=3.0.0 diff --git a/voice/factory.py b/voice/factory.py index fa8b79e5..2269d9e1 100644 --- a/voice/factory.py +++ b/voice/factory.py @@ -50,4 +50,8 @@ def create_voice(voice_type): from voice.xunfei.xunfei_voice import XunfeiVoice return XunfeiVoice() + elif voice_type == "tencent": + from voice.telent.tencent_voice import TencentVoice + + return TencentVoice() raise RuntimeError diff --git a/voice/telent/config.json.template b/voice/telent/config.json.template new file mode 100644 index 00000000..0e5526a0 --- /dev/null +++ b/voice/telent/config.json.template @@ -0,0 +1,5 @@ +{ + "voice_type": 1003, # 客服女声 + "tencent_secret_id": "YOUR_SECRET_ID", + "tencent_secret_key": "YOUR_SECRET_KEY" +} diff --git a/voice/telent/tencent_voice.py b/voice/telent/tencent_voice.py new file mode 100644 index 00000000..a02a9e13 --- /dev/null +++ b/voice/telent/tencent_voice.py @@ -0,0 +1,119 @@ +import json +import base64 +import os +import time +from voice.voice import Voice +from common.log import logger +from tencentcloud.common import credential +from tencentcloud.asr.v20190614 import asr_client, models as asr_models +from tencentcloud.tts.v20190823 import tts_client, models as tts_models +from bridge.reply import Reply, ReplyType +from common.tmp_dir import TmpDir + +class TencentVoice(Voice): + def __init__(self): + super().__init__() + self.secret_id = None + self.secret_key = None + self.voice_type = 1003 + self._load_config() + + def _load_config(self): + """ + 从本地配置文件加载配置 + """ + try: + config_path = os.path.join(os.path.dirname(__file__), 'config.json') + with open(config_path, 'r') as f: + config = json.load(f) + self.secret_id = config.get('secret_id') + self.secret_key = config.get('secret_key') + self.voice_type = config.get('voice_type', self.voice_type) + if not self.secret_id or not self.secret_key: + logger.error("[Tencent] Missing credentials in config.json") + except Exception as e: + logger.error(f"[Tencent] Failed to load config: {e}") + + def setup(self, config): + """ + 设置配置信息(保留此方法用于向后兼容) + """ + pass + + def voiceToText(self, voice_file): + """ + 将语音文件转换为文本 + """ + try: + # 实例化认证对象 + cred = credential.Credential(self.secret_id, self.secret_key) + + # 实例化客户端 + client = asr_client.AsrClient(cred, "ap-guangzhou") + + # 读取音频文件 + with open(voice_file, 'rb') as f: + audio_data = f.read() + + # 进行base64编码 + base64_audio = base64.b64encode(audio_data).decode('utf-8') + + # 构造请求对象 + req = asr_models.SentenceRecognitionRequest() + req.ProjectId = 0 + req.SubServiceType = 2 + req.EngSerViceType = "16k_zh" + req.SourceType = 1 + req.VoiceFormat = "wav" + req.UsrAudioKey = "voice_recognition" + req.Data = base64_audio + + # 发起请求 + resp = client.SentenceRecognition(req) + + # 解析结果 + if resp.Result: + logger.info("[Tencent] Voice to text success: {}".format(resp.Result)) + return Reply(ReplyType.TEXT, resp.Result) + else: + logger.warning("[Tencent] Voice to text failed") + return Reply(ReplyType.ERROR, "腾讯语音识别失败") + + except Exception as e: + logger.error("[Tencent] Voice to text error: {}".format(e)) + return Reply(ReplyType.ERROR, "腾讯语音识别出错:{}".format(str(e))) + + def textToVoice(self, text): + """ + 将文本转换为语音 + """ + try: + cred = credential.Credential(self.secret_id, self.secret_key) + client = tts_client.TtsClient(cred, "ap-guangzhou") + + req = tts_models.TextToVoiceRequest() + req.Text = text + req.SessionId = str(int(time.time())) + req.Volume = 5 + req.Speed = 0 + req.ProjectId = 0 + req.ModelType = 1 + req.PrimaryLanguage = 1 + req.SampleRate = 16000 + req.VoiceType = self.voice_type # 客服女声 + + response = client.TextToVoice(req) + + if response.Audio: + fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3" + with open(fileName, "wb") as f: + f.write(base64.b64decode(response.Audio)) + logger.info("[Tencent] textToVoice text={} voice file name={}".format(text, fileName)) + return Reply(ReplyType.VOICE, fileName) + else: + logger.error("[Tencent] textToVoice failed") + return Reply(ReplyType.ERROR, "腾讯语音合成失败") + + except Exception as e: + logger.error("[Tencent] Text to voice error: {}".format(e)) + return Reply(ReplyType.ERROR, "腾讯语音合成出错:{}".format(str(e))) From ce505251f8c1281dfc7c557034131751443bc57b Mon Sep 17 00:00:00 2001 From: gaojia Date: Wed, 26 Mar 2025 10:01:41 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=8F=8A=E6=96=87=E4=BB=B6=E5=A4=B9=E5=90=8D?= =?UTF-8?q?=E7=A7=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- voice/factory.py | 6 +++++- voice/telent/config.json.template | 5 ----- voice/tencent/config.json.template | 5 +++++ voice/{telent => tencent}/tencent_voice.py | 0 4 files changed, 10 insertions(+), 6 deletions(-) delete mode 100644 voice/telent/config.json.template create mode 100644 voice/tencent/config.json.template rename voice/{telent => tencent}/tencent_voice.py (100%) diff --git a/voice/factory.py b/voice/factory.py index 2269d9e1..e707c637 100644 --- a/voice/factory.py +++ b/voice/factory.py @@ -51,7 +51,11 @@ def create_voice(voice_type): return XunfeiVoice() elif voice_type == "tencent": - from voice.telent.tencent_voice import TencentVoice + from voice.tencent.tencent_voice import TencentVoice return TencentVoice() + elif voice_type == "funasr": + from voice.funasr.fun_voice import FunVoice + + return FunVoice() raise RuntimeError diff --git a/voice/telent/config.json.template b/voice/telent/config.json.template deleted file mode 100644 index 0e5526a0..00000000 --- a/voice/telent/config.json.template +++ /dev/null @@ -1,5 +0,0 @@ -{ - "voice_type": 1003, # 客服女声 - "tencent_secret_id": "YOUR_SECRET_ID", - "tencent_secret_key": "YOUR_SECRET_KEY" -} diff --git a/voice/tencent/config.json.template b/voice/tencent/config.json.template new file mode 100644 index 00000000..91a035cb --- /dev/null +++ b/voice/tencent/config.json.template @@ -0,0 +1,5 @@ +{ + "voice_type": 1003, # 客服女声 + "secret_id": "YOUR_SECRET_ID", + "secret_key": "YOUR_SECRET_KEY" +} diff --git a/voice/telent/tencent_voice.py b/voice/tencent/tencent_voice.py similarity index 100% rename from voice/telent/tencent_voice.py rename to voice/tencent/tencent_voice.py From 814b6753c24dc707c872ed43de0016cd65560469 Mon Sep 17 00:00:00 2001 From: gaojia Date: Wed, 26 Mar 2025 17:33:39 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E5=88=A0=E9=99=A4=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E4=B8=AD=E7=9A=84=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- voice/tencent/config.json.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/voice/tencent/config.json.template b/voice/tencent/config.json.template index 91a035cb..210d95fb 100644 --- a/voice/tencent/config.json.template +++ b/voice/tencent/config.json.template @@ -1,5 +1,5 @@ { - "voice_type": 1003, # 客服女声 + "voice_type": 1003, "secret_id": "YOUR_SECRET_ID", "secret_key": "YOUR_SECRET_KEY" } From ead5f9926b0a086a671c6be0ee4cdf1ab47a4a45 Mon Sep 17 00:00:00 2001 From: gaojia Date: Thu, 27 Mar 2025 10:13:38 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E5=88=A0=E9=99=A4funasr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- voice/factory.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/voice/factory.py b/voice/factory.py index e707c637..8562f634 100644 --- a/voice/factory.py +++ b/voice/factory.py @@ -54,8 +54,4 @@ def create_voice(voice_type): from voice.tencent.tencent_voice import TencentVoice return TencentVoice() - elif voice_type == "funasr": - from voice.funasr.fun_voice import FunVoice - - return FunVoice() raise RuntimeError