GUI_Utils/minimax_tts_gui.py

580 lines
22 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
PySide6 文本转 MP3 GUI 脚本 (仅支持 Minimax API v2)
支持配置记忆功能,将语音设置、保存目录和文件名格式保存到 config.ini。
"""
import sys
import http.client
import json
import base64
import os
import random
import string
import configparser
from pathlib import Path
from PySide6.QtGui import QFont
from PySide6.QtWidgets import (
QApplication,
QWidget,
QVBoxLayout,
QHBoxLayout,
QTextEdit,
QLineEdit,
QPushButton,
QComboBox,
QSlider,
QLabel,
QFileDialog,
QMessageBox,
QGroupBox,
QGridLayout,
QInputDialog,
)
from PySide6.QtCore import Qt, Signal, QObject, QThread, QDir
# --- 配置管理 ---
CONFIG_FILE = "config.ini"
class ConfigManager:
def __init__(self, config_file=CONFIG_FILE):
self.config_file = config_file
self.config = configparser.ConfigParser()
self.load_config()
def load_config(self):
"""加载配置文件,如果不存在则创建默认配置。"""
if not os.path.exists(self.config_file):
self.create_default_config()
else:
try:
# 确保读取时使用 utf-8 编码
with open(self.config_file, 'r', encoding='utf-8') as f:
self.config.read_file(f)
except Exception as e:
print(f"❌ 加载配置文件 '{self.config_file}' 失败: {e}")
self.create_default_config()
def create_default_config(self):
"""创建包含默认值的配置文件。"""
print(f" 配置文件 '{self.config_file}' 不存在,正在创建默认配置。")
self.config['API'] = {'api_key': ''}
self.config['VoiceSettings'] = {
'voice_id': 'iheytang-jasmine',
'speed': '1.0',
'vol': '100',
'pitch': '0',
'emotion': 'happy',
}
self.config['OutputSettings'] = {
'save_directory': os.path.expanduser('~'),
'filename_format': '(voice_id)_(random_8)',
}
self.save_config()
def save_config(self):
"""保存当前配置到文件。"""
try:
with open(self.config_file, 'w', encoding='utf-8') as configfile:
self.config.write(configfile)
print(f"✅ 配置已保存到 '{self.config_file}'")
except Exception as e:
print(f"❌ 保存配置文件 '{self.config_file}' 失败: {e}")
def get_api_key(self):
"""获取 API Key。"""
# 使用 get() 的 fallback 参数来提供默认值
return self.config.get('API', 'api_key', fallback='')
def set_api_key(self, api_key):
"""设置 API Key。"""
if 'API' not in self.config:
self.config['API'] = {}
self.config['API']['api_key'] = api_key
self.save_config()
def get_voice_settings(self):
"""获取语音设置。"""
# 修正: 使用 config.items() 获取特定 section 的所有键值对
if 'VoiceSettings' in self.config:
return dict(self.config.items('VoiceSettings'))
else:
return {} # 如果 Section 不存在,返回空字典
def set_voice_settings(self, settings):
"""设置语音设置。"""
if 'VoiceSettings' not in self.config:
self.config['VoiceSettings'] = {}
for key, value in settings.items():
self.config['VoiceSettings'][key] = str(value) # 确保值是字符串
self.save_config()
def get_output_settings(self):
"""获取输出设置。"""
# 修正: 使用 config.items() 获取特定 section 的所有键值对
if 'OutputSettings' in self.config:
return dict(self.config.items('OutputSettings'))
else:
return {} # 如果 Section 不存在,返回空字典
def set_output_settings(self, settings):
"""设置输出设置。"""
if 'OutputSettings' not in self.config:
self.config['OutputSettings'] = {}
for key, value in settings.items():
self.config['OutputSettings'][key] = str(value) # 确保值是字符串
self.save_config()
# --- Minimax API v2 相关函数 ---
DEFAULT_VOICE_ID_V2 = 'jasmine_52025' # 默认的v2语音ID
def call_minimax_api_v2(
text, voice_id=DEFAULT_VOICE_ID_V2, speed=1.0, vol=1.0, pitch=0, emotion="happy", api_key=""
):
"""
调用 Minimax API v2 (新方式) 将文本转换为音频。
"""
if not api_key:
print("❌ Minimax API Key 未设置。")
return None
conn = http.client.HTTPSConnection("api.minimaxi.com")
payload = json.dumps(
{
"model": "speech-02-hd",
"text": text,
"stream": False,
"language_boost": "auto",
"output_format": "hex",
"voice_setting": {
"voice_id": voice_id,
"speed": speed,
"vol": vol,
"pitch": pitch,
"emotion": emotion,
},
"audio_setting": {"sample_rate": 32000, "bitrate": 128000, "format": "mp3"},
}
)
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
try:
print(f"🔄 正在调用 Minimax API v2...")
conn.request("POST", "/v1/t2a_v2", payload, headers)
res = conn.getresponse()
data = res.read()
if res.status != 200:
print(f"❌ API v2 调用失败: {res.status} {res.reason}")
try:
error_details = json.loads(data.decode('utf-8'))
print(f"响应内容: {error_details}")
except json.JSONDecodeError:
print(f"响应内容: {data.decode('utf-8', errors='ignore')}")
return None
response_data = json.loads(data.decode("utf-8"))
if "data" in response_data and "audio" in response_data["data"]:
return response_data["data"]["audio"]
else:
print(f"❌ API v2 响应中没有音频数据。")
print(f"响应内容: {response_data}")
return None
except Exception as e:
print(f"❌ API v2 调用异常: {str(e)}")
return None
finally:
conn.close()
# --- 音频数据处理函数 ---
def fix_base64_padding(b64_string):
"""自动补齐 base64 字符串的填充。"""
b64_string = b64_string.rstrip("=")
return b64_string + "=" * ((4 - len(b64_string) % 4) % 4)
def validate_mp3_format(audio_data):
"""验证音频数据是否为有效的 MP3 格式(简化版)。"""
if len(audio_data) < 10: return False
return audio_data.startswith(b"ID3") or audio_data.startswith(b"\xff\xfb")
def clean_base64_data(base64_string):
"""清理 base64 数据,移除可能的 JSON 包装和其他非 base64 字符。"""
if base64_string.startswith('"') and base64_string.endswith('"'):
base64_string = base64_string[1:-1]
base64_string = base64_string.replace("\n", "").replace("\r", "").replace(" ", "")
base64_string = (base64_string.replace("\\n", "").replace("\\r", "").replace("\\t", ""))
return base64_string
def is_hex_data(data_string):
"""检查数据是否为十六进制格式。"""
if not data_string: return False
hex_chars = set("0123456789abcdefABCDEF")
return all(c in hex_chars for c in data_string)
def hex_to_bytes(hex_string):
"""将十六进制字符串转换为字节。"""
try:
return bytes.fromhex(hex_string)
except ValueError:
raise ValueError("无效的十六进制格式")
def decode_audio_data(data_string):
"""解码音频数据,支持 base64 和十六进制格式。"""
cleaned_data = clean_base64_data(data_string)
if is_hex_data(cleaned_data):
print("📝 检测到十六进制格式数据")
return hex_to_bytes(cleaned_data)
try:
print("📝 尝试 base64 解码")
fixed_base64 = fix_base64_padding(cleaned_data)
return base64.b64decode(fixed_base64)
except Exception:
raise ValueError("无法解码音频数据,请检查数据格式")
def save_mp3_file(audio_data, output_path):
"""保存音频数据为 MP3 文件。"""
try:
if not validate_mp3_format(audio_data):
print("⚠️ 警告: 解码的数据可能不是有效的 MP3 格式")
with open(output_path, "wb") as f:
f.write(audio_data)
return True
except Exception as e:
print(f"❌ 保存文件失败: {str(e)}")
return False
# --- GUI 组件 ---
class Worker(QObject):
finished = Signal(bool, str) # success, message
progress = Signal(str) # status message
def __init__(self, text, output_path, voice_id, speed, vol, pitch, emotion, api_key):
super().__init__()
self.text = text
self.output_path = output_path
self.voice_id = voice_id
self.speed = speed
self.vol = vol
self.pitch = pitch
self.emotion = emotion
self.api_key = api_key
def run(self):
"""执行文本转 MP3 的整个流程。"""
try:
self.progress.emit("🔄 正在调用 Minimax API v2...")
api_response = call_minimax_api_v2(
self.text, self.voice_id, self.speed, self.vol, self.pitch, self.emotion, self.api_key
)
if api_response is None:
self.finished.emit(False, "API 调用失败,请检查日志信息或 API Key。")
return
self.progress.emit("🔄 正在解码音频数据...")
audio_data = decode_audio_data(api_response)
self.progress.emit(f"💾 正在保存音频到: {self.output_path}")
if save_mp3_file(audio_data, self.output_path):
self.finished.emit(True, f"✅ 成功生成 MP3 文件: {self.output_path}")
else:
self.finished.emit(False, "保存 MP3 文件失败,请检查文件权限和路径。")
except ValueError as ve:
self.finished.emit(False, f"数据处理错误: {ve}")
except Exception as e:
self.finished.emit(False, f"发生未知错误: {str(e)}")
class TextToMP3_GUI(QWidget):
def __init__(self, config_manager):
super().__init__()
self.config_manager = config_manager
self.api_key = self.config_manager.get_api_key()
self.setWindowTitle("文本转 MP3 (Minimax v2)")
self.setGeometry(100, 100, 700, 550)
self.setMinimumSize(600, 450)
self.init_ui()
self.load_settings_from_config()
def init_ui(self):
"""初始化用户界面。"""
main_layout = QVBoxLayout(self)
# --- 输入区域 ---
input_group = QGroupBox("输入文本")
input_layout = QVBoxLayout()
self.text_edit = QTextEdit()
self.text_edit.setPlaceholderText("在此输入您想要转换为语音的文本...")
self.text_edit.setFont(QFont("Arial", 11))
self.text_edit.setFixedHeight(150)
input_layout.addWidget(self.text_edit)
input_group.setLayout(input_layout)
main_layout.addWidget(input_group)
# --- 语音参数区域 ---
params_group = QGroupBox("语音设置 (Minimax v2)")
params_layout = QGridLayout()
params_layout.addWidget(QLabel("语音 ID:"), 0, 0, 1, 1, Qt.AlignRight)
self.voice_id_edit = QLineEdit()
self.voice_id_edit.setPlaceholderText("如: jasmine_52025")
params_layout.addWidget(self.voice_id_edit, 0, 1, 1, 2)
params_layout.addWidget(QLabel("语速:"), 1, 0, 1, 1, Qt.AlignRight)
self.speed_slider = QSlider(Qt.Horizontal)
self.speed_slider.setRange(5, 20) # 对应 0.5x 到 2.0x
self.speed_label = QLabel("1.0")
self.speed_slider.valueChanged.connect(lambda value: self.speed_label.setText(f"{value / 10.0:.1f}"))
speed_h_layout = QHBoxLayout()
speed_h_layout.addWidget(self.speed_slider)
speed_h_layout.addWidget(self.speed_label)
params_layout.addLayout(speed_h_layout, 1, 1, 1, 2)
params_layout.addWidget(QLabel("音量:"), 2, 0, 1, 1, Qt.AlignRight)
self.vol_slider = QSlider(Qt.Horizontal)
self.vol_slider.setRange(0, 100)
self.vol_label = QLabel("100%")
self.vol_slider.valueChanged.connect(lambda value: self.vol_label.setText(f"{value}%"))
vol_h_layout = QHBoxLayout()
vol_h_layout.addWidget(self.vol_slider)
vol_h_layout.addWidget(self.vol_label)
params_layout.addLayout(vol_h_layout, 2, 1, 1, 2)
params_layout.addWidget(QLabel("音调:"), 3, 0, 1, 1, Qt.AlignRight)
self.pitch_slider = QSlider(Qt.Horizontal)
self.pitch_slider.setRange(-10, 10)
self.pitch_label = QLabel("0")
self.pitch_slider.valueChanged.connect(lambda value: self.pitch_label.setText(f"{value}"))
pitch_h_layout = QHBoxLayout()
pitch_h_layout.addWidget(self.pitch_slider)
pitch_h_layout.addWidget(self.pitch_label)
params_layout.addLayout(pitch_h_layout, 3, 1, 1, 2)
params_layout.addWidget(QLabel("情感:"), 4, 0, 1, 1, Qt.AlignRight)
self.emotion_combo = QComboBox()
self.emotion_combo.addItems(["happy", "sad", "angry", "fearful", "surprised", "neutral"])
params_layout.addWidget(self.emotion_combo, 4, 1, 1, 2)
params_group.setLayout(params_layout)
main_layout.addWidget(params_group)
# --- 文件保存区域 ---
output_group = QGroupBox("输出设置")
output_layout = QVBoxLayout()
output_h_layout = QHBoxLayout()
self.output_dir_label = QLabel("保存目录:")
self.output_dir_path_edit = QLineEdit()
self.output_dir_path_edit.setPlaceholderText("选择保存 MP3 文件的目录")
self.browse_dir_button = QPushButton("浏览...")
self.browse_dir_button.clicked.connect(self.browse_directory)
output_h_layout.addWidget(self.output_dir_label)
output_h_layout.addWidget(self.output_dir_path_edit)
output_h_layout.addWidget(self.browse_dir_button)
output_layout.addLayout(output_h_layout)
filename_layout = QHBoxLayout()
filename_layout.addWidget(QLabel("文件名格式:"), alignment=Qt.AlignRight)
self.filename_format_edit = QLineEdit()
self.filename_format_edit.setPlaceholderText("如: (voice_id)_(random_8)")
filename_layout.addWidget(self.filename_format_edit)
output_layout.addLayout(filename_layout)
output_group.setLayout(output_layout)
main_layout.addWidget(output_group)
# --- 状态与操作按钮 ---
status_layout = QHBoxLayout()
self.status_label = QLabel("等待输入...")
self.status_label.setFont(QFont("Arial", 10))
self.status_label.setStyleSheet("color: gray;")
status_layout.addWidget(self.status_label, 1)
self.convert_button = QPushButton("开始转换")
self.convert_button.setFont(QFont("Arial", 12, QFont.Bold))
self.convert_button.setStyleSheet(
"QPushButton { background-color: #4CAF50; color: white; } QPushButton:pressed { background-color: #45a049; }")
self.convert_button.clicked.connect(self.start_conversion)
status_layout.addWidget(self.convert_button)
main_layout.addLayout(status_layout)
self.worker_thread = None
self.worker = None
def load_settings_from_config(self):
"""从配置文件加载设置并填充到 UI 控件。"""
config_voice = self.config_manager.get_voice_settings()
self.voice_id_edit.setText(config_voice.get('voice_id', DEFAULT_VOICE_ID_V2))
self.speed_slider.setValue(int(float(config_voice.get('speed', '1.0')) * 10))
self.vol_slider.setValue(int(config_voice.get('vol', '100')))
self.pitch_slider.setValue(int(config_voice.get('pitch', '0')))
# 找到情感在 ComboBox 中的索引,如果值不存在则使用默认值
emotion_index = self.emotion_combo.findText(config_voice.get('emotion', 'happy'))
if emotion_index != -1:
self.emotion_combo.setCurrentIndex(emotion_index)
else:
self.emotion_combo.setCurrentText('happy') # 找不到则设为happy
config_output = self.config_manager.get_output_settings()
self.output_dir_path_edit.setText(config_output.get('save_directory', os.path.expanduser('~')))
self.filename_format_edit.setText(config_output.get('filename_format', '(voice_id)_(random_8)'))
def save_settings_to_config(self):
"""保存当前 UI 控件中的设置到配置文件。"""
settings = {
'voice_id': self.voice_id_edit.text(),
'speed': f"{self.speed_slider.value() / 10.0:.1f}",
'vol': str(self.vol_slider.value()),
'pitch': str(self.pitch_slider.value()),
'emotion': self.emotion_combo.currentText(),
}
self.config_manager.set_voice_settings(settings)
output_settings = {
'save_directory': self.output_dir_path_edit.text(),
'filename_format': self.filename_format_edit.text(),
}
self.config_manager.set_output_settings(output_settings)
def browse_directory(self):
"""打开目录选择对话框,并更新保存目录的 QLineEdit。"""
directory = QFileDialog.getExistingDirectory(self, "选择保存目录",
self.output_dir_path_edit.text() or QDir.homePath())
if directory:
self.output_dir_path_edit.setText(directory)
# 实时保存目录设置
self.save_settings_to_config()
def generate_random_string(self, length=8):
"""生成指定长度的随机字符串。"""
characters = string.ascii_lowercase + string.digits
return ''.join(random.choice(characters) for _ in range(length))
def get_output_filename(self):
"""根据用户设置的文件名格式生成完整的文件名。"""
base_filename_template = self.filename_format_edit.text()
voice_id = self.voice_id_edit.text() if self.voice_id_edit.text() else "default_voice"
random_part = self.generate_random_string()
filename = base_filename_template.replace("(voice_id)", voice_id).replace("(random_8)", random_part)
if not filename.lower().endswith(".mp3"):
filename += ".mp3"
return filename
def start_conversion(self):
"""开始文本转 MP3 的转换过程。"""
text_to_convert = self.text_edit.toPlainText().strip()
if not text_to_convert:
QMessageBox.warning(self, "输入错误", "请输入要转换的文本!")
return
output_dir = self.output_dir_path_edit.text().strip()
if not output_dir:
QMessageBox.warning(self, "输出错误", "请选择 MP3 文件的保存目录!")
return
output_filename = self.get_output_filename()
output_path = Path(output_dir) / output_filename
if output_path.exists():
reply = QMessageBox.question(
self, "文件已存在", f"文件 '{output_path.name}' 已存在。是否覆盖?",
QMessageBox.Yes | QMessageBox.No, QMessageBox.No
)
if reply == QMessageBox.No:
return
voice_id = self.voice_id_edit.text()
speed = self.speed_slider.value() / 10.0
vol = self.vol_slider.value()
pitch = self.pitch_slider.value()
emotion = self.emotion_combo.currentText()
if not voice_id:
QMessageBox.warning(self, "输入错误", "请提供语音 ID")
return
if not self.api_key:
QMessageBox.critical(self, "API Key 错误", "Minimax API Key 未设置,请在程序启动时配置。")
return
# 保存当前UI控件中的值到配置
self.save_settings_to_config()
self.convert_button.setEnabled(False)
self.browse_dir_button.setEnabled(False)
self.status_label.setText("🔄 准备中...")
self.status_label.setStyleSheet("color: orange;")
self.worker = Worker(
text_to_convert, str(output_path), voice_id, speed, vol, pitch, emotion, self.api_key
)
self.worker.progress.connect(self.update_status)
self.worker.finished.connect(self.on_conversion_finished)
self.worker_thread = QThread()
self.worker.moveToThread(self.worker_thread)
self.worker_thread.started.connect(self.worker.run)
self.worker_thread.start()
def update_status(self, message):
"""更新状态标签显示信息。"""
self.status_label.setText(message)
if "🔄" in message:
self.status_label.setStyleSheet("color: orange;")
elif "" in message:
self.status_label.setStyleSheet("color: green;")
elif "" in message:
self.status_label.setStyleSheet("color: red;")
def on_conversion_finished(self, success, message):
"""当转换完成时,更新 UI 状态并显示结果。"""
self.update_status(message)
if success:
QMessageBox.information(self, "转换成功", message)
else:
QMessageBox.critical(self, "转换失败", message)
self.convert_button.setEnabled(True)
self.browse_dir_button.setEnabled(True)
if self.worker_thread:
self.worker_thread.quit()
self.worker_thread.wait()
self.worker_thread = None
self.worker = None
def main():
# 检查并加载配置
config_manager = ConfigManager()
# 获取 API Key
minimax_api_key = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJHcm91cE5hbWUiOiJpSGV5VGFuZyIsIlVzZXJOYW1lIjoiaUhleVRhbmciLCJBY2NvdW50IjoiIiwiU3ViamVjdElEIjoiMTkzMDY1MjkzNjIwODEyNjY2NCIsIlBob25lIjoiMTM1ODgwMTQ3NzkiLCJHcm91cElEIjoiMTkzMDY1MjkzNjE5OTczODA1NiIsIlBhZ2VOYW1lIjoiIiwiTWFpbCI6IiIsIkNyZWF0ZVRpbWUiOiIyMDI1LTA3LTI1IDE4OjE0OjM3IiwiVG9rZW5UeXBlIjoxLCJpc3MiOiJtaW5pbWF4In0.JCm0sjxZLBLB7mrTrQOdtzUZZ00T97S30jFjHCuWuAvjPwUiIDGkgsm6SHLa8VVh0rcP7ZACr3Xlwy1lplvZpMUFVJg_orJh73Z5WYOZKmwIg2NbRTw5O7VbAcWivJVqj2n5q3OhYm6zAAdrxS5gbXmMMMkI7G1jsBk-v_OfPRJovoilU7nb7mMTdtl35bLMDjQgbV-TymGwDDekPoz1Vb-YYa0HRYzZPAZLR4XIzkpNnGEioYqEsECt-WrLWWkl-5n3I82JrP7HiGnyI7zP1Q5aB3SyHlwxMKDP-C6_2WLwHaHr0fcngMgrEgDvYq1_SLnq9BFhL26bviM1cz-3jQ"
config_manager.set_api_key(minimax_api_key)
app = QApplication(sys.argv)
window = TextToMP3_GUI(config_manager)
window.show()
sys.exit(app.exec())
if __name__ == "__main__":
main()