ADD 添加音乐生成接口

This commit is contained in:
kyj@bowong.ai 2025-07-17 11:28:40 +08:00
parent 2910e30446
commit 8f499ef092
11 changed files with 318 additions and 17 deletions

66
capacity_summary.md Normal file
View File

@ -0,0 +1,66 @@
# 已接入大模型能力汇总(生图/生视频/生音乐)
API通道:
dev--稳定通道
canary--滚动更新通道
### ComfyUI 自定义节点 Git: [链接](https://e.coding.net/g-ldyi2063/dev/ComfyUI-CustomNode.git)
## Midjourney
----
### 生图 **| ComfyUI节点可用✅**
- ComfyUI节点位置: 不忘科技-自定义节点🚩/图片/Midjourney/Prompt修图
- 同步API
- dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/Midjourney%20API/generate_image_api_mj_router_sync_generate_image_post)
- canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/Midjourney%E5%9B%BE%E7%89%87%E7%94%9F%E6%88%90/generate_image_api_api_mj_generate_image_post)
- 异步API
- dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/Midjourney%20API/async_gen_image_mj_router_async_generate_image_post)
- canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/Midjourney%E5%9B%BE%E7%89%87%E7%94%9F%E6%88%90/async_gen_image_api_mj_async_generate_image_post)
### 反推生图Prompt **| ComfyUI节点可用✅**
- ComfyUI节点位置: 不忘科技-自定义节点🚩/图片/Midjourney/反推生图关键词
- dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/Midjourney%20API/desc_img_by_file_mj_router_sync_describe_image_file_post)
- canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/Midjourney%E5%9B%BE%E7%89%87%E7%94%9F%E6%88%90/desc_img_by_file_api_mj_sync_file_img_describe_post)
### 生视频
- 同步API
- canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/%E6%9E%81%E6%A2%A6%E8%A7%86%E9%A2%91%E7%94%9F%E6%88%90api/generate_video_api_api_jm_generate_video_post)
- 异步API
- canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/%E6%9E%81%E6%A2%A6%E8%A7%86%E9%A2%91%E7%94%9F%E6%88%90api/async_gen_video_api_jm_async_generate_video_post)
## 即梦
----
### 生视频 **| ComfyUI节点可用✅**
- ComfyUI节点位置: 不忘科技-自定义节点🚩/视频/即梦/Prompt生视频
- 异步API
- dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/%E5%8D%B3%E6%A2%A6API/submit_video_task_jm_router_jm_submit_task_post)
- canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/%E6%9E%81%E6%A2%A6%E8%A7%86%E9%A2%91%E7%94%9F%E6%88%90api/async_gen_video_api_jm_async_generate_video_post)
- 同步API
- canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/%E6%9E%81%E6%A2%A6%E8%A7%86%E9%A2%91%E7%94%9F%E6%88%90api/generate_video_api_api_jm_generate_video_post)
## Google VEO
----
### 生视频
- 异步API
- canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/veo%E8%A7%86%E9%A2%91%E7%94%9F%E6%88%90/submit_video_generation_api_veo_submit_post)
## Google Gemini
----
### 生图 **| ComfyUI节点可用✅**
- ComfyUI节点位置: 不忘科技-自定义节点🚩/图片/Gemini图像编辑/自定义Prompt修改图片
- 异步API
- dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/Google/clothes_mark_google_image_edit_custom_post)
## Minimax 海螺
----
### 生图
- 同步API
- dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/%E6%B5%B7%E8%9E%BAAPI/generate_image_hl_hl_router_hl_router_generate_image_post)
### 生音乐
- 同步API
- dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/%E6%B5%B7%E8%9E%BAAPI/generate_music_hl_hl_router_sync_generate_music_post)

View File

@ -6,7 +6,7 @@ from sentry_sdk.integrations.fastapi import FastApiIntegration
from fastapi.middleware.cors import CORSMiddleware
from .utils.KVCache import MediaSourceKVCache
from .router import ffmpeg, cache, google, task, jm_router, mj_router
from .router import ffmpeg, cache, google, task, jm_router, mj_router, hl_router
from .models.settings.cluster import WorkerConfig
config = WorkerConfig()
@ -90,3 +90,4 @@ web_app.include_router(google.router)
web_app.include_router(task.router)
web_app.include_router(jm_router.router)
web_app.include_router(mj_router.router)
web_app.include_router(hl_router.router)

View File

@ -110,8 +110,15 @@ class AWSSettings(BaseSettings):
env_file = str(ENV_FILE)
extra = "ignore"
class MiniMaxSettings(BaseSettings):
api_key: str = Field(default="", description="MiniMax 访问密钥")
class Config:
env_prefix = "MINIMAX_"
env_file = str(ENV_FILE)
extra = "ignore"
midjourney_settings = MidjourneySettings()
cos_settings = COSSettings()
langfuse_settings = LangfuseSettings()
aws_settings = AWSSettings()
aws_settings = AWSSettings()
minimax_settings = MiniMaxSettings()

View File

@ -0,0 +1,135 @@
import base64
import json
import os
import tempfile
import uuid
from typing import Optional
import loguru
import modal
import requests
from fastapi import APIRouter, UploadFile, Form, File, Depends, HTTPException
from fastapi.responses import FileResponse
from BowongModalFunctions.config.settings import minimax_settings
from BowongModalFunctions.middleware.authorization import verify_token
from BowongModalFunctions.models.responses.models import ModalTaskResponse
from BowongModalFunctions.models.settings.cluster import WorkerConfig
router = APIRouter(prefix='/hl_router', tags=['海螺API'])
config = WorkerConfig()
def image_generation(image_file: UploadFile, prompt: str, aspect_ratio):
data = base64.b64encode(image_file.file.read()).decode('utf-8')
payload = json.dumps({
"model": "image-01",
"prompt": prompt,
"subject_reference": [
{
"type": "character",
"image_file": f"data:image/jpeg;base64,{data}"
}
],
"aspect_ratio": aspect_ratio,
"n": 1
})
headers = {
'Authorization': f'Bearer {minimax_settings.api_key}',
'Content-Type': 'application/json'
}
response = requests.request("POST", "https://api.minimaxi.com/v1/image_generation", headers=headers, data=payload,
timeout=150)
response.raise_for_status()
return response.json()
def music_generation(lyrics=None, refer_voice=None, refer_instrumental=None):
headers = {
'Authorization': f'Bearer {minimax_settings.api_key}',
'Content-Type': 'application/json'
}
payload = {
"model": "music-01",
"audio_setting": {
"sample_rate": 44100,
"bitrate": 256000,
"format": "mp3"
}
}
if lyrics:
payload["lyrics"] = lyrics
if refer_voice:
payload["refer_voice"] = refer_voice
if refer_instrumental:
payload["refer_instrumental"] = refer_instrumental
payload = json.dumps(payload)
loguru.logger.info("music generation payload: {}".format(payload))
response = requests.request("POST", "https://api.minimaxi.com/v1/music_generation", headers=headers, data=payload,
timeout=150)
response.raise_for_status()
resp = response.json()
if resp["base_resp"]["status_code"] == 0:
return response.json()
else:
raise Exception(f"status_code={resp['base_resp']['status_code']}")
@router.post('/async/upload/music', summary="上传人声/伴奏/音乐", dependencies=[Depends(verify_token)])
async def upload_music_hl(music: UploadFile = File(description="参考音频--10秒以上10分钟以内, 与目标对应"),
purpose: str = Form(default="voice",
description="目标分类 voice(人声)/instrumental(伴奏)/song(人声+伴奏)")):
file_path = os.path.join(config.S3_mount_dir,
"upload/{}.{}".format(str(uuid.uuid4()), music.filename.split(".")[-1]))
with open(file_path, "wb") as file:
file.write(music.file.read())
file.close()
fn = modal.Function.from_name(config.modal_app_name, "music_upload",
environment_name=config.modal_environment)
fn_call = fn.spawn(file_path, music.content_type, purpose)
return ModalTaskResponse(success=True, taskId=fn_call.object_id)
@router.post('/sync/generate/image', summary="生成图片", dependencies=[Depends(verify_token)])
async def generate_image_hl(prompt: str = Form(..., description="图片生成提示词"),
image_file: UploadFile = File(description="样貌参考图片"),
aspect_ratio=Form(default="9:16", description="图片宽高比")):
try:
return image_generation(image_file, prompt, aspect_ratio)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post('/sync/generate/music', summary="生成音乐",
description="refer_voice、refer_instrumental至少填写1个, 传refer_instrumental可不传lyrics",
dependencies=[Depends(verify_token)])
async def generate_music_hl(lyrics: Optional[str] = Form(default=None, description="歌词"),
refer_voice: Optional[str] = Form(default=None, description="参考音色ID, 需要先上传"),
refer_instrumental: Optional[str] = Form(default=None,
description="参考伴奏ID, 需要先上传")):
try:
if not refer_voice and not refer_instrumental:
raise Exception("refer_voice、refer_instrumental至少填写1个!")
if refer_voice and not lyrics:
raise Exception("refer_voice需传lyrics")
result = music_generation(lyrics, refer_voice, refer_instrumental)
hex_data = result["data"]["audio"]
# 将HEX编码的字符串转换为二进制数据
audio_data = bytes.fromhex(hex_data)
audio_file = f"dev/audios/{uuid.uuid4()}.mp3"
with open(os.path.join(config.S3_mount_dir, audio_file), "wb") as file:
# 写入音频数据
file.write(audio_data)
file.close()
# 返回文件响应
return {"audio_url": f"https://cdn.roasmax.cn/" + audio_file}
except ValueError:
raise HTTPException(status_code=400, detail="无效的HEX编码")
except Exception as e:
raise HTTPException(status_code=500, detail=f"生成音频时出错: {str(e)}")

View File

@ -18,7 +18,7 @@ import asyncio
from BowongModalFunctions.middleware.authorization import verify_token
router = APIRouter(prefix='/jm_router', tags=['Jm_router'])
router = APIRouter(prefix='/jm_router', tags=['即梦API'])
api_key = os.getenv("JM_API_KEY")

View File

@ -14,13 +14,14 @@ import uuid
from typing import Optional
import httpx
from fastapi import APIRouter, HTTPException, Form, UploadFile, File
from fastapi import APIRouter, HTTPException, Form, UploadFile, File, Depends
from loguru import logger as mj_logger
from BowongModalFunctions.config.settings import midjourney_settings
from BowongModalFunctions.middleware.authorization import verify_token
from BowongModalFunctions.utils.MediaUtils import MediaUtils
router = APIRouter(prefix='/mj_router', tags=['Mj_router'])
router = APIRouter(prefix='/mj_router', tags=['Midjourney API'])
api_key = midjourney_settings.api_key
@ -162,7 +163,7 @@ async def query_describe_result(job_id: str):
return await query_task_result(job_id, "describe")
async def poll_task_result(job_id: str, task_type: str, max_wait_time: int = 120, poll_interval: int = 2):
async def poll_task_result(job_id: str, task_type: str, max_wait_time: int = 150, poll_interval: int = 2):
"""
通用轮询任务结果函数
@ -322,7 +323,7 @@ async def describe_image_sync(image_url: str, max_wait_time: int = 120, poll_int
# API路由
@router.post("/sync/generate/image", summary="同步生成图片")
@router.post("/sync/generate/image", summary="同步生成图片", dependencies=[Depends(verify_token)], deprecated=True)
async def generate_image_api(
prompt: str = Form(..., description="图片生成提示词"),
img_file: Optional[UploadFile] = File(None, description="样貌参考图片"),
@ -331,7 +332,7 @@ async def generate_image_api(
):
"""生成图片接口"""
try:
if bool(img_file):
if isinstance(img_file, UploadFile):
file_name = f'{uuid.uuid4()}.{img_file.filename.rsplit(".", 1)[-1]}'
with open(file_name, "wb") as f:
f.write(img_file.file.read())
@ -348,7 +349,7 @@ async def generate_image_api(
raise HTTPException(status_code=500, detail=f"服务器内部错误: {str(e)}")
@router.post("/sync/describe/image", summary="图片URL获取图像描述")
@router.post("/sync/describe/image", summary="图片URL获取图像描述", dependencies=[Depends(verify_token)])
async def describe_image_api(
image_url: str = Form(..., description="图片URL地址"),
max_wait_time: int = Form(120, description="最大等待时间(秒)"),
@ -362,7 +363,7 @@ async def describe_image_api(
raise HTTPException(status_code=500, detail=f"服务器内部错误: {str(e)}")
@router.post('/sync/describe/image/file', summary='上传图片获取图像描述')
@router.post('/sync/describe/image/file', summary='上传图片获取图像描述', dependencies=[Depends(verify_token)])
async def desc_img_by_file(
img_file: UploadFile = File(..., description='上传的图片'),
max_wait_time: int = Form(120, description="最大等待时间(秒)"),
@ -384,11 +385,11 @@ async def desc_img_by_file(
raise HTTPException(status_code=500, detail=f"服务器内部错误: {str(e)}")
@router.post('/async/generate/image', summary='异步提交生图任务')
@router.post('/async/generate/image', summary='异步提交生图任务', dependencies=[Depends(verify_token)])
async def async_gen_image(prompt: str,
img_file: Optional[UploadFile] = File(None, description="样貌参考图片"),
):
if bool(img_file):
if isinstance(img_file, UploadFile):
file_name = f'{uuid.uuid4()}.{img_file.filename.rsplit(".", 1)[-1]}'
with open(file_name, "wb") as f:
f.write(img_file.file.read())
@ -402,6 +403,6 @@ async def async_gen_image(prompt: str,
return await submit_gen_image_task(prompt)
@router.get('/async/query/status', summary='异步查询任务状态')
@router.get('/async/query/status', summary='异步查询任务状态', dependencies=[Depends(verify_token)])
async def async_query_status(task_id: str):
return await query_task_result(task_id, 'image')

View File

@ -1,15 +1,48 @@
import json
from typing import cast
import modal
from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, Response
from loguru import logger
from starlette import status
from ..middleware.authorization import verify_token, nakama_login
from ..models.requests.models import NakamaLogin
from ..models.responses.models import ModalTaskCancelResponse, NakamaJWTResponse
from ..models.responses.models import ModalTaskCancelResponse, NakamaJWTResponse, BaseFFMPEGTaskStatusResponse
from ..utils.ModalUtils import ModalUtils
router = APIRouter(prefix="/task", tags=["Tasks"])
sentry_header_schema = {
"x-trace-id": {
"description": "Sentry Transaction ID",
"schema": {
"type": "string",
}
},
"x-baggage": {
"description": "Sentry Transaction baggage",
"schema": {
"type": "string",
}
}
}
@router.get("/{task_id}", summary="查询任务状态/结果", responses={
status.HTTP_200_OK: {
"description": "",
"headers": sentry_header_schema
},
}, )
async def get_task_status(task_id: str, response: Response) -> BaseFFMPEGTaskStatusResponse:
logger.info(f"Get task [{task_id}]")
task_info = await ModalUtils.get_modal_task_status(task_id)
if task_info.transaction:
response.headers["x-trace-id"] = task_info.transaction.x_trace_id
response.headers["x-baggage"] = task_info.transaction.x_baggage
return BaseFFMPEGTaskStatusResponse(taskId=task_id, status=task_info.status,
code=cast(int, task_info.error_code.value),
error=task_info.error_reason, results=[json.dumps(task_info.results, ensure_ascii=False)])
@router.get("/cancel/{task_id}", summary="终止任务",
description="终止任务, 无论是正在排队还是真正运行",
dependencies=[Depends(verify_token)])

View File

@ -0,0 +1,17 @@
import os
import importlib
import pkgutil
# 获取当前包所在的目录
package_dir = os.path.dirname(__file__)
# 自动导入所有模块
for (_, module_name, _) in pkgutil.iter_modules([package_dir]):
# 导入模块
module = importlib.import_module(f'.{module_name}', package=__name__)
# 将模块添加到当前包的命名空间
globals()[module_name] = module
# 可选:将模块名添加到 __all__
if '__all__' not in globals():
globals()['__all__'] = []
globals()['__all__'].append(module_name)

View File

@ -0,0 +1,41 @@
from ..video import app, config, downloader_image
with (downloader_image.imports()):
import os
import loguru
import modal
import requests
from BowongModalFunctions.config.settings import minimax_settings
@app.function(timeout=600, region="us", max_containers=config.video_downloader_concurrency,
volumes={
config.S3_mount_dir: modal.CloudBucketMount(
bucket_name=config.S3_bucket_name,
secret=modal.Secret.from_name("aws-s3-secret",
environment_name=config.modal_environment),
),
}, )
def music_upload(file: str, content_type, purpose: str):
headers = {
'Authorization': f'Bearer {minimax_settings.api_key}'
}
payload = {
"purpose": purpose
}
files = [
('file', (file.split(os.sep)[-1], open(file,"rb"), content_type)),
]
loguru.logger.info(f"music upload file_name {file} file_type: {content_type}")
response = requests.request("POST", "https://api.minimaxi.com/v1/music_upload", headers=headers, data=payload,
files=files,
timeout=590)
response.raise_for_status()
resp = response.json()
if resp["base_resp"]["status_code"] == 0:
voice_id = resp["voice_id"] if "voice_id" in resp else None
instrumental_id = resp["instrumental_id"] if "instrumental_id" in resp else None
return {"voice_id": voice_id, "instrumental_id": instrumental_id}, None
else:
raise Exception(f"status_code={resp['base_resp']['status_code']}")

View File

@ -8,9 +8,7 @@ with (downloader_image.imports()):
import json
from google.genai import types
from loguru import logger
import uuid
from BowongModalFunctions.utils.HTTPUtils import GoogleAuthUtils
from BowongModalFunctions.router.google import SUPPORTED_IMAGE_TYPES
gemini_region = [
"global"

View File

@ -9,6 +9,7 @@ downloader_image = (
.add_local_file("../simhei.ttf", "/usr/share/fonts/simhei.ttf", copy=True)
.run_commands("chmod -R 777 /usr/share/fonts")
.env(dotenv_values("../.runtime.env"))
.add_local_file("../.env", "/root/.env")
.add_local_python_source('cluster')
.add_local_python_source('BowongModalFunctions')
)
@ -107,6 +108,7 @@ with downloader_image.imports():
# 导入sub app function
from .video_apps import *
from .image_apps import *
from .audio_apps import *