diff --git a/capacity_summary.md b/capacity_summary.md new file mode 100644 index 0000000..402d9e8 --- /dev/null +++ b/capacity_summary.md @@ -0,0 +1,66 @@ +# 已接入大模型能力汇总(生图/生视频/生音乐) + + API通道: + dev--稳定通道 + canary--滚动更新通道 + +### ComfyUI 自定义节点 Git: [链接](https://e.coding.net/g-ldyi2063/dev/ComfyUI-CustomNode.git) + +## Midjourney + +---- + +### 生图 **| ComfyUI节点可用✅** + - ComfyUI节点位置: 不忘科技-自定义节点🚩/图片/Midjourney/Prompt修图 + - 同步API + - dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/Midjourney%20API/generate_image_api_mj_router_sync_generate_image_post) + - canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/Midjourney%E5%9B%BE%E7%89%87%E7%94%9F%E6%88%90/generate_image_api_api_mj_generate_image_post) + - 异步API + - dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/Midjourney%20API/async_gen_image_mj_router_async_generate_image_post) + - canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/Midjourney%E5%9B%BE%E7%89%87%E7%94%9F%E6%88%90/async_gen_image_api_mj_async_generate_image_post) +### 反推生图Prompt **| ComfyUI节点可用✅** + - ComfyUI节点位置: 不忘科技-自定义节点🚩/图片/Midjourney/反推生图关键词 + - dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/Midjourney%20API/desc_img_by_file_mj_router_sync_describe_image_file_post) + - canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/Midjourney%E5%9B%BE%E7%89%87%E7%94%9F%E6%88%90/desc_img_by_file_api_mj_sync_file_img_describe_post) +### 生视频 + - 同步API + - canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/%E6%9E%81%E6%A2%A6%E8%A7%86%E9%A2%91%E7%94%9F%E6%88%90api/generate_video_api_api_jm_generate_video_post) + - 异步API + - canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/%E6%9E%81%E6%A2%A6%E8%A7%86%E9%A2%91%E7%94%9F%E6%88%90api/async_gen_video_api_jm_async_generate_video_post) + +## 即梦 + +---- + +### 生视频 **| ComfyUI节点可用✅** + - ComfyUI节点位置: 不忘科技-自定义节点🚩/视频/即梦/Prompt生视频 + - 异步API + - dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/%E5%8D%B3%E6%A2%A6API/submit_video_task_jm_router_jm_submit_task_post) + - canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/%E6%9E%81%E6%A2%A6%E8%A7%86%E9%A2%91%E7%94%9F%E6%88%90api/async_gen_video_api_jm_async_generate_video_post) + - 同步API + - canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/%E6%9E%81%E6%A2%A6%E8%A7%86%E9%A2%91%E7%94%9F%E6%88%90api/generate_video_api_api_jm_generate_video_post) + +## Google VEO + +---- +### 生视频 + - 异步API + - canary: [链接](https://bowongai-dev--text-video-agent-fastapi-app.modal.run/docs#/veo%E8%A7%86%E9%A2%91%E7%94%9F%E6%88%90/submit_video_generation_api_veo_submit_post) + +## Google Gemini + +---- +### 生图 **| ComfyUI节点可用✅** + - ComfyUI节点位置: 不忘科技-自定义节点🚩/图片/Gemini图像编辑/自定义Prompt修改图片 + - 异步API + - dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/Google/clothes_mark_google_image_edit_custom_post) + +## Minimax 海螺 + +---- +### 生图 + - 同步API + - dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/%E6%B5%B7%E8%9E%BAAPI/generate_image_hl_hl_router_hl_router_generate_image_post) +### 生音乐 + - 同步API + - dev: [链接](https://bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run/docs#/%E6%B5%B7%E8%9E%BAAPI/generate_music_hl_hl_router_sync_generate_music_post) \ No newline at end of file diff --git a/src/BowongModalFunctions/api.py b/src/BowongModalFunctions/api.py index f732146..c1bfb9e 100644 --- a/src/BowongModalFunctions/api.py +++ b/src/BowongModalFunctions/api.py @@ -6,7 +6,7 @@ from sentry_sdk.integrations.fastapi import FastApiIntegration from fastapi.middleware.cors import CORSMiddleware from .utils.KVCache import MediaSourceKVCache -from .router import ffmpeg, cache, google, task, jm_router, mj_router +from .router import ffmpeg, cache, google, task, jm_router, mj_router, hl_router from .models.settings.cluster import WorkerConfig config = WorkerConfig() @@ -90,3 +90,4 @@ web_app.include_router(google.router) web_app.include_router(task.router) web_app.include_router(jm_router.router) web_app.include_router(mj_router.router) +web_app.include_router(hl_router.router) diff --git a/src/BowongModalFunctions/config/settings.py b/src/BowongModalFunctions/config/settings.py index 6532f35..27c70f2 100644 --- a/src/BowongModalFunctions/config/settings.py +++ b/src/BowongModalFunctions/config/settings.py @@ -110,8 +110,15 @@ class AWSSettings(BaseSettings): env_file = str(ENV_FILE) extra = "ignore" +class MiniMaxSettings(BaseSettings): + api_key: str = Field(default="", description="MiniMax 访问密钥") + class Config: + env_prefix = "MINIMAX_" + env_file = str(ENV_FILE) + extra = "ignore" midjourney_settings = MidjourneySettings() cos_settings = COSSettings() langfuse_settings = LangfuseSettings() -aws_settings = AWSSettings() \ No newline at end of file +aws_settings = AWSSettings() +minimax_settings = MiniMaxSettings() \ No newline at end of file diff --git a/src/BowongModalFunctions/router/hl_router.py b/src/BowongModalFunctions/router/hl_router.py new file mode 100644 index 0000000..9d48039 --- /dev/null +++ b/src/BowongModalFunctions/router/hl_router.py @@ -0,0 +1,135 @@ +import base64 +import json +import os +import tempfile +import uuid +from typing import Optional + +import loguru +import modal +import requests +from fastapi import APIRouter, UploadFile, Form, File, Depends, HTTPException +from fastapi.responses import FileResponse + +from BowongModalFunctions.config.settings import minimax_settings +from BowongModalFunctions.middleware.authorization import verify_token +from BowongModalFunctions.models.responses.models import ModalTaskResponse +from BowongModalFunctions.models.settings.cluster import WorkerConfig + +router = APIRouter(prefix='/hl_router', tags=['海螺API']) +config = WorkerConfig() + + +def image_generation(image_file: UploadFile, prompt: str, aspect_ratio): + data = base64.b64encode(image_file.file.read()).decode('utf-8') + + payload = json.dumps({ + "model": "image-01", + "prompt": prompt, + "subject_reference": [ + { + "type": "character", + "image_file": f"data:image/jpeg;base64,{data}" + } + ], + "aspect_ratio": aspect_ratio, + "n": 1 + }) + headers = { + 'Authorization': f'Bearer {minimax_settings.api_key}', + 'Content-Type': 'application/json' + } + + response = requests.request("POST", "https://api.minimaxi.com/v1/image_generation", headers=headers, data=payload, + timeout=150) + response.raise_for_status() + return response.json() + + +def music_generation(lyrics=None, refer_voice=None, refer_instrumental=None): + headers = { + 'Authorization': f'Bearer {minimax_settings.api_key}', + 'Content-Type': 'application/json' + } + payload = { + "model": "music-01", + "audio_setting": { + "sample_rate": 44100, + "bitrate": 256000, + "format": "mp3" + } + } + if lyrics: + payload["lyrics"] = lyrics + if refer_voice: + payload["refer_voice"] = refer_voice + if refer_instrumental: + payload["refer_instrumental"] = refer_instrumental + payload = json.dumps(payload) + loguru.logger.info("music generation payload: {}".format(payload)) + response = requests.request("POST", "https://api.minimaxi.com/v1/music_generation", headers=headers, data=payload, + timeout=150) + response.raise_for_status() + resp = response.json() + if resp["base_resp"]["status_code"] == 0: + return response.json() + else: + raise Exception(f"status_code={resp['base_resp']['status_code']}") + + +@router.post('/async/upload/music', summary="上传人声/伴奏/音乐", dependencies=[Depends(verify_token)]) +async def upload_music_hl(music: UploadFile = File(description="参考音频--10秒以上10分钟以内, 与目标对应"), + purpose: str = Form(default="voice", + description="目标分类 voice(人声)/instrumental(伴奏)/song(人声+伴奏)")): + file_path = os.path.join(config.S3_mount_dir, + "upload/{}.{}".format(str(uuid.uuid4()), music.filename.split(".")[-1])) + with open(file_path, "wb") as file: + file.write(music.file.read()) + file.close() + + fn = modal.Function.from_name(config.modal_app_name, "music_upload", + environment_name=config.modal_environment) + fn_call = fn.spawn(file_path, music.content_type, purpose) + return ModalTaskResponse(success=True, taskId=fn_call.object_id) + + +@router.post('/sync/generate/image', summary="生成图片", dependencies=[Depends(verify_token)]) +async def generate_image_hl(prompt: str = Form(..., description="图片生成提示词"), + image_file: UploadFile = File(description="样貌参考图片"), + aspect_ratio=Form(default="9:16", description="图片宽高比")): + try: + return image_generation(image_file, prompt, aspect_ratio) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post('/sync/generate/music', summary="生成音乐", + description="refer_voice、refer_instrumental至少填写1个, 传refer_instrumental可不传lyrics", + dependencies=[Depends(verify_token)]) +async def generate_music_hl(lyrics: Optional[str] = Form(default=None, description="歌词"), + refer_voice: Optional[str] = Form(default=None, description="参考音色ID, 需要先上传"), + refer_instrumental: Optional[str] = Form(default=None, + description="参考伴奏ID, 需要先上传")): + try: + if not refer_voice and not refer_instrumental: + raise Exception("refer_voice、refer_instrumental至少填写1个!") + if refer_voice and not lyrics: + raise Exception("refer_voice需传lyrics") + result = music_generation(lyrics, refer_voice, refer_instrumental) + hex_data = result["data"]["audio"] + # 将HEX编码的字符串转换为二进制数据 + audio_data = bytes.fromhex(hex_data) + + audio_file = f"dev/audios/{uuid.uuid4()}.mp3" + with open(os.path.join(config.S3_mount_dir, audio_file), "wb") as file: + # 写入音频数据 + file.write(audio_data) + file.close() + + # 返回文件响应 + return {"audio_url": f"https://cdn.roasmax.cn/" + audio_file} + + except ValueError: + raise HTTPException(status_code=400, detail="无效的HEX编码") + except Exception as e: + raise HTTPException(status_code=500, detail=f"生成音频时出错: {str(e)}") diff --git a/src/BowongModalFunctions/router/jm_router.py b/src/BowongModalFunctions/router/jm_router.py index 762c1c7..db3c885 100644 --- a/src/BowongModalFunctions/router/jm_router.py +++ b/src/BowongModalFunctions/router/jm_router.py @@ -18,7 +18,7 @@ import asyncio from BowongModalFunctions.middleware.authorization import verify_token -router = APIRouter(prefix='/jm_router', tags=['Jm_router']) +router = APIRouter(prefix='/jm_router', tags=['即梦API']) api_key = os.getenv("JM_API_KEY") diff --git a/src/BowongModalFunctions/router/mj_router.py b/src/BowongModalFunctions/router/mj_router.py index b237cec..725555c 100644 --- a/src/BowongModalFunctions/router/mj_router.py +++ b/src/BowongModalFunctions/router/mj_router.py @@ -14,13 +14,14 @@ import uuid from typing import Optional import httpx -from fastapi import APIRouter, HTTPException, Form, UploadFile, File +from fastapi import APIRouter, HTTPException, Form, UploadFile, File, Depends from loguru import logger as mj_logger from BowongModalFunctions.config.settings import midjourney_settings +from BowongModalFunctions.middleware.authorization import verify_token from BowongModalFunctions.utils.MediaUtils import MediaUtils -router = APIRouter(prefix='/mj_router', tags=['Mj_router']) +router = APIRouter(prefix='/mj_router', tags=['Midjourney API']) api_key = midjourney_settings.api_key @@ -162,7 +163,7 @@ async def query_describe_result(job_id: str): return await query_task_result(job_id, "describe") -async def poll_task_result(job_id: str, task_type: str, max_wait_time: int = 120, poll_interval: int = 2): +async def poll_task_result(job_id: str, task_type: str, max_wait_time: int = 150, poll_interval: int = 2): """ 通用轮询任务结果函数 @@ -322,7 +323,7 @@ async def describe_image_sync(image_url: str, max_wait_time: int = 120, poll_int # API路由 -@router.post("/sync/generate/image", summary="同步生成图片") +@router.post("/sync/generate/image", summary="同步生成图片", dependencies=[Depends(verify_token)], deprecated=True) async def generate_image_api( prompt: str = Form(..., description="图片生成提示词"), img_file: Optional[UploadFile] = File(None, description="样貌参考图片"), @@ -331,7 +332,7 @@ async def generate_image_api( ): """生成图片接口""" try: - if bool(img_file): + if isinstance(img_file, UploadFile): file_name = f'{uuid.uuid4()}.{img_file.filename.rsplit(".", 1)[-1]}' with open(file_name, "wb") as f: f.write(img_file.file.read()) @@ -348,7 +349,7 @@ async def generate_image_api( raise HTTPException(status_code=500, detail=f"服务器内部错误: {str(e)}") -@router.post("/sync/describe/image", summary="图片URL获取图像描述") +@router.post("/sync/describe/image", summary="图片URL获取图像描述", dependencies=[Depends(verify_token)]) async def describe_image_api( image_url: str = Form(..., description="图片URL地址"), max_wait_time: int = Form(120, description="最大等待时间(秒)"), @@ -362,7 +363,7 @@ async def describe_image_api( raise HTTPException(status_code=500, detail=f"服务器内部错误: {str(e)}") -@router.post('/sync/describe/image/file', summary='上传图片获取图像描述') +@router.post('/sync/describe/image/file', summary='上传图片获取图像描述', dependencies=[Depends(verify_token)]) async def desc_img_by_file( img_file: UploadFile = File(..., description='上传的图片'), max_wait_time: int = Form(120, description="最大等待时间(秒)"), @@ -384,11 +385,11 @@ async def desc_img_by_file( raise HTTPException(status_code=500, detail=f"服务器内部错误: {str(e)}") -@router.post('/async/generate/image', summary='异步提交生图任务') +@router.post('/async/generate/image', summary='异步提交生图任务', dependencies=[Depends(verify_token)]) async def async_gen_image(prompt: str, img_file: Optional[UploadFile] = File(None, description="样貌参考图片"), ): - if bool(img_file): + if isinstance(img_file, UploadFile): file_name = f'{uuid.uuid4()}.{img_file.filename.rsplit(".", 1)[-1]}' with open(file_name, "wb") as f: f.write(img_file.file.read()) @@ -402,6 +403,6 @@ async def async_gen_image(prompt: str, return await submit_gen_image_task(prompt) -@router.get('/async/query/status', summary='异步查询任务状态') +@router.get('/async/query/status', summary='异步查询任务状态', dependencies=[Depends(verify_token)]) async def async_query_status(task_id: str): return await query_task_result(task_id, 'image') diff --git a/src/BowongModalFunctions/router/task.py b/src/BowongModalFunctions/router/task.py index 630c281..9a96701 100644 --- a/src/BowongModalFunctions/router/task.py +++ b/src/BowongModalFunctions/router/task.py @@ -1,15 +1,48 @@ +import json +from typing import cast + import modal -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Response from loguru import logger from starlette import status from ..middleware.authorization import verify_token, nakama_login from ..models.requests.models import NakamaLogin -from ..models.responses.models import ModalTaskCancelResponse, NakamaJWTResponse +from ..models.responses.models import ModalTaskCancelResponse, NakamaJWTResponse, BaseFFMPEGTaskStatusResponse +from ..utils.ModalUtils import ModalUtils router = APIRouter(prefix="/task", tags=["Tasks"]) +sentry_header_schema = { + "x-trace-id": { + "description": "Sentry Transaction ID", + "schema": { + "type": "string", + } + }, + "x-baggage": { + "description": "Sentry Transaction baggage", + "schema": { + "type": "string", + } + } +} +@router.get("/{task_id}", summary="查询任务状态/结果", responses={ + status.HTTP_200_OK: { + "description": "", + "headers": sentry_header_schema + }, +}, ) +async def get_task_status(task_id: str, response: Response) -> BaseFFMPEGTaskStatusResponse: + logger.info(f"Get task [{task_id}]") + task_info = await ModalUtils.get_modal_task_status(task_id) + if task_info.transaction: + response.headers["x-trace-id"] = task_info.transaction.x_trace_id + response.headers["x-baggage"] = task_info.transaction.x_baggage + return BaseFFMPEGTaskStatusResponse(taskId=task_id, status=task_info.status, + code=cast(int, task_info.error_code.value), + error=task_info.error_reason, results=[json.dumps(task_info.results, ensure_ascii=False)]) @router.get("/cancel/{task_id}", summary="终止任务", description="终止任务, 无论是正在排队还是真正运行", dependencies=[Depends(verify_token)]) diff --git a/src/cluster/audio_apps/__init__.py b/src/cluster/audio_apps/__init__.py new file mode 100644 index 0000000..807d170 --- /dev/null +++ b/src/cluster/audio_apps/__init__.py @@ -0,0 +1,17 @@ +import os +import importlib +import pkgutil + +# 获取当前包所在的目录 +package_dir = os.path.dirname(__file__) + +# 自动导入所有模块 +for (_, module_name, _) in pkgutil.iter_modules([package_dir]): + # 导入模块 + module = importlib.import_module(f'.{module_name}', package=__name__) + # 将模块添加到当前包的命名空间 + globals()[module_name] = module + # 可选:将模块名添加到 __all__ + if '__all__' not in globals(): + globals()['__all__'] = [] + globals()['__all__'].append(module_name) \ No newline at end of file diff --git a/src/cluster/audio_apps/hailuo_music.py b/src/cluster/audio_apps/hailuo_music.py new file mode 100644 index 0000000..6c19f6d --- /dev/null +++ b/src/cluster/audio_apps/hailuo_music.py @@ -0,0 +1,41 @@ +from ..video import app, config, downloader_image + +with (downloader_image.imports()): + import os + + import loguru + import modal + import requests + from BowongModalFunctions.config.settings import minimax_settings + + + @app.function(timeout=600, region="us", max_containers=config.video_downloader_concurrency, + volumes={ + config.S3_mount_dir: modal.CloudBucketMount( + bucket_name=config.S3_bucket_name, + secret=modal.Secret.from_name("aws-s3-secret", + environment_name=config.modal_environment), + ), + }, ) + def music_upload(file: str, content_type, purpose: str): + headers = { + 'Authorization': f'Bearer {minimax_settings.api_key}' + } + payload = { + "purpose": purpose + } + files = [ + ('file', (file.split(os.sep)[-1], open(file,"rb"), content_type)), + ] + loguru.logger.info(f"music upload file_name {file} file_type: {content_type}") + response = requests.request("POST", "https://api.minimaxi.com/v1/music_upload", headers=headers, data=payload, + files=files, + timeout=590) + response.raise_for_status() + resp = response.json() + if resp["base_resp"]["status_code"] == 0: + voice_id = resp["voice_id"] if "voice_id" in resp else None + instrumental_id = resp["instrumental_id"] if "instrumental_id" in resp else None + return {"voice_id": voice_id, "instrumental_id": instrumental_id}, None + else: + raise Exception(f"status_code={resp['base_resp']['status_code']}") diff --git a/src/cluster/image_apps/image_edit.py b/src/cluster/image_apps/image_edit.py index 63fc7bd..752b709 100644 --- a/src/cluster/image_apps/image_edit.py +++ b/src/cluster/image_apps/image_edit.py @@ -8,9 +8,7 @@ with (downloader_image.imports()): import json from google.genai import types from loguru import logger - import uuid from BowongModalFunctions.utils.HTTPUtils import GoogleAuthUtils - from BowongModalFunctions.router.google import SUPPORTED_IMAGE_TYPES gemini_region = [ "global" diff --git a/src/cluster/video.py b/src/cluster/video.py index c35a651..d599903 100644 --- a/src/cluster/video.py +++ b/src/cluster/video.py @@ -9,6 +9,7 @@ downloader_image = ( .add_local_file("../simhei.ttf", "/usr/share/fonts/simhei.ttf", copy=True) .run_commands("chmod -R 777 /usr/share/fonts") .env(dotenv_values("../.runtime.env")) + .add_local_file("../.env", "/root/.env") .add_local_python_source('cluster') .add_local_python_source('BowongModalFunctions') ) @@ -107,6 +108,7 @@ with downloader_image.imports(): # 导入sub app function from .video_apps import * from .image_apps import * + from .audio_apps import *