modalDeploy/src/BowongModalFunctions/models/web_model.py

432 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import uuid
from enum import Enum
from typing import List, Union, Optional, Dict, Any
import pydantic
from pydantic import BaseModel, Field, field_validator, ConfigDict, HttpUrl, computed_field
from .ffmpeg_worker_model import FFMpegSliceSegment
from .media_model import MediaSource, MediaSources, MediaProtocol
from ..config import WorkerConfig
from ..utils.VideoUtils import VideoMetadata
config = WorkerConfig()
class TaskStatus(str, Enum):
running = "running"
failed = "failed"
success = "success"
expired = "expired"
class ErrorCode(int, Enum):
SUCCESS = 0
PARAM_ERROR = 10001
NOT_FOUND = 10002
UNAUTHORIZED = 10003
FORBIDDEN = 10004
BUSINESS_ERROR = 10005
SYSTEM_ERROR = 99999
class WebhookMethodEnum(str, Enum):
GET = "get"
POST = "post"
class SentryTransactionHeader(BaseModel):
x_trace_id: Optional[str] = Field(description="Sentry Transaction ID", default=None)
x_baggage: Optional[str] = Field(description="Sentry Transaction baggage", default=None)
class SentryTransactionInfo(BaseModel):
x_trace_id: str = Field(description="Sentry Transaction ID")
x_baggage: str = Field(description="Sentry Transaction baggage")
class FFMPEGSliceTaskStatusRequest(BaseModel):
taskId: str = Field(description="任务Id")
class ModalTaskResponse(BaseModel):
success: bool = Field(description="任务接受成功")
taskId: str = Field(description="任务Id")
class RecordingTaskResponse(BaseModel):
success: bool = Field(description="任务接受成功")
taskId: str = Field(description="任务Id")
manifest: str = Field(description="播放地址")
manifest_urn: str = Field(description="播放列表URN")
class WebhookNotify(BaseModel):
endpoint: HttpUrl = Field(description="Webhook回调端点", examples=["https://webhook.example.com?query=123"])
method: WebhookMethodEnum = Field(
description="Webhook回调请求方法, 使用POST方法时body与查询ffmpeg任务所获得的json body格式一致")
headers: Optional[Dict[str, str]] = Field(description="Webhook回调附带的Headers", default=None)
model_config = ConfigDict(json_schema_extra={
"description": "Webhook返回值与查询ffmpeg任务所获得的json格式一致"
})
class FFMPEGResult(BaseModel):
urn: str = Field(description="FFMPEG任务结果urn")
content_length: int = Field(description="媒体资源文件字节大小(Byte)")
metadata: VideoMetadata = Field(description="媒体元数据")
@computed_field(description="可通过CDN访问的资源链接")
@property
def url(self) -> str:
if not self.urn.startswith("s3://"):
raise ValueError("无法转换非s3前缀协议")
return self.urn.replace('s3://', f"{config.S3_cdn_endpoint}/")
class BaseFFMPEGTaskRequest(BaseModel):
webhook: Optional[WebhookNotify] = Field(description="Task webhook", default=None)
class BaseFFMPEGTaskStatusResponse(BaseModel):
taskId: str = Field(description="任务Id")
status: TaskStatus = Field(description="任务运行状态")
error: Optional[str] = Field(description="任务错误原因", default=None)
code: Optional[int] = Field(description="任务错误原因代码", default=None)
task_type: str = Field(description="任务类型", default="unknown")
results: Optional[List[Union[FFMPEGResult, Any]]] = Field(description="任务运行结果", default=None)
model_config = ConfigDict(extra='ignore')
class FFMPEGConvertStreamRequest(BaseFFMPEGTaskRequest):
media: MediaSource = Field(description="待转换的媒体源")
@field_validator('media', mode='before')
@classmethod
def parse_inputs(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
media_source = MediaSource.from_str(v)
if media_source.protocol == MediaProtocol.hls:
return media_source
else:
raise pydantic.ValidationError('media只支持hls格式的urn')
elif isinstance(v, MediaSource):
return v
else:
raise pydantic.ValidationError("media格式读取失败")
class FFMPEGConvertStreamResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[str] = Field(default=None, description="任务运行结果")
class FFMPEGSliceRequest(BaseFFMPEGTaskRequest):
media: MediaSource = Field(description="待切割的媒体源")
markers: List[FFMpegSliceSegment] = Field(description="切割标记数组")
@field_validator('media', mode='before')
@classmethod
def parse_inputs(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
class FFMPEGSliceTaskStatusResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[List[str]] = Field(default=None, description="任务运行结果")
class FFMPEGConcatRequest(BaseFFMPEGTaskRequest):
medias: MediaSources = Field(description="待合并的媒体源")
class FFMPEGConcatTaskStatusResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[str] = Field(default=None, description="任务运行结果")
class FFMPEGExtractAudioRequest(BaseFFMPEGTaskRequest):
media: MediaSource = Field(description="待提取音频的媒体源")
@field_validator('media', mode='before')
@classmethod
def parse_inputs(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
class FFMPEGExtractAudioTaskStatusResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[str] = Field(default=None, description="任务运行结果")
class FFMPEGCornerMirrorRequest(BaseFFMPEGTaskRequest):
media: MediaSource = Field(description="需要处理的媒体源")
@field_validator('media', mode='before')
@classmethod
def parse_inputs(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
class FFMPEGCornerMirrorTaskStatusResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[str] = Field(default=None, description="任务运行结果")
class FFMPEGBgmRequest(BaseFFMPEGTaskRequest):
media: MediaSource = Field(description="需要处理的媒体源")
bgm_media: MediaSource = Field(description="添加的BGM媒体源", alias="bgmMedia")
@field_validator('media', mode='before')
@classmethod
def parse_media_input(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
@field_validator('bgm_media', mode='before')
@classmethod
def parse_bgm_input(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
media = MediaSource.from_str(v)
if not media.file_extension in ['wav', 'mp3']:
raise pydantic.ValidationError("必须使用符合规范的音频格式, 如wav或者mp3")
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
class FFMPEGBgmTaskStatusResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[str] = Field(default=None, description="生成结果的URN")
class FFMPEGZoomLoopRequest(BaseFFMPEGTaskRequest):
media: MediaSource = Field(description="需要处理的媒体源")
duration: float = Field(description="放大缩小一个循环的持续时间秒数", default=6.0)
zoom: float = Field(description="放大缩小系数", default=0.1)
@field_validator('media', mode='before')
@classmethod
def parse_inputs(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
class FFMPEGZoomLoopTaskStatusResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[str] = Field(default=None, description="生成结果的URN")
class FFMPEGOverlayGifRequest(BaseFFMPEGTaskRequest):
media: MediaSource = Field(description="需要处理的媒体源")
gif: MediaSource = Field(description="叠加的特效gif")
@field_validator('media', mode='before')
@classmethod
def parse_media_input(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(f"解析{type(v)}类型不支持")
@field_validator('gif', mode='before')
@classmethod
def parse_fig_input(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
media = MediaSource.from_str(v)
if not media.file_extension == 'gif':
raise pydantic.ValidationError("必须使用.gif文件")
return media
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
class FFMPEGOverlayGifTaskStatusResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[str] = Field(default=None, description="生成结果的URN")
class FFMPEGSubtitleOverlayRequest(BaseFFMPEGTaskRequest):
media: MediaSource = Field(description="需要处理的媒体源")
subtitle: MediaSource = Field(description="需要叠加的字幕文件")
fonts: List[MediaSource] = Field(description="字幕文件内使用到的字体文件")
@field_validator('media', mode='before')
@classmethod
def parse_media_input(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
@field_validator('subtitle', mode='before')
@classmethod
def parse_subtitle_input(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
media = MediaSource.from_str(v)
if not media.file_extension in ['ass']:
raise pydantic.ValidationError("必须使用标准字幕文件, 如.ass")
return media
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
@field_validator('fonts', mode='before')
@classmethod
def parse_font_input(cls, v: Union[str, MediaSource]) -> List[MediaSource]:
if not v:
raise pydantic.ValidationError("fonts输入为空")
result = []
for item in v:
if isinstance(item, str):
result.append(MediaSource.from_str(item))
elif isinstance(item, MediaSource):
result.append(item)
else:
raise pydantic.ValidationError("fonts元素类型错误: 必须是字符串")
return result
class FFMPEGSubtitleTaskStatusResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[str] = Field(default=None, description="生成结果的URN")
class FFMPEGMixBgmWithNoiseReduceRequest(BaseFFMPEGTaskRequest):
media: MediaSource = Field(description="需要处理的媒体源")
bgm: MediaSource = Field(description="需要添加的BGM媒体源")
video_volume: float = Field(description="最终输出的视频音量系数", default=1.4)
music_volume: float = Field(description="最终输出的BGM音量系数", default=0.1)
noise_sample: Optional[MediaSource] = Field(description="常考噪音样本如不指定将使用源视频的前2秒作为样本",
default=None)
@field_validator('media', mode='before')
@classmethod
def parse_media_input(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
@field_validator('bgm', mode='before')
@classmethod
def parse_bgm_input(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
@field_validator('noise_sample', mode='before')
@classmethod
def parse_sample_input(cls, v: Union[None, str, MediaSource]):
if not v:
return None
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
class FFMPEGMixBgmWithNoiseReduceStatusResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[str] = Field(default=None, description="生成结果的URN")
class FFMPEGVideoLoopFillAudioRequest(BaseFFMPEGTaskRequest):
video: MediaSource = Field(description="用来填充的视频素材")
audio: MediaSource = Field(description="被填充的音频素材")
class FFMPEGVideoLoopFillAudioResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[str] = Field(default=None, description="生成结果的URN")
class FFMPEGExtractFrameRequest(BaseFFMPEGTaskRequest):
video: MediaSource = Field(description="提取帧画面的来源")
frame_index: int = Field(description="提取的第几帧, 从1开始默认为1", default=1)
@field_validator('video', mode='before')
@classmethod
def parse_video_input(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
class FFMPEGExtractFrameStatusResponse(BaseFFMPEGTaskStatusResponse):
result: Optional[str] = Field(default=None, description="生成结果的URN")
class ModalTaskCancelResponse(BaseModel):
success: bool = Field(description="成功取消任务")
error: Optional[str] = Field(default=None, description="失败原因")
class ComfyTaskStatusResponse(BaseModel):
taskId: str = Field(description="任务Id")
status: TaskStatus = Field(description="任务运行状态")
error: Optional[str] = Field(description="任务错误原因", default=None)
code: Optional[int] = Field(description="任务错误原因代码", default=None)
result: Optional[str] = Field(description="任务运行结果", default=None)
class ComfyTaskRequest(BaseFFMPEGTaskRequest):
video_path: MediaSource = Field(
default=None, description="视频源")
start_time: str = Field(default="00:00:01.600", description="开始时间")
filename_prefix: str = Field(default=str(uuid.uuid4()), description="生成文件名前缀")
tts_text1: str = Field(default="好好看,这是我们家专门为女生定制的背心,", description="tts文本1")
tts_text2: str = Field(default="", description="tts文本2")
tts_text3: str = Field(default="", description="tts文本3")
tts_text4: str = Field(default="", description="tts文本4")
anchor_id: str = Field(default="dawan", description="讲话人ID")
speed: float = Field(default=1, description="讲话语速")
@field_validator('video_path', mode='before')
@classmethod
def parse_inputs(cls, v: Union[str, MediaSource]):
if isinstance(v, str):
return MediaSource.from_str(v)
elif isinstance(v, MediaSource):
return v
else:
raise TypeError(v)
class FFMPEGStreamRecordRequest(BaseFFMPEGTaskRequest):
stream_source: str = Field(description="直播源地址")
segment_duration: int = Field(default=5, description="hls片段时长(秒)")
recording_timeout: int = Field(default=300, description="hls流无内容后等待的时长(秒)")
monitor_timeout: int = Field(default=36000, description="录制监控最大时长(秒), 默认为10小时")