import uuid from enum import Enum from typing import List, Union, Optional, Dict, Any import pydantic from pydantic import BaseModel, Field, field_validator, ConfigDict, HttpUrl, computed_field from .ffmpeg_worker_model import FFMpegSliceSegment from .media_model import MediaSource, MediaSources, MediaProtocol from ..config import WorkerConfig from ..utils.VideoUtils import VideoMetadata config = WorkerConfig() class TaskStatus(str, Enum): running = "running" failed = "failed" success = "success" expired = "expired" class ErrorCode(int, Enum): SUCCESS = 0 PARAM_ERROR = 10001 NOT_FOUND = 10002 UNAUTHORIZED = 10003 FORBIDDEN = 10004 BUSINESS_ERROR = 10005 SYSTEM_ERROR = 99999 class WebhookMethodEnum(str, Enum): GET = "get" POST = "post" class SentryTransactionHeader(BaseModel): x_trace_id: Optional[str] = Field(description="Sentry Transaction ID", default=None) x_baggage: Optional[str] = Field(description="Sentry Transaction baggage", default=None) class SentryTransactionInfo(BaseModel): x_trace_id: str = Field(description="Sentry Transaction ID") x_baggage: str = Field(description="Sentry Transaction baggage") class FFMPEGSliceTaskStatusRequest(BaseModel): taskId: str = Field(description="任务Id") class ModalTaskResponse(BaseModel): success: bool = Field(description="任务接受成功") taskId: str = Field(description="任务Id") class RecordingTaskResponse(BaseModel): success: bool = Field(description="任务接受成功") taskId: str = Field(description="任务Id") manifest: str = Field(description="播放地址") manifest_urn: str = Field(description="播放列表URN") class WebhookNotify(BaseModel): endpoint: HttpUrl = Field(description="Webhook回调端点", examples=["https://webhook.example.com?query=123"]) method: WebhookMethodEnum = Field( description="Webhook回调请求方法, 使用POST方法时body与查询ffmpeg任务所获得的json body格式一致") headers: Optional[Dict[str, str]] = Field(description="Webhook回调附带的Headers", default=None) model_config = ConfigDict(json_schema_extra={ "description": "Webhook返回值与查询ffmpeg任务所获得的json格式一致" }) class FFMPEGResult(BaseModel): urn: str = Field(description="FFMPEG任务结果urn") content_length: int = Field(description="媒体资源文件字节大小(Byte)") metadata: VideoMetadata = Field(description="媒体元数据") @computed_field(description="可通过CDN访问的资源链接") @property def url(self) -> str: if not self.urn.startswith("s3://"): raise ValueError("无法转换非s3前缀协议") return self.urn.replace('s3://', f"{config.S3_cdn_endpoint}/") class BaseFFMPEGTaskRequest(BaseModel): webhook: Optional[WebhookNotify] = Field(description="Task webhook", default=None) class BaseFFMPEGTaskStatusResponse(BaseModel): taskId: str = Field(description="任务Id") status: TaskStatus = Field(description="任务运行状态") error: Optional[str] = Field(description="任务错误原因", default=None) code: Optional[int] = Field(description="任务错误原因代码", default=None) task_type: str = Field(description="任务类型", default="unknown") results: Optional[List[Union[FFMPEGResult, Any]]] = Field(description="任务运行结果", default=None) model_config = ConfigDict(extra='ignore') class FFMPEGConvertStreamRequest(BaseFFMPEGTaskRequest): media: MediaSource = Field(description="待转换的媒体源") @field_validator('media', mode='before') @classmethod def parse_inputs(cls, v: Union[str, MediaSource]): if isinstance(v, str): media_source = MediaSource.from_str(v) if media_source.protocol == MediaProtocol.hls: return media_source else: raise pydantic.ValidationError('media只支持hls格式的urn') elif isinstance(v, MediaSource): return v else: raise pydantic.ValidationError("media格式读取失败") class FFMPEGConvertStreamResponse(BaseFFMPEGTaskStatusResponse): result: Optional[str] = Field(default=None, description="任务运行结果") class FFMPEGSliceRequest(BaseFFMPEGTaskRequest): media: MediaSource = Field(description="待切割的媒体源") markers: List[FFMpegSliceSegment] = Field(description="切割标记数组") @field_validator('media', mode='before') @classmethod def parse_inputs(cls, v: Union[str, MediaSource]): if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(v) class FFMPEGSliceTaskStatusResponse(BaseFFMPEGTaskStatusResponse): result: Optional[List[str]] = Field(default=None, description="任务运行结果") class FFMPEGConcatRequest(BaseFFMPEGTaskRequest): medias: MediaSources = Field(description="待合并的媒体源") class FFMPEGConcatTaskStatusResponse(BaseFFMPEGTaskStatusResponse): result: Optional[str] = Field(default=None, description="任务运行结果") class FFMPEGExtractAudioRequest(BaseFFMPEGTaskRequest): media: MediaSource = Field(description="待提取音频的媒体源") @field_validator('media', mode='before') @classmethod def parse_inputs(cls, v: Union[str, MediaSource]): if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(v) class FFMPEGExtractAudioTaskStatusResponse(BaseFFMPEGTaskStatusResponse): result: Optional[str] = Field(default=None, description="任务运行结果") class FFMPEGCornerMirrorRequest(BaseFFMPEGTaskRequest): media: MediaSource = Field(description="需要处理的媒体源") @field_validator('media', mode='before') @classmethod def parse_inputs(cls, v: Union[str, MediaSource]): if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(v) class FFMPEGCornerMirrorTaskStatusResponse(BaseFFMPEGTaskStatusResponse): result: Optional[str] = Field(default=None, description="任务运行结果") class FFMPEGBgmRequest(BaseFFMPEGTaskRequest): media: MediaSource = Field(description="需要处理的媒体源") bgm_media: MediaSource = Field(description="添加的BGM媒体源", alias="bgmMedia") @field_validator('media', mode='before') @classmethod def parse_media_input(cls, v: Union[str, MediaSource]): if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(v) @field_validator('bgm_media', mode='before') @classmethod def parse_bgm_input(cls, v: Union[str, MediaSource]): if isinstance(v, str): media = MediaSource.from_str(v) if not media.file_extension in ['wav', 'mp3']: raise pydantic.ValidationError("必须使用符合规范的音频格式, 如wav或者mp3") elif isinstance(v, MediaSource): return v else: raise TypeError(v) class FFMPEGBgmTaskStatusResponse(BaseFFMPEGTaskStatusResponse): result: Optional[str] = Field(default=None, description="生成结果的URN") class FFMPEGZoomLoopRequest(BaseFFMPEGTaskRequest): media: MediaSource = Field(description="需要处理的媒体源") duration: float = Field(description="放大缩小一个循环的持续时间秒数", default=6.0) zoom: float = Field(description="放大缩小系数", default=0.1) @field_validator('media', mode='before') @classmethod def parse_inputs(cls, v: Union[str, MediaSource]): if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(v) class FFMPEGZoomLoopTaskStatusResponse(BaseFFMPEGTaskStatusResponse): result: Optional[str] = Field(default=None, description="生成结果的URN") class FFMPEGOverlayGifRequest(BaseFFMPEGTaskRequest): media: MediaSource = Field(description="需要处理的媒体源") gif: MediaSource = Field(description="叠加的特效gif") @field_validator('media', mode='before') @classmethod def parse_media_input(cls, v: Union[str, MediaSource]): if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(f"解析{type(v)}类型不支持") @field_validator('gif', mode='before') @classmethod def parse_fig_input(cls, v: Union[str, MediaSource]): if isinstance(v, str): media = MediaSource.from_str(v) if not media.file_extension == 'gif': raise pydantic.ValidationError("必须使用.gif文件") return media elif isinstance(v, MediaSource): return v else: raise TypeError(v) class FFMPEGOverlayGifTaskStatusResponse(BaseFFMPEGTaskStatusResponse): result: Optional[str] = Field(default=None, description="生成结果的URN") class FFMPEGSubtitleOverlayRequest(BaseFFMPEGTaskRequest): media: MediaSource = Field(description="需要处理的媒体源") subtitle: MediaSource = Field(description="需要叠加的字幕文件") fonts: List[MediaSource] = Field(description="字幕文件内使用到的字体文件") @field_validator('media', mode='before') @classmethod def parse_media_input(cls, v: Union[str, MediaSource]): if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(v) @field_validator('subtitle', mode='before') @classmethod def parse_subtitle_input(cls, v: Union[str, MediaSource]): if isinstance(v, str): media = MediaSource.from_str(v) if not media.file_extension in ['ass']: raise pydantic.ValidationError("必须使用标准字幕文件, 如.ass") return media elif isinstance(v, MediaSource): return v else: raise TypeError(v) @field_validator('fonts', mode='before') @classmethod def parse_font_input(cls, v: Union[str, MediaSource]) -> List[MediaSource]: if not v: raise pydantic.ValidationError("fonts输入为空") result = [] for item in v: if isinstance(item, str): result.append(MediaSource.from_str(item)) elif isinstance(item, MediaSource): result.append(item) else: raise pydantic.ValidationError("fonts元素类型错误: 必须是字符串") return result class FFMPEGSubtitleTaskStatusResponse(BaseFFMPEGTaskStatusResponse): result: Optional[str] = Field(default=None, description="生成结果的URN") class FFMPEGMixBgmWithNoiseReduceRequest(BaseFFMPEGTaskRequest): media: MediaSource = Field(description="需要处理的媒体源") bgm: MediaSource = Field(description="需要添加的BGM媒体源") video_volume: float = Field(description="最终输出的视频音量系数", default=1.4) music_volume: float = Field(description="最终输出的BGM音量系数", default=0.1) noise_sample: Optional[MediaSource] = Field(description="常考噪音样本,如不指定将使用源视频的前2秒作为样本", default=None) @field_validator('media', mode='before') @classmethod def parse_media_input(cls, v: Union[str, MediaSource]): if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(v) @field_validator('bgm', mode='before') @classmethod def parse_bgm_input(cls, v: Union[str, MediaSource]): if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(v) @field_validator('noise_sample', mode='before') @classmethod def parse_sample_input(cls, v: Union[None, str, MediaSource]): if not v: return None if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(v) class FFMPEGMixBgmWithNoiseReduceStatusResponse(BaseFFMPEGTaskStatusResponse): result: Optional[str] = Field(default=None, description="生成结果的URN") class FFMPEGVideoLoopFillAudioRequest(BaseFFMPEGTaskRequest): video: MediaSource = Field(description="用来填充的视频素材") audio: MediaSource = Field(description="被填充的音频素材") class FFMPEGVideoLoopFillAudioResponse(BaseFFMPEGTaskStatusResponse): result: Optional[str] = Field(default=None, description="生成结果的URN") class FFMPEGExtractFrameRequest(BaseFFMPEGTaskRequest): video: MediaSource = Field(description="提取帧画面的来源") frame_index: int = Field(description="提取的第几帧, 从1开始,默认为1", default=1) @field_validator('video', mode='before') @classmethod def parse_video_input(cls, v: Union[str, MediaSource]): if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(v) class FFMPEGExtractFrameStatusResponse(BaseFFMPEGTaskStatusResponse): result: Optional[str] = Field(default=None, description="生成结果的URN") class ModalTaskCancelResponse(BaseModel): success: bool = Field(description="成功取消任务") error: Optional[str] = Field(default=None, description="失败原因") class ComfyTaskStatusResponse(BaseModel): taskId: str = Field(description="任务Id") status: TaskStatus = Field(description="任务运行状态") error: Optional[str] = Field(description="任务错误原因", default=None) code: Optional[int] = Field(description="任务错误原因代码", default=None) result: Optional[str] = Field(description="任务运行结果", default=None) class ComfyTaskRequest(BaseFFMPEGTaskRequest): video_path: MediaSource = Field( default=None, description="视频源") start_time: str = Field(default="00:00:01.600", description="开始时间") filename_prefix: str = Field(default=str(uuid.uuid4()), description="生成文件名前缀") tts_text1: str = Field(default="好好看,这是我们家专门为女生定制的背心,", description="tts文本1") tts_text2: str = Field(default="", description="tts文本2") tts_text3: str = Field(default="", description="tts文本3") tts_text4: str = Field(default="", description="tts文本4") anchor_id: str = Field(default="dawan", description="讲话人ID") speed: float = Field(default=1, description="讲话语速") @field_validator('video_path', mode='before') @classmethod def parse_inputs(cls, v: Union[str, MediaSource]): if isinstance(v, str): return MediaSource.from_str(v) elif isinstance(v, MediaSource): return v else: raise TypeError(v) class FFMPEGStreamRecordRequest(BaseFFMPEGTaskRequest): stream_source: str = Field(description="直播源地址") segment_duration: int = Field(default=5, description="hls片段时长(秒)") recording_timeout: int = Field(default=300, description="hls流无内容后等待的时长(秒)") monitor_timeout: int = Field(default=36000, description="录制监控最大时长(秒), 默认为10小时")