mxivideo/python_core/utils/VideoUtils.py

954 lines
44 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import asyncio
import shutil
import tempfile
from datetime import datetime, timedelta
import aiofiles
import aiohttp
from typing import List, Tuple, Optional, Any
import m3u8
import numpy as np
import json, os
import math
from aiohttp import ClientTimeout
from .TimeUtils import TimeDelta
from ffmpeg import FFmpeg
from ffmpeg.asyncio import FFmpeg as AsyncFFmpeg
import soundfile as sf
import pyloudnorm as pyln
import noisereduce as nr
from pedalboard import (
Pedalboard,
Compressor,
Limiter,
HighpassFilter,
LowpassFilter,
Gain,
Reverb,
Chorus,
Distortion,
)
from pedalboard.io import AudioFile
from loguru import logger
from .PathUtils import FileUtils
from ..models.ffmpeg_tasks.models import FFMpegSliceSegment, FFMPEGSliceOptions, VideoStream, VideoMetadata
class VideoUtils:
"""
python-ffmpeg package docs : https://python-ffmpeg.readthedocs.io/en/stable/
"""
@staticmethod
def ffprobe_video_format(media_path: str) -> VideoStream:
ffprobe = FFmpeg(executable="ffprobe").input(
media_path, print_format="json",
show_streams=None, show_format=None
)
video_metadata = VideoMetadata.model_validate_json(ffprobe.execute())
return video_metadata.streams[0]
@staticmethod
def ffprobe_media_metadata(media_path: str) -> VideoMetadata:
ffprobe = FFmpeg(executable="ffprobe").input(
media_path, print_format="json",
show_streams=None, show_format=None
)
metadata_bytes = ffprobe.execute()
metadata_json = json.loads(metadata_bytes)
logger.info(f"metadata = {json.dumps(metadata_json, ensure_ascii=False)}")
video_metadata = VideoMetadata.model_validate_json(metadata_bytes)
return video_metadata
@staticmethod
def ffprobe_video_duration(media_path: str) -> TimeDelta:
ffprobe_cmd = VideoUtils.ffmpeg_init(use_ffprobe=True)
ffprobe_cmd.input(
media_path, print_format="json",
show_streams=None, show_format=None
)
metadata_json = ffprobe_cmd.execute()
metadata = VideoMetadata.model_validate_json(metadata_json)
return TimeDelta(seconds=metadata.streams[0].duration)
@staticmethod
def ffprobe_audio_duration(media_path: str) -> TimeDelta:
ffprobe_cmd = VideoUtils.ffmpeg_init(use_ffprobe=True)
ffprobe_cmd.input(
media_path, print_format="json",
show_streams=None, show_format=None
)
metadata_json = ffprobe_cmd.execute()
metadata = VideoMetadata.model_validate_json(metadata_json)
return TimeDelta(seconds=metadata.streams[-1].duration)
@staticmethod
async def ffprobe_video_format_async(media_path: str) -> VideoStream:
ffprobe = AsyncFFmpeg(executable="ffprobe").input(
media_path, print_format="json", show_streams=None, show_format=None
)
video_metadata = VideoMetadata.model_validate_json(await ffprobe.execute())
return video_metadata.streams[0]
@staticmethod
def ffprobe_video_size(media_path: str) -> Tuple[int, int]:
"""
:param media_path: local path to video
:return: video_width, video_height
"""
ffprobe = FFmpeg(executable="ffprobe").input(
media_path, print_format="json", show_streams=None, show_format=None
)
video_metadata = VideoMetadata.model_validate_json(ffprobe.execute())
return video_metadata.streams[0].width, video_metadata.streams[0].height
@staticmethod
async def ffprobe_video_size_async(media_path: str) -> Tuple[int, int]:
ffprobe = AsyncFFmpeg(executable="ffprobe").input(
media_path, print_format="json", show_streams=None, show_format=None
)
video_metadata = VideoMetadata.model_validate_json(await ffprobe.execute())
return video_metadata.streams[0].width, video_metadata.streams[0].height
@staticmethod
def noise_reduce(audio_path: str, noise_sample_path: Optional[str] = None,
output_path: Optional[str] = None) -> str:
samplerate = 44100
with AudioFile(audio_path).resampled_to(float(samplerate)) as f:
audio = f.read(f.frames)
if noise_sample_path:
with AudioFile(noise_sample_path).resampled_to(float(samplerate)) as f:
noise_sample = f.read(f.frames)
else:
# 获取前2秒作为噪声样本
noise_sample_length = min(int(2 * samplerate), audio.shape[0])
noise_sample = audio[:noise_sample_length]
if not output_path:
output_path = FileUtils.file_path_extend(audio_path, "nr")
reduced_noise = nr.reduce_noise(y=audio, y_noise=noise_sample, sr=samplerate,
stationary=True, prop_decrease=0.75, n_std_thresh_stationary=1.5,
n_fft=2048, win_length=1024, hop_length=512, n_jobs=1)
board = Pedalboard(
[
HighpassFilter(cutoff_frequency_hz=150),
LowpassFilter(cutoff_frequency_hz=8000),
Reverb(room_size=0.08, damping=0.7, wet_level=0.08,
dry_level=0.92, width=0.4),
Chorus(rate_hz=0.7, depth=0.12, centre_delay_ms=3.0, mix=0.10),
Distortion(drive_db=3.0),
Compressor(threshold_db=-30, ratio=1.8, attack_ms=20, release_ms=200),
Compressor(threshold_db=-24, ratio=2.2, attack_ms=15, release_ms=180),
Compressor(threshold_db=-18, ratio=1.5, attack_ms=10, release_ms=150),
Gain(gain_db=4),
Limiter(threshold_db=-6, release_ms=200),
]
)
# Convert to float32 if not already
reduced_noise = reduced_noise.astype(np.float32)
# Ensure audio is in the correct range (-1.0 to 1.0)
if np.abs(reduced_noise).max() > 1.0:
reduced_noise = reduced_noise / np.abs(reduced_noise).max()
processed_audio = board(reduced_noise, samplerate)
# 格式处理
if len(processed_audio.shape) == 1:
processed_audio = processed_audio.reshape(-1, 1)
elif len(processed_audio.shape) == 2:
if processed_audio.shape[0] < processed_audio.shape[1]:
processed_audio = processed_audio.T
if processed_audio.shape[1] > 2:
processed_audio = processed_audio[:, :2]
# 响度标准化
meter = pyln.Meter(samplerate)
min_samples = int(0.4 * samplerate)
if processed_audio.shape[0] < min_samples:
normalized_audio = processed_audio
else:
loudness = meter.integrated_loudness(processed_audio)
safety_factor = 0.7
processed_audio = processed_audio * safety_factor
normalized_audio = pyln.normalize.loudness(
processed_audio, loudness, -16.0
)
max_peak = np.max(np.abs(normalized_audio))
if max_peak > 0.85:
additional_safety_factor = 0.85 / max_peak
normalized_audio = normalized_audio * additional_safety_factor
sf.write(
output_path,
normalized_audio,
samplerate,
format="WAV",
subtype="PCM_16",
)
return output_path
@staticmethod
def async_ffmpeg_init(use_ffprobe: bool = False, quiet: bool = False) -> AsyncFFmpeg:
if use_ffprobe:
ffmpeg_cmd = AsyncFFmpeg('ffprobe')
else:
ffmpeg_cmd = AsyncFFmpeg().option('y').option('hide_banner')
@ffmpeg_cmd.on("start")
def on_start(arguments: list[str]):
try:
filter_index = arguments.index("-filter_complex")
filter_content = arguments[filter_index + 1]
arguments[filter_index + 1] = f'"{filter_content}"'
args = " ".join(arguments)
logger.info(f"FFmpeg command:{args}")
arguments[filter_index + 1] = filter_content
except ValueError:
args = " ".join(arguments)
logger.info(f"FFmpeg command:{args}")
@ffmpeg_cmd.on("progress")
def on_progress(progress):
if not quiet:
logger.info(f"处理进度: {progress}")
@ffmpeg_cmd.on("completed")
def on_completed():
logger.info(f"FFMpeg task completed.")
@ffmpeg_cmd.on("stderr")
def on_stderr(line: str):
if line.startswith('Error') and ".m3u8" not in line:
logger.error(line)
raise RuntimeError(line)
elif "Output file is empty" in line:
raise RuntimeError("输出是空文件")
else:
if not quiet:
if "Skip" not in line:
logger.warning(line)
return ffmpeg_cmd
@staticmethod
def ffmpeg_init(use_ffprobe: bool = False) -> FFmpeg:
if use_ffprobe:
ffmpeg_cmd = FFmpeg('ffprobe')
else:
ffmpeg_cmd = FFmpeg().option('y').option('hide_banner')
@ffmpeg_cmd.on("start")
def on_start(arguments: list[str]):
try:
filter_index = arguments.index("-filter_complex")
filter_content = arguments[filter_index + 1]
arguments[filter_index + 1] = f'"{filter_content}"'
args = " ".join(arguments)
logger.info(f"FFmpeg command:{args}")
arguments[filter_index + 1] = filter_content
except ValueError:
args = " ".join(arguments)
logger.info(f"FFmpeg command:{args}")
@ffmpeg_cmd.on("progress")
def on_progress(progress):
logger.info(f"处理进度: {progress}")
@ffmpeg_cmd.on("completed")
def on_completed():
logger.info(f"FFMpeg task completed.")
@ffmpeg_cmd.on("stderr")
def on_stderr(line: str):
if line.startswith('Error'):
logger.error(line)
raise RuntimeError(line)
else:
logger.warning(line)
return ffmpeg_cmd
@staticmethod
async def ffmpeg_slice_media(media_path: str, media_markers: List[FFMpegSliceSegment],
options: FFMPEGSliceOptions, is_streams: bool = False,
output_path: Optional[str] = None) -> List[Tuple[str, VideoMetadata]]:
"""
使用本地视频文件按时间段切割出分段视频, 如果是直播流则按时间分段切割HLS视频流_预先多线程下载所有ts
:param media_path: 本地视频路径
:param media_markers: 分段起始结束时间标记
:param options: 输出切割质量选项
:param is_streams: 输入是否为直播流
:param output_path: 最终输出文件路径, 片段会根据指定路径附加_1.mp4 _2.mp4等片段编号
:return: 输出片段的本地路径
"""
ffmpeg_cmd = VideoUtils.async_ffmpeg_init()
if not is_streams:
ffmpeg_cmd.input(media_path)
seek_head = 0
else:
seek_head = media_markers[0].start.total_seconds()
seek_tail = media_markers[-1].end.total_seconds()
duration = seek_tail - seek_head
logger.info(f"Only using {seek_head}s --> {seek_tail}s = {duration}s")
local_m3u8_path, temp_dir, diff = await VideoUtils.convert_m3u8_to_local_source(media_path, head=seek_head,
tail=seek_tail)
logger.info(f"local_playlist: {local_m3u8_path}")
for segment in media_markers:
segment.start = segment.start - timedelta(seconds=seek_head) + diff
segment.end = segment.end - timedelta(seconds=seek_head) + diff
logger.info(f"Only using {seek_head}s --> {seek_tail}s = {duration}s")
ffmpeg_cmd.input(local_m3u8_path,
t=duration,
protocol_whitelist="file,http,https,tcp,tls")
filter_complex: List[str] = []
temp_outputs: List[str] = []
if not output_path:
output_path = FileUtils.file_path_extend(media_path, "slice")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
metadata = VideoUtils.ffprobe_media_metadata(media_path)
for index, marker in enumerate(media_markers):
start = marker.start.total_seconds()
end = marker.end.total_seconds()
# 处理指定的输出分辨率
if options.width and options.height:
filter_complex.extend(
[
f"[v:0]trim=start={start}:end={end},scale={options.width}:{options.height},setpts=PTS-STARTPTS[cut{index}]",
f"[a:0]atrim=start={start}:end={end},asetpts=PTS-STARTPTS[acut{index}]",
]
)
else:
filter_complex.extend(
[
f"[v:0]trim=start={start}:end={end},setpts=PTS-STARTPTS[cut{index}]",
f"[a:0]atrim=start={start}:end={end},asetpts=PTS-STARTPTS[acut{index}]",
]
)
ffmpeg_cmd.option('filter_complex', ';'.join(filter_complex))
diff_tolerance = 0.001
for i, marker in enumerate(media_markers):
if marker.start.total_seconds() > metadata.format.duration or marker.start.total_seconds() < 0:
raise ValueError(
f"{i}个切割点起始点{marker.start.total_seconds()}s超出视频时长[0-{metadata.format.duration}s]范围")
if marker.end.total_seconds() > metadata.format.duration or marker.end.total_seconds() < 0:
if marker.end.total_seconds() > 0 and math.isclose(marker.end.total_seconds(), metadata.format.duration,
rel_tol=diff_tolerance):
marker.end = TimeDelta(seconds=metadata.format.duration)
logger.warning(
f"{i}个切割点结束点{marker.end.total_seconds()}s接近视频时长[0-{metadata.format.duration}s]范围")
else:
raise ValueError(
f"{i}个切割点结束点{marker.end.total_seconds()}s超出视频时长[0-{metadata.format.duration}s]范围")
segment_output_path = FileUtils.file_path_extend(output_path, str(i))
ffmpeg_options = {
"map": [f"[cut{i}]", f"[acut{i}]"],
"reset_timestamps": "1",
"sc_threshold": "0",
"g": "1",
"force_key_frames": "expr:gte(t,n_forced*1)",
"vcodec": "libx264",
"acodec": "aac",
"crf": options.crf,
"r": options.fps
}
if options.limit_size:
ffmpeg_options["fs"] = options.limit_size
elif options.bit_rate:
ffmpeg_options["b:v"] = options.pretty_bit_rate
ffmpeg_cmd.output(segment_output_path, options=ffmpeg_options)
temp_outputs.append(segment_output_path)
await ffmpeg_cmd.execute()
outputs: List[Tuple[str, VideoMetadata]] = [(output, VideoUtils.ffprobe_media_metadata(output)) for output in
temp_outputs]
return outputs
@staticmethod
async def async_download_file(url: str, output_path: Optional[str] = None) -> str | None | Any:
t = 10
while t > 0:
try:
logger.info(f"Downloading {url} to {output_path}")
async with aiohttp.ClientSession(timeout=ClientTimeout(total=60)) as session:
async with session.get(url) as response:
if response.status != 200:
raise Exception(f"Failed to download {url}, status code: {response.status}")
if output_path:
async with aiofiles.open(output_path, 'wb') as f:
await f.write(await response.read())
return output_path
else:
return await response.text()
except:
t -= 1
logger.warning(f"Retrying downloading {url} to {output_path} Remain Times: {t}")
@staticmethod
async def convert_m3u8_to_local_source(media_stream_url: str,
head: float = 0,
tail: float = 86400, # 使用24H时长替代♾
temp_dir: str = None) -> tuple[str, str, TimeDelta]:
"""
转换m3u8为本地来源
"""
# 创建临时目录存储TS片段
if temp_dir:
os.makedirs(temp_dir, exist_ok=True)
else:
temp_dir = tempfile.mkdtemp()
from m3u8 import SegmentList, Segment
try:
# 1. 下载m3u8文件
playlist = m3u8.load(media_stream_url)
# duration = (tail - head) if head else None
origin_time: datetime = playlist.segments[0].current_program_date_time
logger.info(f"Start Timestamp: {origin_time}")
# 2. 解析TS片段URL
ts_urls: SegmentList[Segment] = SegmentList()
duration = 0
min_head = origin_time + timedelta(seconds=head)
max_head = origin_time + timedelta(seconds=tail)
logger.info(f"min: {min_head}, max: {max_head}")
for segment in playlist.segments:
if min_head - timedelta(seconds=segment.duration) <= segment.current_program_date_time <= max_head:
logger.info(f"duration: {segment.duration}, head: {segment.current_program_date_time}")
duration += segment.duration
ts_urls.append(segment)
if len(ts_urls) > 0:
delta = min_head - ts_urls[0].current_program_date_time
diff = TimeDelta.from_timedelta(delta)
else:
diff = TimeDelta(seconds=0)
logger.info(f"diff = {diff.total_seconds()}")
# 3. 并行下载TS片段
tasks = []
playlist.segments = ts_urls
duration_delta = TimeDelta(seconds=duration)
logger.info(f"count : {len(playlist.segments)}, duration = {duration_delta.toFormatStr()}")
playlist.is_endlist = True
for url in ts_urls:
tasks.append(VideoUtils.async_download_file(url.absolute_uri, f"{temp_dir}/{url.uri}"))
await asyncio.gather(*tasks)
# 4. 修改m3u8文件指向本地TS片段
local_m3u8_path = os.path.join(temp_dir, "local.m3u8")
playlist.dump(local_m3u8_path)
return local_m3u8_path, temp_dir, diff
except Exception as e:
logger.exception(e)
raise Exception(f"下载TS转换M3U8失败 {e}")
@staticmethod
def purge_temp_ts_dir(temp_dir: str) -> None:
# 6. 删除临时文件和目录
try:
shutil.rmtree(temp_dir)
except Exception as e:
logger.exception(e)
@staticmethod
async def ffmpeg_convert_stream_media(media_stream_url: str, options: FFMPEGSliceOptions,
output_path: Optional[str] = None) -> tuple[
str, VideoMetadata] | None:
if not output_path:
output_path = FileUtils.file_path_extend(media_stream_url, "convert")
if not output_path.endswith(".mp4"):
output_path = output_path + ".mp4"
os.makedirs(os.path.dirname(output_path), exist_ok=True)
try:
local_m3u8_path, temp_dir, diff = await VideoUtils.convert_m3u8_to_local_source(
media_stream_url=media_stream_url)
# 使用ffmpeg合并TS片段
ffmpeg_cmd = VideoUtils.async_ffmpeg_init()
ffmpeg_cmd.input(local_m3u8_path,
protocol_whitelist="file,http,https,tcp,tls")
ffmpeg_options = {
"reset_timestamps": "1",
"sc_threshold": "0",
"g": "1",
"force_key_frames": "expr:gte(t,n_forced*1)",
"vcodec": "libx264",
"b:v": options.pretty_bit_rate,
"acodec": "aac",
"crf": options.crf,
"r": options.fps,
}
ffmpeg_cmd.output(output_path, options=ffmpeg_options)
await ffmpeg_cmd.execute()
VideoUtils.purge_temp_ts_dir(temp_dir)
except Exception as e:
logger.exception(f"合并TS失败 {e}")
output: Tuple[str, VideoMetadata] = (output_path, VideoUtils.ffprobe_media_metadata(output_path))
return output
@staticmethod
async def ffmpeg_concat_medias(media_paths: List[str],
target_width: int = 1080,
target_height: int = 1920,
output_path: Optional[str] = None) -> Tuple[str, VideoMetadata]:
"""
将待处理的视频合并为一个视频
:param media_paths: 待合并的多个视频文件路径
:param target_width: 输出的视频分辨率宽
:param target_height: 输出的视频分辨率高
:param output_path: 指定输出视频路径
:return: 最终合并结果路径,最终合并结果时长
"""
total_videos = len(media_paths)
if total_videos == 0:
raise ValueError("没有可以合并的视频源")
if not output_path:
output_path = FileUtils.file_path_extend(media_paths[0], "concat")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ffmpeg_cmd = VideoUtils.async_ffmpeg_init()
filter_complex = []
for input_path in media_paths:
ffmpeg_cmd.input(input_path)
# 2. 统一所有视频的格式、分辨率和帧率
for i in range(total_videos):
filter_complex.extend(
[
# 先缩放到统一分辨率,然后设置帧率和格式
f"[{i}:v]scale={target_width}:{target_height}:force_original_aspect_ratio=decrease,"
f"pad={target_width}:{target_height}:(ow-iw)/2:(oh-ih)/2,"
f"setsar=1:1," # 新增强制设置SAR
f"fps=30,format=yuv420p[v{i}]",
# 修改音频过滤器确保输出为AAC兼容格式
# f"[{i}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo[a{i}]",
f"[{i}:a]aformat=sample_fmts=s16:sample_rates=44100:channel_layouts=stereo[a{i}]",
]
)
# 3. 准备处理后的视频流和音频流的连接字符串
video_streams = "".join(f"[v{i}]" for i in range(total_videos))
audio_streams = "".join(f"[a{i}]" for i in range(total_videos))
# 4. 使用concat过滤器合并视频和音频
filter_complex.extend(
[
f"{video_streams}concat=n={total_videos}:v=1:a=0[vconcated]",
f"{audio_streams}concat=n={total_videos}:v=0:a=1[aconcated]",
]
)
ffmpeg_cmd.output(
output_path,
{
"filter_complex": ";".join(filter_complex),
"map": ["[vconcated]", "[aconcated]"],
"vcodec": "libx264",
"crf": 16,
"r": 30,
"acodec": "aac",
"ar": 44100,
"ac": 2,
"ab": "192k",
},
)
await ffmpeg_cmd.execute()
video_metadata = VideoUtils.ffprobe_media_metadata(output_path)
return output_path, video_metadata
@staticmethod
async def ffmpeg_extract_audio_async(media_path: str, output_path: Optional[str] = None) -> Tuple[
str, VideoMetadata]:
"""
提取源视频的音频
:param media_path: 待处理的源视频
:param output_path: 指定输出的音频文件路径(可选)
:return: 最终输出音频文件路径,音频文件时长
"""
if not output_path:
output_path = FileUtils.file_path_change_extension(output_path, 'wav')
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ffprobe_cmd = VideoUtils.async_ffmpeg_init(use_ffprobe=True)
ffprobe_cmd.input(media_path,
v="quiet",
print_format="json",
select_streams="a",
show_entries="stream=codec_type")
audio_check_bytes = await ffprobe_cmd.execute()
audio_check = json.loads(audio_check_bytes)
logger.info(audio_check)
if len(audio_check['streams']) == 0:
raise RuntimeError(f"Media has no audio streams.")
# output_path = f"{output_path_prefix}/extract_audio/outputs/{fn_id}/output.wav"
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ffmpeg_cmd = VideoUtils.async_ffmpeg_init()
ffmpeg_cmd.input(media_path).output(output_path,
map="0:a",
acodec="pcm_s16le",
ar=44100,
ac=1)
await ffmpeg_cmd.execute()
video_metadata = VideoUtils.ffprobe_media_metadata(output_path)
return output_path, video_metadata
@staticmethod
async def ffmpeg_mix_bgm(origin_audio_path: str, bgm_audio_path: str, video_volume: float = 1.4,
music_volume: float = 0.1, output_path: Optional[str] = None) -> Tuple[str, VideoMetadata]:
"""
给待处理视频混合BGM
:param origin_audio_path: 待处理的源视频
:param bgm_audio_path: 需要混合的BGM
:param video_volume: 最终输出视频的音量系数
:param music_volume: BGM在源视频音量内占比的音量系数
:param output_path: 指定最终输出的视频路径(可选)
:return: 最终输出视频文件路径,最终输出视频时长
"""
if not output_path:
output_path = FileUtils.file_path_extend(origin_audio_path, "bgm")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
origin_audio_duration = VideoUtils.ffprobe_audio_duration(origin_audio_path)
bgm_duration = VideoUtils.ffprobe_audio_duration(bgm_audio_path)
loops_needed = math.ceil(origin_audio_duration.total_seconds() / bgm_duration.total_seconds())
ffmpeg_cmd = VideoUtils.async_ffmpeg_init(use_ffprobe=True)
ffmpeg_cmd.input(origin_audio_path)
ffmpeg_cmd.input(bgm_audio_path)
filter_complex = [
f"[0:a]volume={video_volume}[a1]",
f"[1:a]aloop=loop={loops_needed}:size={bgm_duration.total_seconds()},volume={music_volume}[a2]"
"[a1][a2]amix=inputs=2:duration=first[audio]"
]
ffmpeg_cmd.output(output_path,
options={"filter_complex": ";".join(filter_complex), },
map="[audio]",
acodec='libmp3lame', # 音频编码器
ar=48000, # 音频采样率
ab='192k', # 音频码率
ac=2, # 音频通道数
)
await ffmpeg_cmd.execute()
video_metadata = VideoUtils.ffprobe_media_metadata(output_path)
return output_path, video_metadata
@staticmethod
async def ffmpeg_mix_bgm_with_noise_reduce(media_path: str,
bgm_audio_path: str,
video_volume: float = 1.4,
music_volume: float = 0.1,
noise_sample_path: Optional[str] = None,
temp_audio_path: Optional[str] = None,
output_path: Optional[str] = None) -> Tuple[str, VideoMetadata]:
"""
先对待处理的视频音轨降噪再将降噪后的结果添加BGM最终输出降噪过且混合BGM的视频
由于最终视频画面和音轨是同步混合+合成视频,所以处理速度会比分步降噪, 加BGM快
:param media_path: 待处理的原始视频路径
:param bgm_audio_path: 待处理的BGM音频路径
:param video_volume: 最终输出的视频音量系数
:param music_volume: 最终输出的BGM音量系数
:param noise_sample_path: 降噪使用的噪音样本如不指定将使用源视频的前2秒作为样本可选
:param temp_audio_path: 指定暂存音频的路径(可选)
:param output_path: 指定输出视频的路径(可选)
:return: 最终输出视频的路径, 最终输出视频时长
"""
if not output_path:
output_path = FileUtils.file_path_extend(media_path, "bgm_nr")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
if not temp_audio_path:
temp_audio_path = FileUtils.file_path_extend(media_path, "temp")
temp_audio_path = FileUtils.file_path_change_extension(temp_audio_path, "wav")
media_audio, metadata = await VideoUtils.ffmpeg_extract_audio_async(media_path=media_path,
output_path=temp_audio_path)
logger.info(f"media_audio = {media_audio}, metadata = {metadata}")
nr_audio_path = VideoUtils.noise_reduce(audio_path=media_audio, noise_sample_path=noise_sample_path)
logger.info(f"nr_audio_path = {nr_audio_path}")
video_metadata = VideoUtils.ffprobe_video_format(media_path)
origin_audio_duration = VideoUtils.ffprobe_audio_duration(nr_audio_path)
bgm_duration = VideoUtils.ffprobe_audio_duration(bgm_audio_path)
loops_needed = math.ceil(origin_audio_duration.total_seconds() / bgm_duration.total_seconds())
logger.info(
f"{bgm_duration.total_seconds()}s的BGM循环{loops_needed}次, 填充{origin_audio_duration.total_seconds()}s的视频长度")
ffmpeg_cmd = VideoUtils.async_ffmpeg_init()
ffmpeg_cmd.input(media_path)
ffmpeg_cmd.input(nr_audio_path)
ffmpeg_cmd.input(bgm_audio_path)
filter_complex = [
f"[1:a]volume={video_volume}[a1]",
f"[2:a]aloop=loop={loops_needed}:size={bgm_duration.total_seconds()},volume={music_volume}[a2]",
"[a1][a2]amix=inputs=2:duration=first[audio]"
]
ffmpeg_cmd.output(output_path,
options={"filter_complex": ";".join(filter_complex), },
map=["0:v", "[audio]"],
crf=16,
vcodec='libx264',
b=video_metadata.video_bitrate, # 视频码率
r=video_metadata.video_frame_rate, # 帧率
acodec='libmp3lame',
ar=48000, ab='192k', ac=2,
)
await ffmpeg_cmd.execute()
video_metadata = VideoUtils.ffprobe_media_metadata(output_path)
return output_path, video_metadata
@staticmethod
async def ffmpeg_overlay_gif(media_path: str, overlay_gif_path: str, output_path: Optional[str] = None) -> Tuple[
str, VideoMetadata]:
"""
将GIF特效叠加到视频上如果视频较长则循环播放GIF
:param media_path: 输入视频路径
:param overlay_gif_path: GIF特效文件路径
:param output_path: 指定输出路径
:return: 输出视频路径, 最终输出视频时长
"""
if not output_path:
output_path = FileUtils.file_path_extend(media_path, "overlay")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
video_metadata = VideoUtils.ffprobe_video_format(media_path)
filter_complex = [
# 确保GIF正确解码并循环
"[1:v]fps=30,format=rgba[gif]", # 强制设置30fps
# 叠加GIF到视频上保持透明通道
"[0:v][gif]overlay=shortest=1:format=auto,setpts=PTS-STARTPTS[v]",
]
ffmpeg_cmd = VideoUtils.async_ffmpeg_init()
ffmpeg_cmd.input(media_path)
ffmpeg_cmd.input(overlay_gif_path, stream_loop=-1) # 使用stream_loop让GIF循环直到视频结束
ffmpeg_cmd.output(output_path,
options={"filter_complex": ";".join(filter_complex), },
map=["[v]", "0:a"],
crf=16,
vcodec='libx264',
b=video_metadata.video_bitrate, # 视频码率
r=video_metadata.video_frame_rate, # 帧率
)
await ffmpeg_cmd.execute()
video_metadata = VideoUtils.ffprobe_media_metadata(output_path)
return output_path, video_metadata
@staticmethod
async def ffmpeg_zoom_loop(media_path: str, duration: float = 6.0, zoom: float = 0.1,
output_path: Optional[str] = None) -> Tuple[str, VideoMetadata]:
"""
视频放大缩小循环特效
:param media_path: 待处理的视频文件路径
:param duration: 视频特效循环时间长度
:param zoom: 视频特效放大缩小系数
:param output_path: 指定输出视频地址(可选)
:return: 最终输出视频地址, 最终输出视频时长
"""
if not output_path:
output_path = FileUtils.file_path_extend(media_path, 'zoomed')
os.makedirs(os.path.dirname(output_path), exist_ok=True)
video_metadata = VideoUtils.ffprobe_video_format(media_path)
# abs(sin())表达式会导致实际的往复频率为2倍
duration = duration * 2
ffmpeg_cmd = VideoUtils.async_ffmpeg_init()
ffmpeg_cmd.input(media_path).output(output_path,
options={
"vf": f"scale={4 * video_metadata.width}x{4 * video_metadata.height},fps=30,"
f"zoompan=z='1+{zoom}*abs(sin(2*PI*time/{duration}))':"
"x='trunc(iw/2*(1-1/zoom))':"
"y='trunc(ih/2*(1-1/zoom))':"
f"d=1:s={video_metadata.width}x{video_metadata.height}:fps=30"
},
vcodec="libx264",
acodec="copy",
crf=16,
b=video_metadata.video_bitrate, # 视频码率
r=video_metadata.video_frame_rate, # 帧率
)
await ffmpeg_cmd.execute()
video_metadata = VideoUtils.ffprobe_media_metadata(output_path)
return output_path, video_metadata
@staticmethod
async def ffmpeg_corner_mirror(media_path: str, mirror_scale_down_size: int = 6,
mirror_from_right: bool = True, mirror_position: tuple[float, float] = (40, 40),
output_path: Optional[str] = None) -> Tuple[str, VideoMetadata]:
"""
对源视频添加镜像小窗特效
:param media_path: 待处理的源视频
:param mirror_scale_down_size: 源视频画面缩放系数
:param mirror_from_right: 小窗原点是否使用右下角
:param mirror_position: 小窗基于原点坐标轴的偏移量
:param output_path: 指定的输出视频路径(可选)
:return: 返回最终输出视频的路径, 最终输出视频时长
"""
if not output_path:
output_path = FileUtils.file_path_extend(media_path, 'mir')
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ffmpeg_cmd = VideoUtils.async_ffmpeg_init()
mirror_x = (
f"main_w-overlay_w-{mirror_position[0]}"
if mirror_from_right
else str(mirror_position[0])
)
filter_complex = [
"[0:v]split[original][mirror]",
f"[mirror]hflip,scale=iw/{mirror_scale_down_size}:-1,format=rgba[flipped]",
"[flipped]split[fm1][fm2]",
f"[fm2]format=gray,geq=lum='255*(1-pow(min(1,2*sqrt(pow(X/W-0.5,2)+pow(Y/H-0.5,2))),1.5))':a='if(lt(pow(X/W-0.5,2)+pow(Y/H-0.5,2),0.15),(1-pow(2*sqrt(pow(X/W-0.5,2)+pow(Y/H-0.5,2)),1.5))*255,0)'[fm2Blur]",
"[fm1][fm2Blur]alphamerge[flipped_blured]",
f"[original][flipped_blured]overlay=x={mirror_x}:y=main_h-overlay_h-{mirror_position[1]}[video]",
]
video_metadata = VideoUtils.ffprobe_video_format(media_path)
ffmpeg_cmd.input(media_path).output(output_path,
options={"filter_complex": ";".join(filter_complex)},
map=["[video]", "0:a"],
vcodec="libx264",
crf=16,
b=video_metadata.video_bitrate, # 视频码率
r=video_metadata.video_frame_rate # 帧率
)
await ffmpeg_cmd.execute()
video_metadata = VideoUtils.ffprobe_media_metadata(output_path)
return output_path, video_metadata
@staticmethod
async def ffmpeg_subtitle_apply(media_path: str, subtitle_path: Optional[str], embed_subtitle_path: Optional[str],
font_dir: Optional[str], output_path: Optional[str] = None) -> Tuple[
str, VideoMetadata]:
"""
给视频画面叠加字幕需要确保字幕文件为ass字幕并且subtitle文件内设置的字体存在与font_dir文件夹内
:param media_path: 待处理的源视频
:param subtitle_path: ass渲染字幕文件路径
:param embed_subtitle_path: ass/vtt/srt内嵌字幕文件路径
:param font_dir: 字体文件目录路径
:param output_path: 指定输出文件路径(可选)
:return: 返回最终输出视频路径, 最终输出视频时长
"""
if not output_path:
output_path = FileUtils.file_path_extend(media_path, 'sub')
video_metadata = VideoUtils.ffprobe_video_format(media_path)
ffmpeg_cmd = VideoUtils.async_ffmpeg_init()
ffmpeg_cmd.input(media_path)
if embed_subtitle_path:
ffmpeg_cmd.input(embed_subtitle_path)
ffmpeg_options = {
"vcodec": "libx264",
"acodec": "copy",
"crf": 16,
"b": video_metadata.video_bitrate,
"r": video_metadata.video_frame_rate,
"map": ["0:v", "0:a"]
}
if subtitle_path and font_dir:
ffmpeg_options["vf"] = f"subtitles=filename={subtitle_path}:fontsdir={font_dir}"
if embed_subtitle_path:
ffmpeg_options['map'].append("1")
ffmpeg_options['c:s'] = "mov_text"
ffmpeg_options['metadata:s:s:0'] = "language=chi"
ffmpeg_cmd.output(output_path, options=ffmpeg_options)
await ffmpeg_cmd.execute()
video_metadata = VideoUtils.ffprobe_media_metadata(output_path)
return output_path, video_metadata
@staticmethod
async def ffmpeg_fill_longest(video_path: str, audio_path: str, output_path: Optional[str] = None) -> Tuple[
str, VideoMetadata]:
"""
用视频循环对齐音频时长,如果短于音频时长则循环填满音频时长,如短于音频时长则裁剪结尾
:param video_path: 使用的视频文件路径
:param audio_path: 匹配的音频文件路径
:param output_path: 指定输出文件地址
:return: 最终输出的文件路径, 最终输出视频详细信息
"""
video_metadata = VideoUtils.ffprobe_video_format(video_path)
audio_duration = VideoUtils.ffprobe_audio_duration(audio_path)
loop_times = 0 if video_metadata.duration > audio_duration.total_seconds() else int(
math.ceil(audio_duration.total_seconds() / video_metadata.duration))
logger.info(
f"视频长度 = {video_metadata.duration}, 音频长度 = {audio_duration.total_seconds()}, 重复 = {loop_times}")
if not output_path:
output_path = FileUtils.file_path_extend(video_path, 'fill')
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ffmpeg_cmd = VideoUtils.async_ffmpeg_init()
ffmpeg_cmd.input(video_path, stream_loop=str(loop_times))
ffmpeg_cmd.input(audio_path)
ffmpeg_cmd.output(output_path,
map=["0:v", "1:a"],
vcodec="copy",
acodec="aac",
shortest=None,
)
await ffmpeg_cmd.execute()
video_metadata = VideoUtils.ffprobe_media_metadata(output_path)
return output_path, video_metadata
@staticmethod
async def ffmpeg_extract_frame_image(video_path: str, frame_index: int, seek_time: Optional[TimeDelta] = None,
output_path: Optional[str] = None) -> Tuple[
str, VideoMetadata]:
"""
获取视频的第n帧输出为图片, 并返回图片相关的元数据
"""
if not output_path:
output_path = FileUtils.file_path_extend(video_path, 'cover')
output_path = FileUtils.file_path_change_extension(output_path, 'jpg')
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ffmpeg_cmd = VideoUtils.async_ffmpeg_init()
if seek_time:
ffmpeg_cmd.input(video_path, ss=seek_time.total_seconds())
else:
ffmpeg_cmd.input(video_path)
ffmpeg_cmd.output(output_path, vframes=frame_index)
await ffmpeg_cmd.execute()
image_metadata = VideoUtils.ffprobe_media_metadata(output_path)
return output_path, image_metadata
@staticmethod
async def ffmpeg_stream_record_as_hls(stream_url: str,
segments_output_dir: str,
playlist_output_dir: str,
first_segment_duration: float = 2.0,
segment_duration: float = 5.0,
stream_content_timeout: int = 300,
stream_monitor_timeout: int = 36000,
output_file_pattern: str = "%10d.ts"):
os.makedirs(segments_output_dir, exist_ok=True)
ffmpeg_cmd = VideoUtils.async_ffmpeg_init(quiet=True)
# ffmpeg_cmd.option("loglevel", "debug")
ffmpeg_cmd.option("t", stream_monitor_timeout)
ffmpeg_cmd.input(stream_url,
protocol_whitelist="file,http,https,tcp,tls", # 使用flv
reconnect="1", # 自动重连
reconnect_at_eof="1",
reconnect_streamed="1",
reconnect_delay_max="5")
output_playlist = f"{playlist_output_dir}/playlist.m3u8"
ffmpeg_cmd.output(
output_playlist,
f="hls",
hls_init_time=first_segment_duration,
hls_time=segment_duration,
hls_segment_filename=f"{segments_output_dir}/{output_file_pattern}",
hls_segment_type="mpegts",
hls_flags="append_list+independent_segments+program_date_time+split_by_time+discont_start",
hls_playlist_type="event",
hls_list_size=0,
hls_start_number_source="epoch_us",
timeout=stream_content_timeout,
c="copy",
)
await ffmpeg_cmd.execute()
logger.info(f'停止录制')
return output_playlist