ADD 增加两个图像编辑节点

This commit is contained in:
kyj@bowong.ai 2025-07-11 16:52:46 +08:00
parent c91e506fd5
commit 1b0f7ea4ae
4 changed files with 295 additions and 75 deletions

View File

@ -1,7 +1,8 @@
from .nodes.image_face_nodes import FaceDetect, FaceExtract
from .nodes.image_gesture_nodes import JMGestureCorrect
from .nodes.image_nodes import SaveImagePath, LoadNetImg, SaveImageWithOutput
from .nodes.llm_nodes import LLMChat, LLMChatMultiModalImageUpload, LLMChatMultiModalImageTensor, Jinja2RenderTemplate
from .nodes.llm_nodes import LLMChat, LLMChatMultiModalImageUpload, LLMChatMultiModalImageTensor, Jinja2RenderTemplate, \
ModalClothesMask, ModalEditCustom
from .nodes.object_storage_nodes import COSUpload, COSDownload, S3Download, S3Upload, S3UploadURL
from .nodes.text_nodes import StringEmptyJudgement, LoadTextLocal, LoadTextOnline, RandomLineSelector
from .nodes.util_nodes import LogToDB, TaskIdGenerate, TraverseFolder, UnloadAllModels, VodToLocalNode, \
@ -40,7 +41,9 @@ NODE_CLASS_MAPPINGS = {
"LLMChatMultiModalImageUpload": LLMChatMultiModalImageUpload,
"LLMChatMultiModalImageTensor": LLMChatMultiModalImageTensor,
"Jinja2RenderTemplate": Jinja2RenderTemplate,
"JMGestureCorrect": JMGestureCorrect
"JMGestureCorrect": JMGestureCorrect,
"ModalClothesMask": ModalClothesMask,
"ModalEditCustom": ModalEditCustom
}
NODE_DISPLAY_NAME_MAPPINGS = {
@ -74,5 +77,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
"LLMChatMultiModalImageUpload": "多模态LLM调用-图片Path",
"LLMChatMultiModalImageTensor": "多模态LLM调用-图片Tensor",
"Jinja2RenderTemplate": "Jinja2格式Prompt模板渲染",
"JMGestureCorrect": "人物侧身图片转为正面图-即梦"
"JMGestureCorrect": "人物侧身图片转为正面图-即梦",
"ModalClothesMask": "模特指定衣服替换为指定颜色",
"ModalEditCustom": "自定义Prompt修改图片"
}

View File

@ -1,4 +1,5 @@
# 文件名 comfyui_v2.py
import os
import subprocess
import modal
@ -13,7 +14,7 @@ image = (
.run_commands(
"comfy --skip-prompt install --fast-deps --nvidia --version 0.3.40"
)
.pip_install_from_pyproject("./pyproject.toml")
.pip_install_from_pyproject(os.path.join(os.path.dirname(__file__),"pyproject.toml"))
.run_commands("comfy node install https://e.coding.net/g-ldyi2063/dev/ComfyUI-CustomNode.git", force_build=True)
.run_commands("comfy node install https://github.com/yolain/ComfyUI-Easy-Use.git")
.run_commands("cp -f /root/comfy/ComfyUI/custom_nodes/ComfyUI-CustomNode/ext/nodes_bfl.py /root/comfy/ComfyUI/comfy_api_nodes/nodes_bfl.py")

View File

@ -5,18 +5,23 @@ import json
import os
import re
from mimetypes import guess_type
from time import sleep
from typing import Any, Union
import folder_paths
import httpx
import numpy as np
import requests
import torch
from PIL import Image
from jinja2 import Template, StrictUndefined
from loguru import logger
from retry import retry
from ..utils.image_utils import tensor_to_image_bytes, base64_to_tensor
def find_value_recursive(key:str, data:Union[dict, list]) -> str | None | Any:
def find_value_recursive(key: str, data: Union[dict, list]) -> str | None | Any:
if isinstance(data, dict):
if key in data:
return data[key]
@ -31,6 +36,7 @@ def find_value_recursive(key:str, data:Union[dict, list]) -> str | None | Any:
if result is not None:
return result
def image_tensor_to_base64(image):
pil_image = Image.fromarray(np.clip(255. * image.cpu().numpy().squeeze(), 0, 255).astype(np.uint8))
# 创建一个BytesIO对象用于临时存储图像数据
@ -47,6 +53,7 @@ def image_tensor_to_base64(image):
return encoded_image
class LLMChat:
"""llm chat"""
@ -63,8 +70,8 @@ class LLMChat:
"deepseek-v3",
"deepseek-r1"],),
"prompt": ("STRING", {"multiline": True}),
"temperature": ("FLOAT",{"default": 0.7, "min": 0.0, "max": 1.0}),
"max_tokens": ("INT",{"default": 4096, "min":1, "max":65535}),
"temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0}),
"max_tokens": ("INT", {"default": 4096, "min": 1, "max": 65535}),
"timeout": ("INT", {"default": 120, "min": 30, "max": 900}),
}
}
@ -74,28 +81,28 @@ class LLMChat:
FUNCTION = "chat"
CATEGORY = "不忘科技-自定义节点🚩/LLM"
def chat(self, llm_provider:str, prompt:str, temperature:float, max_tokens:int, timeout:int):
def chat(self, llm_provider: str, prompt: str, temperature: float, max_tokens: int, timeout: int):
@retry(Exception, tries=3, delay=1)
def _chat():
try:
with httpx.Client(timeout=httpx.Timeout(timeout, connect=15)) as session:
resp = session.post("https://gateway.bowong.cc/chat/completions",
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": "Bearer auth-bowong7777"
},
json={
"model": llm_provider,
"messages": [
{
"role": "user",
"content": prompt
}
],
"temperature": temperature,
"max_tokens": max_tokens
})
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": "Bearer auth-bowong7777"
},
json={
"model": llm_provider,
"messages": [
{
"role": "user",
"content": prompt
}
],
"temperature": temperature,
"max_tokens": max_tokens
})
resp.raise_for_status()
resp = resp.json()
content = find_value_recursive("content", resp)
@ -103,8 +110,10 @@ class LLMChat:
except Exception as e:
raise Exception("llm调用失败 {}".format(e))
return (content,)
return _chat()
class LLMChatMultiModalImageUpload:
"""llm chat"""
@ -119,8 +128,8 @@ class LLMChatMultiModalImageUpload:
"gpt-4.1"],),
"prompt": ("STRING", {"multiline": True}),
"image": (sorted(files), {"image_upload": True}),
"temperature": ("FLOAT",{"default": 0.7, "min": 0.0, "max": 1.0}),
"max_tokens": ("INT",{"default": 4096, "min":1, "max":65535}),
"temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0}),
"max_tokens": ("INT", {"default": 4096, "min": 1, "max": 65535}),
"timeout": ("INT", {"default": 120, "min": 30, "max": 900}),
}
}
@ -130,7 +139,7 @@ class LLMChatMultiModalImageUpload:
FUNCTION = "chat"
CATEGORY = "不忘科技-自定义节点🚩/LLM"
def chat(self, llm_provider:str, prompt:str, image, temperature:float, max_tokens:int, timeout:int):
def chat(self, llm_provider: str, prompt: str, image, temperature: float, max_tokens: int, timeout: int):
@retry(Exception, tries=3, delay=1)
def _chat():
try:
@ -140,28 +149,29 @@ class LLMChatMultiModalImageUpload:
base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
with httpx.Client(timeout=httpx.Timeout(timeout, connect=15)) as session:
resp = session.post("https://gateway.bowong.cc/chat/completions",
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": "Bearer auth-bowong7777"
},
json={
"model": llm_provider,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {"url":f"data:{mime_type};base64,{base64_encoded_data}"},
},
]
}
],
"temperature": temperature,
"max_tokens": max_tokens
})
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": "Bearer auth-bowong7777"
},
json={
"model": llm_provider,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{base64_encoded_data}"},
},
]
}
],
"temperature": temperature,
"max_tokens": max_tokens
})
resp.raise_for_status()
resp = resp.json()
content = find_value_recursive("content", resp)
@ -170,8 +180,10 @@ class LLMChatMultiModalImageUpload:
# logger.exception("llm调用失败 {}".format(e))
raise Exception("llm调用失败 {}".format(e))
return (content,)
return _chat()
class LLMChatMultiModalImageTensor:
"""llm chat"""
@ -183,8 +195,8 @@ class LLMChatMultiModalImageTensor:
"gpt-4.1"],),
"prompt": ("STRING", {"multiline": True}),
"image": ("IMAGE",),
"temperature": ("FLOAT",{"default": 0.7, "min": 0.0, "max": 1.0}),
"max_tokens": ("INT",{"default": 4096, "min":1, "max":65535}),
"temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0}),
"max_tokens": ("INT", {"default": 4096, "min": 1, "max": 65535}),
"timeout": ("INT", {"default": 120, "min": 30, "max": 900}),
}
}
@ -194,34 +206,35 @@ class LLMChatMultiModalImageTensor:
FUNCTION = "chat"
CATEGORY = "不忘科技-自定义节点🚩/LLM"
def chat(self, llm_provider:str, prompt:str, image:torch.Tensor, temperature:float, max_tokens:int, timeout:int):
def chat(self, llm_provider: str, prompt: str, image: torch.Tensor, temperature: float, max_tokens: int,
timeout: int):
@retry(Exception, tries=3, delay=1)
def _chat():
try:
with httpx.Client(timeout=httpx.Timeout(timeout, connect=15)) as session:
resp = session.post("https://gateway.bowong.cc/chat/completions",
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": "Bearer auth-bowong7777"
},
json={
"model": llm_provider,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {"url":image_tensor_to_base64(image)},
},
]
}
],
"temperature": temperature,
"max_tokens": max_tokens
})
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": "Bearer auth-bowong7777"
},
json={
"model": llm_provider,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {"url": image_tensor_to_base64(image)},
},
]
}
],
"temperature": temperature,
"max_tokens": max_tokens
})
resp.raise_for_status()
resp = resp.json()
content = find_value_recursive("content", resp)
@ -230,8 +243,10 @@ class LLMChatMultiModalImageTensor:
# logger.exception("llm调用失败 {}".format(e))
raise Exception("llm调用失败 {}".format(e))
return (content,)
return _chat()
class Jinja2RenderTemplate:
@classmethod
def INPUT_TYPES(s):
@ -266,4 +281,130 @@ class Jinja2RenderTemplate:
template = Template(template, undefined=StrictUndefined)
# 渲染模板
return (template.render(kv_map),)
return (template.render(kv_map),)
class ModalClothesMask:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"image": ("IMAGE",),
"mask_color": ("STRING", {"default": "绿色"}),
"clothes_type": ("STRING", {"default": "裤子"}),
"endpoint": ("STRING", {"default": "bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run"}),
},
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("image",)
FUNCTION = "process"
OUTPUT_NODE = False
CATEGORY = "不忘科技-自定义节点🚩/图片/Gemini图像编辑"
def process(self, image: torch.Tensor, mask_color: str, clothes_type: str, endpoint: str):
try:
timeout = 60
logger.info("获取token")
api_key = requests.get(f"https://{endpoint}/google/access-token",
headers={'Authorization': 'Bearer bowong7777'}, timeout=timeout).json()[
"access_token"]
format = "PNG"
logger.info("请求图像编辑")
job_resp = requests.post(f"https://{endpoint}/google/image/clothes_mark",
headers={'x-google-api-key': api_key},
data={
"mark_clothes_type": clothes_type,
"mark_color": mask_color,
},
files={"origin_image": (
'image.' + format.lower(), tensor_to_image_bytes(image, format),
f'image/{format.lower()}')},
timeout=timeout)
job_resp.raise_for_status()
job_resp = job_resp.json()
if not job_resp["success"]:
raise Exception("请求Modal API失败")
job_id = job_resp["taskId"]
wait_time = 240
interval = 3
logger.info("开始轮询任务状态")
for _ in range(0, wait_time, interval):
logger.info("查询任务状态")
result = requests.get(f"https://{endpoint}/google/{job_id}", timeout=timeout)
if result.status_code == 200:
result = result.json()
if result["status"] == "success":
logger.success("任务成功")
image_b64 = json.loads(result["result"])[0]["image_b64"]
image_tensor = base64_to_tensor(image_b64)
return (image_tensor,)
elif "fail" in result["status"].lower():
raise Exception("任务失败")
sleep(interval)
raise Exception("查询任务状态超时")
except Exception as e:
raise Exception(e)
class ModalEditCustom:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"image": ("IMAGE",),
"prompt": ("STRING", {"default": "将背景去除,输出原尺寸图片"}),
"endpoint": ("STRING", {"default": "bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run"}),
},
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("image",)
FUNCTION = "process"
OUTPUT_NODE = False
CATEGORY = "不忘科技-自定义节点🚩/图片/Gemini图像编辑"
def process(self, image: torch.Tensor, prompt: str, endpoint: str):
try:
timeout = 60
logger.info("获取token")
api_key = requests.get(f"https://{endpoint}/google/access-token",
headers={'Authorization': 'Bearer bowong7777'}, timeout=timeout).json()[
"access_token"]
format = "PNG"
logger.info("请求图像编辑")
job_resp = requests.post(f"https://{endpoint}/google/image/edit_custom",
headers={'x-google-api-key': api_key},
data={
"prompt": prompt
},
files={"origin_image": (
'image.' + format.lower(), tensor_to_image_bytes(image, format),
f'image/{format.lower()}')},
timeout=timeout)
job_resp.raise_for_status()
job_resp = job_resp.json()
if not job_resp["success"]:
raise Exception("请求Modal API失败")
job_id = job_resp["taskId"]
wait_time = 240
interval = 3
logger.info("开始轮询任务状态")
for _ in range(0, wait_time, interval):
logger.info("查询任务状态")
result = requests.get(f"https://{endpoint}/google/{job_id}", timeout=timeout)
if result.status_code == 200:
result = result.json()
if result["status"] == "success":
logger.success("任务成功")
image_b64 = json.loads(result["result"])[0]["image_b64"]
image_tensor = base64_to_tensor(image_b64)
return (image_tensor,)
elif "fail" in result["status"].lower():
raise Exception("任务失败")
sleep(interval)
raise Exception("查询任务状态超时")
except Exception as e:
raise Exception(e)

73
utils/image_utils.py Normal file
View File

@ -0,0 +1,73 @@
import base64
import io
import torch
from PIL import Image
from torchvision import transforms
def base64_to_tensor(base64_data: str) -> torch.Tensor:
"""
""格式的图像数据转换为PyTorch张量
参数:
base64_data: 图像的Base64编码字符串
返回:
torch.Tensor: 形状为[C, H, W]的张量取值范围为[0, 1]
"""
# 分离数据前缀和实际Base64编码部分
if ';base64,' in base64_data:
_, encoded = base64_data.split(';base64,', 1)
else:
encoded = base64_data # 假设直接提供了Base64编码部分
# 解码Base64数据
decoded_data = base64.b64decode(encoded)
# 使用PIL打开图像
image = Image.open(io.BytesIO(decoded_data))
# 转换为RGB模式处理PNG的Alpha通道和WebP格式
if image.mode != 'RGB':
image = image.convert('RGB')
# 转换为PyTorch张量
from torchvision import transforms
transform = transforms.Compose([
transforms.ToTensor() # [H, W, C] -> [C, H, W],并归一化到[0, 1]
])
tensor = transform(image)
return tensor.unsqueeze(0).permute(0, 2, 3, 1)
def tensor_to_image_bytes(tensor: torch.Tensor, format: str = 'PNG') -> bytes:
"""
将PyTorch张量转换为图像字节流
参数:
tensor: 形状为[C, H, W]的图像张量取值范围为[0, 1]
format: 图像格式可选'PNG''JPEG'
返回:
bytes: 图像的字节流数据
"""
if tensor.dim() == 4:
if tensor.shape[0] > 1:
print("警告:输入张量包含多个图像,仅使用第一个")
tensor = tensor[0] # 取批量中的第一张图像
tensor = tensor.permute(2, 0, 1)
# 确保张量在[0, 255]范围内
if tensor.max() <= 1.0:
tensor = tensor * 255
# 转换为PIL图像
image = transforms.ToPILImage()(tensor.byte())
# 保存为字节流
buffer = io.BytesIO()
image.save(buffer, format=format)
buffer.seek(0) # 重置指针到开始位置
return buffer.getvalue()