From 1b0f7ea4ae5a329b2619fba921f83fd62b350569 Mon Sep 17 00:00:00 2001 From: "kyj@bowong.ai" Date: Fri, 11 Jul 2025 16:52:46 +0800 Subject: [PATCH] =?UTF-8?q?ADD=20=E5=A2=9E=E5=8A=A0=E4=B8=A4=E4=B8=AA?= =?UTF-8?q?=E5=9B=BE=E5=83=8F=E7=BC=96=E8=BE=91=E8=8A=82=E7=82=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- __init__.py | 11 +- ext/comfyui_modal_deploy.py | 3 +- nodes/llm_nodes.py | 283 +++++++++++++++++++++++++++--------- utils/image_utils.py | 73 ++++++++++ 4 files changed, 295 insertions(+), 75 deletions(-) create mode 100644 utils/image_utils.py diff --git a/__init__.py b/__init__.py index 8a3485b..61fed63 100644 --- a/__init__.py +++ b/__init__.py @@ -1,7 +1,8 @@ from .nodes.image_face_nodes import FaceDetect, FaceExtract from .nodes.image_gesture_nodes import JMGestureCorrect from .nodes.image_nodes import SaveImagePath, LoadNetImg, SaveImageWithOutput -from .nodes.llm_nodes import LLMChat, LLMChatMultiModalImageUpload, LLMChatMultiModalImageTensor, Jinja2RenderTemplate +from .nodes.llm_nodes import LLMChat, LLMChatMultiModalImageUpload, LLMChatMultiModalImageTensor, Jinja2RenderTemplate, \ + ModalClothesMask, ModalEditCustom from .nodes.object_storage_nodes import COSUpload, COSDownload, S3Download, S3Upload, S3UploadURL from .nodes.text_nodes import StringEmptyJudgement, LoadTextLocal, LoadTextOnline, RandomLineSelector from .nodes.util_nodes import LogToDB, TaskIdGenerate, TraverseFolder, UnloadAllModels, VodToLocalNode, \ @@ -40,7 +41,9 @@ NODE_CLASS_MAPPINGS = { "LLMChatMultiModalImageUpload": LLMChatMultiModalImageUpload, "LLMChatMultiModalImageTensor": LLMChatMultiModalImageTensor, "Jinja2RenderTemplate": Jinja2RenderTemplate, - "JMGestureCorrect": JMGestureCorrect + "JMGestureCorrect": JMGestureCorrect, + "ModalClothesMask": ModalClothesMask, + "ModalEditCustom": ModalEditCustom } NODE_DISPLAY_NAME_MAPPINGS = { @@ -74,5 +77,7 @@ NODE_DISPLAY_NAME_MAPPINGS = { "LLMChatMultiModalImageUpload": "多模态LLM调用-图片Path", "LLMChatMultiModalImageTensor": "多模态LLM调用-图片Tensor", "Jinja2RenderTemplate": "Jinja2格式Prompt模板渲染", - "JMGestureCorrect": "人物侧身图片转为正面图-即梦" + "JMGestureCorrect": "人物侧身图片转为正面图-即梦", + "ModalClothesMask": "模特指定衣服替换为指定颜色", + "ModalEditCustom": "自定义Prompt修改图片" } diff --git a/ext/comfyui_modal_deploy.py b/ext/comfyui_modal_deploy.py index e86590f..c568519 100644 --- a/ext/comfyui_modal_deploy.py +++ b/ext/comfyui_modal_deploy.py @@ -1,4 +1,5 @@ # 文件名 comfyui_v2.py +import os import subprocess import modal @@ -13,7 +14,7 @@ image = ( .run_commands( "comfy --skip-prompt install --fast-deps --nvidia --version 0.3.40" ) - .pip_install_from_pyproject("./pyproject.toml") + .pip_install_from_pyproject(os.path.join(os.path.dirname(__file__),"pyproject.toml")) .run_commands("comfy node install https://e.coding.net/g-ldyi2063/dev/ComfyUI-CustomNode.git", force_build=True) .run_commands("comfy node install https://github.com/yolain/ComfyUI-Easy-Use.git") .run_commands("cp -f /root/comfy/ComfyUI/custom_nodes/ComfyUI-CustomNode/ext/nodes_bfl.py /root/comfy/ComfyUI/comfy_api_nodes/nodes_bfl.py") diff --git a/nodes/llm_nodes.py b/nodes/llm_nodes.py index 6c0a9a0..9e0105f 100644 --- a/nodes/llm_nodes.py +++ b/nodes/llm_nodes.py @@ -5,18 +5,23 @@ import json import os import re from mimetypes import guess_type +from time import sleep from typing import Any, Union import folder_paths import httpx import numpy as np +import requests import torch from PIL import Image from jinja2 import Template, StrictUndefined +from loguru import logger from retry import retry +from ..utils.image_utils import tensor_to_image_bytes, base64_to_tensor -def find_value_recursive(key:str, data:Union[dict, list]) -> str | None | Any: + +def find_value_recursive(key: str, data: Union[dict, list]) -> str | None | Any: if isinstance(data, dict): if key in data: return data[key] @@ -31,6 +36,7 @@ def find_value_recursive(key:str, data:Union[dict, list]) -> str | None | Any: if result is not None: return result + def image_tensor_to_base64(image): pil_image = Image.fromarray(np.clip(255. * image.cpu().numpy().squeeze(), 0, 255).astype(np.uint8)) # 创建一个BytesIO对象,用于临时存储图像数据 @@ -47,6 +53,7 @@ def image_tensor_to_base64(image): return encoded_image + class LLMChat: """llm chat""" @@ -63,8 +70,8 @@ class LLMChat: "deepseek-v3", "deepseek-r1"],), "prompt": ("STRING", {"multiline": True}), - "temperature": ("FLOAT",{"default": 0.7, "min": 0.0, "max": 1.0}), - "max_tokens": ("INT",{"default": 4096, "min":1, "max":65535}), + "temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0}), + "max_tokens": ("INT", {"default": 4096, "min": 1, "max": 65535}), "timeout": ("INT", {"default": 120, "min": 30, "max": 900}), } } @@ -74,28 +81,28 @@ class LLMChat: FUNCTION = "chat" CATEGORY = "不忘科技-自定义节点🚩/LLM" - def chat(self, llm_provider:str, prompt:str, temperature:float, max_tokens:int, timeout:int): + def chat(self, llm_provider: str, prompt: str, temperature: float, max_tokens: int, timeout: int): @retry(Exception, tries=3, delay=1) def _chat(): try: with httpx.Client(timeout=httpx.Timeout(timeout, connect=15)) as session: resp = session.post("https://gateway.bowong.cc/chat/completions", - headers={ - "Content-Type": "application/json", - "Accept": "application/json", - "Authorization": "Bearer auth-bowong7777" - }, - json={ - "model": llm_provider, - "messages": [ - { - "role": "user", - "content": prompt - } - ], - "temperature": temperature, - "max_tokens": max_tokens - }) + headers={ + "Content-Type": "application/json", + "Accept": "application/json", + "Authorization": "Bearer auth-bowong7777" + }, + json={ + "model": llm_provider, + "messages": [ + { + "role": "user", + "content": prompt + } + ], + "temperature": temperature, + "max_tokens": max_tokens + }) resp.raise_for_status() resp = resp.json() content = find_value_recursive("content", resp) @@ -103,8 +110,10 @@ class LLMChat: except Exception as e: raise Exception("llm调用失败 {}".format(e)) return (content,) + return _chat() + class LLMChatMultiModalImageUpload: """llm chat""" @@ -119,8 +128,8 @@ class LLMChatMultiModalImageUpload: "gpt-4.1"],), "prompt": ("STRING", {"multiline": True}), "image": (sorted(files), {"image_upload": True}), - "temperature": ("FLOAT",{"default": 0.7, "min": 0.0, "max": 1.0}), - "max_tokens": ("INT",{"default": 4096, "min":1, "max":65535}), + "temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0}), + "max_tokens": ("INT", {"default": 4096, "min": 1, "max": 65535}), "timeout": ("INT", {"default": 120, "min": 30, "max": 900}), } } @@ -130,7 +139,7 @@ class LLMChatMultiModalImageUpload: FUNCTION = "chat" CATEGORY = "不忘科技-自定义节点🚩/LLM" - def chat(self, llm_provider:str, prompt:str, image, temperature:float, max_tokens:int, timeout:int): + def chat(self, llm_provider: str, prompt: str, image, temperature: float, max_tokens: int, timeout: int): @retry(Exception, tries=3, delay=1) def _chat(): try: @@ -140,28 +149,29 @@ class LLMChatMultiModalImageUpload: base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8') with httpx.Client(timeout=httpx.Timeout(timeout, connect=15)) as session: resp = session.post("https://gateway.bowong.cc/chat/completions", - headers={ - "Content-Type": "application/json", - "Accept": "application/json", - "Authorization": "Bearer auth-bowong7777" - }, - json={ - "model": llm_provider, - "messages": [ - { - "role": "user", - "content": [ - {"type": "text", "text": prompt}, - { - "type": "image_url", - "image_url": {"url":f"data:{mime_type};base64,{base64_encoded_data}"}, - }, - ] - } - ], - "temperature": temperature, - "max_tokens": max_tokens - }) + headers={ + "Content-Type": "application/json", + "Accept": "application/json", + "Authorization": "Bearer auth-bowong7777" + }, + json={ + "model": llm_provider, + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:{mime_type};base64,{base64_encoded_data}"}, + }, + ] + } + ], + "temperature": temperature, + "max_tokens": max_tokens + }) resp.raise_for_status() resp = resp.json() content = find_value_recursive("content", resp) @@ -170,8 +180,10 @@ class LLMChatMultiModalImageUpload: # logger.exception("llm调用失败 {}".format(e)) raise Exception("llm调用失败 {}".format(e)) return (content,) + return _chat() + class LLMChatMultiModalImageTensor: """llm chat""" @@ -183,8 +195,8 @@ class LLMChatMultiModalImageTensor: "gpt-4.1"],), "prompt": ("STRING", {"multiline": True}), "image": ("IMAGE",), - "temperature": ("FLOAT",{"default": 0.7, "min": 0.0, "max": 1.0}), - "max_tokens": ("INT",{"default": 4096, "min":1, "max":65535}), + "temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0}), + "max_tokens": ("INT", {"default": 4096, "min": 1, "max": 65535}), "timeout": ("INT", {"default": 120, "min": 30, "max": 900}), } } @@ -194,34 +206,35 @@ class LLMChatMultiModalImageTensor: FUNCTION = "chat" CATEGORY = "不忘科技-自定义节点🚩/LLM" - def chat(self, llm_provider:str, prompt:str, image:torch.Tensor, temperature:float, max_tokens:int, timeout:int): + def chat(self, llm_provider: str, prompt: str, image: torch.Tensor, temperature: float, max_tokens: int, + timeout: int): @retry(Exception, tries=3, delay=1) def _chat(): try: with httpx.Client(timeout=httpx.Timeout(timeout, connect=15)) as session: resp = session.post("https://gateway.bowong.cc/chat/completions", - headers={ - "Content-Type": "application/json", - "Accept": "application/json", - "Authorization": "Bearer auth-bowong7777" - }, - json={ - "model": llm_provider, - "messages": [ - { - "role": "user", - "content": [ - {"type": "text", "text": prompt}, - { - "type": "image_url", - "image_url": {"url":image_tensor_to_base64(image)}, - }, - ] - } - ], - "temperature": temperature, - "max_tokens": max_tokens - }) + headers={ + "Content-Type": "application/json", + "Accept": "application/json", + "Authorization": "Bearer auth-bowong7777" + }, + json={ + "model": llm_provider, + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": {"url": image_tensor_to_base64(image)}, + }, + ] + } + ], + "temperature": temperature, + "max_tokens": max_tokens + }) resp.raise_for_status() resp = resp.json() content = find_value_recursive("content", resp) @@ -230,8 +243,10 @@ class LLMChatMultiModalImageTensor: # logger.exception("llm调用失败 {}".format(e)) raise Exception("llm调用失败 {}".format(e)) return (content,) + return _chat() + class Jinja2RenderTemplate: @classmethod def INPUT_TYPES(s): @@ -266,4 +281,130 @@ class Jinja2RenderTemplate: template = Template(template, undefined=StrictUndefined) # 渲染模板 - return (template.render(kv_map),) \ No newline at end of file + return (template.render(kv_map),) + + +class ModalClothesMask: + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "image": ("IMAGE",), + "mask_color": ("STRING", {"default": "绿色"}), + "clothes_type": ("STRING", {"default": "裤子"}), + "endpoint": ("STRING", {"default": "bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run"}), + }, + } + + RETURN_TYPES = ("IMAGE",) + RETURN_NAMES = ("image",) + FUNCTION = "process" + OUTPUT_NODE = False + CATEGORY = "不忘科技-自定义节点🚩/图片/Gemini图像编辑" + + def process(self, image: torch.Tensor, mask_color: str, clothes_type: str, endpoint: str): + try: + timeout = 60 + logger.info("获取token") + api_key = requests.get(f"https://{endpoint}/google/access-token", + headers={'Authorization': 'Bearer bowong7777'}, timeout=timeout).json()[ + "access_token"] + format = "PNG" + logger.info("请求图像编辑") + job_resp = requests.post(f"https://{endpoint}/google/image/clothes_mark", + headers={'x-google-api-key': api_key}, + data={ + "mark_clothes_type": clothes_type, + "mark_color": mask_color, + }, + files={"origin_image": ( + 'image.' + format.lower(), tensor_to_image_bytes(image, format), + f'image/{format.lower()}')}, + timeout=timeout) + job_resp.raise_for_status() + job_resp = job_resp.json() + if not job_resp["success"]: + raise Exception("请求Modal API失败") + job_id = job_resp["taskId"] + + wait_time = 240 + interval = 3 + logger.info("开始轮询任务状态") + for _ in range(0, wait_time, interval): + logger.info("查询任务状态") + result = requests.get(f"https://{endpoint}/google/{job_id}", timeout=timeout) + if result.status_code == 200: + result = result.json() + if result["status"] == "success": + logger.success("任务成功") + image_b64 = json.loads(result["result"])[0]["image_b64"] + image_tensor = base64_to_tensor(image_b64) + return (image_tensor,) + elif "fail" in result["status"].lower(): + raise Exception("任务失败") + sleep(interval) + raise Exception("查询任务状态超时") + except Exception as e: + raise Exception(e) + + +class ModalEditCustom: + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "image": ("IMAGE",), + "prompt": ("STRING", {"default": "将背景去除,输出原尺寸图片"}), + "endpoint": ("STRING", {"default": "bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run"}), + }, + } + + RETURN_TYPES = ("IMAGE",) + RETURN_NAMES = ("image",) + FUNCTION = "process" + OUTPUT_NODE = False + CATEGORY = "不忘科技-自定义节点🚩/图片/Gemini图像编辑" + + def process(self, image: torch.Tensor, prompt: str, endpoint: str): + try: + timeout = 60 + logger.info("获取token") + api_key = requests.get(f"https://{endpoint}/google/access-token", + headers={'Authorization': 'Bearer bowong7777'}, timeout=timeout).json()[ + "access_token"] + format = "PNG" + logger.info("请求图像编辑") + job_resp = requests.post(f"https://{endpoint}/google/image/edit_custom", + headers={'x-google-api-key': api_key}, + data={ + "prompt": prompt + }, + files={"origin_image": ( + 'image.' + format.lower(), tensor_to_image_bytes(image, format), + f'image/{format.lower()}')}, + timeout=timeout) + job_resp.raise_for_status() + job_resp = job_resp.json() + if not job_resp["success"]: + raise Exception("请求Modal API失败") + job_id = job_resp["taskId"] + + wait_time = 240 + interval = 3 + logger.info("开始轮询任务状态") + for _ in range(0, wait_time, interval): + logger.info("查询任务状态") + result = requests.get(f"https://{endpoint}/google/{job_id}", timeout=timeout) + if result.status_code == 200: + result = result.json() + if result["status"] == "success": + logger.success("任务成功") + image_b64 = json.loads(result["result"])[0]["image_b64"] + image_tensor = base64_to_tensor(image_b64) + return (image_tensor,) + elif "fail" in result["status"].lower(): + raise Exception("任务失败") + sleep(interval) + raise Exception("查询任务状态超时") + except Exception as e: + raise Exception(e) diff --git a/utils/image_utils.py b/utils/image_utils.py new file mode 100644 index 0000000..3bc29dd --- /dev/null +++ b/utils/image_utils.py @@ -0,0 +1,73 @@ +import base64 +import io + +import torch +from PIL import Image +from torchvision import transforms + + +def base64_to_tensor(base64_data: str) -> torch.Tensor: + """ + 将"data:image/xxx;base64,xxx"格式的图像数据转换为PyTorch张量 + + 参数: + base64_data: 图像的Base64编码字符串 + + 返回: + torch.Tensor: 形状为[C, H, W]的张量,取值范围为[0, 1] + """ + # 分离数据前缀和实际Base64编码部分 + if ';base64,' in base64_data: + _, encoded = base64_data.split(';base64,', 1) + else: + encoded = base64_data # 假设直接提供了Base64编码部分 + + # 解码Base64数据 + decoded_data = base64.b64decode(encoded) + + # 使用PIL打开图像 + image = Image.open(io.BytesIO(decoded_data)) + + # 转换为RGB模式(处理PNG的Alpha通道和WebP格式) + if image.mode != 'RGB': + image = image.convert('RGB') + + # 转换为PyTorch张量 + from torchvision import transforms + transform = transforms.Compose([ + transforms.ToTensor() # [H, W, C] -> [C, H, W],并归一化到[0, 1] + ]) + tensor = transform(image) + + return tensor.unsqueeze(0).permute(0, 2, 3, 1) + + +def tensor_to_image_bytes(tensor: torch.Tensor, format: str = 'PNG') -> bytes: + """ + 将PyTorch张量转换为图像字节流 + + 参数: + tensor: 形状为[C, H, W]的图像张量,取值范围为[0, 1] + format: 图像格式,可选'PNG'、'JPEG'等 + + 返回: + bytes: 图像的字节流数据 + """ + if tensor.dim() == 4: + if tensor.shape[0] > 1: + print("警告:输入张量包含多个图像,仅使用第一个") + tensor = tensor[0] # 取批量中的第一张图像 + tensor = tensor.permute(2, 0, 1) + # 确保张量在[0, 255]范围内 + if tensor.max() <= 1.0: + tensor = tensor * 255 + + # 转换为PIL图像 + image = transforms.ToPILImage()(tensor.byte()) + + # 保存为字节流 + buffer = io.BytesIO() + image.save(buffer, format=format) + buffer.seek(0) # 重置指针到开始位置 + + return buffer.getvalue()