From 1b0f7ea4ae5a329b2619fba921f83fd62b350569 Mon Sep 17 00:00:00 2001
From: "kyj@bowong.ai" <kyj@bowong.ai>
Date: Fri, 11 Jul 2025 16:52:46 +0800
Subject: [PATCH] =?UTF-8?q?ADD=20=E5=A2=9E=E5=8A=A0=E4=B8=A4=E4=B8=AA?=
 =?UTF-8?q?=E5=9B=BE=E5=83=8F=E7=BC=96=E8=BE=91=E8=8A=82=E7=82=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 __init__.py                 |  11 +-
 ext/comfyui_modal_deploy.py |   3 +-
 nodes/llm_nodes.py          | 283 +++++++++++++++++++++++++++---------
 utils/image_utils.py        |  73 ++++++++++
 4 files changed, 295 insertions(+), 75 deletions(-)
 create mode 100644 utils/image_utils.py

diff --git a/__init__.py b/__init__.py
index 8a3485b..61fed63 100644
--- a/__init__.py
+++ b/__init__.py
@@ -1,7 +1,8 @@
 from .nodes.image_face_nodes import FaceDetect, FaceExtract
 from .nodes.image_gesture_nodes import JMGestureCorrect
 from .nodes.image_nodes import SaveImagePath, LoadNetImg, SaveImageWithOutput
-from .nodes.llm_nodes import LLMChat, LLMChatMultiModalImageUpload, LLMChatMultiModalImageTensor, Jinja2RenderTemplate
+from .nodes.llm_nodes import LLMChat, LLMChatMultiModalImageUpload, LLMChatMultiModalImageTensor, Jinja2RenderTemplate, \
+    ModalClothesMask, ModalEditCustom
 from .nodes.object_storage_nodes import COSUpload, COSDownload, S3Download, S3Upload, S3UploadURL
 from .nodes.text_nodes import StringEmptyJudgement, LoadTextLocal, LoadTextOnline, RandomLineSelector
 from .nodes.util_nodes import LogToDB, TaskIdGenerate, TraverseFolder, UnloadAllModels, VodToLocalNode, \
@@ -40,7 +41,9 @@ NODE_CLASS_MAPPINGS = {
     "LLMChatMultiModalImageUpload": LLMChatMultiModalImageUpload,
     "LLMChatMultiModalImageTensor": LLMChatMultiModalImageTensor,
     "Jinja2RenderTemplate": Jinja2RenderTemplate,
-    "JMGestureCorrect": JMGestureCorrect
+    "JMGestureCorrect": JMGestureCorrect,
+    "ModalClothesMask": ModalClothesMask,
+    "ModalEditCustom": ModalEditCustom
 }
 
 NODE_DISPLAY_NAME_MAPPINGS = {
@@ -74,5 +77,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
     "LLMChatMultiModalImageUpload": "多模态LLM调用-图片Path",
     "LLMChatMultiModalImageTensor": "多模态LLM调用-图片Tensor",
     "Jinja2RenderTemplate": "Jinja2格式Prompt模板渲染",
-    "JMGestureCorrect": "人物侧身图片转为正面图-即梦"
+    "JMGestureCorrect": "人物侧身图片转为正面图-即梦",
+    "ModalClothesMask": "模特指定衣服替换为指定颜色",
+    "ModalEditCustom": "自定义Prompt修改图片"
 }
diff --git a/ext/comfyui_modal_deploy.py b/ext/comfyui_modal_deploy.py
index e86590f..c568519 100644
--- a/ext/comfyui_modal_deploy.py
+++ b/ext/comfyui_modal_deploy.py
@@ -1,4 +1,5 @@
 # 文件名  comfyui_v2.py
+import os
 import subprocess
 
 import modal
@@ -13,7 +14,7 @@ image = (
     .run_commands(
         "comfy --skip-prompt install --fast-deps --nvidia --version 0.3.40"
     )
-    .pip_install_from_pyproject("./pyproject.toml")
+    .pip_install_from_pyproject(os.path.join(os.path.dirname(__file__),"pyproject.toml"))
     .run_commands("comfy node install https://e.coding.net/g-ldyi2063/dev/ComfyUI-CustomNode.git", force_build=True)
     .run_commands("comfy node install https://github.com/yolain/ComfyUI-Easy-Use.git")
     .run_commands("cp -f /root/comfy/ComfyUI/custom_nodes/ComfyUI-CustomNode/ext/nodes_bfl.py /root/comfy/ComfyUI/comfy_api_nodes/nodes_bfl.py")
diff --git a/nodes/llm_nodes.py b/nodes/llm_nodes.py
index 6c0a9a0..9e0105f 100644
--- a/nodes/llm_nodes.py
+++ b/nodes/llm_nodes.py
@@ -5,18 +5,23 @@ import json
 import os
 import re
 from mimetypes import guess_type
+from time import sleep
 from typing import Any, Union
 
 import folder_paths
 import httpx
 import numpy as np
+import requests
 import torch
 from PIL import Image
 from jinja2 import Template, StrictUndefined
+from loguru import logger
 from retry import retry
 
+from ..utils.image_utils import tensor_to_image_bytes, base64_to_tensor
 
-def find_value_recursive(key:str, data:Union[dict, list]) -> str | None | Any:
+
+def find_value_recursive(key: str, data: Union[dict, list]) -> str | None | Any:
     if isinstance(data, dict):
         if key in data:
             return data[key]
@@ -31,6 +36,7 @@ def find_value_recursive(key:str, data:Union[dict, list]) -> str | None | Any:
             if result is not None:
                 return result
 
+
 def image_tensor_to_base64(image):
     pil_image = Image.fromarray(np.clip(255. * image.cpu().numpy().squeeze(), 0, 255).astype(np.uint8))
     # 创建一个BytesIO对象，用于临时存储图像数据
@@ -47,6 +53,7 @@ def image_tensor_to_base64(image):
 
     return encoded_image
 
+
 class LLMChat:
     """llm chat"""
 
@@ -63,8 +70,8 @@ class LLMChat:
                                   "deepseek-v3",
                                   "deepseek-r1"],),
                 "prompt": ("STRING", {"multiline": True}),
-                "temperature": ("FLOAT",{"default": 0.7, "min": 0.0, "max": 1.0}),
-                "max_tokens": ("INT",{"default": 4096, "min":1, "max":65535}),
+                "temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0}),
+                "max_tokens": ("INT", {"default": 4096, "min": 1, "max": 65535}),
                 "timeout": ("INT", {"default": 120, "min": 30, "max": 900}),
             }
         }
@@ -74,28 +81,28 @@ class LLMChat:
     FUNCTION = "chat"
     CATEGORY = "不忘科技-自定义节点🚩/LLM"
 
-    def chat(self, llm_provider:str, prompt:str, temperature:float, max_tokens:int, timeout:int):
+    def chat(self, llm_provider: str, prompt: str, temperature: float, max_tokens: int, timeout: int):
         @retry(Exception, tries=3, delay=1)
         def _chat():
             try:
                 with httpx.Client(timeout=httpx.Timeout(timeout, connect=15)) as session:
                     resp = session.post("https://gateway.bowong.cc/chat/completions",
-                                 headers={
-                                     "Content-Type": "application/json",
-                                      "Accept": "application/json",
-                                      "Authorization": "Bearer auth-bowong7777"
-                                 },
-                                 json={
-                                     "model": llm_provider,
-                                     "messages": [
-                                         {
-                                             "role": "user",
-                                             "content": prompt
-                                         }
-                                     ],
-                                     "temperature": temperature,
-                                     "max_tokens": max_tokens
-                                 })
+                                        headers={
+                                            "Content-Type": "application/json",
+                                            "Accept": "application/json",
+                                            "Authorization": "Bearer auth-bowong7777"
+                                        },
+                                        json={
+                                            "model": llm_provider,
+                                            "messages": [
+                                                {
+                                                    "role": "user",
+                                                    "content": prompt
+                                                }
+                                            ],
+                                            "temperature": temperature,
+                                            "max_tokens": max_tokens
+                                        })
                     resp.raise_for_status()
                     resp = resp.json()
                     content = find_value_recursive("content", resp)
@@ -103,8 +110,10 @@ class LLMChat:
             except Exception as e:
                 raise Exception("llm调用失败 {}".format(e))
             return (content,)
+
         return _chat()
 
+
 class LLMChatMultiModalImageUpload:
     """llm chat"""
 
@@ -119,8 +128,8 @@ class LLMChatMultiModalImageUpload:
                                   "gpt-4.1"],),
                 "prompt": ("STRING", {"multiline": True}),
                 "image": (sorted(files), {"image_upload": True}),
-                "temperature": ("FLOAT",{"default": 0.7, "min": 0.0, "max": 1.0}),
-                "max_tokens": ("INT",{"default": 4096, "min":1, "max":65535}),
+                "temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0}),
+                "max_tokens": ("INT", {"default": 4096, "min": 1, "max": 65535}),
                 "timeout": ("INT", {"default": 120, "min": 30, "max": 900}),
             }
         }
@@ -130,7 +139,7 @@ class LLMChatMultiModalImageUpload:
     FUNCTION = "chat"
     CATEGORY = "不忘科技-自定义节点🚩/LLM"
 
-    def chat(self, llm_provider:str, prompt:str, image, temperature:float, max_tokens:int, timeout:int):
+    def chat(self, llm_provider: str, prompt: str, image, temperature: float, max_tokens: int, timeout: int):
         @retry(Exception, tries=3, delay=1)
         def _chat():
             try:
@@ -140,28 +149,29 @@ class LLMChatMultiModalImageUpload:
                     base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
                 with httpx.Client(timeout=httpx.Timeout(timeout, connect=15)) as session:
                     resp = session.post("https://gateway.bowong.cc/chat/completions",
-                                 headers={
-                                     "Content-Type": "application/json",
-                                      "Accept": "application/json",
-                                      "Authorization": "Bearer auth-bowong7777"
-                                 },
-                                 json={
-                                     "model": llm_provider,
-                                     "messages": [
-                                         {
-                                             "role": "user",
-                                             "content": [
-                                                 {"type": "text", "text": prompt},
-                                                 {
-                                                     "type": "image_url",
-                                                     "image_url": {"url":f"data:{mime_type};base64,{base64_encoded_data}"},
-                                                 },
-                                             ]
-                                         }
-                                     ],
-                                     "temperature": temperature,
-                                     "max_tokens": max_tokens
-                                 })
+                                        headers={
+                                            "Content-Type": "application/json",
+                                            "Accept": "application/json",
+                                            "Authorization": "Bearer auth-bowong7777"
+                                        },
+                                        json={
+                                            "model": llm_provider,
+                                            "messages": [
+                                                {
+                                                    "role": "user",
+                                                    "content": [
+                                                        {"type": "text", "text": prompt},
+                                                        {
+                                                            "type": "image_url",
+                                                            "image_url": {
+                                                                "url": f"data:{mime_type};base64,{base64_encoded_data}"},
+                                                        },
+                                                    ]
+                                                }
+                                            ],
+                                            "temperature": temperature,
+                                            "max_tokens": max_tokens
+                                        })
                     resp.raise_for_status()
                     resp = resp.json()
                     content = find_value_recursive("content", resp)
@@ -170,8 +180,10 @@ class LLMChatMultiModalImageUpload:
                 # logger.exception("llm调用失败 {}".format(e))
                 raise Exception("llm调用失败 {}".format(e))
             return (content,)
+
         return _chat()
 
+
 class LLMChatMultiModalImageTensor:
     """llm chat"""
 
@@ -183,8 +195,8 @@ class LLMChatMultiModalImageTensor:
                                   "gpt-4.1"],),
                 "prompt": ("STRING", {"multiline": True}),
                 "image": ("IMAGE",),
-                "temperature": ("FLOAT",{"default": 0.7, "min": 0.0, "max": 1.0}),
-                "max_tokens": ("INT",{"default": 4096, "min":1, "max":65535}),
+                "temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0}),
+                "max_tokens": ("INT", {"default": 4096, "min": 1, "max": 65535}),
                 "timeout": ("INT", {"default": 120, "min": 30, "max": 900}),
             }
         }
@@ -194,34 +206,35 @@ class LLMChatMultiModalImageTensor:
     FUNCTION = "chat"
     CATEGORY = "不忘科技-自定义节点🚩/LLM"
 
-    def chat(self, llm_provider:str, prompt:str, image:torch.Tensor, temperature:float, max_tokens:int, timeout:int):
+    def chat(self, llm_provider: str, prompt: str, image: torch.Tensor, temperature: float, max_tokens: int,
+             timeout: int):
         @retry(Exception, tries=3, delay=1)
         def _chat():
             try:
                 with httpx.Client(timeout=httpx.Timeout(timeout, connect=15)) as session:
                     resp = session.post("https://gateway.bowong.cc/chat/completions",
-                                 headers={
-                                     "Content-Type": "application/json",
-                                      "Accept": "application/json",
-                                      "Authorization": "Bearer auth-bowong7777"
-                                 },
-                                 json={
-                                     "model": llm_provider,
-                                     "messages": [
-                                         {
-                                             "role": "user",
-                                             "content": [
-                                                 {"type": "text", "text": prompt},
-                                                 {
-                                                     "type": "image_url",
-                                                     "image_url": {"url":image_tensor_to_base64(image)},
-                                                 },
-                                             ]
-                                         }
-                                     ],
-                                     "temperature": temperature,
-                                     "max_tokens": max_tokens
-                                 })
+                                        headers={
+                                            "Content-Type": "application/json",
+                                            "Accept": "application/json",
+                                            "Authorization": "Bearer auth-bowong7777"
+                                        },
+                                        json={
+                                            "model": llm_provider,
+                                            "messages": [
+                                                {
+                                                    "role": "user",
+                                                    "content": [
+                                                        {"type": "text", "text": prompt},
+                                                        {
+                                                            "type": "image_url",
+                                                            "image_url": {"url": image_tensor_to_base64(image)},
+                                                        },
+                                                    ]
+                                                }
+                                            ],
+                                            "temperature": temperature,
+                                            "max_tokens": max_tokens
+                                        })
                     resp.raise_for_status()
                     resp = resp.json()
                     content = find_value_recursive("content", resp)
@@ -230,8 +243,10 @@ class LLMChatMultiModalImageTensor:
                 # logger.exception("llm调用失败 {}".format(e))
                 raise Exception("llm调用失败 {}".format(e))
             return (content,)
+
         return _chat()
 
+
 class Jinja2RenderTemplate:
     @classmethod
     def INPUT_TYPES(s):
@@ -266,4 +281,130 @@ class Jinja2RenderTemplate:
         template = Template(template, undefined=StrictUndefined)
 
         # 渲染模板
-        return (template.render(kv_map),)
\ No newline at end of file
+        return (template.render(kv_map),)
+
+
+class ModalClothesMask:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "image": ("IMAGE",),
+                "mask_color": ("STRING", {"default": "绿色"}),
+                "clothes_type": ("STRING", {"default": "裤子"}),
+                "endpoint": ("STRING", {"default": "bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run"}),
+            },
+        }
+
+    RETURN_TYPES = ("IMAGE",)
+    RETURN_NAMES = ("image",)
+    FUNCTION = "process"
+    OUTPUT_NODE = False
+    CATEGORY = "不忘科技-自定义节点🚩/图片/Gemini图像编辑"
+
+    def process(self, image: torch.Tensor, mask_color: str, clothes_type: str, endpoint: str):
+        try:
+            timeout = 60
+            logger.info("获取token")
+            api_key = requests.get(f"https://{endpoint}/google/access-token",
+                                   headers={'Authorization': 'Bearer bowong7777'}, timeout=timeout).json()[
+                "access_token"]
+            format = "PNG"
+            logger.info("请求图像编辑")
+            job_resp = requests.post(f"https://{endpoint}/google/image/clothes_mark",
+                                     headers={'x-google-api-key': api_key},
+                                     data={
+                                         "mark_clothes_type": clothes_type,
+                                         "mark_color": mask_color,
+                                     },
+                                     files={"origin_image": (
+                                         'image.' + format.lower(), tensor_to_image_bytes(image, format),
+                                         f'image/{format.lower()}')},
+                                     timeout=timeout)
+            job_resp.raise_for_status()
+            job_resp = job_resp.json()
+            if not job_resp["success"]:
+                raise Exception("请求Modal API失败")
+            job_id = job_resp["taskId"]
+
+            wait_time = 240
+            interval = 3
+            logger.info("开始轮询任务状态")
+            for _ in range(0, wait_time, interval):
+                logger.info("查询任务状态")
+                result = requests.get(f"https://{endpoint}/google/{job_id}", timeout=timeout)
+                if result.status_code == 200:
+                    result = result.json()
+                    if result["status"] == "success":
+                        logger.success("任务成功")
+                        image_b64 = json.loads(result["result"])[0]["image_b64"]
+                        image_tensor = base64_to_tensor(image_b64)
+                        return (image_tensor,)
+                    elif "fail" in result["status"].lower():
+                        raise Exception("任务失败")
+                sleep(interval)
+            raise Exception("查询任务状态超时")
+        except Exception as e:
+            raise Exception(e)
+
+
+class ModalEditCustom:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "image": ("IMAGE",),
+                "prompt": ("STRING", {"default": "将背景去除，输出原尺寸图片"}),
+                "endpoint": ("STRING", {"default": "bowongai-dev--bowong-ai-video-gemini-fastapi-webapp.modal.run"}),
+            },
+        }
+
+    RETURN_TYPES = ("IMAGE",)
+    RETURN_NAMES = ("image",)
+    FUNCTION = "process"
+    OUTPUT_NODE = False
+    CATEGORY = "不忘科技-自定义节点🚩/图片/Gemini图像编辑"
+
+    def process(self, image: torch.Tensor, prompt: str, endpoint: str):
+        try:
+            timeout = 60
+            logger.info("获取token")
+            api_key = requests.get(f"https://{endpoint}/google/access-token",
+                                   headers={'Authorization': 'Bearer bowong7777'}, timeout=timeout).json()[
+                "access_token"]
+            format = "PNG"
+            logger.info("请求图像编辑")
+            job_resp = requests.post(f"https://{endpoint}/google/image/edit_custom",
+                                     headers={'x-google-api-key': api_key},
+                                     data={
+                                         "prompt": prompt
+                                     },
+                                     files={"origin_image": (
+                                         'image.' + format.lower(), tensor_to_image_bytes(image, format),
+                                         f'image/{format.lower()}')},
+                                     timeout=timeout)
+            job_resp.raise_for_status()
+            job_resp = job_resp.json()
+            if not job_resp["success"]:
+                raise Exception("请求Modal API失败")
+            job_id = job_resp["taskId"]
+
+            wait_time = 240
+            interval = 3
+            logger.info("开始轮询任务状态")
+            for _ in range(0, wait_time, interval):
+                logger.info("查询任务状态")
+                result = requests.get(f"https://{endpoint}/google/{job_id}", timeout=timeout)
+                if result.status_code == 200:
+                    result = result.json()
+                    if result["status"] == "success":
+                        logger.success("任务成功")
+                        image_b64 = json.loads(result["result"])[0]["image_b64"]
+                        image_tensor = base64_to_tensor(image_b64)
+                        return (image_tensor,)
+                    elif "fail" in result["status"].lower():
+                        raise Exception("任务失败")
+                sleep(interval)
+            raise Exception("查询任务状态超时")
+        except Exception as e:
+            raise Exception(e)
diff --git a/utils/image_utils.py b/utils/image_utils.py
new file mode 100644
index 0000000..3bc29dd
--- /dev/null
+++ b/utils/image_utils.py
@@ -0,0 +1,73 @@
+import base64
+import io
+
+import torch
+from PIL import Image
+from torchvision import transforms
+
+
+def base64_to_tensor(base64_data: str) -> torch.Tensor:
+    """
+    将"data:image/xxx;base64,xxx"格式的图像数据转换为PyTorch张量
+
+    参数:
+        base64_data: 图像的Base64编码字符串
+
+    返回:
+        torch.Tensor: 形状为[C, H, W]的张量，取值范围为[0, 1]
+    """
+    # 分离数据前缀和实际Base64编码部分
+    if ';base64,' in base64_data:
+        _, encoded = base64_data.split(';base64,', 1)
+    else:
+        encoded = base64_data  # 假设直接提供了Base64编码部分
+
+    # 解码Base64数据
+    decoded_data = base64.b64decode(encoded)
+
+    # 使用PIL打开图像
+    image = Image.open(io.BytesIO(decoded_data))
+
+    # 转换为RGB模式（处理PNG的Alpha通道和WebP格式）
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+
+    # 转换为PyTorch张量
+    from torchvision import transforms
+    transform = transforms.Compose([
+        transforms.ToTensor()  # [H, W, C] -> [C, H, W]，并归一化到[0, 1]
+    ])
+    tensor = transform(image)
+
+    return tensor.unsqueeze(0).permute(0, 2, 3, 1)
+
+
+def tensor_to_image_bytes(tensor: torch.Tensor, format: str = 'PNG') -> bytes:
+    """
+    将PyTorch张量转换为图像字节流
+
+    参数:
+        tensor: 形状为[C, H, W]的图像张量，取值范围为[0, 1]
+        format: 图像格式，可选'PNG'、'JPEG'等
+
+    返回:
+        bytes: 图像的字节流数据
+    """
+    if tensor.dim() == 4:
+        if tensor.shape[0] > 1:
+            print("警告：输入张量包含多个图像，仅使用第一个")
+        tensor = tensor[0]  # 取批量中的第一张图像
+    tensor = tensor.permute(2, 0, 1)
+    # 确保张量在[0, 255]范围内
+    if tensor.max() <= 1.0:
+        tensor = tensor * 255
+
+    # 转换为PIL图像
+    image = transforms.ToPILImage()(tensor.byte())
+
+    # 保存为字节流
+    buffer = io.BytesIO()
+    image.save(buffer, format=format)
+    buffer.seek(0)  # 重置指针到开始位置
+
+    return buffer.getvalue()