ADD 多模态LLM节点增加图片传入类型

This commit is contained in:
kyj@bowong.ai 2025-07-08 13:56:44 +08:00
parent 126f9bfb80
commit 2753a6b8a0
2 changed files with 86 additions and 4 deletions

View File

@ -1,4 +1,4 @@
from .nodes.llm_api import LLMChat, LLMChatMultiModal from .nodes.llm_api import LLMChat, LLMChatMultiModalImageUpload, LLMChatMultiModalImageTensor
from .nodes.compute_video_point import VideoStartPointDurationCompute from .nodes.compute_video_point import VideoStartPointDurationCompute
from .nodes.cos import COSUpload, COSDownload from .nodes.cos import COSUpload, COSDownload
from .nodes.face_detect import FaceDetect from .nodes.face_detect import FaceDetect
@ -66,7 +66,8 @@ NODE_CLASS_MAPPINGS = {
"PlugAndPlayWebhook": PlugAndPlayWebhook, "PlugAndPlayWebhook": PlugAndPlayWebhook,
"SaveImageWithOutput": SaveImageWithOutput, "SaveImageWithOutput": SaveImageWithOutput,
"LLMChat": LLMChat, "LLMChat": LLMChat,
"LLMChatMultiModal": LLMChatMultiModal "LLMChatMultiModalImageUpload": LLMChatMultiModalImageUpload,
"LLMChatMultiModalImageTensor": LLMChatMultiModalImageTensor
} }
# A dictionary that contains the friendly/humanly readable titles for the nodes # A dictionary that contains the friendly/humanly readable titles for the nodes
@ -106,5 +107,6 @@ NODE_DISPLAY_NAME_MAPPINGS = {
"PlugAndPlayWebhook": "Webhook转发器", "PlugAndPlayWebhook": "Webhook转发器",
"SaveImageWithOutput": "保存图片(带输出)", "SaveImageWithOutput": "保存图片(带输出)",
"LLMChat": "LLM调用", "LLMChat": "LLM调用",
"LLMChatMultiModal": "多模态LLM调用" "LLMChatMultiModalImageUpload": "多模态LLM调用-图片Path",
"LLMChatMultiModalImageTensor": "多模态LLM调用-图片Tensor"
} }

View File

@ -1,11 +1,15 @@
# LLM API 通过cloudflare gateway调用llm # LLM API 通过cloudflare gateway调用llm
import base64 import base64
import io
import os import os
import re import re
from mimetypes import guess_type from mimetypes import guess_type
from typing import Any, Union from typing import Any, Union
import httpx import httpx
import numpy as np
import torch
from PIL import Image
from retry import retry from retry import retry
import folder_paths import folder_paths
@ -26,6 +30,22 @@ def find_value_recursive(key:str, data:Union[dict, list]) -> str | None | Any:
if result is not None: if result is not None:
return result return result
def image_tensor_to_base64(image):
pil_image = Image.fromarray(np.clip(255. * image.cpu().numpy().squeeze(), 0, 255).astype(np.uint8))
# 创建一个BytesIO对象用于临时存储图像数据
image_data = io.BytesIO()
# 将图像保存到BytesIO对象中格式为PNG
pil_image.save(image_data, format='PNG')
# 将BytesIO对象的内容转换为字节串
image_data_bytes = image_data.getvalue()
# 将图像数据编码为Base64字符串
encoded_image = "data:image/png;base64," + base64.b64encode(image_data_bytes).decode('utf-8')
return encoded_image
class LLMChat: class LLMChat:
"""llm chat""" """llm chat"""
@ -85,7 +105,7 @@ class LLMChat:
return (content,) return (content,)
return _chat() return _chat()
class LLMChatMultiModal: class LLMChatMultiModalImageUpload:
"""llm chat""" """llm chat"""
@classmethod @classmethod
@ -150,4 +170,64 @@ class LLMChatMultiModal:
# logger.exception("llm调用失败 {}".format(e)) # logger.exception("llm调用失败 {}".format(e))
raise Exception("llm调用失败 {}".format(e)) raise Exception("llm调用失败 {}".format(e))
return (content,) return (content,)
return _chat()
class LLMChatMultiModalImageTensor:
"""llm chat"""
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"llm_provider": (["gpt-4o-1120",
"gpt-4.1"],),
"prompt": ("STRING", {"multiline": True}),
"image": ("IMAGE",),
"temperature": ("FLOAT",{"default": 0.7, "min": 0.0, "max": 1.0}),
"max_tokens": ("INT",{"default": 4096, "min":1, "max":65535}),
"timeout": ("INT", {"default": 120, "min": 30, "max": 900}),
}
}
RETURN_TYPES = ("STRING",)
RETURN_NAMES = ("llm输出",)
FUNCTION = "chat"
CATEGORY = "不忘科技-自定义节点🚩/llm"
def chat(self, llm_provider:str, prompt:str, image:torch.Tensor, temperature:float, max_tokens:int, timeout:int):
@retry(Exception, tries=3, delay=1)
def _chat():
try:
with httpx.Client(timeout=httpx.Timeout(timeout, connect=15)) as session:
resp = session.post("https://gateway.bowong.cc/chat/completions",
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": "Bearer auth-bowong7777"
},
json={
"model": llm_provider,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {"url":image_tensor_to_base64(image)},
},
]
}
],
"temperature": temperature,
"max_tokens": max_tokens
})
resp.raise_for_status()
resp = resp.json()
content = find_value_recursive("content", resp)
content = re.sub(r'\n{2,}', '\n', content)
except Exception as e:
# logger.exception("llm调用失败 {}".format(e))
raise Exception("llm调用失败 {}".format(e))
return (content,)
return _chat() return _chat()