ADD 增加多模态LLM节点

2025-07-08 11:42:27 +08:00 · 2025-07-08 11:42:27 +08:00 · a8b85a8696
parent d035f77ae5
commit a8b85a8696
2 changed files with 82 additions and 4 deletions
--- a/init.py
+++ b/init.py
@ -1,4 +1,4 @@
-from .nodes.llm_api import LLMChat
+from .nodes.llm_api import LLMChat, LLMChatMultiModal
 from .nodes.compute_video_point import VideoStartPointDurationCompute
 from .nodes.cos import COSUpload, COSDownload
 from .nodes.face_detect import FaceDetect
@ -65,7 +65,8 @@ NODE_CLASS_MAPPINGS = {
    "RandomLineSelector": RandomLineSelector,
    "PlugAndPlayWebhook": PlugAndPlayWebhook,
    "SaveImageWithOutput": SaveImageWithOutput,
-    "LLMChat": LLMChat
+    "LLMChat": LLMChat,
    "LLMChatMultiModal": LLMChatMultiModal
 }
 # A dictionary that contains the friendly/humanly readable titles for the nodes
@ -104,5 +105,6 @@ NODE_DISPLAY_NAME_MAPPINGS = {
    "RandomLineSelector": "随机选择一行内容",
    "PlugAndPlayWebhook": "Webhook转发器",
    "SaveImageWithOutput": "保存图片(带输出)",
-    "LLMChat": "LLM调用"
+    "LLMChat": "LLM调用",
    "LLMChatMultiModal": "多模态LLM调用"
 }
--- a/nodes/llm_api.py
+++ b/nodes/llm_api.py
@ -1,10 +1,19 @@
 # LLM API 通过cloudflare gateway调用llm
 import base64
 import io
 import os
 import re
 from mimetypes import guess_type
 from typing import Any, Union
 import httpx
 import numpy as np
 import torch
 from PIL import Image, ImageSequence, ImageOps
 from retry import retry
 import folder_paths
 def find_value_recursive(key:str, data:Union[dict, list]) -> str | None | Any:
    if isinstance(data, dict):
@ -22,7 +31,7 @@ def find_value_recursive(key:str, data:Union[dict, list]) -> str | None | Any:
                return result
 class LLMChat:
-    """AWS S3下载"""
+    """llm chat"""
    @classmethod
    def INPUT_TYPES(s):
@ -78,4 +87,71 @@ class LLMChat:
                # logger.exception("llm调用失败 {}".format(e))
                raise Exception("llm调用失败 {}".format(e))
            return (content,)
        return _chat()
 class LLMChatMultiModal:
    """llm chat"""
    @classmethod
    def INPUT_TYPES(s):
        input_dir = folder_paths.get_input_directory()
        files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]
        files = folder_paths.filter_files_content_types(files, ["image"])
        return {
            "required": {
                "llm_provider": (["gpt-4o-1120",
                                  "gpt-4.1"],),
                "prompt": ("STRING", {"multiline": True}),
                "image": (sorted(files), {"image_upload": True}),
                "temperature": ("FLOAT",{"default": 0.7, "min": 0.0, "max": 1.0}),
                "max_tokens": ("INT",{"default": 4096, "min":1, "max":65535}),
                "timeout": ("INT", {"default": 120, "min": 30, "max": 900}),
            }
        }
    RETURN_TYPES = ("STRING",)
    RETURN_NAMES = ("llm输出",)
    FUNCTION = "chat"
    CATEGORY = "不忘科技-自定义节点🚩/llm"
    def chat(self, llm_provider:str, prompt:str, image, temperature:float, max_tokens:int, timeout:int):
        @retry(Exception, tries=3, delay=1)
        def _chat():
            try:
                image_path = folder_paths.get_annotated_filepath(image)
                mime_type, _ = guess_type(image_path)
                with open(image_path, "rb") as image_file:
                    base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
                with httpx.Client(timeout=httpx.Timeout(timeout, connect=15)) as session:
                    resp = session.post("https://gateway.bowong.cc/chat/completions",
                                 headers={
                                     "Content-Type": "application/json",
                                      "Accept": "application/json",
                                      "Authorization": "Bearer auth-bowong7777"
                                 },
                                 json={
                                     "model": llm_provider,
                                     "messages": [
                                         {
                                             "role": "user",
                                             "content": [
                                                 {"type": "text", "text": prompt},
                                                 {
                                                     "type": "image_url",
                                                     "image_url": {"url":f"data:{mime_type};base64,{base64_encoded_data}"},
                                                 },
                                             ]
                                         }
                                     ],
                                     "temperature": temperature,
                                     "max_tokens": max_tokens
                                 })
                    resp.raise_for_status()
                    resp = resp.json()
                    content = find_value_recursive("content", resp)
                    content = re.sub(r'\n{2,}', '\n', content)
            except Exception as e:
                # logger.exception("llm调用失败 {}".format(e))
                raise Exception("llm调用失败 {}".format(e))
            return (content,)
        return _chat()