mixvideo-v2/cargos/tvai-v2/视觉-语言模型配置/ViTamin-XL-384.json

20 lines
430 B
JSON

{
"embed_dim": 1152,
"vision_cfg": {
"timm_model_name": "vitamin_xlarge_384",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"timm_drop": 0.0,
"timm_drop_path": 0.1,
"image_size": 256
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 1152,
"heads": 16,
"layers": 27
},
"custom_text": true
}