mixvideo-v2/cargos/tvai-v2/视觉-语言模型配置/swin_base_patch4_window7_22...

17 lines
380 B
JSON

{
"embed_dim": 640,
"vision_cfg": {
"timm_model_name": "swin_base_patch4_window7_224",
"timm_model_pretrained": false,
"timm_pool": "",
"timm_proj": "linear",
"image_size": 224
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 640,
"heads": 10,
"layers": 12
}
}