mvit_s = mobilevit_s()
checkpoint = torch.load("mobilevit_s.pt",map_location='cpu')
mvit_s.load_state_dict(checkpoint)
I downlod checkpoint from https://github.com/apple/ml-cvnets/blob/main/examples/README-mobilevit.md.
How can I load this
RuntimeError: Error(s) in loading state_dict for MobileViT:
Missing key(s) in state_dict: "conv_1.0.weight", "conv_1.0.bias", "conv_1.1.weight", "conv_1.1.bias", "conv_1.1.running_mean", "conv_1.1.running_var", "mv2.0.conv.0.weight", "mv2.0.conv.1.weight", "mv2.0.conv.1.bias", "mv2.0.conv.1.running_mean", "mv2.0.conv.1.running_var", "mv2.0.conv.3.weight", "mv2.0.conv.4.weight", "mv2.0.conv.4.bias", "mv2.0.conv.4.running_mean", "mv2.0.conv.4.running_var", "mv2.0.conv.6.weight", "mv2.0.conv.8.weight", "mv2.0.conv.8.bias", "mv2.0.conv.8.running_mean", "mv2.0.conv.8.running_var", "mv2.1.conv.0.weight", "mv2.1.conv.1.weight", "mv2.1.conv.1.bias", "mv2.1.conv.1.running_mean", "mv2.1.conv.1.running_var", "mv2.1.conv.3.weight", "mv2.1.conv.4.weight", "mv2.1.conv.4.bias", "mv2.1.conv.4.running_mean", "mv2.1.conv.4.running_var", "mv2.1.conv.6.weight", "mv2.1.conv.8.weight", "mv2.1.conv.8.bias", "mv2.1.conv.8.running_mean", "mv2.1.conv.8.running_var", "mv2.2.conv.0.weight", "mv2.2.conv.1.weight", "mv2.2.conv.1.bias", "mv2.2.conv.1.running_mean", "mv2.2.conv.1.running_var", "mv2.2.conv.3.weight", "mv2.2.conv.4.weight", "mv2.2.conv.4.bias", "mv2.2.conv.4.running_mean", "mv2.2.conv.4.running_var", "mv2.2.conv.6.weight", "mv2.2.conv.8.weight", "mv2.2.conv.8.bias", "mv2.2.conv.8.running_mean", "mv2.2.conv.8.running_var", "mv2.3.conv.0.weight", "mv2.3.conv.1.weight", "mv2.3.conv.1.bias", "mv2.3.conv.1.running_mean", "mv2.3.conv.1.running_var", "mv2.3.conv.3.weight", "mv2.3.conv.4.weight", "mv2.3.conv.4.bias", "mv2.3.conv.4.running_mean", "mv2.3.conv.4.running_var", "mv2.3.conv.6.weight", "mv2.3.conv.8.weight", "mv2.3.conv.8.bias", "mv2.3.conv.8.running_mean", "mv2.3.conv.8.running_var", "mv2.4.conv.0.weight", "mv2.4.conv.1.weight", "mv2.4.conv.1.bias", "mv2.4.conv.1.running_mean", "mv2.4.conv.1.running_var", "mv2.4.conv.3.weight", "mv2.4.conv.4.weight", "mv2.4.conv.4.bias", "mv2.4.conv.4.running_mean", "mv2.4.conv.4.running_var", "mv2.4.conv.6.weight", "mv2.4.conv.8.weight", "mv2.4.conv.8.bias", "mv2.4.conv.8.running_mean", "mv2.4.conv.8.running_var", "mv2.5.conv.0.weight", "mv2.5.conv.1.weight", "mv2.5.conv.1.bias", "mv2.5.conv.1.running_mean", "mv2.5.conv.1.running_var", "mv2.5.conv.3.weight", "mv2.5.conv.4.weight", "mv2.5.conv.4.bias", "mv2.5.conv.4.running_mean", "mv2.5.conv.4.running_var", "mv2.5.conv.6.weight", "mv2.5.conv.8.weight", "mv2.5.conv.8.bias", "mv2.5.conv.8.running_mean", "mv2.5.conv.8.running_var", "mv2.6.conv.0.weight", "mv2.6.conv.1.weight", "mv2.6.conv.1.bias", "mv2.6.conv.1.running_mean", "mv2.6.conv.1.running_var", "mv2.6.conv.3.weight", "mv2.6.conv.4.weight", "mv2.6.conv.4.bias", "mv2.6.conv.4.running_mean", "mv2.6.conv.4.running_var", "mv2.6.conv.6.weight", "mv2.6.conv.8.weight", "mv2.6.conv.8.bias", "mv2.6.conv.8.running_mean", "mv2.6.conv.8.running_var", "m_vits.0.conv_1.weight", "m_vits.0.conv_1.bias", "m_vits.0.conv2.weight", "m_vits.0.conv2.bias", "m_vits.0.trans.layers.0.0.ln.weight", "m_vits.0.trans.layers.0.0.ln.bias", "m_vits.0.trans.layers.0.0.fn.to_qkv.weight", "m_vits.0.trans.layers.0.0.fn.to_out.0.weight", "m_vits.0.trans.layers.0.0.fn.to_out.0.bias", "m_vits.0.trans.layers.0.1.ln.weight", "m_vits.0.trans.layers.0.1.ln.bias", "m_vits.0.trans.layers.0.1.fn.net.0.weight", "m_vits.0.trans.layers.0.1.fn.net.0.bias", "m_vits.0.trans.layers.0.1.fn.net.3.weight", "m_vits.0.trans.layers.0.1.fn.net.3.bias", "m_vits.0.trans.layers.1.0.ln.weight", "m_vits.0.trans.layers.1.0.ln.bias", "m_vits.0.trans.layers.1.0.fn.to_qkv.weight", "m_vits.0.trans.layers.1.0.fn.to_out.0.weight", "m_vits.0.trans.layers.1.0.fn.to_out.0.bias", "m_vits.0.trans.layers.1.1.ln.weight", "m_vits.0.trans.layers.1.1.ln.bias", "m_vits.0.trans.layers.1.1.fn.net.0.weight", "m_vits.0.trans.layers.1.1.fn.net.0.bias", "m_vits.0.trans.layers.1.1.fn.net.3.weight", "m_vits.0.trans.layers.1.1.fn.net.3.bias", "m_vits.0.conv3.weight", "m_vits.0.conv3.bias", "m_vits.0.conv4.weight", "m_vits.0.conv4.bias", "m_vits.1.conv_1.weight", "m_vits.1.conv_1.bias", "m_vits.1.conv2.weight", "m_vits.1.conv2.bias", "m_vits.1.trans.layers.0.0.ln.weight", "m_vits.1.trans.layers.0.0.ln.bias", "m_vits.1.trans.layers.0.0.fn.to_qkv.weight", "m_vits.1.trans.layers.0.0.fn.to_out.0.weight", "m_vits.1.trans.layers.0.0.fn.to_out.0.bias", "m_vits.1.trans.layers.0.1.ln.weight", "m_vits.1.trans.layers.0.1.ln.bias", "m_vits.1.trans.layers.0.1.fn.net.0.weight", "m_vits.1.trans.layers.0.1.fn.net.0.bias", "m_vits.1.trans.layers.0.1.fn.net.3.weight", "m_vits.1.trans.layers.0.1.fn.net.3.bias", "m_vits.1.trans.layers.1.0.ln.weight", "m_vits.1.trans.layers.1.0.ln.bias", "m_vits.1.trans.layers.1.0.fn.to_qkv.weight", "m_vits.1.trans.layers.1.0.fn.to_out.0.weight", "m_vits.1.trans.layers.1.0.fn.to_out.0.bias", "m_vits.1.trans.layers.1.1.ln.weight", "m_vits.1.trans.layers.1.1.ln.bias", "m_vits.1.trans.layers.1.1.fn.net.0.weight", "m_vits.1.trans.layers.1.1.fn.net.0.bias", "m_vits.1.trans.layers.1.1.fn.net.3.weight", "m_vits.1.trans.layers.1.1.fn.net.3.bias", "m_vits.1.trans.layers.2.0.ln.weight", "m_vits.1.trans.layers.2.0.ln.bias", "m_vits.1.trans.layers.2.0.fn.to_qkv.weight", "m_vits.1.trans.layers.2.0.fn.to_out.0.weight", "m_vits.1.trans.layers.2.0.fn.to_out.0.bias", "m_vits.1.trans.layers.2.1.ln.weight", "m_vits.1.trans.layers.2.1.ln.bias", "m_vits.1.trans.layers.2.1.fn.net.0.weight", "m_vits.1.trans.layers.2.1.fn.net.0.bias", "m_vits.1.trans.layers.2.1.fn.net.3.weight", "m_vits.1.trans.layers.2.1.fn.net.3.bias", "m_vits.1.trans.layers.3.0.ln.weight", "m_vits.1.trans.layers.3.0.ln.bias", "m_vits.1.trans.layers.3.0.fn.to_qkv.weight", "m_vits.1.trans.layers.3.0.fn.to_out.0.weight", "m_vits.1.trans.layers.3.0.fn.to_out.0.bias", "m_vits.1.trans.layers.3.1.ln.weight", "m_vits.1.trans.layers.3.1.ln.bias", "m_vits.1.trans.layers.3.1.fn.net.0.weight", "m_vits.1.trans.layers.3.1.fn.net.0.bias", "m_vits.1.trans.layers.3.1.fn.net.3.weight", "m_vits.1.trans.layers.3.1.fn.net.3.bias", "m_vits.1.conv3.weight", "m_vits.1.conv3.bias", "m_vits.1.conv4.weight", "m_vits.1.conv4.bias", "m_vits.2.conv_1.weight", "m_vits.2.conv_1.bias", "m_vits.2.conv2.weight", "m_vits.2.conv2.bias", "m_vits.2.trans.layers.0.0.ln.weight", "m_vits.2.trans.layers.0.0.ln.bias", "m_vits.2.trans.layers.0.0.fn.to_qkv.weight", "m_vits.2.trans.layers.0.0.fn.to_out.0.weight", "m_vits.2.trans.layers.0.0.fn.to_out.0.bias", "m_vits.2.trans.layers.0.1.ln.weight", "m_vits.2.trans.layers.0.1.ln.bias", "m_vits.2.trans.layers.0.1.fn.net.0.weight", "m_vits.2.trans.layers.0.1.fn.net.0.bias", "m_vits.2.trans.layers.0.1.fn.net.3.weight", "m_vits.2.trans.layers.0.1.fn.net.3.bias", "m_vits.2.trans.layers.1.0.ln.weight", "m_vits.2.trans.layers.1.0.ln.bias", "m_vits.2.trans.layers.1.0.fn.to_qkv.weight", "m_vits.2.trans.layers.1.0.fn.to_out.0.weight", "m_vits.2.trans.layers.1.0.fn.to_out.0.bias", "m_vits.2.trans.layers.1.1.ln.weight", "m_vits.2.trans.layers.1.1.ln.bias", "m_vits.2.trans.layers.1.1.fn.net.0.weight", "m_vits.2.trans.layers.1.1.fn.net.0.bias", "m_vits.2.trans.layers.1.1.fn.net.3.weight", "m_vits.2.trans.layers.1.1.fn.net.3.bias", "m_vits.2.trans.layers.2.0.ln.weight", "m_vits.2.trans.layers.2.0.ln.bias", "m_vits.2.trans.layers.2.0.fn.to_qkv.weight", "m_vits.2.trans.layers.2.0.fn.to_out.0.weight", "m_vits.2.trans.layers.2.0.fn.to_out.0.bias", "m_vits.2.trans.layers.2.1.ln.weight", "m_vits.2.trans.layers.2.1.ln.bias", "m_vits.2.trans.layers.2.1.fn.net.0.weight", "m_vits.2.trans.layers.2.1.fn.net.0.bias", "m_vits.2.trans.layers.2.1.fn.net.3.weight", "m_vits.2.trans.layers.2.1.fn.net.3.bias", "m_vits.2.conv3.weight", "m_vits.2.conv3.bias", "m_vits.2.conv4.weight", "m_vits.2.conv4.bias", "conv2.0.weight", "conv2.0.bias", "conv2.1.weight", "conv2.1.bias", "conv2.1.running_mean", "conv2.1.running_var", "fc.weight".
Unexpected key(s) in state_dict: "layer_1.0.block.exp_1x1.block.conv.weight", "layer_1.0.block.exp_1x1.block.norm.weight", "layer_1.0.block.exp_1x1.block.norm.bias", "layer_1.0.block.exp_1x1.block.norm.running_mean", "layer_1.0.block.exp_1x1.block.norm.running_var", "layer_1.0.block.exp_1x1.block.norm.num_batches_tracked", "layer_1.0.block.conv_3x3.block.conv.weight", "layer_1.0.block.conv_3x3.block.norm.weight", "layer_1.0.block.conv_3x3.block.norm.bias", "layer_1.0.block.conv_3x3.block.norm.running_mean", "layer_1.0.block.conv_3x3.block.norm.running_var", "layer_1.0.block.conv_3x3.block.norm.num_batches_tracked", "layer_1.0.block.red_1x1.block.conv.weight", "layer_1.0.block.red_1x1.block.norm.weight", "layer_1.0.block.red_1x1.block.norm.bias", "layer_1.0.block.red_1x1.block.norm.running_mean", "layer_1.0.block.red_1x1.block.norm.running_var", "layer_1.0.block.red_1x1.block.norm.num_batches_tracked", "layer_2.0.block.exp_1x1.block.conv.weight", "layer_2.0.block.exp_1x1.block.norm.weight", "layer_2.0.block.exp_1x1.block.norm.bias", "layer_2.0.block.exp_1x1.block.norm.running_mean", "layer_2.0.block.exp_1x1.block.norm.running_var", "layer_2.0.block.exp_1x1.block.norm.num_batches_tracked", "layer_2.0.block.conv_3x3.block.conv.weight", "layer_2.0.block.conv_3x3.block.norm.weight", "layer_2.0.block.conv_3x3.block.norm.bias", "layer_2.0.block.conv_3x3.block.norm.running_mean", "layer_2.0.block.conv_3x3.block.norm.running_var", "layer_2.0.block.conv_3x3.block.norm.num_batches_tracked", "layer_2.0.block.red_1x1.block.conv.weight", "layer_2.0.block.red_1x1.block.norm.weight", "layer_2.0.block.red_1x1.block.norm.bias", "layer_2.0.block.red_1x1.block.norm.running_mean", "layer_2.0.block.red_1x1.block.norm.running_var", "layer_2.0.block.red_1x1.block.norm.num_batches_tracked", "layer_2.1.block.exp_1x1.block.conv.weight", "layer_2.1.block.exp_1x1.block.norm.weight", "layer_2.1.block.exp_1x1.block.norm.bias", "layer_2.1.block.exp_1x1.block.norm.running_mean", "layer_2.1.block.exp_1x1.block.norm.running_var", "layer_2.1.block.exp_1x1.block.norm.num_batches_tracked", "layer_2.1.block.conv_3x3.block.conv.weight", "layer_2.1.block.conv_3x3.block.norm.weight", "layer_2.1.block.conv_3x3.block.norm.bias", "layer_2.1.block.conv_3x3.block.norm.running_mean", "layer_2.1.block.conv_3x3.block.norm.running_var", "layer_2.1.block.conv_3x3.block.norm.num_batches_tracked", "layer_2.1.block.red_1x1.block.conv.weight", "layer_2.1.block.red_1x1.block.norm.weight", "layer_2.1.block.red_1x1.block.norm.bias", "layer_2.1.block.red_1x1.block.norm.running_mean", "layer_2.1.block.red_1x1.block.norm.running_var", "layer_2.1.block.red_1x1.block.norm.num_batches_tracked", "layer_2.2.block.exp_1x1.block.conv.weight", "layer_2.2.block.exp_1x1.block.norm.weight", "layer_2.2.block.exp_1x1.block.norm.bias", "layer_2.2.block.exp_1x1.block.norm.running_mean", "layer_2.2.block.exp_1x1.block.norm.running_var", "layer_2.2.block.exp_1x1.block.norm.num_batches_tracked", "layer_2.2.block.conv_3x3.block.conv.weight", "layer_2.2.block.conv_3x3.block.norm.weight", "layer_2.2.block.conv_3x3.block.norm.bias", "layer_2.2.block.conv_3x3.block.norm.running_mean", "layer_2.2.block.conv_3x3.block.norm.running_var", "layer_2.2.block.conv_3x3.block.norm.num_batches_tracked", "layer_2.2.block.red_1x1.block.conv.weight", "layer_2.2.block.red_1x1.block.norm.weight", "layer_2.2.block.red_1x1.block.norm.bias", "layer_2.2.block.red_1x1.block.norm.running_mean", "layer_2.2.block.red_1x1.block.norm.running_var", "layer_2.2.block.red_1x1.block.norm.num_batches_tracked", "layer_3.0.block.exp_1x1.block.conv.weight", "layer_3.0.block.exp_1x1.block.norm.weight", "layer_3.0.block.exp_1x1.block.norm.bias", "layer_3.0.block.exp_1x1.block.norm.running_mean", "layer_3.0.block.exp_1x1.block.norm.running_var", "layer_3.0.block.exp_1x1.block.norm.num_batches_tracked", "layer_3.0.block.conv_3x3.block.conv.weight", "layer_3.0.block.conv_3x3.block.norm.weight", "layer_3.0.block.conv_3x3.block.norm.bias", "layer_3.0.block.conv_3x3.block.norm.running_mean", "layer_3.0.block.conv_3x3.block.norm.running_var", "layer_3.0.block.conv_3x3.block.norm.num_batches_tracked", "layer_3.0.block.red_1x1.block.conv.weight", "layer_3.0.block.red_1x1.block.norm.weight", "layer_3.0.block.red_1x1.block.norm.bias", "layer_3.0.block.red_1x1.block.norm.running_mean", "layer_3.0.block.red_1x1.block.norm.running_var", "layer_3.0.block.red_1x1.block.norm.num_batches_tracked", "layer_3.1.local_rep.conv_3x3.block.conv.weight", "layer_3.1.local_rep.conv_3x3.block.norm.weight", "layer_3.1.local_rep.conv_3x3.block.norm.bias", "layer_3.1.local_rep.conv_3x3.block.norm.running_mean", "layer_3.1.local_rep.conv_3x3.block.norm.running_var", "layer_3.1.local_rep.conv_3x3.block.norm.num_batches_tracked", "layer_3.1.local_rep.conv_1x1.block.conv.weight", "layer_3.1.global_rep.0.pre_norm_mha.0.weight", "layer_3.1.global_rep.0.pre_norm_mha.0.bias", "layer_3.1.global_rep.0.pre_norm_mha.1.qkv_proj.weight", "layer_3.1.global_rep.0.pre_norm_mha.1.qkv_proj.bias", "layer_3.1.global_rep.0.pre_norm_mha.1.out_proj.weight", "layer_3.1.global_rep.0.pre_norm_mha.1.out_proj.bias", "layer_3.1.global_rep.0.pre_norm_ffn.0.weight", "layer_3.1.global_rep.0.pre_norm_ffn.0.bias", "layer_3.1.global_rep.0.pre_norm_ffn.1.weight", "layer_3.1.global_rep.0.pre_norm_ffn.1.bias", "layer_3.1.global_rep.0.pre_norm_ffn.4.weight", "layer_3.1.global_rep.0.pre_norm_ffn.4.bias", "layer_3.1.global_rep.1.pre_norm_mha.0.weight", "layer_3.1.global_rep.1.pre_norm_mha.0.bias", "layer_3.1.global_rep.1.pre_norm_mha.1.qkv_proj.weight", "layer_3.1.global_rep.1.pre_norm_mha.1.qkv_proj.bias", "layer_3.1.global_rep.1.pre_norm_mha.1.out_proj.weight", "layer_3.1.global_rep.1.pre_norm_mha.1.out_proj.bias", "layer_3.1.global_rep.1.pre_norm_ffn.0.weight", "layer_3.1.global_rep.1.pre_norm_ffn.0.bias", "layer_3.1.global_rep.1.pre_norm_ffn.1.weight", "layer_3.1.global_rep.1.pre_norm_ffn.1.bias", "layer_3.1.global_rep.1.pre_norm_ffn.4.weight", "layer_3.1.global_rep.1.pre_norm_ffn.4.bias", "layer_3.1.global_rep.2.weight", "layer_3.1.global_rep.2.bias", "layer_3.1.conv_proj.block.conv.weight", "layer_3.1.conv_proj.block.norm.weight", "layer_3.1.conv_proj.block.norm.bias", "layer_3.1.conv_proj.block.norm.running_mean", "layer_3.1.conv_proj.block.norm.running_var", "layer_3.1.conv_proj.block.norm.num_batches_tracked", "layer_3.1.fusion.block.conv.weight", "layer_3.1.fusion.block.norm.weight", "layer_3.1.fusion.block.norm.bias", "layer_3.1.fusion.block.norm.running_mean", "layer_3.1.fusion.block.norm.running_var", "layer_3.1.fusion.block.norm.num_batches_tracked", "layer_4.0.block.exp_1x1.block.conv.weight", "layer_4.0.block.exp_1x1.block.norm.weight", "layer_4.0.block.exp_1x1.block.norm.bias", "layer_4.0.block.exp_1x1.block.norm.running_mean", "layer_4.0.block.exp_1x1.block.norm.running_var", "layer_4.0.block.exp_1x1.block.norm.num_batches_tracked", "layer_4.0.block.conv_3x3.block.conv.weight", "layer_4.0.block.conv_3x3.block.norm.weight", "layer_4.0.block.conv_3x3.block.norm.bias", "layer_4.0.block.conv_3x3.block.norm.running_mean", "layer_4.0.block.conv_3x3.block.norm.running_var", "layer_4.0.block.conv_3x3.block.norm.num_batches_tracked", "layer_4.0.block.red_1x1.block.conv.weight", "layer_4.0.block.red_1x1.block.norm.weight", "layer_4.0.block.red_1x1.block.norm.bias", "layer_4.0.block.red_1x1.block.norm.running_mean", "layer_4.0.block.red_1x1.block.norm.running_var", "layer_4.0.block.red_1x1.block.norm.num_batches_tracked", "layer_4.1.local_rep.conv_3x3.block.conv.weight", "layer_4.1.local_rep.conv_3x3.block.norm.weight", "layer_4.1.local_rep.conv_3x3.block.norm.bias", "layer_4.1.local_rep.conv_3x3.block.norm.running_mean", "layer_4.1.local_rep.conv_3x3.block.norm.running_var", "layer_4.1.local_rep.conv_3x3.block.norm.num_batches_tracked", "layer_4.1.local_rep.conv_1x1.block.conv.weight", "layer_4.1.global_rep.0.pre_norm_mha.0.weight", "layer_4.1.global_rep.0.pre_norm_mha.0.bias", "layer_4.1.global_rep.0.pre_norm_mha.1.qkv_proj.weight", "layer_4.1.global_rep.0.pre_norm_mha.1.qkv_proj.bias", "layer_4.1.global_rep.0.pre_norm_mha.1.out_proj.weight", "layer_4.1.global_rep.0.pre_norm_mha.1.out_proj.bias", "layer_4.1.global_rep.0.pre_norm_ffn.0.weight", "layer_4.1.global_rep.0.pre_norm_ffn.0.bias", "layer_4.1.global_rep.0.pre_norm_ffn.1.weight", "layer_4.1.global_rep.0.pre_norm_ffn.1.bias", "layer_4.1.global_rep.0.pre_norm_ffn.4.weight", "layer_4.1.global_rep.0.pre_norm_ffn.4.bias", "layer_4.1.global_rep.1.pre_norm_mha.0.weight", "layer_4.1.global_rep.1.pre_norm_mha.0.bias", "layer_4.1.global_rep.1.pre_norm_mha.1.qkv_proj.weight", "layer_4.1.global_rep.1.pre_norm_mha.1.qkv_proj.bias", "layer_4.1.global_rep.1.pre_norm_mha.1.out_proj.weight", "layer_4.1.global_rep.1.pre_norm_mha.1.out_proj.bias", "layer_4.1.global_rep.1.pre_norm_ffn.0.weight", "layer_4.1.global_rep.1.pre_norm_ffn.0.bias", "layer_4.1.global_rep.1.pre_norm_ffn.1.weight", "layer_4.1.global_rep.1.pre_norm_ffn.1.bias", "layer_4.1.global_rep.1.pre_norm_ffn.4.weight", "layer_4.1.global_rep.1.pre_norm_ffn.4.bias", "layer_4.1.global_rep.2.pre_norm_mha.0.weight", "layer_4.1.global_rep.2.pre_norm_mha.0.bias", "layer_4.1.global_rep.2.pre_norm_mha.1.qkv_proj.weight", "layer_4.1.global_rep.2.pre_norm_mha.1.qkv_proj.bias", "layer_4.1.global_rep.2.pre_norm_mha.1.out_proj.weight", "layer_4.1.global_rep.2.pre_norm_mha.1.out_proj.bias", "layer_4.1.global_rep.2.pre_norm_ffn.0.weight", "layer_4.1.global_rep.2.pre_norm_ffn.0.bias", "layer_4.1.global_rep.2.pre_norm_ffn.1.weight", "layer_4.1.global_rep.2.pre_norm_ffn.1.bias", "layer_4.1.global_rep.2.pre_norm_ffn.4.weight", "layer_4.1.global_rep.2.pre_norm_ffn.4.bias", "layer_4.1.global_rep.3.pre_norm_mha.0.weight", "layer_4.1.global_rep.3.pre_norm_mha.0.bias", "layer_4.1.global_rep.3.pre_norm_mha.1.qkv_proj.weight", "layer_4.1.global_rep.3.pre_norm_mha.1.qkv_proj.bias", "layer_4.1.global_rep.3.pre_norm_mha.1.out_proj.weight", "layer_4.1.global_rep.3.pre_norm_mha.1.out_proj.bias", "layer_4.1.global_rep.3.pre_norm_ffn.0.weight", "layer_4.1.global_rep.3.pre_norm_ffn.0.bias", "layer_4.1.global_rep.3.pre_norm_ffn.1.weight", "layer_4.1.global_rep.3.pre_norm_ffn.1.bias", "layer_4.1.global_rep.3.pre_norm_ffn.4.weight", "layer_4.1.global_rep.3.pre_norm_ffn.4.bias", "layer_4.1.global_rep.4.weight", "layer_4.1.global_rep.4.bias", "layer_4.1.conv_proj.block.conv.weight", "layer_4.1.conv_proj.block.norm.weight", "layer_4.1.conv_proj.block.norm.bias", "layer_4.1.conv_proj.block.norm.running_mean", "layer_4.1.conv_proj.block.norm.running_var", "layer_4.1.conv_proj.block.norm.num_batches_tracked", "layer_4.1.fusion.block.conv.weight", "layer_4.1.fusion.block.norm.weight", "layer_4.1.fusion.block.norm.bias", "layer_4.1.fusion.block.norm.running_mean", "layer_4.1.fusion.block.norm.running_var", "layer_4.1.fusion.block.norm.num_batches_tracked", "layer_5.0.block.exp_1x1.block.conv.weight", "layer_5.0.block.exp_1x1.block.norm.weight", "layer_5.0.block.exp_1x1.block.norm.bias", "layer_5.0.block.exp_1x1.block.norm.running_mean", "layer_5.0.block.exp_1x1.block.norm.running_var", "layer_5.0.block.exp_1x1.block.norm.num_batches_tracked", "layer_5.0.block.conv_3x3.block.conv.weight", "layer_5.0.block.conv_3x3.block.norm.weight", "layer_5.0.block.conv_3x3.block.norm.bias", "layer_5.0.block.conv_3x3.block.norm.running_mean", "layer_5.0.block.conv_3x3.block.norm.running_var", "layer_5.0.block.conv_3x3.block.norm.num_batches_tracked", "layer_5.0.block.red_1x1.block.conv.weight", "layer_5.0.block.red_1x1.block.norm.weight", "layer_5.0.block.red_1x1.block.norm.bias", "layer_5.0.block.red_1x1.block.norm.running_mean", "layer_5.0.block.red_1x1.block.norm.running_var", "layer_5.0.block.red_1x1.block.norm.num_batches_tracked", "layer_5.1.local_rep.conv_3x3.block.conv.weight", "layer_5.1.local_rep.conv_3x3.block.norm.weight", "layer_5.1.local_rep.conv_3x3.block.norm.bias", "layer_5.1.local_rep.conv_3x3.block.norm.running_mean", "layer_5.1.local_rep.conv_3x3.block.norm.running_var", "layer_5.1.local_rep.conv_3x3.block.norm.num_batches_tracked", "layer_5.1.local_rep.conv_1x1.block.conv.weight", "layer_5.1.global_rep.0.pre_norm_mha.0.weight", "layer_5.1.global_rep.0.pre_norm_mha.0.bias", "layer_5.1.global_rep.0.pre_norm_mha.1.qkv_proj.weight", "layer_5.1.global_rep.0.pre_norm_mha.1.qkv_proj.bias", "layer_5.1.global_rep.0.pre_norm_mha.1.out_proj.weight", "layer_5.1.global_rep.0.pre_norm_mha.1.out_proj.bias", "layer_5.1.global_rep.0.pre_norm_ffn.0.weight", "layer_5.1.global_rep.0.pre_norm_ffn.0.bias", "layer_5.1.global_rep.0.pre_norm_ffn.1.weight", "layer_5.1.global_rep.0.pre_norm_ffn.1.bias", "layer_5.1.global_rep.0.pre_norm_ffn.4.weight", "layer_5.1.global_rep.0.pre_norm_ffn.4.bias", "layer_5.1.global_rep.1.pre_norm_mha.0.weight", "layer_5.1.global_rep.1.pre_norm_mha.0.bias", "layer_5.1.global_rep.1.pre_norm_mha.1.qkv_proj.weight", "layer_5.1.global_rep.1.pre_norm_mha.1.qkv_proj.bias", "layer_5.1.global_rep.1.pre_norm_mha.1.out_proj.weight", "layer_5.1.global_rep.1.pre_norm_mha.1.out_proj.bias", "layer_5.1.global_rep.1.pre_norm_ffn.0.weight", "layer_5.1.global_rep.1.pre_norm_ffn.0.bias", "layer_5.1.global_rep.1.pre_norm_ffn.1.weight", "layer_5.1.global_rep.1.pre_norm_ffn.1.bias", "layer_5.1.global_rep.1.pre_norm_ffn.4.weight", "layer_5.1.global_rep.1.pre_norm_ffn.4.bias", "layer_5.1.global_rep.2.pre_norm_mha.0.weight", "layer_5.1.global_rep.2.pre_norm_mha.0.bias", "layer_5.1.global_rep.2.pre_norm_mha.1.qkv_proj.weight", "layer_5.1.global_rep.2.pre_norm_mha.1.qkv_proj.bias", "layer_5.1.global_rep.2.pre_norm_mha.1.out_proj.weight", "layer_5.1.global_rep.2.pre_norm_mha.1.out_proj.bias", "layer_5.1.global_rep.2.pre_norm_ffn.0.weight", "layer_5.1.global_rep.2.pre_norm_ffn.0.bias", "layer_5.1.global_rep.2.pre_norm_ffn.1.weight", "layer_5.1.global_rep.2.pre_norm_ffn.1.bias", "layer_5.1.global_rep.2.pre_norm_ffn.4.weight", "layer_5.1.global_rep.2.pre_norm_ffn.4.bias", "layer_5.1.global_rep.3.weight", "layer_5.1.global_rep.3.bias", "layer_5.1.conv_proj.block.conv.weight", "layer_5.1.conv_proj.block.norm.weight", "layer_5.1.conv_proj.block.norm.bias", "layer_5.1.conv_proj.block.norm.running_mean", "layer_5.1.conv_proj.block.norm.running_var", "layer_5.1.conv_proj.block.norm.num_batches_tracked", "layer_5.1.fusion.block.conv.weight", "layer_5.1.fusion.block.norm.weight", "layer_5.1.fusion.block.norm.bias", "layer_5.1.fusion.block.norm.running_mean", "layer_5.1.fusion.block.norm.running_var", "layer_5.1.fusion.block.norm.num_batches_tracked", "conv_1x1_exp.block.conv.weight", "conv_1x1_exp.block.norm.weight", "conv_1x1_exp.block.norm.bias", "conv_1x1_exp.block.norm.running_mean", "conv_1x1_exp.block.norm.running_var", "conv_1x1_exp.block.norm.num_batches_tracked", "classifier.fc.weight", "classifier.fc.bias", "conv_1.block.conv.weight", "conv_1.block.norm.weight", "conv_1.block.norm.bias", "conv_1.block.norm.running_mean", "conv_1.block.norm.running_var", "conv_1.block.norm.num_batches_tracked".