Hi~
Thanks for your share!
When I try to train this model , I found the problem as follows:
Can you tell me what is the problem?...Thank you so much~
I used python3.6 pytorch 1.1 cuda 10.0.130
Epoch 1
RuntimeError Traceback (most recent call last)
in
11 for idx, (images, texts, masks) in enumerate(train_generator):
12
---> 13 loss, outputs, _ = model(texts, images, labels=texts, attention_mask=masks)
14
15 sum_loss += loss.item()
~/miniconda3/envs/dual/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
~/miniconda3/envs/dual/lib/python3.6/site-packages/transformers/modeling_gpt2.py in forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, labels, use_cache, output_attentions, output_hidden_states, return_dict, **kwargs)
776 output_attentions=output_attentions,
777 output_hidden_states=output_hidden_states,
--> 778 return_dict=return_dict,
779 )
780 hidden_states = transformer_outputs[0]
~/miniconda3/envs/dual/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
~/miniconda3/envs/dual/lib/python3.6/site-packages/transformers/modeling_gpt2.py in forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, use_cache, output_attentions, output_hidden_states, return_dict, **kwargs)
651 encoder_attention_mask=encoder_attention_mask,
652 use_cache=use_cache,
--> 653 output_attentions=output_attentions,
654 )
655
~/miniconda3/envs/dual/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
~/miniconda3/envs/dual/lib/python3.6/site-packages/transformers/modeling_gpt2.py in forward(self, hidden_states, layer_past, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, use_cache, output_attentions)
289 head_mask=head_mask,
290 use_cache=use_cache,
--> 291 output_attentions=output_attentions,
292 )
293 attn_output = attn_outputs[0] # output_attn: a, present, (attentions)
~/miniconda3/envs/dual/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
~/miniconda3/envs/dual/lib/python3.6/site-packages/transformers/modeling_gpt2.py in forward(self, hidden_states, layer_past, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, use_cache, output_attentions)
225 if layer_past is not None:
226 past_key, past_value = layer_past[0].transpose(-2, -1), layer_past[1] # transpose back cf below
--> 227 key = torch.cat((past_key, key), dim=-1)
228 value = torch.cat((past_value, value), dim=-2)
229
RuntimeError: invalid argument 0: Tensors must have same number of dimensions: got 4 and 2 at /tmp/pip-req-build-jh50bw28/aten/src/THC/generic/THCTensorMath.cu:62