import torch
from pathlib import Path
import numpy as np
import h5py
from tbd.module_net import load_tbd_net
from utils.clevr import load_vocab
from utils.generate_programs import load_program_generator, generate_programs
vocab_path = Path('data/vocab.json')
model_path = Path('models/clevr-reg-hres.pt')
tbd_net = load_tbd_net(model_path, load_vocab(vocab_path))
program_generator = load_program_generator(Path('models/program_generator.pt'))
generate_programs(Path('data/val_questions.h5'), program_generator,
dest_dir=Path('data/val/'), batch_size=128)
use_np_features = False
if use_np_features:
features = np.load(str(Path('data/val/val_features.npy')), mmap_mode='r')
else:
features = h5py.File(Path('data/val_features.h5'))['features']
question_np = np.load(Path('data/val/questions.npy'))
image_idx_np = np.load(Path('data/val/image_idxs.npy'))
programs_np = np.load(Path('data/val/programs.npy'))
answers = ['blue', 'brown', 'cyan', 'gray', 'green', 'purple', 'red', 'yellow',
'cube', 'cylinder', 'sphere',
'large', 'small',
'metal', 'rubber',
'no', 'yes',
'0', '1', '10', '2', '3', '4', '5', '6', '7', '8', '9']
pred_idx_to_token = dict(zip(range(len(answers)), answers))
f = open('predicted_answers.txt', 'w')
def write_preds(preds):
for pred in preds:
f.write(pred)
f.write('\n')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size = 128
for batch in range(0, len(programs_np), batch_size):
image_idx = image_idx_np[batch:batch+batch_size]
programs = torch.LongTensor(programs_np[batch:batch+batch_size]).to(device)
if use_np_features:
feats = torch.FloatTensor(np.asarray(features[image_idx])).to(device)
else:
# Using HDF5 files requires some overhead due to constraints on how those may
# be accessed. We cannot index into the file using a numpy array. We also cannot
# access the same element multiple times (e.g. we cannot index into an h5py.File
# with [1,1,1]) because we are constrained to increasing sequences
feats = []
for idx in image_idx:
feats.append(np.asarray(features[idx]))
feats = torch.FloatTensor(np.asarray(feats)).to(device)
outputs = tbd_net(feats, programs)
_, preds = outputs.max(1)
preds = [pred_idx_to_token[pred] for pred in preds.detach().to('cpu').numpy()]
write_preds(preds)
f.close()
Traceback (most recent call last):
File "eval.py", line 72, in <module>
outputs = tbd_net(feats, programs)
File "/home/dengwei/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/dengwei/tbd-nets/tbd/module_net.py", line 195, in forward
output = module(feat_input, output)
File "/home/dengwei/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/dengwei/tbd-nets/tbd/modules.py", line 92, in forward
attended_feats = torch.mul(feats, attn.repeat(1, self.dim, 1, 1))
RuntimeError: The size of tensor a (128) must match the size of tensor b (16384) at non-singleton dimension 1