def main(params): data = json.load(open(params['input_json'], 'r')) #imgs = imgs['images'] seed(123) # make reproducible #shuffle(imgs) # shuffle the order imgs = data["images"] prepro_captions(imgs) # create the vocab vocab = build_vocab(imgs, params) itow = {i + 1: w for i, w in enumerate(vocab)} # a 1-indexed vocab translation table wtoi = {w: i + 1 for i, w in enumerate(vocab)} # inverse table # done # assign the splits assign_splits(imgs, params) # encode captions in large arrays, ready to ship to hdf5 file L, label_start_ix, label_end_ix, label_length = encode_captions(imgs, params, wtoi) import misc.resnet as resnet resnet_type = 'resnet151' if resnet_type == 'resnet101': resnet = resnet.resnet101() resnet.load_state_dict(torch.load('resnet/resnet101.pth')) else: resnet = resnet.resnet152() resnet.load_state_dict(torch.load('resnet/resnet152.pth')) my_resnet = myResnet(resnet) my_resnet.cuda() my_resnet.eval() # create output h5 file N = len(imgs) f_lb = h5py.File(params['output_h5'] + '_'+ resnet_type +'_label.h5', "w") f_fc = h5py.File(params['output_h5'] + '_'+ resnet_type +'_fc.h5', "w") f_att = h5py.File(params['output_h5'] + '_'+ resnet_type +'_att.h5', "w") f_lb.create_dataset("labels", dtype='uint32', data=L) f_lb.create_dataset("label_start_ix", dtype='uint32', data=label_start_ix) f_lb.create_dataset("label_end_ix", dtype='uint32', data=label_end_ix) f_lb.create_dataset("label_length", dtype='uint32', data=label_length) f_lb.close() #exit() ### extract features dset_fc = f_fc.create_dataset("fc", (N, 2048), dtype='float32') dset_att = f_att.create_dataset("att", (N, 14, 14, 2048), dtype='float32') for i, img in enumerate(imgs): # load the image real_path = img['filepath'] + "/" + img['filename'] I = skimage.io.imread(os.path.join(params['images_root'],real_path)) # note the path # handle grayscale input images if len(I.shape) == 2: I = I[:, :, np.newaxis] I = np.concatenate((I, I, I), axis=2) I = I.astype('float32') / 255.0 I = torch.from_numpy(I.transpose([2, 0, 1])).cuda() I = Variable(preprocess(I), volatile=True) tmp_fc, tmp_att = my_resnet(I) # write to h5 dset_fc[i] = tmp_fc.data.cpu().float().numpy() dset_att[i] = tmp_att.data.cpu().float().numpy() if i % 1000 == 0: print 'processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N) f_fc.close() f_att.close() print 'wrote ', params['output_h5'] # create output json file out = {} out['ix_to_word'] = itow # encode the (1-indexed) vocab out['images'] = [] for i, img in enumerate(imgs): jimg = {} jimg['split'] = img['split'] if 'filepath' in img: jimg['filepath'] = img['filepath'] # copy it over, might need if 'id' in img: jimg['id'] = img['id'] # copy over & mantain an id, if present (e.g. coco ids, useful) out['images'].append(jimg) json.dump(out, open(params['output_json'], 'w')) print 'wrote ', params['output_json']
import torch from torch.autograd import Variable import skimage import skimage.io import scipy.misc from torchvision import transforms as trn preprocess = trn.Compose([ #trn.ToTensor(), trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) from misc.resnet_utils import myResnet import misc.resnet as resnet resnet = resnet.resnet101() resnet.load_state_dict( torch.load('/datadrive/resnet_pretrianed_t7/resnet101.pth')) my_resnet = myResnet(resnet) my_resnet.cuda() my_resnet.eval() class DataLoaderRaw(): def __init__(self, opt): self.opt = opt self.coco_json = opt.get('coco_json', '') self.folder_path = opt.get('folder_path', '') self.batch_size = opt.get('batch_size', 1) self.seq_per_img = 1
def main(params): data = json.load(open(params['input_json'], 'r')) #imgs = imgs['images'] seed(123) # make reproducible #shuffle(imgs) # shuffle the order imgs = data["images"] prepro_captions(imgs) # create the vocab vocab = build_vocab(imgs, params) itow = {i + 1: w for i, w in enumerate(vocab) } # a 1-indexed vocab translation table wtoi = {w: i + 1 for i, w in enumerate(vocab)} # inverse table # done # assign the splits assign_splits(imgs, params) # encode captions in large arrays, ready to ship to hdf5 file L, label_start_ix, label_end_ix, label_length = encode_captions( imgs, params, wtoi) import misc.resnet as resnet resnet_type = 'resnet101' if resnet_type == 'resnet101': resnet = resnet.resnet101() resnet.load_state_dict(torch.load('resnet/resnet101.pth')) else: resnet = resnet.resnet152() resnet.load_state_dict(torch.load('resnet/resnet152.pth')) my_resnet = myResnet(resnet) my_resnet.cuda() my_resnet.eval() # create output h5 file N = len(imgs) f_lb = h5py.File(params['output_h5'] + '_' + resnet_type + '_label.h5', "w") f_fc = h5py.File(params['output_h5'] + '_' + resnet_type + '_fc.h5', "w") f_att = h5py.File(params['output_h5'] + '_' + resnet_type + '_att.h5', "w") f_lb.create_dataset("labels", dtype='uint32', data=L) f_lb.create_dataset("label_start_ix", dtype='uint32', data=label_start_ix) f_lb.create_dataset("label_end_ix", dtype='uint32', data=label_end_ix) f_lb.create_dataset("label_length", dtype='uint32', data=label_length) f_lb.close() #exit() ### extract features dset_fc = f_fc.create_dataset("fc", (N, 2048), dtype='float32') dset_att = f_att.create_dataset("att", (N, 14, 14, 2048), dtype='float32') for i, img in enumerate(imgs): # load the image real_path = img['filepath'] + "/" + img['filename'] I = skimage.io.imread(os.path.join(params['images_root'], real_path)) # note the path # handle grayscale input images if len(I.shape) == 2: I = I[:, :, np.newaxis] I = np.concatenate((I, I, I), axis=2) I = I.astype('float32') / 255.0 I = torch.from_numpy(I.transpose([2, 0, 1])).cuda() I = Variable(preprocess(I), volatile=True) tmp_fc, tmp_att = my_resnet(I) # write to h5 dset_fc[i] = tmp_fc.data.cpu().float().numpy() dset_att[i] = tmp_att.data.cpu().float().numpy() if i % 1000 == 0: print 'processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N) f_fc.close() f_att.close() print 'wrote ', params['output_h5'] # create output json file out = {} out['ix_to_word'] = itow # encode the (1-indexed) vocab out['images'] = [] for i, img in enumerate(imgs): jimg = {} jimg['split'] = img['split'] if 'filepath' in img: jimg['filepath'] = img['filepath'] # copy it over, might need if 'id' in img: jimg['id'] = img[ 'id'] # copy over & mantain an id, if present (e.g. coco ids, useful) out['images'].append(jimg) json.dump(out, open(params['output_json'], 'w')) print 'wrote ', params['output_json']