def test_image_hred_model(config): db = vg(config, 'train') loaddb = caption_loader(db) loader = DataLoader(loaddb, batch_size=3 * config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=caption_collate_fn) net = ImageHREDModel(config) net.train() for name, param in net.named_parameters(): print(name, param.size()) for cnt, batched in enumerate(loader): images = batched['images'].float() sent_inds = batched['sent_inds'].long() sent_msks = batched['sent_msks'].long() img_feats, txt_feats = net(sent_inds, sent_msks, None, images) print('images', images.size()) print('img_feats', img_feats.size()) print('txt_feats', txt_feats.size()) loss = net.forward_loss(img_feats, txt_feats) print(loss) metrics, caches = net.evaluate(img_feats, txt_feats) print(metrics) break
def dump_image_features(config): output_dir = osp.join(config.data_dir, 'vg', 'global_features') maybe_create(output_dir) db = vg(config) loaddb = caption_loader(db) loader = DataLoader(loaddb, batch_size=1, shuffle=False, num_workers=0, collate_fn=caption_collate_fn) net = ImageEncoder(config) if config.cuda: net = net.cuda() net.eval() for cnt, batched in enumerate(loader): images = batched['images'].float() if config.cuda: images = images.cuda() indices = batched['image_inds'] image_index = int(indices[0]) output_path = osp.join(output_dir, str(image_index).zfill(12) + '.npy') features = net(images).squeeze().cpu().data.numpy() assert (len(features) == 2048) pickle_save(output_path, features) print(cnt, image_index)
def test_region_grounding_model(config): db = vg(config, 'test') loaddb = region_loader(db) loader = DataLoader(loaddb, batch_size=3 * config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=region_collate_fn) net = RegionGroundingModel(config) if config.pretrained is not None: pretrained_path = osp.join(config.data_dir, 'caches/region_grounding_ckpts', config.pretrained + '.pkl') states = torch.load(pretrained_path, map_location=lambda storage, loc: storage) net.load_state_dict(states['state_dict'], strict=False) net.train() for name, param in net.named_parameters(): print(name, param.size()) for cnt, batched in enumerate(loader): scene_inds = batched['scene_inds'].long() sent_inds = batched['sent_inds'].long() sent_msks = batched['sent_msks'].long() region_feats = batched['region_feats'].float() region_clses = batched['region_clses'].long() region_masks = batched['region_masks'].float() img_feats, masked_feats, txt_feats, subspace_masks, sample_logits, sample_indices = \ net(scene_inds, sent_inds, sent_msks, None, None, None, region_feats, region_clses, region_masks, config.explore_mode) if config.instance_dim > 1: print(sample_indices[0]) # print('sample_logits', sample_logits.size()) # print('sample_indices', sample_indices.size()) txt_masks = txt_feats.new_ones(txt_feats.size(0), txt_feats.size(1)) losses = net.final_loss(img_feats, masked_feats, region_masks, txt_feats, txt_masks, sample_logits, sample_indices) print('losses', losses.size(), torch.mean(losses)) if config.subspace_alignment_mode > 0: metrics, cache_results = net.evaluate(masked_feats, region_masks, txt_feats) else: metrics, cache_results = net.evaluate(img_feats, region_masks, txt_feats) print('metrics', metrics) print('txt_feats', txt_feats.size()) print('img_feats', img_feats.size()) break
def create_text_reference_html(config): config_html = HTML() config_table = config_html.table(border='1') testdb = vg(config, 'val') for i in range(len(testdb.scenedb)): scene = testdb.scenedb[i] image_index = scene['image_index'] path = "http://www.cs.virginia.edu/~ft3ex/data/language_vision/val_image_htmls/%d.html"%image_index r = config_table.tr c = r.td() c.a('%04d'%i, href='%s'%path) html_file = open('reference.html', 'w') print(config_table, file=html_file) html_file.close()
def create_img_reference_html(config): config_html = HTML() config_table = config_html.table(border='1') testdb = vg(config, 'test') for i in range(len(testdb.scenedb)): scene = testdb.scenedb[i] image_index = scene['image_index'] img_path = "http://www.cs.virginia.edu/~ft3ex/data/vg/VG_100K/%d.jpg"%image_index html_path = "http://www.cs.virginia.edu/~ft3ex/data/language_vision/test_image_htmls/%d.html"%image_index c = config_table.tr a = c.a(href='%s'%html_path) a.img(src='%s'%img_path, height='150') html_file = open('img_reference.html', 'w') print(config_table, file=html_file) html_file.close()
def test_image_encoder(config): db = vg(config, 'test') loaddb = caption_loader(db) loader = DataLoader(loaddb, batch_size=3 * config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=caption_collate_fn) net = ImageEncoder(config) for cnt, batched in enumerate(loader): images = batched['images'].float() print('images', images.size()) feats = net(images) print('features', feats.size()) break
def test_caption_loader(config): db = vg(config, 'train') # db = coco(config, 'train') loaddb = caption_loader(db) output_dir = osp.join(config.model_dir, 'test_caption_dataloader') maybe_create(output_dir) loader = DataLoader(loaddb, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=caption_collate_fn) start = time() plt.switch_backend('agg') for cnt, batched in enumerate(loader): sent_inds = batched['sent_inds'].long() sent_msks = batched['sent_msks'].long() images = batched['images'].float() captions = batched['captions'] print('sent_inds', sent_inds.size()) print('sent_msks', sent_msks.size()) print('images', images.size()) for i in range(config.batch_size): color = cv2.imread( db.color_path_from_index(batched['image_inds'][i]), cv2.IMREAD_COLOR) out_path = osp.join(output_dir, '%d.png' % batched['image_inds'][i]) fig = plt.figure(figsize=(32, 16)) for j in range(min(config.max_turns, 10)): plt.subplot(2, 5, j + 1) plt.title( captions[i][j] + '\n' + ' '.join([str(x.data.item()) for x in sent_inds[i, j]]) + '\n' + ' '.join([str(x.data.item()) for x in sent_msks[i, j]])) plt.imshow(color[:, :, ::-1]) plt.axis('off') fig.savefig(out_path, bbox_inches='tight') plt.close(fig) print('------------------') if cnt == 2: break print("Time", time() - start)
def test_region_model(config): db = vg(config, 'test') loaddb = region_loader(db) loader = DataLoader(loaddb, batch_size=3 * config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=region_collate_fn) net = RegionModel(config) net.train() for name, param in net.named_parameters(): print(name, param.size()) for cnt, batched in enumerate(loader): start = time() scene_inds = batched['scene_inds'].long()[:config.batch_size] sent_inds = batched['sent_inds'].long()[:config.batch_size] sent_msks = batched['sent_msks'].long()[:config.batch_size] region_feats = batched['region_feats'].float()[:config.batch_size] region_clses = batched['region_clses'].long()[:config.batch_size] region_masks = batched['region_masks'].float()[:config.batch_size] src_region_feats = batched['region_feats'].float( )[config.batch_size:2 * config.batch_size] src_region_clses = batched['region_clses'].long()[config.batch_size:2 * config.batch_size] src_region_masks = batched['region_masks'].float( )[config.batch_size:2 * config.batch_size] img_feats, masked_feats, txt_feats, subspace_masks, sample_logits, sample_indices = \ net(scene_inds, sent_inds, sent_msks, src_region_feats, src_region_clses, src_region_masks, region_feats, region_clses, region_masks, config.explore_mode) print('img_feats', img_feats.size()) print('txt_feats', txt_feats.size()) if config.subspace_alignment_mode > 0: print('masked_feats', masked_feats.size()) print('subspace_masks', subspace_masks.size()) if config.instance_dim > 1: print('sample_logits', sample_logits.size()) print('sample_indices', sample_indices.size()) print('time:', time() - start) break
def main_rnn(config): testdb = vg(config, 'test') trainer = RegionGroundingTrainer(config) with open( osp.join(testdb.cache_dir, 'img_features/vg_rnn_1280_img_features.pkl'), 'rb') as fid: data_ = pickle.load(fid) all_img_feats = data_['feats'] all_img_masks = data_['masks'] all_img_feats = torch.from_numpy(all_img_feats).float() all_img_masks = torch.from_numpy(all_img_masks).float() if config.cuda: all_img_feats = all_img_feats.cuda() all_img_masks = all_img_masks.cuda() print('all_img_feats', all_img_feats.size()) print('all_img_masks', all_img_masks.size()) count = 0 all_captions = [] while count < 10: print('Please input the query:\n') query = input() r, top5_img_inds = trainer.net.demo_step(query, all_captions, all_img_feats, all_img_masks, testdb) top5_imgs = [] for x in top5_img_inds: cur_img = cv2.imread(testdb.color_path_from_index(x), cv2.IMREAD_COLOR) cur_img, _, _ = create_squared_image(cur_img) cur_img = cv2.resize(cur_img, (500, 500)) top5_imgs.append(cur_img) fig = plt.figure(figsize=(32, 8)) plt.suptitle(query, fontsize=20) for i in range(len(top5_imgs)): cur_img = top5_imgs[i] plt.subplot(1, 5, i + 1) plt.imshow(cur_img[:, :, ::-1].astype(np.uint8)) plt.axis('off') plt.show() count += 1 print('turn:', count)
def check_region_clses(config): db = vg(config, 'train') loaddb = region_loader(db) loader = DataLoader(loaddb, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=region_collate_fn) min_index = 1000000 max_index = -1 for cnt, batched in enumerate(loader): region_clses = batched['region_clses'].long() min_index = min(min_index, torch.min(region_clses).item()) max_index = max(max_index, torch.max(region_clses).item()) if cnt % 1000: print('iter:', cnt) print('min_index', min_index) print('max_index', max_index)
def test_image_model(config): db = vg(config, 'test') loaddb = caption_loader(db) loader = DataLoader(loaddb, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=caption_collate_fn) net = ImageModel(config) for cnt, batched in enumerate(loader): images = batched['images'].float() sent_inds = batched['sent_inds'].long() sent_msks = batched['sent_msks'].long() img_feats, txt_feats = net(sent_inds, sent_msks, None, images) print('images', images.size()) print('img_feats', img_feats.size()) print('txt_feats', txt_feats.size()) break
def test_grounding_loss(config): db = vg(config, 'test') loaddb = region_loader(db) loader = DataLoader(loaddb, batch_size=3 * config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=region_collate_fn) net = RegionModel(config) criterion = GroundingLoss(config) for cnt, batched in enumerate(loader): scene_inds = batched['scene_inds'].long()[:config.batch_size] sent_inds = batched['sent_inds'].long()[:config.batch_size] sent_msks = batched['sent_msks'].long()[:config.batch_size] region_feats = batched['region_feats'].float()[:config.batch_size] region_clses = batched['region_clses'].long()[:config.batch_size] region_masks = batched['region_masks'].float()[:config.batch_size] src_region_feats = batched['region_feats'].float( )[config.batch_size:2 * config.batch_size] src_region_clses = batched['region_clses'].long()[config.batch_size:2 * config.batch_size] src_region_masks = batched['region_masks'].float( )[config.batch_size:2 * config.batch_size] img_feats, masked_feats, txt_feats, subspace_masks, sample_logits, sample_indices = \ net(scene_inds, sent_inds, sent_msks, src_region_feats, src_region_clses, src_region_masks, region_feats, region_clses, region_masks, config.explore_mode) masked_feats = img_feats sim1 = criterion.compute_batch_mutual_similarity( masked_feats, region_masks, txt_feats) sim2 = criterion.debug_compute_batch_mutual_similarity( masked_feats, region_masks, txt_feats) print('sim1', sim1.size()) print('sim2', sim2.size()) print('diff', torch.sum(torch.abs(sim1 - sim2))) txt_masks = txt_feats.new_ones(txt_feats.size(0), txt_feats.size(1)) losses = criterion.forward_loss(masked_feats, region_masks, txt_feats, txt_masks, config.loss_reduction_mode) print('losses', losses.size()) break
def test_paragraph_model(config): db = vg(config, 'test') loaddb = paragraph_loader(db) loader = DataLoader(loaddb, batch_size=3 * config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=paragraph_collate_fn) net = ParagraphModel(config) net.train() for name, param in net.named_parameters(): print(name, param.size()) for cnt, batched in enumerate(loader): start = time() scene_inds = batched['scene_inds'].long()[:config.batch_size] sent_inds = batched['sent_inds'].long()[:config.batch_size] sent_msks = batched['sent_msks'].long()[:config.batch_size] region_feats = batched['region_feats'].float()[:config.batch_size] region_clses = batched['region_clses'].long()[:config.batch_size] region_masks = batched['region_masks'].float()[:config.batch_size] img_feats, txt_feats = net(sent_inds, sent_msks, region_feats, region_clses, region_masks) losses = net.loss(img_feats, region_masks, txt_feats.unsqueeze(1)) print('losses', losses.size(), torch.mean(losses)) metrics, cache_results = net.evaluate(img_feats, region_masks, txt_feats.unsqueeze(1)) print('metrics', metrics) print('sent_inds', sent_inds.size()) print('sent_msks', sent_msks.size()) print('region_feats', region_feats.size()) print('region_clses', region_clses.size()) print('region_masks', region_masks.size()) print('img_feats', img_feats.size()) print('txt_feats', txt_feats.size()) print('time:', time() - start) break
def test_text_encoder(config): db = vg(config, 'test') loaddb = region_loader(db) loader = DataLoader(loaddb, batch_size=3 * config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=region_collate_fn) net = TextEncoder(config) for cnt, batched in enumerate(loader): sent_inds = batched['sent_inds'].long() sent_msks = batched['sent_msks'].float() bsize, slen, fsize = sent_inds.size() print('sent_inds', sent_inds.size()) print('sent_msks', sent_msks.size()) f1, f2, h = net(sent_inds.view(bsize * slen, fsize), sent_msks.view(bsize * slen, fsize)) print(f1.size(), f2.size(), h.size()) break
def test_region_encoder(config): db = vg(config, 'test') loaddb = region_loader(db) loader = DataLoader(loaddb, batch_size=3 * config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=region_collate_fn) net = RegionEncoder(config) for cnt, batched in enumerate(loader): region_feats = batched['region_feats'].float() region_clses = batched['region_clses'].long() print('region_feats', region_feats.size()) print('region_clses', region_clses.size()) img_feats, masked_feats, mm = net(region_feats, region_clses) print('img_feats', img_feats.size()) if config.subspace_alignment_mode > 0: print('masked_feats', masked_feats.size()) print('mm', mm.size()) break
def create_html_per_image(config): testdb = vg(config, 'test') image_folder_name = 'test_image_htmls' maybe_create(image_folder_name) for i in range(len(testdb.scenedb)): scene = testdb.scenedb[i] all_meta_regions = [scene['regions'][x] for x in sorted(list(scene['regions'].keys()))] captions = [x['caption'] for x in all_meta_regions[:config.max_turns]] image_index = scene['image_index'] text = '\n'.join(captions) path = "http://www.cs.virginia.edu/~ft3ex/data/vg/VG_100K/%d.jpg"%image_index config_html = HTML() config_table = config_html.table(border='1') r = config_table.tr c1 = r.td() c1.img(src='%s'%path, height='700') c2 = r.td() for j in range(len(captions)): c2.p(captions[j]) html_file = open(osp.join(image_folder_name, '%d.html'%image_index), 'w') print(config_table, file=html_file) html_file.close() print(i)
def test_model(config): testdb = vg(config, 'test') trainer = RegionGroundingTrainer(config) trainer.test(testdb)
def test_response_gen(config): s = time() db = vg(config) html_folder_name = 'template_htmls' maybe_create(html_folder_name) with open('candidates.json', 'r') as fp: candidates = json.load(fp) captioner = RelativeCaptioner(db) for k, v in candidates.items(): target_scene = db.scenedb[v['src']] decoy_scenes = [db.scenedb[x] for x in v['top5']] unmention_candidates = captioner.collect_unmentioned_candidates( target_scene, decoy_scenes) captions_1 = [] if len(unmention_candidates) > 0: cap1 = caption_1( unmention_candidates[np.random.randint( 0, len(unmention_candidates))], target_scene) if cap1 is not None: captions_1.append(cap1) cap2 = caption_2( unmention_candidates[np.random.randint( 0, len(unmention_candidates))], target_scene) if cap2 is not None: captions_1.append(cap2) cap3 = caption_3( unmention_candidates[np.random.randint( 0, len(unmention_candidates))], target_scene) if cap3 is not None: captions_1.append(cap3) # print(cap3) captions_2 = [] mention_candidates = captioner.collect_mentioned_candidates( target_scene, decoy_scenes) if len(mention_candidates) > 0: cap1 = caption_1( mention_candidates[np.random.randint(0, len(mention_candidates))], target_scene) if cap1 is not None: captions_2.append(cap1) cap2 = caption_2( mention_candidates[np.random.randint(0, len(mention_candidates))], target_scene) if cap2 is not None: captions_2.append(cap2) cap3 = caption_3( mention_candidates[np.random.randint(0, len(mention_candidates))], target_scene) if cap3 is not None: captions_2.append(cap3) # print(cap3) cap4 = caption_4( mention_candidates[np.random.randint(0, len(mention_candidates))], target_scene) if cap4 is not None: captions_2.append(cap4) cap5 = caption_5( mention_candidates[np.random.randint(0, len(mention_candidates))], target_scene) if cap5 is not None: captions_2.append(cap5) # print(cap5) # query_path = "http://www.cs.virginia.edu/~ft3ex/data/vg/VG_100K/%d.jpg"%v['src'] # top5_paths = ["http://www.cs.virginia.edu/~ft3ex/data/vg/VG_100K/%d.jpg"%x for x in v['top5']] query_path = "file:///Users/fuwentan/datasets/vg/VG_100K/%d.jpg" % v[ 'src'] top5_paths = [ "file:///Users/fuwentan/datasets/vg/VG_100K/%d.jpg" % x for x in v['top5'] ] config_html = HTML() config_table = config_html.table(border='1') r1 = config_table.tr c1 = r1.td(colspan="2") for j in range(len(captions_1)): c1.p(captions_1[j]) c2 = r1.td() c2.img(src='%s' % query_path, height='200') c3 = r1.td(colspan="2") for j in range(len(captions_2)): c3.p(captions_2[j]) r2 = config_table.tr for j in range(5): c2_r2_c = r2.td() c2_r2_c.img(src='%s' % top5_paths[j], height='200') html_file = open( osp.join(html_folder_name, '%d_%d.html' % (v['src'], v['turn'])), 'w') print(config_table, file=html_file) html_file.close() print(k)
elif args.vg_dataset == "vg6": vg_version = "1600-400-400" vg_split = imdb_vg_name set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.25, 0.5, 1, 2, 4]' ] elif args.vg_dataset == "vg_bm": vg_version = "150-50-50" vg_split = imdb_vg_name set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.25, 0.5, 1, 2, 4]' ] imdb_vg = vg(vg_version, vg_split) def bbox_proposal_fast(obj_prob, att_prob, rois): batch_size = obj_prob.size(0) # get the top obj cls, excluded the background class. max_obj_prob, max_obj_clss = obj_prob[:, :, 1:].max(2) # get the top att cls, exclude the background class. max_att_prob, max_att_clss = att_prob[:, :, 1:].max(2) # get the top rel cls, exlude the background class # max_rel_scores, max_rel_ind = rel_prob[:, :, 1:].max(2) # compute the final score, B x N obj_att_scores = max_obj_prob * max_att_prob
def test_region_loader(config): db = vg(config, 'train') # db = coco(config, 'train') loaddb = region_loader(db) loader = DataLoader(loaddb, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=region_collate_fn) output_dir = osp.join(config.model_dir, 'test_region_loader') maybe_create(output_dir) start = time() plt.switch_backend('agg') for cnt, batched in enumerate(loader): print('scene_inds', batched['scene_inds']) sent_inds = batched['sent_inds'].long() sent_msks = batched['sent_msks'].long() widths = batched['widths'] heights = batched['heights'] captions = batched['captions'] region_boxes = batched['region_boxes'].float() region_feats = batched['region_feats'].float() region_clses = batched['region_clses'].long() region_masks = batched['region_masks'].long() print('sent_inds', sent_inds.size()) print('sent_msks', sent_msks.size()) print('region_boxes', region_boxes.size()) print('region_feats', region_feats.size()) print('region_clses', region_clses.size()) print('region_masks', region_masks.size()) print('clses', torch.min(region_clses), torch.max(region_clses)) print('widths', widths) print('heights', heights) for i in range(len(sent_inds)): # print('####') # print(len(captions), len(captions[0])) entry = {} image_index = batched['image_inds'][i] entry['width'] = widths[i] entry['height'] = heights[i] nr = torch.sum(region_masks[i]) entry['region_boxes'] = xyxys_to_xywhs( region_boxes[i, :nr].cpu().data.numpy()) color = cv2.imread(db.color_path_from_index(image_index), cv2.IMREAD_COLOR) color, _, _ = create_squared_image(color) out_path = osp.join(output_dir, '%d.png' % image_index) layouts = db.render_regions_as_output( entry, bg=cv2.resize( color, (config.visu_size[0], config.visu_size[0]))[:, :, ::-1]) fig = plt.figure(figsize=(32, 16)) for j in range(min(14, len(layouts))): plt.subplot(3, 5, j + 1) if j < config.max_turns: plt.title( captions[i][j] + '\n' + ' '.join([str(x.data.item()) for x in sent_inds[i, j]]) + '\n' + ' '.join([str(x.data.item()) for x in sent_msks[i, j]])) plt.imshow(layouts[j].astype(np.uint8)) plt.axis('off') plt.subplot(3, 5, 15) plt.imshow(color[:, :, ::-1]) plt.axis('off') fig.savefig(out_path, bbox_inches='tight') plt.close(fig) print('------------------') if cnt == 2: break print("Time", time() - start)
def overfit_model(config): valdb = vg(config, 'val') valdb.scenedb = valdb.scenedb[:31] trainer = ImageHREDTrainer(config) trainer.train(valdb, valdb, valdb)
def train_model(config): traindb = vg(config, 'train') valdb = vg(config, 'val') trainer = ImageHREDTrainer(config) trainer.train(traindb, valdb, valdb)
def overfit_model(config): valdb = vg(config, 'val') valdb.scenedb = valdb.scenedb[:31] trainer = RegionGroundingTrainer(config) trainer.train(valdb, valdb, valdb)
# Set up coco_2015_<split> for year in ['2015']: for split in ['test', 'test-dev']: name = 'coco_{}_{}'.format(year, split) __sets[name] = (lambda split=split, year=year: coco(split, year)) # Set up vg_<split> # for version in ['1600-400-20']: # for split in ['minitrain', 'train', 'minival', 'val', 'test']: # name = 'vg_{}_{}'.format(version,split) # __sets[name] = (lambda split=split, version=version: vg(version, split)) for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']: for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']: name = 'vg_{}_{}'.format(version,split) __sets[name] = (lambda split=split, version=version: vg(version, split)) # set up image net. for split in ['train', 'val', 'val1', 'val2', 'test']: name = 'imagenet_{}'.format(split) devkit_path = 'data/imagenet/ILSVRC/devkit' data_path = 'data/imagenet/ILSVRC' __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path)) for split in ['train', 'val', 'test']: name = 'wider_face_{}'.format(split) __sets[name] = (lambda split=split: wider_face(split)) for split in ['train', 'val', 'test']: name = 'MI3_{}'.format(split) __sets[name] = (lambda split=split: mi3(split))
def test_vg_dataset(config): s = time() db = vg(config, 'train')
def dump_trained_features(config): traindb = vg(config, 'train') trainer = RegionGroundingTrainer(config) trainer.test(traindb)
def test_model(config): testdb = vg(config, 'test') trainer = ImageHREDTrainer(config) trainer.test(testdb)
from __future__ import absolute_import from __future__ import division from __future__ import print_function __sets = {} from datasets.vg import vg from datasets.clevr import clevr from datasets.visualgenome import visualgenome from datasets.vrd import vrd import numpy as np # Set up vg_<split> for split in ['train', 'validation', 'test']: name = 'visual_genome_{}'.format(split) __sets[name] = (lambda split=split: vg(split)) # Set up clevr_<split> for split in ['train', 'val', 'test']: name = 'clevr_{}'.format(split) __sets[name] = (lambda split=split: clevr(split)) # Set up vg_<split> for split in ['train', 'validation', 'test']: name = 'visualgenome_{}'.format(split) __sets[name] = (lambda split=split: visualgenome(split)) # Set up vrd_<split> for split in ['train', 'validation', 'test']: name = 'vrd_{}'.format(split) __sets[name] = (lambda split=split: vrd(split))
def dump_trained_features(config): traindb = vg(config, 'train') trainer = ImageHREDTrainer(config) trainer.test(traindb)
# Set up vg_<split> # for version in ['1600-400-20']: # for split in ['minitrain', 'train', 'minival', 'val', 'test']: # name = 'vg_{}_{}'.format(version,split) # __sets[name] = (lambda split=split, version=version: vg(version, split)) for version in [ '150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20' ]: for split in [ 'minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test' ]: name = 'vg_{}_{}'.format(version, split) __sets[name] = ( lambda split=split, version=version: vg(version, split)) # set up image net. for split in ['train', 'val', 'val1', 'val2', 'test']: name = 'imagenet_{}'.format(split) devkit_path = 'data/imagenet/ILSVRC/devkit' data_path = 'data/imagenet/ILSVRC' __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path= data_path: imagenet(split, devkit_path, data_path)) def get_imdb(name): """Get an imdb (image database) by name.""" if name not in __sets: raise KeyError('Unknown dataset: {}'.format(name)) return __sets[name]()
# Set up coco_2015_<split> for year in ['2015']: for split in ['test', 'test-dev']: name = 'coco_{}_{}'.format(year, split) __sets[name] = (lambda split=split, year=year: coco(split, year)) # Set up vg_<split> # for version in ['1600-400-20']: # for split in ['minitrain', 'train', 'minival', 'val', 'test']: # name = 'vg_{}_{}'.format(version,split) # __sets[name] = (lambda split=split, version=version: vg(version, split)) for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']: for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']: name = 'vg_{}_{}'.format(version,split) __sets[name] = (lambda split=split, version=version: vg(version, split)) # set up image net. for split in ['train', 'val', 'val1', 'val2', 'test']: name = 'imagenet_{}'.format(split) devkit_path = 'data/imagenet/ILSVRC/devkit' data_path = 'data/imagenet/ILSVRC' __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path)) def get_imdb(name): """Get an imdb (image database) by name.""" if name not in __sets: raise KeyError('Unknown dataset: {}'.format(name)) return __sets[name]()