def load_pretrained_model(opt): """ Generating answers for (image, question) pair in the dataset. """ data_info = torch.load(opt['data_info']) word2idx = data_info["word2idx"] idx2word = data_info["idx2word"] idx2ans = data_info["idx2ans"] print("Building model...") word_embedded = LargeEmbedding(len(idx2word), 300, padding_idx=0, devices=opt['gpus']) word_embedded.load_pretrained_vectors(opt['word_vecs']) model = DCN(opt,2187) dict_checkpoint = opt['train_from'] if dict_checkpoint: print("Loading model from checkpoint at %s" % dict_checkpoint) #model = torch.load(dict_checkpoint) checkpoint = torch.load(dict_checkpoint) print(checkpoint.keys()) model.load_state_dict(checkpoint['model']) if len(opt['gpus']) >= 1: model.cuda(opt['gpus'][0]) model.word_embedded = word_embedded model.eval() return model, idx2ans, word2idx
def main(opt): """ Generating answers for (image, question) pair in the dataset. """ print("Constructing the dataset...") testset = VQADataset(opt.data_path, opt.data_name, "test", opt.img_path, opt.img_type, "test") testLoader = DataLoader(testset, batch_size=opt.batch_size, shuffle=False, drop_last=False, num_workers=opt.num_workers, pin_memory=True, collate_fn=default_collate, batch_sampler=BatchSampler) idx2word = testset.idx2word idx2ans = testset.idx2ans print("Building model...") word_embedded = LargeEmbedding(len(idx2word), 300, padding_idx=0, devices=opt.gpus) word_embedded.load_pretrained_vectors(opt.word_vectors) num_ans = testset.ans_pool.shape[0] if opt.arch == "DCNWithRCNN": model = DCNWithRCNN(opt, num_ans) elif opt.arch == "DCN": model = DCN(opt, num_ans) dict_checkpoint = opt.resume if dict_checkpoint: print("Loading model from checkpoint at %s" % dict_checkpoint) checkpoint = torch.load(dict_checkpoint) model.load_state_dict(checkpoint["state_dict"]) if len(opt.gpus) >= 1: model.cuda(opt.gpus[0]) model.word_embedded = word_embedded print("Generating answers...") with torch.no_grad(): answer(testLoader, model, idx2ans, opt, ensemble=opt.ensemble)
def main(opt): """ Generating answers for (image, question) pair in the dataset. """ print("Constructing the dataset...") testset = Dataset(opt.data_path, opt.data_name, "test", opt.seq_per_img, opt.img_name, opt.size_scale, use_h5py=opt.use_h5py) if not opt.use_rcnn else \ RCNN_Dataset(opt.data_path, opt.data_name, "test", opt.seq_per_img) testLoader = DataLoader(testset, batch_size=opt.batch, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False, use_thread=opt.use_thread) idx2word = testset.idx2word idx2ans = testset.idx2ans ans_pool = testset.ans_pool ans_pool = torch.from_numpy(ans_pool) print("Building model...") word_embedded = LargeEmbedding(len(idx2word), 300, padding_idx=0, devices=opt.gpus) word_embedded.load_pretrained_vectors(opt.word_vectors) if opt.predict_type in ["sum_attn", "cat_attn", "prod_attn"]: num_ans = ans_pool.size(0) model = DCN(opt, num_ans) if not opt.use_rcnn else DCNWithRCNN( opt, num_ans) else: ans = word_embedded(Variable(ans_pool.cuda(opt.gpus[0]), volatile=True)).data ans_mask = ans_pool.ne(0).float() model = DCNWithAns(opt, ans, ans_mask) if not opt.use_rcnn else \ DCNWithRCNNAns(opt, ans, ans_mask) dict_checkpoint = opt.train_from if dict_checkpoint: print("Loading model from checkpoint at %s" % dict_checkpoint) checkpoint = torch.load(dict_checkpoint) model.load_state_dict(checkpoint["model"]) if len(opt.gpus) >= 1: model.cuda(opt.gpus[0]) model.word_embedded = word_embedded print("Generating answers...") with torch.cuda.device(opt.gpus[0]): answer(testLoader, model, idx2ans, opt, ensemble=opt.ensemble)
use_thread=opt['use_thread']) idx2word = testset.idx2word idx2ans = testset.idx2ans ans_pool = testset.ans_pool ans_pool = torch.from_numpy(ans_pool) print("Building model...") word_embedded = LargeEmbedding(len(idx2word), 300, padding_idx=0, devices=opt['gpus']) word_embedded.load_pretrained_vectors(opt['word_vectors']) num_ans = ans_pool.size(0) model = DCN(opt, num_ans) dict_checkpoint = opt['train_from'] if dict_checkpoint: print("Loading model from checkpoint at %s" % dict_checkpoint) checkpoint = torch.load(dict_checkpoint) model.load_state_dict(checkpoint["model"]) if len(opt['gpus']) >= 1: model.cuda(opt['gpus'][0]) model.word_embedded = word_embedded print("Generating answers...") with torch.cuda.device(opt['gpus'][0]): answer(testLoader, model, idx2ans, opt)
def main(opt): Initializer.manual_seed(opt.seed) print("Constructing the dataset...") if opt.trainval == 0: trainset = Dataset(opt.data_path, opt.data_name, "train", opt.seq_per_img, opt.img_name, opt.size_scale, use_h5py=opt.use_h5py) if not opt.use_rcnn else \ RCNN_Dataset(opt.data_path, opt.data_name, "train", opt.seq_per_img) trainLoader = DataLoader(trainset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory, drop_last=opt.drop_last, use_thread=opt.use_thread) valset = Dataset(opt.data_path, opt.data_name, "val", opt.seq_per_img, opt.img_name, opt.size_scale, use_h5py=opt.use_h5py) if not opt.use_rcnn else \ RCNN_Dataset(opt.data_path, opt.data_name, "val", opt.seq_per_img) valLoader = DataLoader(valset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory, drop_last=opt.drop_last, use_thread=opt.use_thread) else: trainset = Dataset(opt.data_path, opt.data_name, "trainval", opt.seq_per_img, opt.img_name, opt.size_scale, use_h5py=opt.use_h5py) if not opt.use_rcnn else \ RCNN_Dataset(opt.data_path, opt.data_name, "trainval", opt.seq_per_img) trainLoader = DataLoader(trainset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory, drop_last=opt.drop_last, use_thread=opt.use_thread) valset = None valLoader = None idx2word = trainset.idx2word ans_pool = trainset.ans_pool ans_pool = torch.from_numpy(ans_pool) print("Building model...") word_embedded = LargeEmbedding(len(idx2word), 300, padding_idx=0, devices=opt.gpus) word_embedded.load_pretrained_vectors(opt.word_vectors) if opt.predict_type in ["sum_attn", "cat_attn", "prod_attn"]: num_ans = ans_pool.size(0) model = DCN(opt, num_ans) if not opt.use_rcnn else DCNWithRCNN( opt, num_ans) else: ans = word_embedded(Variable(ans_pool.cuda(opt.gpus[0]), volatile=True)).data ans_mask = ans_pool.ne(0).float() model = DCNWithAns(opt, ans, ans_mask) if not opt.use_rcnn else \ DCNWithRCNNAns(opt, ans, ans_mask) criterion = BinaryLoss() evaluation = Accuracy() dict_checkpoint = opt.train_from if dict_checkpoint: print("Loading model from checkpoint at %s" % dict_checkpoint) checkpoint = torch.load(dict_checkpoint) model.load_state_dict(checkpoint["model"]) if len(opt.gpus) >= 1: model.cuda(opt.gpus[0]) if len(opt.gpus) > 1: model = nn.DataParallel(model, opt.gpus, dim=0) model.word_embedded = word_embedded optimizer = Adam(list(filter(lambda x: x.requires_grad, model.parameters())), lr=opt.lr, weight_decay=opt.weight_decay, record_step=opt.record_step) scheduler = lr_scheduler.StepLR(optimizer, opt.step_size, gamma=opt.gamma) optim_wrapper = OptimWrapper(optimizer, scheduler) nparams = [] named_parameters = model.module.named_parameters() if len( opt.gpus) > 1 else model.named_parameters() for name, param in named_parameters: if not (name.startswith("resnet") or name.startswith("word_embedded") or name.startswith("ans")): nparams.append(param.numel()) print("* Number of parameters: %d" % sum(nparams)) checkpoint = None timer = Timer() timer.tic() try: with torch.cuda.device(opt.gpus[0]): trainModel(trainLoader, valLoader, model, criterion, evaluation, optim_wrapper, opt) except KeyboardInterrupt: print("It toke %.2f hours to train the network" % (timer.toc() / 3600)) sys.exit("Training interrupted") print("It toke %.2f hours to train the network" % (timer.toc() / 3600))
valset = None valLoader = None idx2word = trainset.idx2word ans_pool = trainset.ans_pool ans_pool = torch.from_numpy(ans_pool) print("Building model...") word_embedded = LargeEmbedding(len(idx2word), 300, padding_idx=0, devices=opt['gpus']) word_embedded.load_pretrained_vectors(opt['word_vectors']) num_ans = ans_pool.size(0) model = DCN(opt, num_ans) criterion = BinaryLoss() evaluation = Accuracy() dict_checkpoint = opt['train_from'] if dict_checkpoint: print("Loading model from checkpoint at %s" % dict_checkpoint) checkpoint = torch.load(dict_checkpoint) model.load_state_dict(checkpoint["model"]) if len(opt['gpus']) >= 1: model.cuda(opt['gpus'][0]) if len(opt['gpus']) > 1: model = nn.DataParallel(model, opt['gpus'], dim=0)
def main(opt): print(">> Creating saving folder if it does not exist: {}".format( opt.directory)) if not os.path.exists(opt.directory): os.makedirs(opt.directory) checkpoint = None if opt.resume: opt.resume = os.path.join(opt.directory, opt.resume) if os.path.isfile(opt.resume): print(">>>> Loading checkpoint {}".format(opt.resume)) checkpoint = torch.load(opt.resume) if opt.overwrite: opt = checkpoint["args"] print(">>>> Overwrite args...") Initializer.manual_seed(opt.seed) print(">> Constructing the dataset...") if opt.trainval == 0: trainset = VQADataset(opt.data_path, opt.data_name, "train", opt.img_path, opt.img_type, "trainval") trainLoader = DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, drop_last=True, num_workers=opt.num_workers, pin_memory=True, collate_fn=default_collate, batch_sampler=BatchSampler) valset = VQADataset(opt.data_path, opt.data_name, "val", opt.img_path, opt.img_type, "trainval") valLoader = DataLoader(valset, batch_size=opt.batch_size, shuffle=False, drop_last=False, num_workers=opt.num_workers, pin_memory=True, collate_fn=default_collate, batch_sampler=BatchSampler) else: trainset = VQADataset(opt.data_path, opt.data_name, "trainval", opt.img_path, opt.img_type, "trainval") trainLoader = DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, drop_last=True, num_workers=opt.num_workers, pin_memory=True, collate_fn=default_collate, batch_sampler=BatchSampler) valset = None valLoader = None print(">> Building model...") word_embedded = LargeEmbedding(len(trainset.idx2word), 300, padding_idx=0, devices=opt.gpus) word_embedded.load_pretrained_vectors(opt.word_vectors) idx2ans = trainset.idx2ans num_ans = trainset.ans_pool.shape[0] if opt.arch == "DCNWithRCNN": model = DCNWithRCNN(opt, num_ans) elif opt.arch == "DCN": model = DCN(opt, num_ans) criterion = nn.BCEWithLogitsLoss(reduction="sum") if len(opt.gpus) >= 1: model = model.cuda(opt.gpus[0]) criterion = criterion.cuda(opt.gpus[0]) if checkpoint is not None: model.load_state_dict(checkpoint["state_dict"]) if len(opt.gpus) > 1: model = nn.DataParallel(model, opt.gpus, dim=0) model.word_embedded = word_embedded params = list(filter(lambda x: x.requires_grad, model.parameters())) optimizer = FixedAdam(opt, params) scheduler = StepScheduler(opt, optimizer) params = model.module.parameters() if len( opt.gpus) > 1 else model.parameters() print(">> Number of trained parameters:", sum(param.numel() for param in params if param.requires_grad)) if checkpoint is not None: optimizer.load_state_dict(checkpoint["optimizer"]) scheduler.load_state_dict(checkpoint["lr_scheduler"]) print(">>>> Loaded checkpoint: {} - epoch {}".format( opt.resume, checkpoint["last_epoch"])) timer = TimeMeter() timer.reset() try: with torch.cuda.device(opt.gpus[0]): trainModel(trainLoader, valLoader, model, criterion, optimizer, scheduler, checkpoint, idx2ans, opt) except KeyboardInterrupt: sys.exit("Training interrupted") print("It toke %.2f hours to train the network" % (timer.elapsed_time / 3600))