def preprocess_answer(answer): """ Mimicing the answer pre-processing with evaluation server. """ dummy_vqa = lambda: None dummy_vqa.getQuesIds = lambda: None vqa_eval = VQAEval(dummy_vqa, None) answer = vqa_eval.processDigitArticle(vqa_eval.processPunctuation(answer)) answer = answer.replace(',', '') return answer
def after_epoch(self, is_training): if len(self.results): dump_json(self.dump_path, self.results) vqa = VQA(self.ann_file, self.ques_file) vqaRes = vqa.loadRes(self.dump_path, self.ques_file) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() self.accuracy = vqaEval.accuracy self.results = []
def prepare_questions(questions, rvqa=False): """ Tokenize and normalize questions from a given question json in the usual VQA format. """ if not rvqa: questions = [q['question'] for q in questions['questions']] # print(questions[:10]) for question in questions: question = question.lower()[:-1] question = _special_chars.sub('', question) question = re.sub(r'-+', ' ', question) yield question dummy_vqa = lambda: None dummy_vqa.getQuesIds = lambda: None vqa_eval = VQAEval(dummy_vqa, None) def process_answers(answer): answer = answer.replace('\n', ' ') answer = answer.replace('\t', ' ') answer = answer.strip() answer = vqa_eval.processPunctuation(answer) answer = vqa_eval.processDigitArticle(answer) return answer def prepare_answers(answers_json): """ Normalize answers from a given answer json in the usual VQA format. """ answers = [[a['answer'] for a in ans_dict['answers']] for ans_dict in answers_json['annotations']]
import sys import utils.utils as utils from vqa_eval.PythonHelperTools.vqaTools.vqa import VQA from vqa_eval.PythonEvaluationTools.vqaEvaluation.vqaEval import VQAEval quesFile = utils.path_for(val=True, question=True) annFile = utils.path_for(val=True, answer=True) resFile = sys.argv[1] # create vqa object and vqaRes object vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(resFile, quesFile) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval(vqa, vqaRes, n=2) # n is precision of accuracy (number of places after decimal), default is 2 # evaluate results vqaEval.evaluate() # print accuracies print("Overall Accuracy is: {:.2f}\n".format(vqaEval.accuracy['overall'])) print("Per Question Type Accuracy is the following:") for quesType in vqaEval.accuracy['perQuestionType']: print("{} : {:.2f}".format(quesType, vqaEval.accuracy['perQuestionType'][quesType])) print("\n") print("Per Answer Type Accuracy is the following:") for ansType in vqaEval.accuracy['perAnswerType']: print("{} : {:.2f}".format(ansType, vqaEval.accuracy['perAnswerType'][ansType]))
def main(cfg): gpu_id = cfg["hyperparameters"]["gpu_id"] # Use GPU if available if gpu_id >= 0: assert torch.cuda.is_available() device = torch.device("cuda:" + str(gpu_id)) print("Using GPU {} | {}".format(gpu_id, torch.cuda.get_device_name(gpu_id))) elif gpu_id == -1: device = torch.device("cpu") print("Using the CPU") else: raise NotImplementedError( "Device ID {} not recognized. gpu_id = 0, 1, 2 etc. Use -1 for CPU" .format(gpu_id)) data_loader = get_loader( input_dir=cfg["paths"]["input"], input_vqa_train="train.npy", input_vqa_valid="valid.npy", max_qst_length=cfg["hyperparameters"]["max_input_length"], max_num_ans=cfg["hyperparameters"]["max_num_answers"], batch_size=cfg["hyperparameters"]["batch_size"], num_workers=6) qst_vocab_size = data_loader['train'].dataset.qst_vocab.vocab_size ans_vocab_size = data_loader['train'].dataset.ans_vocab.vocab_size ans_list = data_loader['train'].dataset.ans_vocab.word_list ans_unk_idx = data_loader['train'].dataset.ans_vocab.unk2idx cfg["hyperparameters"]["qst_vocab_size"] = qst_vocab_size cfg["hyperparameters"]["ans_vocab_size"] = ans_vocab_size # assert not(cfg["hyperparameters"]["use_dnc_c"] and cfg["hyperparameters"]["use_dnc_q"]) _set_seed(cfg["hyperparameters"]["seed"]) model = VqaModelDncQC(cfg).to(device) if cfg["hyperparameters"]["finetune"]: model.load_pretrained_weights(cfg["hyperparameters"]["fc_flag"]) model.check_successul_weight_loading(cfg["hyperparameters"]["fc_flag"]) net_name = "Advanced_dnc" criterion = nn.CrossEntropyLoss() dnc_q_params = { "params": model.qst_encoder.dnc_q.parameters(), "lr": cfg["dnc_q"]["lr"] } dnc_c_params = {"params": model.dnc.parameters(), "lr": cfg["dnc_c"]["lr"]} embed_params = {"params": model.qst_encoder.word2vec.parameters()} img_encoder_params = {"params": model.img_encoder.fc.parameters()} fc_1_params = {"params": model.fc_1.parameters()} fc_2_params = {"params": model.fc_2.parameters()} if cfg["hyperparameters"]["optimizer"] == "adam": optimizer = optim.Adam( [ dnc_q_params, dnc_c_params, embed_params, img_encoder_params, fc_1_params, fc_2_params ], lr=cfg["hyperparameters"]["lr"], weight_decay=cfg["hyperparameters"]["weight_decay"]) elif cfg["hyperparameters"]["optimizer"] == "sgd": optimizer = optim.SGD( [ dnc_q_params, dnc_c_params, embed_params, img_encoder_params, fc_1_params, fc_2_params ], lr=cfg["hyperparameters"]["lr"], weight_decay=cfg["hyperparameters"]["weight_decay"]) elif cfg["hyperparameters"]["optimizer"] == "rmsprop": optimizer = optim.RMSprop( [ dnc_q_params, dnc_c_params, embed_params, img_encoder_params, fc_1_params, fc_2_params ], lr=cfg["hyperparameters"]["lr"], weight_decay=cfg["hyperparameters"]["weight_decay"]) else: raise ValueError("<{}> is not supported.".format( cfg["hyperparameters"]["optimizer"])) print("Training " + net_name) print("Using " + cfg["hyperparameters"]["optimizer"]) scheduler = lr_scheduler.StepLR( optimizer, step_size=cfg["hyperparameters"]["lr_reduce_after"], gamma=cfg["hyperparameters"]["lr_decay_rate"]) summary_writer = SummaryWriter(logdir=cfg["logging"]["tensorboard_dir"]) tr_iter = 0 val_iter = 0 lr = 0 lr_dnc_q = 0 lr_dnc_c = 0 for epoch in range(cfg["hyperparameters"]["num_epochs"]): for phase in ["train", "valid"]: (chx_q, mhx_q, rv_q) = (None, None, None) if cfg["dnc_c"]["type"] == "MLP": (mhx_c, rv_c) = model.dnc.init_hidden( None, cfg["hyperparameters"]["batch_size"], False) else: (chx_c, mhx_c, rv_c) = (None, None, None) running_loss = 0.0 dataloader = data_loader[phase] batch_step_size = len( dataloader.dataset) / cfg["hyperparameters"]["batch_size"] if phase == "train": model.train() else: model.eval() val_predictions = [] pbar = tqdm(dataloader) pbar.set_description("{} | Epcoh {}/{}".format( phase, epoch, cfg["hyperparameters"]["num_epochs"])) for batch_idx, batch_sample in enumerate(pbar): image = batch_sample['image'].to(device) question = batch_sample['question'].to(device) label = batch_sample['answer_label'].to(device) multi_choice = batch_sample[ 'answer_multi_choice'] # not tensor, list. if image.size(0) != cfg["hyperparameters"]["batch_size"]: (chx_q, mhx_q, rv_q) = (None, None, None) if cfg["dnc_c"]["type"] == "MLP": (mhx_c, rv_c) = model.dnc.init_hidden(None, image.size(0), False) else: (chx_c, mhx_c, rv_c) = (None, None, None) optimizer.zero_grad() with torch.set_grad_enabled(phase == "train"): if cfg["dnc_c"]["type"] == "MLP": output, (chx_q, mhx_q, rv_q), v_q, (mhx_c, rv_c), v_c = model(image, question, chx_q=chx_q, mhx_q=mhx_q, rv_q=rv_q, mhx_c=mhx_c, rv_c=rv_c) else: output, (chx_q, mhx_q, rv_q), v_q, (chx_c, mhx_c, rv_c), v_c = model(image, question, chx_q=chx_q, mhx_q=mhx_q, rv_q=rv_q, chx_c=chx_c, mhx_c=mhx_c, rv_c=rv_c) _, pred = torch.max(output, 1) # [batch_size] # _, pred_exp2 = torch.max(output, 1) # [batch_size] loss = criterion(output, label) if phase == 'train': loss.backward() if cfg["hyperparameters"]["add_noise"]: for p in model.parameters(): if p.grad is not None: p.grad += get_gaussian_noise( tr_iter, 1.0, 0.55, p.grad.shape, device) # if iter % cfg["hyperparameters"]["grad_flow_interval"] == 0: # plot_grad_flow(model.named_parameters(), cfg["hyperparameters"]["grad_flow_dir"], str(tr_iter)) if cfg["hyperparameters"]["use_clip_grad"]: nn.utils.clip_grad_norm_( model.parameters(), cfg["hyperparameters"]["clip_value"]) optimizer.step() lr_dnc_q = optimizer.param_groups[0]["lr"] lr_dnc_c = optimizer.param_groups[1]["lr"] lr = optimizer.param_groups[2]["lr"] dict_lr = { "DNC_Q": lr_dnc_q, "DNC_C": lr_dnc_c, "Rest": lr } summary_writer.add_scalars("lr", dict_lr, global_step=tr_iter) else: question_ids = batch_sample["question_id"].tolist() pred = pred.tolist() pred = [ans_list[i] for i in pred] for id_, ans in zip(question_ids, pred): val_predictions.append({ "question_id": id_, "answer": ans }) mhx_q = { k: (v.detach() if isinstance(v, var) else v) for k, v in mhx_q.items() } mhx_c = { k: (v.detach() if isinstance(v, var) else v) for k, v in mhx_c.items() } if cfg["dnc_c"]["type"] == "MLP": rv_c = rv_c.detach() running_loss += loss.item() summary_writer.add_scalar( "Loss/" + phase + "_Batch", loss.item(), global_step=tr_iter if phase == "train" else val_iter) if v_c is not None and cfg["dnc_c"]["type"] == "mlp": inf_dict = { "Memory": v_c["memory_influence"], "Controller": v_c["controller_influence"] } summary_writer.add_scalars( "Influences/" + phase + "_Batch", inf_dict, global_step=tr_iter if phase == "train" else val_iter) if phase == "train": tr_iter += 1 else: val_iter += 1 if phase == "train": scheduler.step() # Print the average loss and accuracy in an epoch. epoch_loss = running_loss / batch_step_size summary_writer.add_scalar("Loss/" + phase + "_Epoch", epoch_loss, global_step=epoch) if phase == "valid": valFile = os.path.join(cfg["logging"]["results_dir"], "val_res.json") with open(valFile, 'w') as f: json.dump(val_predictions, f) annFile = cfg["paths"]["json_a_path_val"] quesFile = cfg["paths"]["json_q_path_val"] vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(valFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() acc_overall = vqaEval.accuracy['overall'] # acc_perQuestionType = vqaEval.accuracy['perQuestionType'] # acc_perAnswerType = vqaEval.accuracy['perAnswerType'] summary_writer.add_scalar("Acc/overall_" + phase + "_Epoch", acc_overall, global_step=epoch) # summary_writer.add_scalar("Acc/perQues" + phase + "_Epoch", epoch_loss, global_step=epoch) # summary_writer.add_scalar("Acc/" + phase + "_Epoch", epoch_loss, global_step=epoch) # Save the model check points. _save_checkpoint(net_name, model, optimizer, epoch, tr_iter, val_iter, lr, cfg)
def main(cfg): gpu_id = cfg["hyperparameters"]["gpu_id"] # Use GPU if available if gpu_id >= 0: assert torch.cuda.is_available() device = torch.device("cuda:" + str(gpu_id)) print("Using GPU {} | {}".format(gpu_id, torch.cuda.get_device_name(gpu_id))) elif gpu_id == -1: device = torch.device("cpu") print("Using the CPU") else: raise NotImplementedError( "Device ID {} not recognized. gpu_id = 0, 1, 2 etc. Use -1 for CPU" .format(gpu_id)) data_loader = get_loader( input_dir=cfg["paths"]["input"], input_vqa_train="train.npy", input_vqa_valid="valid.npy", max_qst_length=cfg["hyperparameters"]["max_input_length"], max_num_ans=cfg["hyperparameters"]["max_num_answers"], batch_size=cfg["hyperparameters"]["batch_size"], num_workers=6) qst_vocab_size = data_loader['train'].dataset.qst_vocab.vocab_size ans_vocab_size = data_loader['train'].dataset.ans_vocab.vocab_size ans_list = data_loader['train'].dataset.ans_vocab.word_list ans_unk_idx = data_loader['train'].dataset.ans_vocab.unk2idx cfg["hyperparameters"]["qst_vocab_size"] = qst_vocab_size cfg["hyperparameters"]["ans_vocab_size"] = ans_vocab_size assert not (cfg["hyperparameters"]["use_dnc_c"] and cfg["hyperparameters"]["use_dnc_q"]) _set_seed(cfg["hyperparameters"]["seed"]) if cfg["hyperparameters"]["use_dnc_c"]: model = VqaModelDncC(cfg).to(device) net_name = "dnc_C_" + str(cfg["dnc"]["number"]) elif cfg["hyperparameters"]["use_dnc_q"]: model = VqaModelDncQ(cfg).to(device) net_name = "dnc_Q" else: model = VqaModel(cfg).to(device) net_name = "Baseline" # embed_size=cfg["hyperparameters"]["commun_embed_size"], # qst_vocab_size=qst_vocab_size, # ans_vocab_size=ans_vocab_size, # word_embed_size=cfg["hyperparameters"]["embedding_dim"], # num_layers=args.num_layers, # hidden_size=args.hidden_size).to(device) criterion = nn.CrossEntropyLoss() if cfg["hyperparameters"]["use_dnc_c"]: dnc_params = { "params": model.dnc.parameters(), "lr": cfg["dnc_c"]["lr"] } img_encoder_params = {"params": model.img_encoder.fc.parameters()} qst_encoder_params = {"params": model.qst_encoder.fc.parameters()} if cfg["hyperparameters"]["optimizer"] == "adam": optimizer = optim.Adam( [dnc_params, img_encoder_params, qst_encoder_params], lr=cfg["hyperparameters"]["lr"], weight_decay=cfg["hyperparameters"]["weight_decay"]) elif cfg["hyperparameters"]["optimizer"] == "sgd": optimizer = optim.SGD( [dnc_params, img_encoder_params, qst_encoder_params], lr=cfg["hyperparameters"]["lr"], weight_decay=cfg["hyperparameters"]["weight_decay"]) elif cfg["hyperparameters"]["use_dnc_q"]: dnc_params = { "params": model.qst_encoder.dnc_q.parameters(), "lr": cfg["dnc_q"]["lr"] } embed_params = {"params": model.qst_encoder.word2vec.parameters()} img_encoder_params = {"params": model.img_encoder.fc.parameters()} #qst_encoder_params = {"params": model.qst_encoder.fc.parameters()} fc1_params = {"params": model.fc1.parameters()} fc2_params = {"params": model.fc2.parameters()} if cfg["hyperparameters"]["optimizer"] == "adam": optimizer = optim.Adam( [ dnc_params, embed_params, img_encoder_params, fc1_params, fc2_params ], lr=cfg["hyperparameters"]["lr"], weight_decay=cfg["hyperparameters"]["weight_decay"]) elif cfg["hyperparameters"]["optimizer"] == "sgd": optimizer = optim.SGD( [ dnc_params, embed_params, img_encoder_params, fc1_params, fc2_params ], lr=cfg["hyperparameters"]["lr"], weight_decay=cfg["hyperparameters"]["weight_decay"]) else: params = list(model.img_encoder.fc.parameters()) \ + list(model.qst_encoder.parameters()) \ + list(model.fc1.parameters()) \ + list(model.fc2.parameters()) optimizer = optim.Adam(params, lr=cfg["hyperparameters"]["lr"]) print("Training " + net_name) scheduler = lr_scheduler.StepLR( optimizer, step_size=cfg["hyperparameters"]["lr_reduce_after"], gamma=cfg["hyperparameters"]["lr_decay_rate"]) summary_writer = SummaryWriter(logdir=cfg["logging"]["tensorboard_dir"]) tr_iter = 0 val_iter = 0 lr = 0 lr_dnc = 0 for epoch in range(cfg["hyperparameters"]["num_epochs"]): for phase in ['train', 'valid']: if cfg["hyperparameters"]["use_dnc_c"]: if cfg["dnc"]["number"] == 1: model.dnc.update_batch_size( cfg["hyperparameters"]["batch_size"]) h, mem = model.dnc.reset() elif cfg["dnc"]["number"] == 0: (mem, rv) = model.dnc.init_hidden( None, cfg["hyperparameters"]["batch_size"], True) else: raise ValueError("No dnc number " + cfg["dnc"]["number"]) if cfg["hyperparameters"]["use_dnc_q"]: (chx, mhx, rv) = (None, None, None) running_loss = 0.0 dataloader = data_loader[phase] batch_step_size = len( dataloader.dataset) / cfg["hyperparameters"]["batch_size"] if phase == 'train': model.train() else: model.eval() val_predictions = [] pbar = tqdm(dataloader) pbar.set_description("{} | Epcoh {}/{}".format( phase, epoch, cfg["hyperparameters"]["num_epochs"])) for batch_idx, batch_sample in enumerate(pbar): image = batch_sample['image'].to(device) question = batch_sample['question'].to(device) label = batch_sample['answer_label'].to(device) multi_choice = batch_sample[ 'answer_multi_choice'] # not tensor, list. if image.size(0) != cfg["hyperparameters"]["batch_size"]: if cfg["hyperparameters"]["use_dnc_c"]: if cfg["dnc"]["number"] == 1: model.dnc.update_batch_size(image.size(0)) h, mem = model.dnc.reset() elif cfg["dnc"]["number"] == 0: (mem, rv) = model.dnc.init_hidden( None, image.size(0), False) else: raise ValueError("No dnc number " + cfg["dnc"]["number"]) if cfg["hyperparameters"]["use_dnc_q"]: (chx, mhx, rv) = (None, None, None) optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): if cfg["hyperparameters"]["use_dnc_c"]: if cfg["dnc"]["number"] == 1: output, h, mem = model(image, question, h=h, mem=mem) elif cfg["dnc"]["number"] == 0: output, (mem, rv), v = model(image, question, mem=mem, rv=rv) elif cfg["hyperparameters"]["use_dnc_q"]: output, (chx, mhx, rv), v = model(image, question, chx=chx, mhx=mhx, rv=rv) else: output = model( image, question) # [batch_size, ans_vocab_size=1000] _, pred = torch.max(output, 1) # [batch_size] # _, pred_exp2 = torch.max(output, 1) # [batch_size] loss = criterion(output, label) if phase == 'train': loss.backward() # if iter % cfg["hyperparameters"]["grad_flow_interval"] == 0: # plot_grad_flow(model.named_parameters(), cfg["hyperparameters"]["grad_flow_dir"], str(tr_iter)) if cfg["hyperparameters"]["use_clip_grad"]: nn.utils.clip_grad_norm_( model.parameters(), cfg["hyperparameters"]["clip_value"]) optimizer.step() if cfg["hyperparameters"]["use_dnc_c"]: lr_dnc = optimizer.param_groups[0]["lr"] lr = optimizer.param_groups[1]["lr"] dict_lr = {"DNC": lr_dnc, "Rest": lr} summary_writer.add_scalars("lr", dict_lr, global_step=tr_iter) elif cfg["hyperparameters"]["use_dnc_q"]: lr_dnc = optimizer.param_groups[0]["lr"] lr = optimizer.param_groups[1]["lr"] dict_lr = {"DNC": lr_dnc, "Rest": lr} summary_writer.add_scalars("lr", dict_lr, global_step=tr_iter) else: lr = optimizer.param_groups[0]["lr"] summary_writer.add_scalar("lr", lr, global_step=tr_iter) else: question_ids = batch_sample["question_id"].tolist() pred = pred.tolist() pred = [ans_list[i] for i in pred] for id_, ans in zip(question_ids, pred): val_predictions.append({ "question_id": id_, "answer": ans }) if cfg["hyperparameters"]["use_dnc_c"]: if cfg["dnc"]["number"] == 1: mem = repackage_hidden(mem) elif cfg["dnc"]["number"] == 0: mem = { k: (v.detach() if isinstance(v, var) else v) for k, v in mem.items() } rv = rv.detach() elif cfg["hyperparameters"]["use_dnc_q"]: mhx = { k: (v.detach() if isinstance(v, var) else v) for k, v in mhx.items() } # Evaluation metric of 'multiple choice' # Exp1: our model prediction to '<unk>' IS accepted as the answer. # Exp2: our model prediction to '<unk>' is NOT accepted as the answer. # pred_exp2[pred_exp2 == ans_unk_idx] = -9999 running_loss += loss.item() summary_writer.add_scalar( "Loss/" + phase + "_Batch", loss.item(), global_step=tr_iter if phase == "train" else val_iter) # running_corr_exp1 += torch.stack([(ans == pred_exp1.cpu()) for ans in multi_choice]).any(dim=0).sum() # running_corr_exp2 += torch.stack([(ans == pred_exp2.cpu()) for ans in multi_choice]).any(dim=0).sum() # Print the average loss in a mini-batch. # if batch_idx % 10 == 0: # print('| {} SET | Epoch [{:02d}/{:02d}], Step [{:04d}/{:04d}], Loss: {:.4f}' # .format(phase.upper(), epoch+1, args.num_epochs, batch_idx, int(batch_step_size), loss.item())) if phase == "train": tr_iter += 1 else: val_iter += 1 if phase == "train": scheduler.step() # Print the average loss and accuracy in an epoch. epoch_loss = running_loss / batch_step_size summary_writer.add_scalar("Loss/" + phase + "_Epoch", epoch_loss, global_step=epoch) if phase == "valid": valFile = os.path.join(cfg["logging"]["results_dir"], "val_res.json") with open(valFile, 'w') as f: json.dump(val_predictions, f) annFile = cfg["paths"]["json_a_path_val"] quesFile = cfg["paths"]["json_q_path_val"] vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(valFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() acc_overall = vqaEval.accuracy['overall'] # acc_perQuestionType = vqaEval.accuracy['perQuestionType'] # acc_perAnswerType = vqaEval.accuracy['perAnswerType'] summary_writer.add_scalar("Acc/overall_" + phase + "_Epoch", acc_overall, global_step=epoch) # summary_writer.add_scalar("Acc/perQues" + phase + "_Epoch", epoch_loss, global_step=epoch) # summary_writer.add_scalar("Acc/" + phase + "_Epoch", epoch_loss, global_step=epoch) # epoch_acc_exp1 = running_corr_exp1.double() / len(data_loader[phase].dataset) # multiple choice # epoch_acc_exp2 = running_corr_exp2.double() / len(data_loader[phase].dataset) # multiple choice # print('| {} SET | Epoch [{:02d}/{:02d}], Loss: {:.4f}, Acc(Exp1): {:.4f}, Acc(Exp2): {:.4f} \n' # .format(phase.upper(), epoch+1, args.num_epochs, epoch_loss, epoch_acc_exp1, epoch_acc_exp2)) # Log the loss and accuracy in an epoch. # with open(os.path.join(args.log_dir, '{}-log-epoch-{:02}.txt') # .format(phase, epoch+1), 'w') as f: # f.write(str(epoch+1) + '\t' # + str(epoch_loss) + '\t' # + str(epoch_acc_exp1.item()) + '\t' # + str(epoch_acc_exp2.item())) # Save the model check points. _save_checkpoint(net_name, model, optimizer, epoch, tr_iter, val_iter, lr, cfg)