def __init__(self, config): super(KLDLoss, self).__init__() # Must call super __init__() self.tau = utils.get_value_from_dict(config["model"], "tau", 1) self.reduce = utils.get_value_from_dict(config["model"], "loss_reduce", True) self.apply_softmax_on_teacher = True
def __init__(self, config, verbose=True): # Must call super __init__() super(SimpleMLP, self).__init__(config, verbose) self.classname = "SimpleMLP" self.use_gpu = utils.get_value_from_dict(config["model"], "use_gpu", True) loss_reduce = utils.get_value_from_dict(config["model"], "loss_reduce", True) # options for applying l2-norm self.apply_l2_norm = utils.get_value_from_dict(config["model"], "apply_l2_norm", False) # build layers self.img_emb_net = building_blocks.ResBlock2D(config["model"], "img_emb") self.qst_emb_net = building_blocks.QuestionEmbedding(config["model"]) self.classifier = building_blocks.MLP(config["model"], "answer") self.criterion = nn.CrossEntropyLoss(reduce=loss_reduce) # set layer names (all and to be updated) self.model_list = [ "img_emb_net", "qst_emb_net", "classifier", "criterion" ] self.models_to_update = [ "img_emb_net", "qst_emb_net", "classifier", "criterion" ] self.config = config
def __init__(self, config): super(StackedAttention, self).__init__() # Must call super __init__() # get SAN configurations self.num_stacks = utils.get_value_from_dict(config, "num_stacks", 2) self.qst_feat_dim = utils.get_value_from_dict(config, "qst_emb_dim", 256) img_feat_dim = utils.get_value_from_dict(config, "img_emb_dim", 256) self.att_emb_dim = utils.get_value_from_dict(config, "att_emb_dim", 512) att_dropout_prob = utils.get_value_from_dict(config, "att_dropout_prob", 0.5) assert self.num_stacks > 0, "# of stacks {} < 1.".format( self.num_stacks) # build layers layers = [] if att_dropout_prob > 0: layers.append(nn.Dropout(p=att_dropout_prob)) layers.append( nn.Conv2d(img_feat_dim + self.qst_feat_dim, self.att_emb_dim, 1, 1)) layers.append(nn.ReLU()) if att_dropout_prob > 0: layers.append(nn.Dropout(p=att_dropout_prob)) layers.append(nn.Conv2d(self.att_emb_dim, self.num_stacks, 1, 1)) self.att_encoder = nn.Sequential(*layers) self.att_softmax = nn.Softmax(dim=2)
def __init__(self, config): super(Ensemble, self).__init__(config) # Must call super __init__() self.classname = "ENSEMBLE" # options for loading model self.base_model_type = utils.get_value_from_dict( config["model"], "base_model_type", "san") self.M = cmf.get_model(self.base_model_type) self.use_gpu = utils.get_value_from_dict(config["model"], "use_gpu", True) self.num_models = utils.get_value_from_dict(config["model"], "num_models", 5) # options if use knowledge distillation self.use_knowledge_distillation = utils.get_value_from_dict( config["model"], "use_knowledge_distillation", False) base_model_ckpt_path = utils.get_value_from_dict( config["model"], "base_model_ckpt_path", "None") if self.use_knowledge_distillation: assert base_model_ckpt_path != "None", \ "checkpoint path for base model should be given" # build and load base models if use if self.use_knowledge_distillation: self.base_model = [] num_base_models = len(base_model_ckpt_path) for i in range(num_base_models): base_config = copy.deepcopy(config) base_config["use_knowledge_distillation"] = False self.base_model.append(self.M(base_config)) self.base_model[i].load_checkpoint(base_model_ckpt_path[i]) if self.use_gpu and torch.cuda.is_available(): self.base_model[i].cuda() self.base_model[i].eval() # set to eval mode for base model self.logger["train"].info( \ "{}th base-net is initialized from {}".format( \ i, base_model_ckpt_path[i])) # build specialized models self.net_list = nn.ModuleList() for m in range(self.num_models): self.net_list.append(self.M(config)) # load pre-trained base models if exist if base_model_ckpt_path != "None": self.net_list[m].load_checkpoint(base_model_ckpt_path[m]) self.logger["train"].info("{}th net is initialized from {}".format( \ m, base_model_ckpt_path[m])) self.criterion = building_blocks.EnsembleLoss(config["model"]) # set is_main_net flag of base networks as False for m in range(self.num_models): self.net_list[m].set_is_main_net(False) # set models to update self.model_list = ["net_list", "criterion"] self.models_to_update = ["net_list", "criterion"]
def __init__(self, config): # get configions print(json.dumps(config, indent=4)) self.hdf5_path = utils.get_value_from_dict(config, "encoded_hdf5_path", \ "data/CLEVR_v1.0/preprocess/encoded_qa/vocab_train_raw/" \ + "all_questions_use_zero_token/qa_train.h5") self.json_path = utils.get_value_from_dict(config, "encoded_json_path", \ "data/CLEVR_v1.0/preprocess/encoded_qa/vocab_train_raw/" \ + "all_questions_use_zero_token/qa_train.json") self.img_size = utils.get_value_from_dict(config, "img_size", 224) self.batch_size = utils.get_value_from_dict(config, "batch_size", 32) self.use_img = utils.get_value_from_dict(config, "use_img", False) self.use_gpu = utils.get_value_from_dict(config, "use_gpu", True) if self.use_img: self.img_dir = utils.get_value_from_dict(config, "img_dir", "data/CLEVR_v1.0/images") self.prepro = trn.Compose([ trn.Resize(self.img_size), trn.CenterCrop(self.img_size), trn.ToTensor(), trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) else: self.feat_dir = utils.get_value_from_dict(config, "feat_dir", "data/CLEVR_v1.0/feats") # load hdf5 file including question_labels, question_length, # answer_labels hdf5_file = io_utils.load_hdf5(self.hdf5_path) self.max_time_steps = hdf5_file["question_labels"].shape[1] # load json file including woti, itow, atoi, itoa, splits, vocab_info, # question_ids, image_filenames self.json_file = io_utils.load_json(self.json_path) # set path of pre-computed assignments # NOTE: DEPRECATED self.assignment_path = utils.get_value_from_dict( config, "assignment_path", "") # set path of pre-computed logits of base models self.base_logits_path = utils.get_value_from_dict( config, "base_logits_path", "") self.fetching_answer_option = "simple" self.vis_mode = config.get("vis_mode", False)
def __init__(self, config, name=""): super(Embedding2D, self).__init__() # Must call super __init__() if name != "": name = name + "_" inp_dim = utils.get_value_from_dict( config, name+"emb2d_inp_dim", 1024) out_dim = utils.get_value_from_dict( config, name+"emb2d_out_dim", 256) dropout_prob = utils.get_value_from_dict( config, name+"emb2d_dropout_prob", 0.0) nonlinear = utils.get_value_from_dict( config, name+"emb2d_nonlinear_fn", "None") batchnorm = utils.get_value_from_dict( config, name+"emb2d_use_batchnorm", False) self.apply_l2_norm = \ utils.get_value_from_dict(config, name+"emb2d_apply_l2_norm", False) self.only_l2_norm = utils.get_value_from_dict( config, name+"emb2d_only_l2_norm", False) assert not ((self.apply_l2_norm == False) and (self.only_l2_norm == True)), \ "You set only_l2_norm as True, but also set apply_l2_norm as False" # define layers if not self.only_l2_norm: self.embedding_2d = get_conv2d(inp_dim, out_dim, 1, 1, dropout=dropout_prob, nonlinear=nonlinear, use_batchnorm=batchnorm)
def __init__(self, config, name=""): super(WordEmbedding, self).__init__() # Must call super __init__() if name != "": name = name + "_" # get configuration inp_dim = utils.get_value_from_dict( config, name+"word_emb_vocab_size", 256) out_dim = utils.get_value_from_dict( config, name+"word_emb_dim", 52) dropout_prob = utils.get_value_from_dict( config, name+"word_emb_dropout_prob", 0) nonlinear = utils.get_value_from_dict( config, name+"word_emb_nonlinear_fn", "ReLU") # set layers self.word_emb = get_embedding(inp_dim, out_dim, dropout=dropout_prob, nonlinear=nonlinear)
def __init__(self, config, name=""): super(ResBlock2D, self).__init__() # Must call super __init__() if name != "": name = name + "_" # get configuration inp_dim = utils.get_value_from_dict( config, name+"res_block_2d_inp_dim", 1024) out_dim = utils.get_value_from_dict( config, name+"res_block_2d_out_dim", 1024) hidden_dim = utils.get_value_from_dict( config, name+"res_block_2d_hidden_dim", 512) self.num_blocks = utils.get_value_from_dict( config, name+"num_blocks", 1) self.use_downsample = utils.get_value_from_dict( config, name+"use_downsample", False) self.use_attention_transfer = utils.get_value_from_dict( config, "use_attention_transfer", False) # set layers if self.use_downsample: self.downsample = nn.Sequential( nn.Conv2d(inp_dim, out_dim, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(out_dim), ) self.blocks = nn.ModuleList() for i in range(self.num_blocks): self.blocks.append(get_res_block_2d(inp_dim, out_dim, hidden_dim)) if (i == 0) and self.use_downsample: inp_dim = out_dim
def factory_model(config, M, dset, ckpt_path): net = M(config) net.bring_loader_info(dset) # ship network to use gpu if config["model"]["use_gpu"]: net.gpu_mode() # load checkpoint if len(ckpt_path) > 0: if not (net.classname == "ENSEMBLE" and config["model"]["version"] == "IE"): assert os.path.exists(ckpt_path), \ "Checkpoint does not exists ({})".format(ckpt_path) net.load_checkpoint(ckpt_path) # If checkpoint is already applied with curriculum learning apply_cc_after = utils.get_value_from_dict( config["model"], "apply_curriculum_learning_after", -1) if (apply_cc_after > 0) and (epoch >= apply_cc_after): net.apply_curriculum_learning() return net
def __init__(self, config, name=""): super(MLP, self).__init__() # Must call super __init__() if name != "": name = name + "_" # get configuration inp_dim = utils.get_value_from_dict( config, name+"mlp_inp_dim", 256) out_dim = utils.get_value_from_dict( config, name+"mlp_out_dim", 52) dropout_prob = utils.get_value_from_dict( config, name+"mlp_dropout_prob", 0) hidden_dim = utils.get_value_from_dict( config, name+"mlp_hidden_dim", (1024,)) use_batchnorm = utils.get_value_from_dict( config, name+"mlp_use_batchnorm", False) nonlinear = utils.get_value_from_dict( config, name+"mlp_nonlinear_fn", "ReLU") # set layers self.mlp_1d = get_mlp(inp_dim, out_dim, hidden_dim, \ dropout=dropout_prob, nonlinear=nonlinear, use_batchnorm=use_batchnorm)
def train(config): """ Build data loader """ dsets = {} dsets["train"] = dataset.DataSet(config["train_loader"]) dsets["test"] = dataset.DataSet(config["test_loader"]) L = {} L["train"] = data.DataLoader( \ dsets["train"], batch_size=config["train_loader"]["batch_size"], \ num_workers=config["misc"]["num_workers"], \ shuffle=True, collate_fn=dataset.collate_fn) L["test"] = data.DataLoader( \ dsets["test"], batch_size=config["test_loader"]["batch_size"], \ num_workers=config["misc"]["num_workers"], \ shuffle=True, collate_fn=dataset.collate_fn) config = M.override_config_from_loader(config, dsets["train"]) """ Build network """ net = M(config) net.bring_loader_info(dsets) logger["train"].info(str(net)) apply_cc_after = utils.get_value_from_dict( config["model"], "apply_curriculum_learning_after", -1) # load checkpoint if exists if len(config["model"]["checkpoint_path"]) > 0: net.load_checkpoint(config["model"]["checkpoint_path"]) start_epoch = int( utils.get_filename_from_path( config["model"]["checkpoint_path"]).split("_")[-1]) # If checkpoint use curriculum learning if (apply_cc_after > 0) and (start_epoch >= apply_cc_after): net.apply_curriculum_learning() else: start_epoch = 0 # ship network to use gpu if config["model"]["use_gpu"]: net.gpu_mode() # Prepare tensorboard net.create_tensorboard_summary(config["misc"]["tensorboard_dir"]) """ Run training network """ ii = 0 tm = timer.Timer() # tm: timer iter_per_epoch = dsets["train"].get_iter_per_epoch() min_lr = config["optimize"].get("min_lr", 0.0002) for epoch in range(start_epoch, config["optimize"]["num_epoch"]): net.train_mode() # set network as train mode net.reset_status() # initialize status for batch in L["train"]: data_load_duration = tm.get_duration() # maintain sample data to observe learning status if ii == 0: sample_data = dsets["train"].get_samples(5) """ TODO: get samples from both training/test set test_sample_data = dsets["test"].get_samples(5)) """ # Forward and update the network # Note that the 1st and 2nd item of outputs from forward() should be # loss and logits. The others would change depending on the network tm.reset() lr = utils.adjust_lr(ii + 1, iter_per_epoch, config["optimize"], min_lr) outputs = net.forward_update(batch, lr) run_duration = tm.get_duration() # Compute status for current batch: loss, evaluation scores, etc net.compute_status(outputs[1], batch[0][-1]) # print learning status if (ii + 1) % config["misc"]["print_every"] == 0: net.print_status(epoch + 1, ii + 1) txt = "fetching for {:.3f}s, optimizing for {:.3f}s, lr = {:.5f}" logger["train"].debug( txt.format(data_load_duration, run_duration, lr)) logger["train"].info("\n") # visualize results if (config["misc"]["vis_every"] > 0) \ and ((ii+1) % config["misc"]["vis_every"] == 0): if config["misc"]["model_type"] == "ensemble": net.save_results(sample_data, "iteration_{}".format(ii + 1), mode="train") ii += 1 tm.reset() if config["misc"]["debug"]: if ii % 100 == 0: break # epoch done # save network every epoch net.save_checkpoint(epoch + 1) # visualize results net.save_results(sample_data, "epoch_{:03d}".format(epoch + 1), mode="train") # print status (metric) accumulated over each epoch net.print_counters_info(epoch + 1, logger_name="epoch", mode="Train") # validate network if (epoch + 1) % config["evaluation"]["every_eval"] == 0: cmf.evaluate(config, L["test"], net, epoch, logger_name="epoch", mode="Valid") # curriculum learning if (apply_cc_after >= 0) and ((epoch + 1) == apply_cc_after): net.apply_curriculum_learning() # reset reference time to compute duration of loading data tm.reset()
def ensemble(config): """ Build data loader """ dset = dataset.DataSet(config["test_loader"]) L = data.DataLoader( \ dset, batch_size=config["test_loader"]["batch_size"], \ num_workers=config["num_workers"], \ shuffle=False, collate_fn=dataset.collate_fn) """ Load assignments if exists """ with_assignment = False if config["assignment_path"] != "None": with_assignment = True assignment_file = io_utils.load_hdf5(config["assignment_path"], verbose=False) assignments = assignment_file["assignments"][:] cnt_mapping = np.zeros((3,3)) """ Build network """ nets = [] net_configs = [] for i in range(len(config["checkpoint_paths"])): net_configs.append(io_utils.load_yaml(config["config_paths"][i])) net_configs[i] = M.override_config_from_loader(net_configs[i], dset) nets.append(M(net_configs[i])) nets[i].bring_loader_info(dset) apply_cc_after = utils.get_value_from_dict( net_configs[i]["model"], "apply_curriculum_learning_after", -1) # load checkpoint if exists nets[i].load_checkpoint(config["checkpoint_paths"][i]) start_epoch = int(utils.get_filename_from_path( config["checkpoint_paths"][i]).split("_")[-1]) # If checkpoint use curriculum learning if (apply_cc_after > 0) and (start_epoch >= apply_cc_after): nets[i].apply_curriculum_learning() # ship network to use gpu if config["use_gpu"]: for i in range(len(nets)): nets[i].gpu_mode() for i in range(len(nets)): nets[i].eval_mode() # initialize counters for different tau metrics = ["top1-avg", "top1-max", "oracle"] for i in range(len(nets)): modelname = "M{}".format(i) metrics.append(modelname) tau = [1.0, 1.2, 1.5, 2.0, 5.0, 10.0, 50.0, 100.0] counters = OrderedDict() for T in tau: tau_name = "tau-"+str(T) counters[tau_name] = OrderedDict() for mt in metrics: counters[tau_name][mt] = accumulator.Accumulator(mt) """ Run training network """ ii = 0 itoa = dset.get_itoa() predictions = [] for batch in tqdm(L): # Forward networks probs = 0 B = batch[0][0].size(0) if type(batch[0][-1]) == type(list()): gt = batch[0][-1][0] else: gt = batch[0][-1] correct = 0 probs = {} for T in tau: tau_name = "tau-"+str(T) probs[tau_name] = 0 prob_list = [] for i in range(len(nets)): outputs = nets[i].evaluate(batch) prob_list.append(outputs[1]) # m*[B,A] if config["save_logits"]: TODO = True for T in tau: tau_name = "tau-"+str(T) probs = [net_utils.get_data(F.softmax(logits/T, dim=1)) \ for logits in prob_list] # m*[B,A] # count correct numbers for each model for i in range(len(nets)): val, idx = probs[i].max(dim=1) correct = torch.eq(idx, gt) num_correct = torch.sum(correct) modelname = "M{}".format(i) counters[tau_name][modelname].add(num_correct, B) # add prob of each model if i == 0: oracle_correct = correct else: oracle_correct = oracle_correct + correct # top1-max accuracy for ensemble ens_probs, ens_idx = torch.stack(probs,0).max(0) # [B,A] max_val, max_idx = ens_probs.max(dim=1) num_correct = torch.sum(torch.eq(max_idx, gt)) counters[tau_name]["top1-max"].add(num_correct, B) # top1-avg accuracy for ensemble ens_probs = sum(probs) # [B,A] max_val, max_idx = ens_probs.max(dim=1) num_correct = torch.sum(torch.eq(max_idx, gt)) counters[tau_name]["top1-avg"].add(num_correct, B) # oracle accuracy for ensemble num_oracle_correct = torch.sum(torch.ge(oracle_correct, 1)) counters[tau_name]["oracle"].add(num_oracle_correct, B) # attach predictions for i in range(len(batch[1])): qid = batch[1][i] predictions.append({ "question_id": qid, "answer": utils.label2string(itoa, max_idx[i]) }) # epoch done # print accuracy for cnt_k,cnt_v in counters.items(): txt = cnt_k + " " for k,v in cnt_v.items(): txt += ", {} = {:.5f}".format(v.get_name(), v.get_average()) print(txt) save_dir = os.path.join("results", "ensemble_predictions") io_utils.check_and_create_dir(save_dir) io_utils.write_json(os.path.join(save_dir, config["out"]+".json"), predictions)
def main(params): # load configuration of pre-trained models exp_path = os.path.join("results", params["dataset"], params["model_type"], params["exp"]) config_path = os.path.join(exp_path, "config.yml") config = io_utils.load_yaml(config_path) params["config_path"] = config_path config = M.override_config_from_params(config, params) config["exp_path"] = exp_path cmf.create_save_dirs(config["misc"]) # create logger logger_path = os.path.join(config["exp_path"], "evaluation.log") logger = io_utils.get_logger("Evaluate", log_file_path=logger_path) """ Build data loader """ loader_config = io_utils.load_yaml(params["loader_config_path"]) dset = dataset.DataSet(loader_config) L = data.DataLoader(dset, batch_size=loader_config["batch_size"], \ num_workers=params["num_workers"], \ shuffle=False, collate_fn=dataset.collate_fn) config = M.override_config_from_loader(config, dset) if params["mode"] == "eval": """ Evaluating networks """ e0 = params["start_epoch"] e1 = params["end_epoch"] e_stride = params["epoch_stride"] sample_data = dset.get_samples(5) for epoch in range(e0, e1+1, e_stride): """ Build network """ net = M(config) net.bring_loader_info(dset) # ship network to use gpu if config["model"]["use_gpu"]: net.gpu_mode() # load checkpoint if not (net.classname == "ENSEMBLE" and config["model"]["version"] == "IE"): ckpt_path = os.path.join(exp_path, "checkpoints", "checkpoint_epoch_{:03d}.pkl".format(epoch)) assert os.path.exists(ckpt_path), \ "Checkpoint does not exists ({})".format(ckpt_path) net.load_checkpoint(ckpt_path) # If checkpoint is already applied with curriculum learning apply_cc_after = utils.get_value_from_dict( config["model"], "apply_curriculum_learning_after", -1) if (apply_cc_after > 0) and (epoch >= apply_cc_after): net.apply_curriculum_learning() cmf.evaluate(config, L, net, epoch-1, logger_name="eval", mode="Evaluation", verbose_every=100) elif params["mode"] == "selection": epoch = params["start_epoch"] """ Build network """ net = M(config) net.bring_loader_info(dset) # ship network to use gpu if config["model"]["use_gpu"]: net.gpu_mode() # load checkpoint ckpt_path = os.path.join(exp_path, "checkpoints", "checkpoint_epoch_{:03d}.pkl".format(epoch)) assert os.path.exists(ckpt_path), "Checkpoint does not exists ({})".format(ckpt_path) net.load_checkpoint(ckpt_path) apply_cc_after = utils.get_value_from_dict( config["model"], "apply_curriculum_learning_after", -1) # If checkpoint use curriculum learning if (apply_cc_after > 0) and (epoch >= apply_cc_after): net.apply_curriculum_learning() cmf.get_selection_values(config, L, net, epoch-1, logger_name="eval", mode="Evaluation", verbose_every=100)
def __init__(self, config): super(EnsembleLoss, self).__init__() # Must call super __init__() self.logger = io_utils.get_logger("Train") # common options self.version = utils.get_value_from_dict(config, "version", "KD-MCL") self.use_gpu = utils.get_value_from_dict(config, "use_gpu", True) self.m = utils.get_value_from_dict(config, "num_models", 5) self.num_labels = utils.get_value_from_dict(config, "num_labels", 28) self.print_every = 20 self.log_every = 500 # options for computing assignments self.k = utils.get_value_from_dict(config, "num_overlaps", 2) self.tau = utils.get_value_from_dict(config, "tau", -1) self.beta = utils.get_value_from_dict(config, "beta", 0.75) # options for margin-MCL self.margin_threshold = utils.get_value_from_dict( config, "margin_threshold", 1.0) self.use_logit = utils.get_value_from_dict(config, "margin_in_logit", True) # options for attention transfer self.use_attention_transfer = utils.get_value_from_dict( config, "use_attention_transfer", False) if self.use_attention_transfer: self.att_transfer_beta = utils.get_value_from_dict( config, "att_transfer_beta", 1000) # options for assignment model self.use_assignment_model = utils.get_value_from_dict( config, "use_assignment_model", False) if self.use_assignment_model: self.assignment_criterion = nn.CrossEntropyLoss() self.iteration = 0
def __init__(self, config, name=""): super(QuestionEmbedding, self).__init__() # Must call super __init__() if name != "": name = name + "_" # get configurations self.use_gpu = utils.get_value_from_dict(config, "use_gpu", True) # options for word embedding word_emb_dim = utils.get_value_from_dict(config, name + "word_emb_dim", 300) padding_idx = utils.get_value_from_dict(config, name + "word_emb_padding_idx", 0) self.apply_nonlinear = utils.get_value_from_dict( config, name + "apply_word_emb_nonlinear", False) self.word_emb_dropout_prob = utils.get_value_from_dict( config, name + "word_emb_dropout_prob", 0.0) # options for rnn self.rnn_type = utils.get_value_from_dict(config, name + "rnn_type", "LSTM") self.num_layers = utils.get_value_from_dict(config, name + "rnn_num_layers", 2) self.rnn_dim = utils.get_value_from_dict(config, name + "rnn_hidden_dim", 256) rnn_dropout_prob = utils.get_value_from_dict(config, name + "rnn_dropout_prob", 0) self.bidirectional = utils.get_value_from_dict(config, name + "bidirectional", False) vocab_size = utils.get_value_from_dict(config, name + "vocab_size", 10) assert (self.rnn_type == "GRU") or (self.rnn_type == "LSTM"),\ "Not supported RNN type: {}" \ + "(neither GRU or LSTM)".format(self.rnn_type) # word embedding layers self.lookuptable = nn.Embedding(vocab_size, word_emb_dim, padding_idx=padding_idx) if self.word_emb_dropout_prob > 0: self.word_emb_dropout = nn.Dropout(self.word_emb_dropout_prob) # RNN layers self.rnn = getattr(nn, self.rnn_type)(input_size=word_emb_dim, hidden_size=self.rnn_dim, num_layers=self.num_layers, bias=True, batch_first=True, dropout=rnn_dropout_prob, bidirectional=self.bidirectional)