def pretrain_reid(self): """Training reid, and may validate on val set.""" start_ep = cfg.pre_reid_resume_ep if cfg.pre_reid_resume else 0 for ep in range(start_ep, cfg.pre_reid_num_epochs): # Force all BN layers to use global mean and variance utils.may_set_mode(self.modules_optims, 'eval') # Enable dropout #utils.may_set_mode(self.reidTop.dropout, 'train') epoch_done = False ep_losses = [] ep_st = time.time() step = 0 while not epoch_done: step += 1 step_st = time.time() ims, im_names, labels, ims_mirrored, epoch_done = \ self.train_set.next_batch() ims = TVT(Variable(torch.from_numpy(ims).float())) labels = TVT(Variable(torch.LongTensor(labels))) logits = self.googlenet(ims) loss = self.reid_criterion(logits, labels) self.optimReID.zero_grad() loss.backward() self.optimReID.step() ep_losses.append(utils.to_scalar(loss)) # Step logs if step % cfg.pre_reid_log_steps == 0: print '[Step {}/Ep {}], [{:.2f}s], [loss: {}]'.format( step + 1, ep + 1, time.time() - step_st, utils.to_scalar(loss)) # Epoch logs print '===========> [Epoch {}], [{:.2f}s], [ep_avg_loss: {}]'.format( ep + 1, time.time() - ep_st, np.mean(ep_losses)) # validation if cfg.train_val_part == 'train': self.val_set.eval_single_query(True) self.val_set.eval_multi_query(False) # epoch saving if (ep + 1) % cfg.pre_reid_epochs_per_saving_ckpt == 0 \ or ep + 1 == cfg.pre_reid_num_epochs: utils.may_save_modules_optims_state_dicts( self.modules_optims, cfg.pre_reid_ckpt_saving_tmpl.format(ep + 1)) self.train_set.stop_prefetching_threads() if cfg.train_val_part == 'train': self.val_set.stop_prefetching_threads()
def train(epoch,train_loader,DEVICE): print("starting epoch {}".format(epoch)) train_loss = [] kl_weight = 1 for batch_idx, (data, _) in enumerate(train_loader): start_time = time.time() x = Variable(data, requires_grad=False).to(DEVICE) opt.zero_grad() x_di = vmodel(x) # use cuda? dmll_loss = discretized_mix_logistic_loss(x_di, 2*x-1, nr_mix=nr_mix,use_cuda=args.cuda) kl_loss = kl_weight*latent_loss(vmodel.z_mean, vmodel.z_sigma) loss = dmll_loss+kl_loss loss.backward() opt.step() train_loss.append(to_scalar([kl_loss, dmll_loss])) if not batch_idx%10: print 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {} Time: {}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / float(len(train_loader)), np.asarray(train_loss).mean(0), time.time() - start_time ) return np.asarray(train_loss).mean(0)
def train_epoch(data_train, model, optimizer, criterion, args): model.train() epoch_loss = 0 for doc_train in tqdm.tqdm(data_train): for tokens, pos_tags, labels in zip(doc_train.sents, doc_train.pos_tags, doc_train.token_labels): assert len(tokens) == len(labels) # pdb.set_trace() tokens = [args.word2idx[i] for i in tokens] pos_tags = [args.pos2idx[i] for i in pos_tags] labels = [args.label2idx[i] for i in labels] y_true = labels # pdb.set_trace() tokens = Variable(torch.LongTensor(np.array([tokens]).transpose())) pos_tags = Variable(torch.LongTensor(np.array([pos_tags]).transpose())) labels = Variable(torch.LongTensor(np.array(labels).transpose())) # labels have to be one-dim for NLL loss if args.cuda: tokens, pos_tags, labels = [tokens.cuda(), pos_tags.cuda(), labels.cuda()] scores = model(tokens, pos_tags) # print(scores) # print(labels) loss = criterion(scores, labels) loss.backward() optimizer.step() epoch_loss += utils.to_scalar(loss) print('epoch loss:{}'.format(epoch_loss))
def train(model, params, optimizer, q_a_data, q_target_data, answer_data): N = int(math.floor(len(q_a_data) / params.batch_size)) shuffle_index = np.random.permutation(q_a_data.shape[0]) q_a_data = q_a_data[shuffle_index] q_target_data = q_target_data[shuffle_index] answer_data = answer_data[shuffle_index] pred_list = [] target_list = [] epoch_loss = 0 model.train() for idx in range(N): q_a_seq = q_a_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] q_target_seq = q_target_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] answer_seq = answer_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = (answer_seq - 1) / params.n_question target = np.floor(target) input_q_target = utils.variable(torch.LongTensor(q_target_seq), params.gpu) input_x = utils.variable(torch.LongTensor(q_a_seq), params.gpu) target = utils.variable(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat( [target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) input_q_target_to_1d = torch.chunk(input_q_target, params.batch_size, 0) input_q_target_1d = torch.cat( [input_q_target_to_1d[i] for i in range(params.batch_size)], 1) input_q_target_1d = input_q_target_1d.permute(1, 0) model.zero_grad() loss, filtered_pred, filtered_target = model(input_x, input_q_target_1d, target_1d) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), params.max_grad_norm) optimizer.step() epoch_loss += utils.to_scalar(loss) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) return epoch_loss / N, accuracy, auc
def train(epoch_num, model, params, optimizer, q_data, qa_data): N = int(math.floor(len(q_data) / params.batch_size)) # shuffle_index = np.random.permutation(q_data.shape[0]) # q_data_shuffled = q_data[shuffle_index] # qa_data_shuffled = qa_data[shuffle_index] pred_list = [] target_list = [] epoch_loss = 0 model.train() # init_memory_value = np.random.normal(0.0, params.init_std, ()) for idx in range(N): q_one_seq = q_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = (target - 1) / params.n_question target = np.floor(target) input_q = utils.varible(torch.LongTensor(q_one_seq), params.gpu) input_qa = utils.varible(torch.LongTensor(qa_batch_seq), params.gpu) target = utils.varible(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat([target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) model.zero_grad() loss, filtered_pred, filtered_target = model.forward(input_q, input_qa, target_1d) loss.backward() nn.utils.clip_grad_norm(model.parameters(), params.maxgradnorm) optimizer.step() epoch_loss += utils.to_scalar(loss) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) # print(right_pred) # print(right_target) # right_index = np.flatnonzero(right_target != -1.).tolist() pred_list.append(right_pred) target_list.append(right_target) all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) # if (epoch_num + 1) % params.decay_epoch == 0: # utils.adjust_learning_rate(optimizer, params.init_lr * params.lr_decay) # print('lr: ', params.init_lr / (1 + 0.75)) # utils.adjust_learning_rate(optimizer, params.init_lr / (1 + 0.75)) # print("all_target", all_target) # print("all_pred", all_pred) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) # f1 = metrics.f1_score(all_target, all_pred) return epoch_loss/N, accuracy, auc
def test(model, params, optimizer, q_data, qa_data): N = int(math.floor(len(q_data) / params.batch_size)) pred_list = [] target_list = [] epoch_loss = 0 model.eval() # init_memory_value = np.random.normal(0.0, params.init_std, ()) for idx in range(N): q_one_seq = q_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = (target - 1) / params.n_question target = np.floor(target) input_q = utils.varible(torch.LongTensor(q_one_seq), params.gpu) # shape 32,200 input_qa = utils.varible(torch.LongTensor(qa_batch_seq), params.gpu) # shape 32,200 target = utils.varible(torch.FloatTensor(target), params.gpu) # shape 32,200 target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat( [target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) loss, filtered_pred, filtered_target, _ = model.forward( input_q, input_qa, target_1d) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) epoch_loss += utils.to_scalar(loss) all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) # print("all_target", all_target) # print("all_pred", all_pred) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) f1 = metrics.f1_score(all_target, all_pred) return epoch_loss / N, accuracy, auc, f1
def test(model, params, optimizer, q_data, qa_data, a_data): N = int(math.floor(len(q_data) / params.batch_size)) pred_list = [] target_list = [] epoch_loss = 0 model.eval() for idx in range(N): q_one_seq = q_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] a_batch_seq = a_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = (target - 1) / params.n_question target = np.floor(target) input_q = utils.variable(torch.LongTensor(q_one_seq), params.gpu) input_qa = utils.variable(torch.LongTensor(qa_batch_seq), params.gpu) input_a = utils.variable(torch.LongTensor(a_batch_seq), params.gpu) target = utils.variable(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat( [target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) loss, filtered_pred, filtered_target = model.forward( input_q, input_qa, input_a, target_1d) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) epoch_loss += utils.to_scalar(loss) # print("testing : batch " + str(idx) + " finished!") all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) return epoch_loss / N, accuracy, auc
def train(num_epochs, model, params, optimizer, q_data, qa_data): N = len(q_data) // params.batch_size pred_list = [] target_list = [] epoch_loss = 0 # turn the status of model to the train status model.train() for idx in range(N): q_one_seq = q_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = (target - 1) / params.n_question target = np.floor(target) input_q = utils.variable(torch.LongTensor(q_one_seq), params.gpu) input_qa = utils.variable(torch.LongTensor(qa_batch_seq), params.gpu) target = utils.variable(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat( [target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) model.zero_grad() loss, filtered_pred, filtered_target = model.forward( input_q, input_qa, target_1d) loss.backward() nn.utils.clip_grad_norm(model.parameters(), params.maxgradnorm) optimizer.step() epoch_loss += utils.to_scalar(loss) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) return epoch_loss / N, accuracy, auc
def training_epoch_end(self, outputs, prefix='train'): losses = torch.stack([output['loss'] for output in outputs]) sizes = torch.tensor([output['size'] for output in outputs], device=losses.device) loss_mean = (losses * sizes).sum() / sizes.sum() outs = [output['out'] for output in outputs] targets = [output['target'] for output in outputs] metrics = self.dataset.metrics_epoch(outs, targets) metrics = {f'{prefix}_{k}': v for k, v in metrics.items()} results = {f'{prefix}_loss': loss_mean, **metrics} results_scalar = {k: to_scalar(v) for k, v in results.items()} # PL prefers torch.Tensor while we prefer float setattr(self, f'_{prefix}_results', results_scalar) if getattr(self.train_args, 'verbose', False): print(f'{prefix} set results:', results_scalar) return {f'{prefix}_loss': loss_mean, 'log': results}
def test(model, params, optimizer, q_a_data, q_target_data, answer_data, repeated_time_gap, past_trail_counts, seq_time_gap): N = int(math.floor(len(q_a_data) / params.batch_size)) pred_list = [] target_list = [] epoch_loss = 0 model.eval() for idx in range(N): q_a_seq = q_a_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] q_target_seq = q_target_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] answer_seq = answer_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] repeated_time_gap_seq = repeated_time_gap[idx * params.batch_size:(idx + 1) * params.batch_size, :] past_trail_counts_seq = past_trail_counts[idx * params.batch_size:(idx + 1) * params.batch_size, :] seq_time_gap_seq = seq_time_gap[idx * params.batch_size:(idx + 1) * params.batch_size, :] input_repeated_time_gap = utils.variable(torch.FloatTensor(repeated_time_gap_seq), params.gpu) input_past_trail_counts = utils.variable(torch.FloatTensor(past_trail_counts_seq), params.gpu) input_seq_time_gap = utils.variable(torch.FloatTensor(seq_time_gap_seq), params.gpu) target = (answer_seq - 1) / params.n_question target = np.floor(target) input_q_target = utils.variable(torch.LongTensor(q_target_seq), params.gpu) input_x = utils.variable(torch.LongTensor(q_a_seq), params.gpu) target = utils.variable(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat([target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) input_q_target_to_1d = torch.chunk(input_q_target, params.batch_size, 0) input_q_target_1d = torch.cat([input_q_target_to_1d[i] for i in range(params.batch_size)], 1) input_q_target_1d = input_q_target_1d.permute(1, 0) loss, filtered_pred, filtered_target = model.forward(input_x, input_q_target_1d, target_1d, input_repeated_time_gap, input_past_trail_counts, input_seq_time_gap) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) epoch_loss += utils.to_scalar(loss) all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) return epoch_loss / N, accuracy, auc
def test(epoch, test_loader, do_use_cuda, save_img_path=None): test_loss = [] for batch_idx, (data, _) in enumerate(test_loader): start_time = time.time() if do_use_cuda: x = Variable(data, requires_grad=False).cuda() else: x = Variable(data, requires_grad=False) x_d, z_e_x, z_q_x, latents = vmodel(x) loss_1 = discretized_mix_logistic_loss(x_d, 2 * x - 1, use_cuda=do_use_cuda) loss_2 = F.mse_loss(z_q_x, z_e_x.detach()) loss_3 = .25 * F.mse_loss(z_e_x, z_q_x.detach()) test_loss.append(to_scalar([loss_1, loss_2, loss_3])) test_loss_mean = np.asarray(test_loss).mean(0) if save_img_path is not None: x_tilde = sample_from_discretized_mix_logistic(x_d, nr_logistic_mix) idx = 0 x_cat = torch.cat([x[idx], x_tilde[idx]], 0) images = x_cat.cpu().data pred = (((np.array(x_tilde.cpu().data)[0, 0] + 1.0) / 2.0) * float(max_pixel - min_pixel)) + min_pixel # input x is between 0 and 1 real = (np.array(x.cpu().data)[0, 0] * float(max_pixel - min_pixel)) + min_pixel f, ax = plt.subplots(1, 3, figsize=(10, 3)) ax[0].imshow(real, vmin=0, vmax=max_pixel) ax[0].set_title("original") ax[1].imshow(pred, vmin=0, vmax=max_pixel) ax[1].set_title("pred epoch %s test loss %s" % (epoch, np.mean(test_loss_mean))) ax[2].imshow((pred - real)**2, cmap='gray') ax[2].set_title("error") f.tight_layout() plt.savefig(save_img_path) plt.close() print("saving example image") print("rsync -avhp [email protected]://%s" % os.path.abspath(save_img_path)) return test_loss_mean
def train(epoch, train_loader, do_use_cuda): print("starting epoch {}".format(epoch)) train_loss = [] for batch_idx, (data, _) in enumerate(train_loader): start_time = time.time() if do_use_cuda: x = Variable(data, requires_grad=False).cuda() else: x = Variable(data, requires_grad=False) opt.zero_grad() x_d, z_e_x, z_q_x, latents = vmodel(x) # with bigger model - latents is 64, 6, 6 z_q_x.retain_grad() #loss_1 = F.binary_cross_entropy(x_d, x) # going into dml - x should be bt 0 and 1 loss_1 = discretized_mix_logistic_loss(x_d, 2 * x - 1, use_cuda=do_use_cuda) loss_1.backward(retain_graph=True) vmodel.embedding.zero_grad() z_e_x.backward(z_q_x.grad, retain_graph=True) loss_2 = F.mse_loss(z_q_x, z_e_x.detach()) loss_2.backward(retain_graph=True) loss_3 = .25 * F.mse_loss(z_e_x, z_q_x.detach()) loss_3.backward() opt.step() train_loss.append(to_scalar([loss_1, loss_2, loss_3])) if not batch_idx % 100: print 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {} Time: {}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / float(len(train_loader)), np.asarray(train_loss).mean(0), time.time() - start_time) return np.asarray(train_loss).mean(0)
def test(epoch,test_loader,DEVICE,save_img_path=None): test_loss = [] kl_weight = 1 for batch_idx, (data, _) in enumerate(test_loader): start_time = time.time() x = Variable(data, requires_grad=False).to(DEVICE) x_di = vmodel(x) # use cuda? dmll_loss = discretized_mix_logistic_loss(x_di, 2*x-1, nr_mix=nr_mix,use_cuda=args.cuda) kl_loss = kl_weight*latent_loss(vmodel.z_mean, vmodel.z_sigma) loss = dmll_loss+kl_loss test_loss.append(to_scalar([kl_loss, dmll_loss])) test_loss_mean = np.asarray(test_loss).mean(0) #if save_img_path is not None: # x_tilde = sample_from_discretized_mix_logistic(x_d, nr_logistic_mix) # idx = 0 # x_cat = torch.cat([x[idx], x_tilde[idx]], 0) # images = x_cat.cpu().data # pred = (((np.array(x_tilde.cpu().data)[0,0]+1.0)/2.0)*float(max_pixel-min_pixel)) + min_pixel # # input x is between 0 and 1 # real = (np.array(x.cpu().data)[0,0]*float(max_pixel-min_pixel))+min_pixel # f, ax = plt.subplots(1,3, figsize=(10,3)) # ax[0].imshow(real, vmin=0, vmax=max_pixel) # ax[0].set_title("original") # ax[1].imshow(pred, vmin=0, vmax=max_pixel) # ax[1].set_title("pred epoch %s test loss %s" %(epoch,np.mean(test_loss_mean))) # ax[2].imshow((pred-real)**2, cmap='gray') # ax[2].set_title("error") # f.tight_layout() # plt.savefig(save_img_path) # plt.close() # print("saving example image") # print("rsync -avhp [email protected]://%s" %os.path.abspath(save_img_path)) return test_loss_mean
def train_epoch(data_train, model, optimizer, criterion_t, criterion_i, args): model.train() epoch_loss = 0 # data_train = remove_neg_data(data_train) # pdb.set_trace() # all_data: corpus_ids, corpus_tokens, corpus_pos_tags, corpus_trigger_labels, corpus_interaction_idxs, corpus_interaction_labels for d in tqdm.tqdm(data_train): model.zero_grad() tokens = d[1] pos_tags = d[2] trigger_labels = d[3] assert len(tokens) == len(trigger_labels) tokens = [args.word2idx[i] for i in tokens] pos_tags = [args.pos2idx[i] for i in pos_tags] trigger_labels = [args.triggerlabel2idx[i] for i in trigger_labels] tokens = Variable(torch.LongTensor(np.array([tokens]).transpose())) pos_tags = Variable(torch.LongTensor(np.array([pos_tags]).transpose())) trigger_labels = Variable( torch.LongTensor(np.array(trigger_labels).transpose()) ) # labels have to be one-dim for NLL loss if args.cuda: tokens, pos_tags, trigger_labels = [ tokens.cuda(), pos_tags.cuda(), trigger_labels.cuda() ] # first predict for triggers scores_trigger = model(tokens, pos_tags, pair_idxs=None, task='trigger') loss_trigger = criterion_t(scores_trigger, trigger_labels) # second predict edges, there are two cases if args.pred_edge_with_gold: # in this case, just use the gold pairs and predict the edge pair_idxs = d[4] interaction_labels = d[5] assert len(pair_idxs) == len(interaction_labels) # only select Theme and Cause edges # this is to exclude the Site ... args # pair_idxs = [pair_idxs[i] for i in range(len(pair_idxs)) if interaction_labels[i] not in interaction_ignore_types] # interaction_labels = [interaction_labels[i] for i in range(len(interaction_labels)) if interaction_labels[i] not in interaction_ignore_types] # we construct the pairs using gold trigger labels # note that there can be None pairs pair_idxs, interaction_labels = construct_pairs( y_preds=[args.triggerlabel2idx[i] for i in d[3]], gold_pair_idxs=d[4], gold_int_labels=d[5], gold_trigger_labels=d[3], args=args, test=False) elif args.pred_edge_with_pred: # in this case, first construct the pairs with predicted triggers, pairs:(T, E), (T, T) # returned pair_idxs and ineteraction_labels can be empty y_preds = scores_trigger.max(dim=1, keepdim=False)[1].tolist() # we construct the pairs using predicted triggers pair_idxs, interaction_labels = construct_pairs( y_preds=y_preds, gold_pair_idxs=d[4], gold_int_labels=d[5], gold_trigger_labels=d[3], args=args, test=False) assert len(pair_idxs) == len(interaction_labels) assert set(interaction_labels).intersection( set(interaction_ignore_types)) == set( []), pdb.set_trace() #print(interaction_labels) interaction_labels = [ args.interactionlabel2idx[i] for i in interaction_labels ] interaction_labels = Variable( torch.LongTensor(np.array(interaction_labels).transpose())) if args.cuda: interaction_labels = interaction_labels.cuda() loss_interaction = 0 if len(pair_idxs) > 0: # Only compute loss for those sentences which have interactions scores_interaction = model(tokens, pos_tags, pair_idxs, task='interaction') loss_interaction = criterion_i(scores_interaction, interaction_labels) loss = args.trigger_w * loss_trigger + args.interaction_w * loss_interaction loss.backward() optimizer.step() epoch_loss += utils.to_scalar(loss) print('epoch loss:{}'.format(epoch_loss))
def test(model, params, optimizer, q_a_data, q_target_data, answer_data): N = int(math.floor(len(q_a_data) / params.batch_size)) pred_list = [] target_list = [] epoch_loss = 0 model.eval() for idx in range(N): q_a_seq = q_a_data[idx * params.batch_size:(idx + 1) * params.batch_size] q_target_seq = q_target_data[idx * params.batch_size:(idx + 1) * params.batch_size] answer_seq = answer_data[idx * params.batch_size:(idx + 1) * params.batch_size] max_len = max(len(q_a_seq[i]) for i in range(params.batch_size)) q_a_dataArray = np.zeros((params.batch_size, max_len)) q_target_dataArray = np.zeros((params.batch_size, max_len)) answer_dataArray = np.zeros((params.batch_size, max_len)) for j in range(params.batch_size): dat = q_a_seq[j] q_a_dataArray[j, :len(dat)] = dat q_target_dat = q_target_seq[j] q_target_dataArray[j, :len(q_target_dat)] = q_target_dat answer_dat = answer_seq[j] answer_dataArray[j, :len(answer_dat)] = answer_dat # q_a_dataArray = q_a_data[idx] # q_target_dataArray = q_target_data[idx] # answer_dataArray = answer_data[idx] target = (answer_dataArray - 1) / params.n_question target = np.floor(target) input_q_target = utils.variable(torch.LongTensor(q_target_dataArray), params.gpu) input_x = utils.variable(torch.LongTensor(q_a_dataArray), params.gpu) target = utils.variable(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat([target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) input_q_target_to_1d = torch.chunk(input_q_target, params.batch_size, 0) input_q_target_1d = torch.cat([input_q_target_to_1d[i] for i in range(params.batch_size)], 1) input_q_target_1d = input_q_target_1d.permute(1, 0) loss, filtered_pred, filtered_target = model.forward(input_x, input_q_target_1d, target_1d) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) epoch_loss += utils.to_scalar(loss) all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) return epoch_loss / N, accuracy, auc
def train(model, epoch, params, optimizer, q_data, q_target_data, qa_data): N = int(math.floor(len(q_data) / params.batch_size)) # shuffle data shuffle_index = np.random.permutation(q_data.shape[0]) q_data = q_data[shuffle_index] # 用来Input的pair q_target_data = q_target_data[shuffle_index] # 下一题题号 qa_data = qa_data[shuffle_index] # 下一题pair pred_list = [] target_list = [] epoch_loss = 0 model.train() for idx in range(N): q_target_seq = q_target_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] # 下一题题号 qa_batch_seq = q_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] # 用于input target = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] # 下一题答案 target = (target - 1) / params.n_question target = np.floor(target) # 1是正确,等于0是错误,-1是填充的0补全的位置 input_q_target = utils.variable(torch.LongTensor(q_target_seq), params.gpu) input_x = utils.variable(torch.LongTensor(qa_batch_seq), params.gpu) target = utils.variable(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat( [target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) input_q_target_to_1d = torch.chunk(input_q_target, params.batch_size, 0) input_q_target_1d = torch.cat( [input_q_target_to_1d[i] for i in range(params.batch_size)], 1) input_q_target_1d = input_q_target_1d.permute(1, 0) model.zero_grad() loss, filtered_pred, filtered_target = model( input_x, input_q_target_1d, target_1d) # 答案编码后10x800,题号8000x1,答案8000x1 loss.backward() nn.utils.clip_grad_norm_(model.parameters(), params.max_grad_norm) optimizer.step() epoch_loss += utils.to_scalar(loss) right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) if (epoch + 1) % params.decay_epoch == 0: new_lr = params.init_lr * params.lr_decay if new_lr < params.final_lr: new_lr = params.final_lr utils.adjust_learning_rate(optimizer, new_lr) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) # f1 = metrics.f1_score(all_target, all_pred) return epoch_loss / N, accuracy, auc
ner_model.train() for f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v in tqdm( itertools.chain.from_iterable(dataset_loader), mininterval=2, desc=' - Tot it %d (epoch %d)' % (tot_length, args.start_epoch), leave=False, file=sys.stdout): #f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v 都是什么????? #Ex. for i in tqdm(range(1000)): f_f, f_p, b_f, b_p, w_f, tg_v, mask_v = packer.repack_vb( f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v) ner_model.zero_grad() scores = ner_model(f_f, f_p, b_f, b_p, w_f) loss = crit_ner(scores, tg_v, mask_v) epoch_loss += utils.to_scalar(loss) if args.co_train: cf_p = f_p[0:-1, :].contiguous() cb_p = b_p[1:, :].contiguous() cf_y = w_f[1:, :].contiguous() cb_y = w_f[0:-1, :].contiguous() cfs, _ = ner_model.word_pre_train_forward(f_f, cf_p) loss = loss + args.lambda0 * crit_lm(cfs, cf_y.view(-1)) cbs, _ = ner_model.word_pre_train_backward(b_f, cb_p) loss = loss + args.lambda0 * crit_lm(cbs, cb_y.view(-1)) loss.backward() nn.utils.clip_grad_norm(ner_model.parameters(), args.clip_grad) optimizer.step() epoch_loss /= tot_length # update lr
def train(model, params, optimizer, q_data, qa_data, a_data): N = int(math.floor(len(q_data) / params.batch_size)) # batch的数量 # shuffle data shuffle_index = np.random.permutation(q_data.shape[0]) q_data = q_data[shuffle_index] qa_data = qa_data[shuffle_index] a_data = a_data[shuffle_index] pred_list = [] target_list = [] epoch_loss = 0 model.train() start = time.time() for idx in range(N): q_one_seq = q_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] a_batch_seq = a_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :] target = (target - 1) / params.n_question target = np.floor(target) # 向下取整 input_q = utils.variable(torch.LongTensor(q_one_seq), params.gpu) input_qa = utils.variable(torch.LongTensor(qa_batch_seq), params.gpu) input_a = utils.variable(torch.LongTensor(a_batch_seq), params.gpu) target = utils.variable(torch.FloatTensor(target), params.gpu) target_to_1d = torch.chunk(target, params.batch_size, 0) target_1d = torch.cat( [target_to_1d[i] for i in range(params.batch_size)], 1) target_1d = target_1d.permute(1, 0) # 维度换位 model.zero_grad() loss, filtered_pred, filtered_target = model(input_q, input_qa, input_a, target_1d) loss.backward() # 每一个batch做一次反向传播 nn.utils.clip_grad_norm_(model.parameters(), params.max_grad_norm) optimizer.step() epoch_loss += utils.to_scalar(loss) # print("training : batch " + str(idx) + " finished!") right_target = np.asarray(filtered_target.data.tolist()) right_pred = np.asarray(filtered_pred.data.tolist()) pred_list.append(right_pred) target_list.append(right_target) all_pred = np.concatenate(pred_list, axis=0) all_target = np.concatenate(target_list, axis=0) # if (idx + 1) % params.decay_epoch == 0: # utils.adjust_learning_rate(optimizer, params.init_lr * params.lr_decay) # print('lr: ', params.init_lr / (1 + 0.75)) auc = metrics.roc_auc_score(all_target, all_pred) all_pred[all_pred >= 0.5] = 1.0 all_pred[all_pred < 0.5] = 0.0 accuracy = metrics.accuracy_score(all_target, all_pred) # f1 = metrics.f1_score(all_target, all_pred) end = time.time() print("epoch time:" + str(end - start)) return epoch_loss / N, accuracy, auc