Exemplo n.º 1
0
 def on_valid_end(self, eval_result, metric_key, optimizer, better_result):
     if better_result:
         eval_result = deepcopy(eval_result)
         eval_result['step'] = self.step
         eval_result['epoch'] = self.epoch
         fitlog.add_best_metric(eval_result)
     fitlog.add_metric(eval_result, step=self.step, epoch=self.epoch)
     if len(self.testers) > 0:
         for key, tester in self.testers.items():
             try:
                 eval_result = tester.test()
                 if self.verbose != 0:
                     self.pbar.write(
                         "FitlogCallback evaluation on {}:".format(key))
                     self.pbar.write(
                         tester._format_eval_results(eval_result))
                 fitlog.add_metric(eval_result,
                                   name=key,
                                   step=self.step,
                                   epoch=self.epoch)
                 if better_result:
                     fitlog.add_best_metric(eval_result, name=key)
             except Exception as e:
                 self.pbar.write(
                     "Exception happens when evaluate on DataSet named `{}`."
                     .format(key))
                 raise e
Exemplo n.º 2
0
def test(
    C,
    logger,
    dataset,
    models,
    loss_func,
    generator,
    mode="valid",
    epoch_id=0,
    run_name="0",
    need_generated=False,
):

    device, batch_size, batch_numb, models = before_test(
        C, logger, dataset, models)

    pbar = tqdm(range(batch_numb), ncols=70)
    avg_loss = 0
    generated = ""
    for batch_id in pbar:

        data = dataset[batch_id * batch_size:(batch_id + 1) * batch_size]
        sents, ents, anss, data_ent = get_data_from_batch(data,
                                                          device=tc.device(
                                                              C.device))

        with tc.no_grad():
            model, preds, loss, partial_generated = get_output(
                C, logger, models, device, loss_func, generator, sents, ents,
                anss, data_ent)
        generated += partial_generated
        avg_loss += float(loss) / len(models)

        pbar.set_description_str("(Test )Epoch {0}".format(epoch_id))
        pbar.set_postfix_str("loss = %.4f (avg = %.4f)" %
                             (float(loss), avg_loss / (batch_id + 1)))

    micro_f1, macro_f1 = get_evaluate(C, logger, mode, generated, generator,
                                      dataset)

    #print (result)
    logger.log(
        "-----Epoch {} tested. Micro F1 = {:.2f}% , Macro F1 = {:.2f}% , loss = {:.4f}"
        .format(epoch_id, micro_f1, macro_f1, avg_loss / batch_numb))
    logger.log("\n")

    fitlog.add_metric(micro_f1,
                      step=epoch_id,
                      name="({0})micro f1".format(run_name))
    fitlog.add_metric(macro_f1,
                      step=epoch_id,
                      name="({0})macro f1".format(run_name))

    if need_generated:
        return micro_f1, macro_f1, avg_loss, generated

    return micro_f1, macro_f1, avg_loss
Exemplo n.º 3
0
 def on_valid_end(self, eval_result, metric_key, optimizer, better_result):
     if better_result:
         eval_result = deepcopy(eval_result)
         eval_result['step'] = self.step
         eval_result['epoch'] = self.epoch
         fitlog.add_best_metric(eval_result)
     fitlog.add_metric(eval_result, step=self.step, epoch=self.epoch)
     if better_result:
         for key, eval_result in self._save_metrics.items():
             fitlog.add_best_metric(eval_result, name=key)
Exemplo n.º 4
0
Arquivo: train.py Projeto: FFTYYY/Poem
def train(model , train_data , test_data):
	train_iter = DataSetIter(train_data , batch_size = C.batch_size)
	test_iter  = DataSetIter(test_data  , batch_size = C.batch_size)

	loss_func = nn.CrossEntropyLoss(ignore_index = 0)
	optim = tc.optim.Adam(params = model.parameters() , lr = C.lr , weight_decay = C.weight_decay)	
	scheduler = get_cosine_schedule_with_warmup(
		optim , 
		num_warmup_steps = C.warmup ,
		num_training_steps = train_iter.num_batches * C.epoch_number , 
	)

	best_test_loss 	= -1
	best_test_epoch = -1
	best_step 		= -1
	try:
		for epoch_n in range(C.epoch_number):
			tra_loss = run(model , train_iter , loss_func , epoch_n , optim , scheduler , True)
			tes_loss = run(model , test_iter , loss_func , epoch_n , None , None , False)

			logger.log ("Epoch %d ended. Train loss = %.4f , Valid loss = %.4f" % (
				epoch_n , tra_loss , tes_loss ,
			))
			fitlog.add_metric(
				tes_loss , 
				step = train_iter.num_batches * (epoch_n + 1) , 
				epoch = epoch_n , 
				name = "valid loss"
			)

			if best_test_epoch < 0 or tes_loss < best_test_loss:
				best_test_loss = tes_loss
				best_test_epoch = epoch_n
				best_step = fitlog_loss_step["train loss"]

				fitlog.add_best_metric(best_test_loss , name = "loss")
				with open(C.model_save , "wb") as fil:#暂时保存目前最好的模型
					pickle.dump(model , fil)
				fitlog.add_hyper(name = "best_step" , value =  "%d / %d" % (
					best_step ,
					train_iter.num_batches * C.epoch_number , 
				))

	except KeyboardInterrupt: # 手动提前停止
		pass

	logger.log ("Train end.")
	logger.log ("Got best valid loss %.4f in epoch %d" % (best_test_loss , best_test_epoch))

	return model
Exemplo n.º 5
0
def train(step, model, data_loader, optim, device):
    model.train()
    total = 0

    correct = 0
    losses = []
    pbtr = tqdm(total=len(data_loader))
    for src, trg in data_loader:
        loss, to, cor = train_step(src, trg, model, optim, device)
        total += to
        correct += cor
        losses.append(loss)
        pbtr.update(1)
        pbtr.set_postfix({'accuracy': cor / to, "loss": loss, "ppl": math.exp(loss)})
    pbtr.close()
    print("training epoch {} ||  ppl {} ||accuracy {} || loss  {}".format(step, math.exp(mean(losses)), (correct / total),
                                                                          mean(losses)))
    fitlog.add_loss(math.exp(mean(losses)), step=step, name='train')
    fitlog.add_metric(correct / total, step=step, name='train accuracy')
Exemplo n.º 6
0
 def on_valid_begin(self):
     if len(self.testers) > 0:
         for key, tester in self.testers.items():
             try:
                 eval_result = tester.test()
                 if self.verbose != 0:
                     self.pbar.write(
                         "FitlogCallback evaluation on {}:".format(key))
                     self.pbar.write(
                         tester._format_eval_results(eval_result))
                 fitlog.add_metric(eval_result,
                                   name=key,
                                   step=self.step,
                                   epoch=self.epoch)
                 self._save_metrics[key] = eval_result
             except Exception as e:
                 self.pbar.write(
                     "Exception happens when evaluate on DataSet named `{}`."
                     .format(key))
                 raise e
Exemplo n.º 7
0
def valid_one_epoch(fold,
                    epoch,
                    model,
                    criterion,
                    optimizer,
                    dataloader,
                    device,
                    scheduler=None):
    global min_loss, max_dice, save_dir
    model.eval()

    img_labels, img_preds = [], []
    total_loss, length = .0, 0

    pbar = tqdm(dataloader)
    for step, (imgs, labels) in enumerate(pbar):
        imgs = imgs.to(device)
        labels = labels.to(device)

        preds = model(imgs)
        img_preds.append(preds.sigmoid())
        img_labels.append(labels)

    preds = torch.cat(img_preds)
    labels = torch.cat(img_labels).type_as(preds)

    dice = calc_dice(preds, labels, args.thersh)
    fitlog.add_metric({"val": {f"fold_{fold}_dice": dice}}, step=epoch)
    if not save_dir:
        save_dir = fitlog.get_log_folder(absolute=True)  # 将模型保存在对应fitlog文件夹下
    if dice > max_dice:
        max_dice = dice
        fitlog.add_best_metric({"val": {f"fold_{fold}_dice": max_dice}})
        torch.save(
            model.state_dict(),
            f'{save_dir}/{args.structure}_{args.encoder}_fold{fold}_best.pth')

    print(f'fold {fold} epoch {epoch}, valid dice {dice:.4f}')
Exemplo n.º 8
0
def eval(step, model, data_loader, best_loss, device):
    model.eval()
    total = 0
    correct = 0
    losses = []
    pbtr = tqdm(total=len(data_loader))
    for src, trg in data_loader:
        loss, to, cor = eval_step(src, trg, model, device)
        total += to
        correct += cor
        losses.append(loss)
        pbtr.update(1)
        # pbtr.set_postfix({'accuracy':cor/to,"loss":loss,"ppl":math.exp(loss)})
    pbtr.close()
    fitlog.add_loss(math.exp(mean(losses)), step=step, name='eval')
    fitlog.add_metric(correct / total, step=step, name='eval accuracy')
    if best_loss > mean(losses):
        torch.save(model.state_dict(), './model/best_model.pkl')
        best_loss = mean(losses)
        print("saving to best_model.pkl")
    print("eval epoch {} ||  ppl {} ||accuracy {} || loss  {} ||best loss {}".format(step, math.exp(mean(losses)),
                                                                                     correct / total, mean(losses),
                                                                                     best_loss))
    return best_loss
Exemplo n.º 9
0
def valid_one_epoch(fold,
                    epoch,
                    model,
                    criterion,
                    optimizer,
                    dataloader,
                    device,
                    scheduler=None):
    global min_loss, max_acc, save_dir
    model.eval()

    img_labels, img_preds = [], []
    total_loss, length = .0, 0

    pbar = tqdm(dataloader)
    for step, (imgs, labels) in enumerate(pbar):
        imgs = imgs.to(device)
        labels = labels.to(device)

        preds = model(imgs)
        # img_preds.append(torch.argmax(preds, dim=1).detach().cpu().numpy())
        img_preds.append((preds.sigmoid() > 0.5).detach().cpu().numpy())
        img_labels.append(labels.detach().cpu().numpy())

    img_preds = np.concatenate(img_preds)
    img_labels = np.concatenate(img_labels)
    acc = (img_preds == img_labels).mean()
    fitlog.add_metric({"val": {f"fold_{fold}_acc": acc}}, step=epoch)
    save_dir = fitlog.get_log_folder(absolute=True)
    if acc > max_acc:
        max_acc = acc
        fitlog.add_best_metric({"val": {f"fold_{fold}_acc": max_acc}})
        torch.save(model.state_dict(),
                   f'{save_dir}/{args.model}_fold{fold}_best.pth')

    print(f'fold {fold} epoch {epoch}, valid acc {acc:.4f}')
Exemplo n.º 10
0
    def _train(self, criterion, optimizer, train_data_loader, val_data_loader,
               test_data_loader):
        fitlog.add_hyper({
            "model_name": self.opt.model_name,
            "dataset": self.opt.dataset,
            'resplit': self.opt.resplit,
            "domain": self.opt.domain,
            "aug": self.opt.aug,
            "adv": self.opt.adv,
            "aux": self.opt.aux,
            "adv_aux": self.opt.adv_aux,
            'chg': self.opt.chg
        })

        max_val_acc = 0
        max_val_f1 = 0
        global_step = 0
        last_model_path = None
        # model_path =None
        path = None

        pgd = PGD(self.model)
        k = 3
        for epoch in range(self.opt.num_epoch):
            logger.info('>' * 100)
            logger.info('epoch: {}'.format(epoch))
            n_correct, n_total, loss_total = 0, 0, 0
            # switch model to training mode
            self.model.train()
            for i_batch, sample_batched in enumerate(train_data_loader):
                global_step += 1
                # clear gradient accumulators
                optimizer.zero_grad()

                inputs = [
                    sample_batched[col].to(self.opt.device)
                    for col in self.opt.inputs_cols
                ]
                if self.opt.model_name == 'bert_multi_target':
                    targets = sample_batched['polarity'].to(self.opt.device)
                else:
                    targets = sample_batched['polarity'].to(self.opt.device)

                if self.opt.model_name in reg_list:
                    aux_cls_logeits, outputs, reg_can_loss, reg_aux_loss, bert_word_output, reg_chg_loss = self.model(
                        inputs, None)
                else:
                    outputs = self.model(inputs)
                    reg_can_loss = 0
                    reg_aux_loss = 0
                    reg_chg_loss = 0
                # print('outputs',outputs.shape)
                # print('targets',targets.shape)

                # print(outputs,'outputs')
                # print(targets,'polarity')

                loss_1 = criterion(outputs, targets)
                loss_2 = reg_can_loss
                loss_3 = reg_aux_loss
                loss_4 = reg_chg_loss

                weighted_loss_2 = loss_2 * self.opt.can
                weighted_loss_3 = loss_3 * self.opt.aux
                weighted_loss_4 = loss_4 * self.opt.chg

                loss = 1 * loss_1 + weighted_loss_2 + weighted_loss_3 + weighted_loss_4

                if self.opt.adv > 0:
                    # print(inputs.shape)
                    if self.opt.adv_aux == 1:
                        loss_adv = self._loss_adv(weighted_loss_3,
                                                  bert_word_output,
                                                  criterion,
                                                  inputs,
                                                  targets,
                                                  p_mult=self.opt.adv)
                    else:
                        loss_adv = self._loss_adv(loss,
                                                  bert_word_output,
                                                  criterion,
                                                  inputs,
                                                  targets,
                                                  p_mult=self.opt.adv)
                    loss += loss_adv
                else:
                    loss_adv = 0
                loss.backward()

                # pgd.backup_grad()
                #     for t in range(K):
                #         pgd.attack(is_first_attack=(t==0)) # 在embedding上添加对抗扰动, first attack时备份param.data
                #         if t != K-1:
                #             model.zero_grad()
                #         else:
                #             pgd.restore_grad()
                #         loss_adv = model(batch_input, batch_label)
                #         loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度
                #     pgd.restore() # 恢复embedding参数

                optimizer.step()

                n_correct += (torch.argmax(outputs,
                                           -1) == targets).sum().item()
                # print(outputs.shape)
                # n_correct += (torch.argmax(aux_cls_logeits, -1) == 4*targets).sum().item()
                n_total += len(outputs)
                loss_total += loss.item() * len(outputs)
                if global_step % self.opt.log_step == 0:
                    train_acc = n_correct / n_total
                    train_loss = loss_total / n_total
                    logger.info(
                        'loss_total: {:.4f}, acc: {:.4f},loss_main: {:.4f},reg_can_loss: {:.4f},loss_adv: {:.4f},reg_aux_loss {:.4f},reg_chg_loss {:.4f}'
                        .format(train_loss, train_acc, loss_1, weighted_loss_2,
                                loss_adv, weighted_loss_3, weighted_loss_4))
                    fitlog.add_metric(
                        {
                            "Train": {
                                'loss_total: {:.4f}, acc: {:.4f},loss_main: {:.4f},reg_can_loss: {:.4f},loss_adv: {:.4f},reg_aux_loss {:.4f},reg_chg_loss {:.4f}'
                                .format(train_loss, train_acc, loss_1,
                                        weighted_loss_2, loss_adv,
                                        weighted_loss_3, weighted_loss_4)
                            }
                        },
                        step=global_step)
            val_acc, val_f1 = self._evaluate_acc_f1(val_data_loader)
            test_acc, test_f1 = self._evaluate_acc_f1(test_data_loader)

            logger.info('> val_acc: {:.4f}, val_f1: {:.4f}'.format(
                val_acc, val_f1))
            logger.info('> test_acc: {:.4f}, test_f1: {:.4f}'.format(
                test_acc, test_f1))

            if val_acc > max_val_acc:
                max_val_acc = val_acc
                if not os.path.exists('state_dict'):
                    os.mkdir('state_dict')
                model_path = 'state_dict/{0}_{1}_doamin-{2}_can{3}_aug{4}_adv{5}_aux{6}_val_acc{7}_resplit{8}'.format(
                    self.opt.model_name, self.opt.dataset, self.opt.domain,
                    self.opt.can, self.opt.aug, self.opt.adv, self.opt.aux,
                    round(val_acc, 4), self.opt.resplit)
                bert_path = 'state_dict/{0}_{1}_doamin-{2}_can{3}_aug{4}_adv{5}_aux{6}_val_acc{7}_resplit{8}_bert'.format(
                    self.opt.model_name, self.opt.dataset, self.opt.domain,
                    self.opt.can, self.opt.aug, self.opt.adv, self.opt.aux,
                    round(val_acc, 4), self.opt.resplit)

                # fitlog.add_hyper({"model_name":self.opt.model_name,"dataset":self.opt.dataset,'resplit':self.opt.resplit,"domain":self.opt.domain,"aug":self.opt.aug,"adv":self.opt.adv,"aux":self.opt.aux})

                fitlog.add_metric(
                    {"val": {
                        "val_acc": val_acc,
                        "val_f1": val_f1
                    }},
                    step=global_step)
                fitlog.add_metric(
                    {"test": {
                        "test_acc": test_acc,
                        "test_f1": test_f1
                    }},
                    step=global_step)

                fitlog.add_best_metric(
                    {"val": {
                        "val_acc": val_acc,
                        "val_f1": val_f1
                    }})
                fitlog.add_best_metric(
                    {"test": {
                        "test_acc": test_acc,
                        "test_f1": test_f1
                    }})

                if last_model_path != None:
                    os.remove(last_model_path)
                    if self.opt.model_name not in ['lcf_bert']:
                        os.remove(last_bert_path)
                last_model_path = model_path
                last_bert_path = bert_path
                torch.save(self.model.state_dict(), model_path)
                if self.opt.model_name not in ['lcf_bert']:
                    torch.save(self.model.bert.state_dict(), bert_path)
                logger.info('>> saved: {}'.format(model_path))

                # max_val_f1 = val_f1
            if val_f1 > max_val_f1:
                max_val_f1 = val_f1
                # fitlog.add_metric(acc,name="Acc",step=step)

        return model_path
Exemplo n.º 11
0
    def fit(self):
        last_miou = .0  # record the best validation mIoU
        loss_step = 0  # step count
        for epoch in range(self.conf.epochs):
            train_loss = .0
            start = time.time()
            for i, (data, target) in enumerate(self.train_iter):
                gpu_datas = split_and_load(data, ctx_list=self.ctx)
                gpu_targets = split_and_load(target, ctx_list=self.ctx)
                with autograd.record():
                    loss_gpu = [
                        self.criterion(*self.net(gpu_data), gpu_target)
                        for gpu_data, gpu_target in zip(
                            gpu_datas, gpu_targets)
                    ]
                for loss in loss_gpu:
                    autograd.backward(loss)
                self.trainer.step(self.conf.bs_train)
                nd.waitall()
                loss_temp = .0
                for losses in loss_gpu:
                    loss_temp += losses.sum().asscalar()
                train_loss += (loss_temp / self.conf.bs_train)
                # log every n batch
                # add loss to draw curve, train_loss <class numpy.float64>
                interval = 5 if loss_step < 5000 else 50
                if (i % interval == 0) or (i + 1 == len(self.train_iter)):
                    fitlog.add_loss(name='loss',
                                    value=round(train_loss / (i + 1), 5),
                                    step=loss_step)
                    loss_step += 1
                    self.logger.info(
                        "Epoch %d, batch %d, training loss %.5f." %
                        (epoch, i, train_loss / (i + 1)))
            # log each epoch
            self.logger.info(
                ">>>>>> Epoch %d complete, time cost: %.1f sec. <<<<<<" %
                (epoch, time.time() - start))
            # validation each epoch
            if self.val:
                pixel_acc, mean_iou = self._validation()
                self.logger.info(
                    "Epoch %d validation, PixelAccuracy: %.4f, mIoU: %.4f." %
                    (epoch, pixel_acc, mean_iou))
                fitlog.add_metric(value=mean_iou, step=epoch, name='mIoU')
                fitlog.add_metric(value=pixel_acc, step=epoch, name='PA')
                if mean_iou > last_miou:
                    f_name = self._save_model(tag='best')
                    self.logger.info(
                        "Epoch %d mIoU: %.4f > %.4f(previous), save model: %s"
                        % (epoch, mean_iou, last_miou, f_name))
                    last_miou = mean_iou

        # save the final-epoch params
        f_name = self._save_model(tag='last')
        self.logger.info(">>>>>> Training complete, save model: %s. <<<<<<" %
                         f_name)
        # record
        fitlog.add_best_metric(value=round(last_miou, 4), name='mIoU')
        fitlog.add_other(value=self.id, name='record_id')
        fitlog.add_other(value=self.num_train, name='train')
        fitlog.add_other(value=self.num_val, name='val')
Exemplo n.º 12
0
    def _train(self, criterion, optimizer):
        max_test_acc = 0
        max_test_f1 = 0
        global_step = 0
        continue_not_increase = 0
        for epoch in range(self.opt.num_epoch):
            print(">" * 100)
            print("epoch: ", epoch)
            n_correct, n_total = 0, 0
            increase_flag = False
            for i_batch, sample_batched in enumerate(self.train_data_loader):
                global_step += 1

                # switch model to training mode, clear gradient accumulators
                self.model.train()
                optimizer.zero_grad()

                inputs = [
                    sample_batched[col].to(self.opt.device)
                    for col in self.opt.inputs_cols
                ]
                targets = sample_batched["polarity"].to(self.opt.device)

                outputs = self.model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()

                if global_step % self.opt.log_step == 0:
                    n_correct += (torch.argmax(outputs,
                                               -1) == targets).sum().item()
                    n_total += len(outputs)
                    train_acc = n_correct / n_total

                    test_acc, test_f1 = self._evaluate_acc_f1()
                    ################fitlog code####################
                    fitlog.add_metric(test_acc, name="acc", step=global_step)
                    fitlog.add_metric(test_f1, name="f1", step=global_step)
                    ################fitlog code####################
                    if test_acc > max_test_acc:
                        increase_flag = True
                        fitlog.add_best_metric(test_acc, "acc")
                        max_test_acc = test_acc
                    if test_f1 > max_test_f1:
                        increase_flag = True
                        max_test_f1 = test_f1
                        fitlog.add_best_metric(max_test_f1, "f1")
                        if self.opt.save and test_f1 > self.global_f1:
                            self.global_f1 = test_f1
                            torch.save(
                                self.model.state_dict(),
                                "state_dict/" + self.opt.model_name + "_" +
                                self.opt.dataset + ".pkl",
                            )
                            print(">>> best model saved.")
                    print(
                        "loss: {:.4f}, acc: {:.4f}, test_acc: {:.4f}, test_f1: {:.4f}"
                        .format(loss.item(), train_acc, test_acc, test_f1))
            if increase_flag == False:
                if continue_not_increase >= self.opt.early_stop:
                    print("early stop.")
                    break
                continue_not_increase += 1
            else:
                continue_not_increase = 0
        return max_test_acc, max_test_f1
Exemplo n.º 13
0
 losses=[]
 for i,(data,label) in enumerate(train_loader):
     data=data.cuda()
     label=label.cuda()
     predict=model(data)
     loss=loss_function(predict,label)
     predict_label=torch.argmax(predict,1)
     correct+=(predict_label==label).cpu().sum().item()
     total+=label.size()[0]
     loss.backward()
     optimizer.step()
     optimizer.zero_grad()
     losses.append(loss.item())
 # fitlog.add_metric(list(mean(losses)),step=num,name='train_loss')
 fitlog.add_loss(mean(losses),step=num,name='train_loss')
 fitlog.add_metric({"train":{"acc":correct/total}},step=num)
 print("current_epoch_loss:{}".format(str(mean(losses))))
 print("current_epoch_accuracy:{}".format(correct/total))
 total=0
 correct=0
 losses=[]
 for i,(data,label) in enumerate(test_loader):
     data=data.cuda()
     label=label.cuda()
     predict=model(data)
     loss=loss_function(predict,label)
     predict_label=torch.argmax(predict,1)
     correct+=(predict_label==label).cpu().sum().item()
     total+=label.size()[0]
     losses.append(loss.item())
     print("predict",predict_label)
Exemplo n.º 14
0
def train(args, train_dataset, model, test_dataset):
    '''Train the model'''
    tb_writer = SummaryWriter()

    args.train_batch_size = args.per_gpu_train_batch_size
    train_sampler = RandomSampler(train_dataset)
    collate_fn = get_collate_fn(args)
    train_dataloader = DataLoader(train_dataset, sampler=train_sampler,
                                  batch_size=args.train_batch_size,
                                  collate_fn=collate_fn)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (
            len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(
            train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs

    
    if args.embedding_type in ('bert', 'roberta'):
        optimizer = get_bert_optimizer(args, model)
    else:
        parameters = filter(lambda param: param.requires_grad, model.parameters())
        optimizer = torch.optim.Adam(parameters, lr=args.learning_rate)
        # optimizer = torch.optim.SGD(parameters, lr=args.learning_rate, momentum=0.9)

    # Train
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d",
                args.per_gpu_train_batch_size)
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)
    print("Total steps:", t_total)
    global_step = 0
    tr_loss, logging_loss = 0.0, 0.0
    all_eval_results = []
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs), desc="Epoch")
    best_acc = 0
    best_f1 = 0
    # results, eval_loss = evaluate(args, test_dataset, model)
    with tqdm(total=args.num_train_epochs, desc='Epoch') as pbar:
    # for _ in train_iterator:
        for _ in range(int(args.num_train_epochs)):
            pbar.update()
        # epoch_iterator = tqdm(train_dataloader, desc='Iteration')
            for step, batch in enumerate(train_dataloader):
                model.train()
                batch = tuple(t.to(args.device) for t in batch)

                inputs, labels = get_input_from_batch(args, batch)
                logit = model(**inputs)
                loss = F.cross_entropy(logit, labels)

                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                loss.backward()
                torch.nn.utils.clip_grad_norm_(
                    model.parameters(), args.max_grad_norm)

                tr_loss += loss.item()
                # if (step + 1) % args.gradient_accumulation_steps == 0:
                    # scheduler.step()  # Update learning rate schedule
                optimizer.step()
                model.zero_grad()
                global_step += 1

                # Log metrics
                if args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    results, eval_loss = evaluate(args, test_dataset, model)

                    all_eval_results.append(results)
                    if results['acc']>best_acc:
                        best_acc = results['acc']
                        best_f1 = results['f1']
                        pbar.write(f"Step:{global_step} acc:{round(best_acc, 4)}, f1:{round(best_f1, 4)}")
                        fitlog.add_best_metric({'acc':best_acc, 'f1':best_f1, 'step':global_step})

                    fitlog.add_metric(name='f1', value=results['f1'], step=global_step)
                    fitlog.add_metric(name='acc', value=results['acc'], step=global_step)
                    # for key, value in results.items():
                    #     tb_writer.add_scalar(
                    #         'eval_{}'.format(key), value, global_step)
                    # tb_writer.add_scalar('eval_loss', eval_loss, global_step)
                    # # tb_writer.add_scalar('lr', scheduler.get_lr()[0], global_step)
                    # tb_writer.add_scalar(
                    #     'train_loss', (tr_loss - logging_loss) / args.logging_steps, global_step)
                    # logging_loss = tr_loss

                    # Save model checkpoint

                if args.max_steps > 0 and global_step > args.max_steps:
                    epoch_iterator.close()
                    break
            if args.max_steps > 0 and global_step > args.max_steps:
                epoch_iterator.close()
                break

    tb_writer.close()
    return global_step, tr_loss/global_step, all_eval_results
Exemplo n.º 15
0
        # print("score:{}".format(score.requires_grad))
        # print("predict:{}".format(predict.requires_grad))
        loss.backward()
        optimizer.step()
        # for each in zip(score,predict):
        #     tr.write(str(each)+'\n')
        # print(loss.grad)
        losss.append(loss.item())
        # tmp=zip(score,torch.argmax(predict,-1))
        # print(tmp)
        acc += (score == torch.argmax(predict, -1)).cpu().sum().item()
        total += score.size(0)

    print("training epoch %s accuracy is %s loss is %s " %
          (str(i), str(acc / total), str(np.mean(losss))))
    fitlog.add_metric(np.mean(losss), name="loss", step=i)
    fitlog.add_metric(acc / total, name='acc', step=i)
    # if num %100==0:
    # torch.save(model.state_dict(), "model.bin")
    losss2 = []
    acc2 = 0
    total2 = 0
    model.eval()
    with torch.no_grad():
        for num, (score, data) in tqdm(enumerate(dev_loader)):
            batch_size = score.size(0)
            embedding_size = 300
            score = score.cuda()
            data = data.cuda()
            predict = model(data)
            # print(predict.shape)