Пример #1
0
def test(model, data_loader, criterion, posp, max_fpr):
    model.eval()
    targets, predicts = list(), list()
    loss = Averager()
    posp = torch.FloatTensor([posp]).cuda()
    one = torch.FloatTensor([1]).cuda()
    with torch.no_grad():
        for j, (ids, values, seqlength, label,
                seq_mask) in enumerate(data_loader):
            ids, values = ids.cuda(), values.cuda()
            label = label.cuda().float()
            seq_mask = seq_mask.cuda()
            y, _ = model(ids, values, seqlength, seq_mask, 'tgt')
            p = posp * label + (one - posp) * (one - label)
            loss.add(torch.mean(p * criterion(y, label)).item())
            targets.extend(label.tolist())
            predicts.extend(y.tolist())
    model.train()
    return roc_auc_score(targets, predicts,
                         max_fpr=max_fpr), loss.item(), roc_auc_score(
                             targets, predicts)
def validation(model, ctc_criterion, attn_criterion, evaluation_loader, ctc_converter, attn_converter, opt):
    """ validation or evaluation """
    for p in model.parameters():
        p.requires_grad = False
    n_correct = 0
    norm_ED = 0
    length_of_data = 0
    infer_time = 0
    valid_loss_avg = Averager()
    ctc_correct = 0
    for i, (image_tensors, labels) in enumerate(evaluation_loader):
        batch_size = image_tensors.size(0)
        length_of_data = length_of_data + batch_size
        #image = image_tensors.cuda()
        image = image_tensors.to(device)
        length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
        text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)

        ctc_text_for_loss, ctc_length_for_loss = ctc_converter.encode(labels)
        attn_text_for_loss, attn_length_for_loss = attn_converter.encode(labels)

        start_time = time.time()
        ctc_preds, attn_preds = model(image, text_for_pred)
        forward_time = time.time() - start_time
        # ctc
        ctc_preds = ctc_preds.log_softmax(2)
        # Calculate evaluation loss for CTC deocder.
        preds_size = torch.IntTensor([ctc_preds.size(1)] * batch_size)
        ctc_preds = ctc_preds.permute(1, 0, 2)  # to use CTCloss format
        ctc_cost = ctc_criterion(ctc_preds, ctc_text_for_loss, preds_size, ctc_length_for_loss)
        # Select max probabilty (greedy decoding) then decode index to character
        _, preds_index = ctc_preds.max(2)
        preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
        ctc_preds_str = ctc_converter.decode(preds_index.data, preds_size.data)

        # attention
        attn_preds = attn_preds[:, :attn_text_for_loss.shape[1] - 1, :]
        target = attn_text_for_loss[:, 1:]  # without [GO] Symbol
        attn_cost = attn_criterion(attn_preds.contiguous().view(-1, attn_preds.shape[-1]), target.contiguous().view(-1))
        # select max probabilty (greedy decoding) then decode index to character
        _, attn_preds_index = attn_preds.max(2)
        attn_preds_str = attn_converter.decode(attn_preds_index, length_for_pred)
        attn_labels = attn_converter.decode(attn_text_for_loss[:, 1:], attn_length_for_loss)

        cost = opt.ctc_weight * ctc_cost + (1.0 - opt.ctc_weight) * attn_cost
        infer_time += forward_time
        valid_loss_avg.add(cost)
        # calculate accuracy.
        #for attn_pred, attn_gt in zip(attn_preds_str, attn_labels):
        for pred, gt, attn_pred, attn_gt in zip(ctc_preds_str, labels, attn_preds_str, attn_labels):
            attn_pred = attn_pred[:attn_pred.find('[s]')]  # prune after "end of sentence" token ([s])
            attn_gt = attn_gt[:attn_gt.find('[s]')]

            if pred == gt:
                ctc_correct += 1
            if attn_pred == attn_gt:
                n_correct += 1
            norm_ED += edit_distance(attn_pred, attn_gt) / len(attn_gt)

    accuracy = n_correct / float(length_of_data) * 100
    ctc_accuracy = ctc_correct / float(length_of_data) * 100

    return valid_loss_avg.val(), accuracy, ctc_accuracy, norm_ED, attn_preds_str, attn_labels, infer_time, length_of_data
Пример #3
0
                                           dim=1,
                                           largest=True,
                                           sorted=True)

            p = cfg.progalambda * (allExtraproto[topkindex] * topsim.unsqueeze(2).expand(cfg.way, cfg.topk, 1600)).sum(dim=1)/cfg.topk \
                    + (1 - cfg.progalambda) * proto
        else:
            p = proto

        logits = euclidean_metric(model(data_query), p)

        label = torch.arange(cfg.way).repeat(cfg.query)
        label = label.type(torch.cuda.LongTensor)

        acc = count_acc(logits, label)
        ave_acc.add(acc)
        print('batch {}: {:.2f}({:.2f})'.format(i,
                                                ave_acc.item() * 100,
                                                acc * 100))

        allacc.append(acc)

        x = None
        p = None
        logits = None

    allacc = np.array(allacc)
    torch.save(allacc, cfg.result + '/allacc')

    mean, std, conf_intveral = CI(allacc)
Пример #4
0
def train(opt):
    """ dataset preparation """
    opt.select_data = opt.select_data.split('-')
    opt.batch_ratio = opt.batch_ratio.split('-')
    train_dataset = Batch_Balanced_Dataset(opt)

    AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
    valid_dataset = hierarchical_dataset(root=opt.valid_data, opt=opt)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=opt.batch_size,
        shuffle=True,  # 'True' to check training progress with validation function.
        num_workers=int(opt.workers),
        collate_fn=AlignCollate_valid, pin_memory=True)
    print('-' * 80)

    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
          opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
          opt.SequenceModeling, opt.Prediction)

    # weight initialization
    for name, param in model.named_parameters():
        if 'localization_fc2' in name:
            print(f'Skip {name} as it is already initialized')
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
        except Exception as e:  # for batchnorm.
            if 'weight' in name:
                param.data.fill_(1)
            continue

    # data parallel for multi-GPU
    model = torch.nn.DataParallel(model).cuda()
    model.train()
    if opt.continue_model != '':
        if opt.without_prediction:
            load_model_without_prediction(opt.continue_model, model)
            print(f'loading pretrained model from {opt.continue_model}, without prediction layer')
        else:
            print(f'loading pretrained model from {opt.continue_model}')
            model.load_state_dict(torch.load(opt.continue_model))
    print("Model:")
    print(model)

    """ setup loss """
    if 'CTC' in opt.Prediction:
        criterion = torch.nn.CTCLoss(zero_infinity=True).cuda()
    else:
        criterion = torch.nn.CrossEntropyLoss(ignore_index=0).cuda()  # ignore [GO] token = ignore index 0
    # loss averager
    loss_avg = Averager()

    # filter that only require gradient decent
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Trainable params num : ', sum(params_num))
    # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())]

    # setup optimizer
    if opt.adam:
        optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999))
    else:
        optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps)
    print("Optimizer:")
    print(optimizer)

    """ final options """
    # print(opt)
    with open(f'./saved_models/{opt.experiment_name}/opt.txt', 'a') as opt_file:
        opt_log = '------------ Options -------------\n'
        args = vars(opt)
        for k, v in args.items():
            opt_log += f'{str(k)}: {str(v)}\n'
        opt_log += '---------------------------------------\n'
        print(opt_log)
        opt_file.write(opt_log)

    """ start training """
    start_iter = 0
    if opt.continue_model != '':
        print(f'continue to train, start_iter: {start_iter}')

    start_time = time.time()
    best_accuracy = -1
    best_norm_ED = 1e+6
    i = start_iter

    while True:
        # train part
        for p in model.parameters():
            p.requires_grad = True

        image_tensors, labels = train_dataset.get_batch()
        image = image_tensors.cuda()
        text, length = converter.encode(labels)
        batch_size = image.size(0)

        if 'CTC' in opt.Prediction:
            preds = model(image, text).log_softmax(2)
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            preds = preds.permute(1, 0, 2)  # to use CTCLoss format
            cost = criterion(preds, text, preds_size, length)

        else:
            preds = model(image, text)
            target = text[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1))

        model.zero_grad()
        cost.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)  # gradient clipping with 5 (Default)
        optimizer.step()

        loss_avg.add(cost)

        # validation part
        if i % opt.valInterval == 0:
            elapsed_time = time.time() - start_time
            logging.info(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}')
            # for log
            with open(f'./saved_models/{opt.experiment_name}/log_train.txt', 'a') as log:
                log.write(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}\n')
                loss_avg.reset()

                model.eval()
                valid_loss, current_accuracy, current_norm_ED, preds, labels, infer_time, length_of_data = validation(
                    model, criterion, valid_loader, converter, opt)
                model.train()

                for pred, gt in zip(preds[:5], labels[:5]):
                    if 'Attn' in opt.Prediction:
                        pred = pred[:pred.find('[s]')]
                        gt = gt[:gt.find('[s]')]
                    print(f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}')
                    log.write(f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}\n')

                valid_log = f'[{i}/{opt.num_iter}] valid loss: {valid_loss:0.5f}'
                valid_log += f' accuracy: {current_accuracy:0.3f}, norm_ED: {current_norm_ED:0.2f}'
                log.write(valid_log + '\n')

                # keep best accuracy model
                if current_accuracy > best_accuracy:
                    best_accuracy = current_accuracy
                    torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/mtl_best_accuracy.pth')
                if current_norm_ED < best_norm_ED:
                    best_norm_ED = current_norm_ED
                    torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/best_norm_ED.pth')
                best_model_log = f'best_accuracy: {best_accuracy:0.3f}, best_norm_ED: {best_norm_ED:0.2f}'
                logging.info(best_model_log)
                log.write(best_model_log + '\n')

        # save model per 1e+5 iter.
        if (i + 1) % 50000 == 0:
            torch.save(
                model.state_dict(), f'./saved_models/{opt.experiment_name}/iter_{i+1}.pth')

        if i == opt.num_iter:
            logging.info('end the training')
            sys.exit()
        i += 1
Пример #5
0
def train(opt):
    #logging.info(opt)
    train_dataset = Batch_Dataset(opt)
    AlignCollate_valid = AlignCollate(imgH=opt.imgH,
                                      imgW=opt.imgW,
                                      keep_ratio_with_pad=opt.PAD)
    valid_dataset = LmdbDataset(root=opt.valid_data, opt=opt)
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=int(opt.workers),
                                               collate_fn=AlignCollate_valid,
                                               pin_memory=True)
    print('-' * 80)
    """ model configuration """

    ctc_converter = CTCLabelConverter(opt.character, opt.subword)
    attn_converter = AttnLabelConverter(opt.character, opt.subword,
                                        opt.batch_max_length)

    opt.num_class = len(attn_converter.character)
    opt.ctc_num_class = len(ctc_converter.character)
    print("ctc num class {}".format(len(ctc_converter.character)))
    print("attention num class {}".format(len(attn_converter.character)))

    if opt.rgb:
        opt.input_channel = 3

    model = MyModel(opt)

    # weight initialization
    for name, param in model.named_parameters():
        if 'localization_fc2' in name:
            print('Skip {name} as it is already initialized'.format(name))
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
        except Exception as e:
            if 'weight' in name:
                param.data.fill_(1)
            continue

    model = torch.nn.DataParallel(model).to(device)

    model.train()
    if opt.continue_model != '':
        print('loading pretrained model from {}'.format(opt.continue_model))
        model.load_state_dict(torch.load(opt.continue_model))
    """ setup loss """
    ctc_criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
    attn_criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

    loss_avg = Averager()
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Trainable params num : ', sum(params_num))

    if opt.adam:
        optimizer = optim.Adam(filtered_parameters,
                               lr=opt.lr,
                               betas=(opt.beta1, 0.999))
    else:
        optimizer = optim.Adadelta(filtered_parameters,
                                   lr=opt.lr,
                                   rho=opt.rho,
                                   eps=opt.eps)
    print("Optimizer:")
    print(optimizer)
    """ final options """
    with open(osj(opt.outPath, '{}/opt.txt'.format(opt.experiment_name)),
              'a') as opt_file:
        opt_log = '------------ Options -------------\n'
        args = vars(opt)
        for k, v in args.items():
            opt_log += '{}: {}\n'.format(str(k), str(v))
        opt_log += '---------------------------------------\n'
        print(opt_log)
        opt_file.write(opt_log)
    """ start training """
    start_iter = 0
    if opt.continue_model != '':
        print('continue to train, start_iter: {}'.format(start_iter))

    start_time = time.time()
    best_accuracy = -1
    i = start_iter

    while True:
        # train part
        for p in model.parameters():
            p.requires_grad = True

        image_tensors, labels = train_dataset.get_batch()
        image = image_tensors.to(device)

        ctc_text, ctc_length = ctc_converter.encode(labels)
        attn_text, attn_length = attn_converter.encode(labels)
        batch_size = image.size(0)
        # ctc loss
        ctc_preds, attn_preds = model(image, attn_text)
        ctc_preds = ctc_preds.log_softmax(2)
        preds_size = torch.IntTensor([ctc_preds.size(1)] * batch_size)
        ctc_preds = ctc_preds.permute(1, 0, 2)
        ctc_cost = ctc_criterion(ctc_preds, ctc_text, preds_size, ctc_length)
        # attn loss
        target = attn_text[:, 1:]
        attn_cost = attn_criterion(attn_preds.view(-1, attn_preds.shape[-1]),
                                   target.contiguous().view(-1))
        cost = opt.ctc_weight * ctc_cost + (1.0 - opt.ctc_weight) * attn_cost

        model.zero_grad()
        cost.backward()
        torch.nn.utils.clip_grad_norm_(
            model.parameters(),
            opt.grad_clip)  # gradient clipping with 5 (Default)
        optimizer.step()
        loss_avg.add(cost)
        # validation part
        if i % opt.valInterval == 0:
            elapsed_time = time.time() - start_time
            logging.info('[{}/{}] Loss: {:0.5f} elapsed_time: {:0.5f}'.format(
                i, opt.num_iter, loss_avg.val(), elapsed_time))
            # for log
            with open(
                    osj(opt.outPath,
                        '{}/log_train.txt'.format(opt.experiment_name)),
                    'a') as log:
                log.write(
                    '[{}/{}] Loss: {:0.5f} elapsed_time: {:0.5f}\n'.format(
                        i, opt.num_iter, loss_avg.val(), elapsed_time))
                loss_avg.reset()

                model.eval()
                with torch.no_grad():
                    valid_loss, current_accuracy, ctc_accuracy, current_norm_ED, preds, labels, infer_time, length_of_data \
                        = mtl_validation(model, ctc_criterion, attn_criterion, valid_loader, ctc_converter, attn_converter, opt)
                model.train()

                for pred, gt in zip(preds[:5], labels[:5]):
                    pred = pred[:pred.find('[s]')]
                    gt = gt[:gt.find('[s]')]
                    print('{:20s}, gt: {:20s},   {}'.format(
                        pred, gt, str(pred == gt)))
                    log.write('{:20s}, gt: {:20s},   {}\n'.format(
                        pred, gt, str(pred == gt)))

                valid_log = '[{}/{}] valid loss: {:0.5f}'.format(
                    i, opt.num_iter, valid_loss)
                valid_log += ' accuracy: {:0.3f}'.format(current_accuracy)

                log.write(valid_log + '\n')

                # save best accuracy model
                if current_accuracy > best_accuracy:
                    best_accuracy = current_accuracy
                    torch.save(
                        model.state_dict(),
                        osj(opt.outPath, '{}/best_accuracy.pth'.format(
                            opt.experiment_name)))

                best_model_log = 'best_accuracy: {:0.3f}'.format(best_accuracy)
                logging.info(best_model_log)
                log.write(best_model_log + '\n')

        if (i + 1) % 50000 == 0:
            torch.save(
                model.state_dict(),
                osj(opt.outPath,
                    '{}/iter_{}.pth'.format(opt.experiment_name, i + 1)))

        if i == opt.num_iter:
            logging.info('end the training')
            sys.exit()
        i += 1
Пример #6
0
def validation(model, criterion, evaluation_loader, converter, opt):
    """ validation or evaluation """
    for p in model.parameters():
        p.requires_grad = False

    n_correct = 0
    norm_ED = 0
    length_of_data = 0
    infer_time = 0
    valid_loss_avg = Averager()

    for i, (image_tensors, labels) in enumerate(evaluation_loader):
        batch_size = image_tensors.size(0)
        length_of_data = length_of_data + batch_size
        with torch.no_grad():
            image = image_tensors.cuda()
            # For max length prediction
            length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] *
                                                   batch_size)
            text_for_pred = torch.cuda.LongTensor(
                batch_size, opt.batch_max_length + 1).fill_(0)

            text_for_loss, length_for_loss = converter.encode(labels)

        start_time = time.time()
        if 'CTC' in opt.Prediction:
            preds = model(image, text_for_pred).log_softmax(2)
            forward_time = time.time() - start_time

            # Calculate evaluation loss for CTC deocder.
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            preds = preds.permute(1, 0, 2)  # to use CTCloss format
            cost = criterion(preds, text_for_loss, preds_size, length_for_loss)

            # Select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
            preds_str = converter.decode(preds_index.data, preds_size.data)

        else:
            preds = model(image, text_for_pred, is_train=False)
            forward_time = time.time() - start_time

            preds = preds[:, :text_for_loss.shape[1] - 1, :]
            target = text_for_loss[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.contiguous().view(-1, preds.shape[-1]),
                             target.contiguous().view(-1))

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)
            labels = converter.decode(text_for_loss[:, 1:], length_for_loss)

        infer_time += forward_time
        valid_loss_avg.add(cost)

        # calculate accuracy.
        for pred, gt in zip(preds_str, labels):
            if 'Attn' in opt.Prediction:
                pred = pred[:pred.find(
                    '[s]')]  # prune after "end of sentence" token ([s])
                gt = gt[:gt.find('[s]')]

            if pred == gt:
                n_correct += 1
            norm_ED += edit_distance(pred, gt) / len(gt)

    accuracy = n_correct / float(length_of_data) * 100

    return valid_loss_avg.val(
    ), accuracy, norm_ED, preds_str, labels, infer_time, length_of_data
Пример #7
0
                                               sorted=True)

                p = p + cfg.progalambda * (
                    allExtraproto[topkindex] * topsim.unsqueeze(2).expand(
                        cfg.train_way, cfg.topk, 1600)).sum(dim=1) / cfg.topk

            label = torch.arange(cfg.train_way).repeat(cfg.query)
            label = label.type(torch.cuda.LongTensor)

            logits = euclidean_metric(model(data_query), proto)
            loss = F.cross_entropy(logits, label)
            acc = count_acc(logits, label)
            print('epoch {}, train {}/{}, loss={:.4f} acc={:.4f}'.format(
                epoch, i, len(train_loader), loss.item(), acc))

            tl.add(loss.item())
            ta.add(acc)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            p = None
            proto = None
            logits = None
            loss = None

        tl = tl.item()
        ta = ta.item()

        torch.cuda.empty_cache()
Пример #8
0
def train(model,
          optimizer,
          src_loader,
          tgt_loader,
          valid_loader,
          criterion,
          log_interval=1000,
          val_interval=50,
          posp=1,
          nagp=0.5,
          params_cls=0.5,
          params_da=0.5,
          da_type='cmmd',
          max_fpr=0.01):
    global max_auc
    global max_auchead
    global min_loss
    posp = torch.FloatTensor([posp]).cuda()
    nagp = torch.FloatTensor([nagp]).cuda()
    one = torch.FloatTensor([1]).cuda()

    iter_src = iter(src_loader)
    iter_tgt = iter(tgt_loader)
    num_iter = len(src_loader)
    stoper = Stoper()

    avg_all_loss = Averager()
    avg_src_loss = Averager()
    avg_tgt_loss = Averager()
    avg_da_loss = Averager()
    start_time = time.time()
    for i in range(1, num_iter * 20):
        model.train()
        src_ids, src_values, src_seqlength, src_label, src_seq_mask = iter_src.next(
        )
        src_ids, src_values, src_label = src_ids.cuda(), src_values.cuda(
        ), src_label.cuda().float()
        src_seq_mask = src_seq_mask.cuda()
        if i % len(src_loader) == 0:
            iter_src = iter(src_loader)
        if i % len(tgt_loader) == 0:
            iter_tgt = iter(tgt_loader)

        src_p = posp * src_label + nagp * (one - src_label)
        src_y, src_fea_LSTM = model(src_ids, src_values, src_seqlength,
                                    src_seq_mask, 'src')
        src_loss = torch.mean(
            src_p * criterion(src_y, src_label)
        )  # + torch.mean(src_p * criterion(src_spey, src_label))

        tgt_ids, tgt_values, tgt_seqlength, tgt_label, tgt_seq_mask = iter_tgt.next(
        )
        tgt_ids, tgt_values, tgt_label = tgt_ids.cuda(), tgt_values.cuda(
        ), tgt_label.cuda().float()
        tgt_seq_mask = tgt_seq_mask.cuda()
        # print(tgt_seqlength, tgt_label)

        tgt_p = posp * tgt_label + nagp * (one - tgt_label)
        tgt_y, tgt_fea_LSTM, tgt_spey = model(tgt_ids, tgt_values,
                                              tgt_seqlength, tgt_seq_mask,
                                              'tgt')
        tgt_loss = torch.mean(
            tgt_p * criterion(tgt_y, tgt_label)
        )  # + 0.5 * torch.mean(tgt_p * criterion(tgt_spey, tgt_label))
        if da_type == 'cmmd':
            da_loss = cmmd(src_fea_LSTM, tgt_fea_LSTM, src_label.long(),
                           tgt_label.long())
        elif da_type == 'mmd':
            da_loss = mmd_rbf_noaccelerate(src_fea_LSTM, tgt_fea_LSTM)
        elif da_type == 'coral':
            da_loss = coral(src_fea_LSTM, tgt_fea_LSTM)
        elif da_type == 'euclidian':
            da_loss = euclidian(src_fea_LSTM, tgt_fea_LSTM)
        elif da_type == 'c_euclidian':
            da_loss = c_euclidian(src_fea_LSTM, tgt_fea_LSTM, src_label.long(),
                                  tgt_label.long())
        elif da_type == 'nometric':
            da_loss = nometric(src_fea_LSTM, tgt_fea_LSTM)
        elif da_type == 'ced':
            da_loss = ced(src_fea_LSTM, tgt_fea_LSTM, src_label.long(),
                          tgt_label.long())
        lambd = 2 / (1 + math.exp((-5 * i) / (len(src_loader)))) - 1
        loss = params_cls * src_loss + tgt_loss + params_da * lambd * da_loss
        model.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        avg_all_loss.add(loss.item())
        avg_src_loss.add(src_loss.item())
        avg_tgt_loss.add(tgt_loss.item())
        avg_da_loss.add(da_loss.item())
        if (i + 1) % log_interval == 0:
            print(
                'step: {}, loss: {:.4f}, src_loss: {:.4f}, tgt_loss: {:.4f}, {}_loss:, {:.4f}, lambda: {}'
                .format(i + 1, avg_all_loss.item(), avg_src_loss.item(),
                        avg_tgt_loss.item(), da_type, avg_da_loss.item(),
                        lambd))
            avg_all_loss = Averager()
            avg_src_loss = Averager()
            avg_tgt_loss = Averager()
            avg_da_loss = Averager()

        if (i + 1) % val_interval == 0:
            end_time = time.time()
            print('train time (s):', end_time - start_time)
            start_time = time.time()
            auc_head, loss, auc = test(model, valid_loader, criterion, posp,
                                       max_fpr)
            if loss < min_loss:
                min_loss = loss
            if auc > max_auc:
                max_auc = auc
            if auc_head > max_auchead:
                torch.save(model, f'{save_dir}/tmp.pt')
                max_auchead = auc_head
            print(
                'dev ---  auchead: {:.4f}, max_auchead: {:.4f}, auc: {:.4f}, max_auc: {:.4f}, loss: {:.4f}, minloss: {:.4f}'
                .format(auc_head, max_auchead, auc, max_auc, loss, min_loss))
            end_time = time.time()
            print('dev time (s):', end_time - start_time)
            start_time = time.time()
            if stoper.add(auc_head):
                print('training end')
                break