Пример #1
0
def test(model, data_loader, criterion, posp, max_fpr):
    model.eval()
    targets, predicts = list(), list()
    loss = Averager()
    posp = torch.FloatTensor([posp]).cuda()
    one = torch.FloatTensor([1]).cuda()
    with torch.no_grad():
        for j, (ids, values, seqlength, label,
                seq_mask) in enumerate(data_loader):
            ids, values = ids.cuda(), values.cuda()
            label = label.cuda().float()
            seq_mask = seq_mask.cuda()
            y, _ = model(ids, values, seqlength, seq_mask, 'tgt')
            p = posp * label + (one - posp) * (one - label)
            loss.add(torch.mean(p * criterion(y, label)).item())
            targets.extend(label.tolist())
            predicts.extend(y.tolist())
    model.train()
    return roc_auc_score(targets, predicts,
                         max_fpr=max_fpr), loss.item(), roc_auc_score(
                             targets, predicts)
def validation(model, ctc_criterion, attn_criterion, evaluation_loader, ctc_converter, attn_converter, opt):
    """ validation or evaluation """
    for p in model.parameters():
        p.requires_grad = False
    n_correct = 0
    norm_ED = 0
    length_of_data = 0
    infer_time = 0
    valid_loss_avg = Averager()
    ctc_correct = 0
    for i, (image_tensors, labels) in enumerate(evaluation_loader):
        batch_size = image_tensors.size(0)
        length_of_data = length_of_data + batch_size
        #image = image_tensors.cuda()
        image = image_tensors.to(device)
        length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
        text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)

        ctc_text_for_loss, ctc_length_for_loss = ctc_converter.encode(labels)
        attn_text_for_loss, attn_length_for_loss = attn_converter.encode(labels)

        start_time = time.time()
        ctc_preds, attn_preds = model(image, text_for_pred)
        forward_time = time.time() - start_time
        # ctc
        ctc_preds = ctc_preds.log_softmax(2)
        # Calculate evaluation loss for CTC deocder.
        preds_size = torch.IntTensor([ctc_preds.size(1)] * batch_size)
        ctc_preds = ctc_preds.permute(1, 0, 2)  # to use CTCloss format
        ctc_cost = ctc_criterion(ctc_preds, ctc_text_for_loss, preds_size, ctc_length_for_loss)
        # Select max probabilty (greedy decoding) then decode index to character
        _, preds_index = ctc_preds.max(2)
        preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
        ctc_preds_str = ctc_converter.decode(preds_index.data, preds_size.data)

        # attention
        attn_preds = attn_preds[:, :attn_text_for_loss.shape[1] - 1, :]
        target = attn_text_for_loss[:, 1:]  # without [GO] Symbol
        attn_cost = attn_criterion(attn_preds.contiguous().view(-1, attn_preds.shape[-1]), target.contiguous().view(-1))
        # select max probabilty (greedy decoding) then decode index to character
        _, attn_preds_index = attn_preds.max(2)
        attn_preds_str = attn_converter.decode(attn_preds_index, length_for_pred)
        attn_labels = attn_converter.decode(attn_text_for_loss[:, 1:], attn_length_for_loss)

        cost = opt.ctc_weight * ctc_cost + (1.0 - opt.ctc_weight) * attn_cost
        infer_time += forward_time
        valid_loss_avg.add(cost)
        # calculate accuracy.
        #for attn_pred, attn_gt in zip(attn_preds_str, attn_labels):
        for pred, gt, attn_pred, attn_gt in zip(ctc_preds_str, labels, attn_preds_str, attn_labels):
            attn_pred = attn_pred[:attn_pred.find('[s]')]  # prune after "end of sentence" token ([s])
            attn_gt = attn_gt[:attn_gt.find('[s]')]

            if pred == gt:
                ctc_correct += 1
            if attn_pred == attn_gt:
                n_correct += 1
            norm_ED += edit_distance(attn_pred, attn_gt) / len(attn_gt)

    accuracy = n_correct / float(length_of_data) * 100
    ctc_accuracy = ctc_correct / float(length_of_data) * 100

    return valid_loss_avg.val(), accuracy, ctc_accuracy, norm_ED, attn_preds_str, attn_labels, infer_time, length_of_data
Пример #3
0
    propagate_loader = DataLoader(dataset=trainset,
                                  batch_size=1280,
                                  shuffle=True,
                                  num_workers=24,
                                  pin_memory=True)

    premodel = torch.load(cfg.load)
    # model = Convnet()

    model = copyModel(torch.load(cfg.load), Convnet()).cuda()
    # model = copyModel(Convnet(), torch.load(cfg.oad)).cuda()
    model.eval()

    allacc = []

    ave_acc = Averager()

    allExtraData = []
    with torch.no_grad():
        for i, batch in enumerate(propagate_loader, 1):
            ext_data, _ = [_.cuda() for _ in batch]
            allExtraData.append(ext_data)
    allExtraData = torch.cat(allExtraData)

    if cfg.progalambda > 0:
        with torch.no_grad():
            n = 1280
            allExtraproto = []
            index = torch.randperm(
                allExtraData.shape[0])[:int(allExtraData.shape[0] / 10)]
            extraDatatemp = allExtraData[index]
Пример #4
0
def train(opt):
    """ dataset preparation """
    opt.select_data = opt.select_data.split('-')
    opt.batch_ratio = opt.batch_ratio.split('-')
    train_dataset = Batch_Balanced_Dataset(opt)

    AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
    valid_dataset = hierarchical_dataset(root=opt.valid_data, opt=opt)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=opt.batch_size,
        shuffle=True,  # 'True' to check training progress with validation function.
        num_workers=int(opt.workers),
        collate_fn=AlignCollate_valid, pin_memory=True)
    print('-' * 80)

    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
          opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
          opt.SequenceModeling, opt.Prediction)

    # weight initialization
    for name, param in model.named_parameters():
        if 'localization_fc2' in name:
            print(f'Skip {name} as it is already initialized')
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
        except Exception as e:  # for batchnorm.
            if 'weight' in name:
                param.data.fill_(1)
            continue

    # data parallel for multi-GPU
    model = torch.nn.DataParallel(model).cuda()
    model.train()
    if opt.continue_model != '':
        if opt.without_prediction:
            load_model_without_prediction(opt.continue_model, model)
            print(f'loading pretrained model from {opt.continue_model}, without prediction layer')
        else:
            print(f'loading pretrained model from {opt.continue_model}')
            model.load_state_dict(torch.load(opt.continue_model))
    print("Model:")
    print(model)

    """ setup loss """
    if 'CTC' in opt.Prediction:
        criterion = torch.nn.CTCLoss(zero_infinity=True).cuda()
    else:
        criterion = torch.nn.CrossEntropyLoss(ignore_index=0).cuda()  # ignore [GO] token = ignore index 0
    # loss averager
    loss_avg = Averager()

    # filter that only require gradient decent
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Trainable params num : ', sum(params_num))
    # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())]

    # setup optimizer
    if opt.adam:
        optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999))
    else:
        optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps)
    print("Optimizer:")
    print(optimizer)

    """ final options """
    # print(opt)
    with open(f'./saved_models/{opt.experiment_name}/opt.txt', 'a') as opt_file:
        opt_log = '------------ Options -------------\n'
        args = vars(opt)
        for k, v in args.items():
            opt_log += f'{str(k)}: {str(v)}\n'
        opt_log += '---------------------------------------\n'
        print(opt_log)
        opt_file.write(opt_log)

    """ start training """
    start_iter = 0
    if opt.continue_model != '':
        print(f'continue to train, start_iter: {start_iter}')

    start_time = time.time()
    best_accuracy = -1
    best_norm_ED = 1e+6
    i = start_iter

    while True:
        # train part
        for p in model.parameters():
            p.requires_grad = True

        image_tensors, labels = train_dataset.get_batch()
        image = image_tensors.cuda()
        text, length = converter.encode(labels)
        batch_size = image.size(0)

        if 'CTC' in opt.Prediction:
            preds = model(image, text).log_softmax(2)
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            preds = preds.permute(1, 0, 2)  # to use CTCLoss format
            cost = criterion(preds, text, preds_size, length)

        else:
            preds = model(image, text)
            target = text[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1))

        model.zero_grad()
        cost.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)  # gradient clipping with 5 (Default)
        optimizer.step()

        loss_avg.add(cost)

        # validation part
        if i % opt.valInterval == 0:
            elapsed_time = time.time() - start_time
            logging.info(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}')
            # for log
            with open(f'./saved_models/{opt.experiment_name}/log_train.txt', 'a') as log:
                log.write(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}\n')
                loss_avg.reset()

                model.eval()
                valid_loss, current_accuracy, current_norm_ED, preds, labels, infer_time, length_of_data = validation(
                    model, criterion, valid_loader, converter, opt)
                model.train()

                for pred, gt in zip(preds[:5], labels[:5]):
                    if 'Attn' in opt.Prediction:
                        pred = pred[:pred.find('[s]')]
                        gt = gt[:gt.find('[s]')]
                    print(f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}')
                    log.write(f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}\n')

                valid_log = f'[{i}/{opt.num_iter}] valid loss: {valid_loss:0.5f}'
                valid_log += f' accuracy: {current_accuracy:0.3f}, norm_ED: {current_norm_ED:0.2f}'
                log.write(valid_log + '\n')

                # keep best accuracy model
                if current_accuracy > best_accuracy:
                    best_accuracy = current_accuracy
                    torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/mtl_best_accuracy.pth')
                if current_norm_ED < best_norm_ED:
                    best_norm_ED = current_norm_ED
                    torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/best_norm_ED.pth')
                best_model_log = f'best_accuracy: {best_accuracy:0.3f}, best_norm_ED: {best_norm_ED:0.2f}'
                logging.info(best_model_log)
                log.write(best_model_log + '\n')

        # save model per 1e+5 iter.
        if (i + 1) % 50000 == 0:
            torch.save(
                model.state_dict(), f'./saved_models/{opt.experiment_name}/iter_{i+1}.pth')

        if i == opt.num_iter:
            logging.info('end the training')
            sys.exit()
        i += 1
Пример #5
0
def train(opt):
    #logging.info(opt)
    train_dataset = Batch_Dataset(opt)
    AlignCollate_valid = AlignCollate(imgH=opt.imgH,
                                      imgW=opt.imgW,
                                      keep_ratio_with_pad=opt.PAD)
    valid_dataset = LmdbDataset(root=opt.valid_data, opt=opt)
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=int(opt.workers),
                                               collate_fn=AlignCollate_valid,
                                               pin_memory=True)
    print('-' * 80)
    """ model configuration """

    ctc_converter = CTCLabelConverter(opt.character, opt.subword)
    attn_converter = AttnLabelConverter(opt.character, opt.subword,
                                        opt.batch_max_length)

    opt.num_class = len(attn_converter.character)
    opt.ctc_num_class = len(ctc_converter.character)
    print("ctc num class {}".format(len(ctc_converter.character)))
    print("attention num class {}".format(len(attn_converter.character)))

    if opt.rgb:
        opt.input_channel = 3

    model = MyModel(opt)

    # weight initialization
    for name, param in model.named_parameters():
        if 'localization_fc2' in name:
            print('Skip {name} as it is already initialized'.format(name))
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
        except Exception as e:
            if 'weight' in name:
                param.data.fill_(1)
            continue

    model = torch.nn.DataParallel(model).to(device)

    model.train()
    if opt.continue_model != '':
        print('loading pretrained model from {}'.format(opt.continue_model))
        model.load_state_dict(torch.load(opt.continue_model))
    """ setup loss """
    ctc_criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
    attn_criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

    loss_avg = Averager()
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Trainable params num : ', sum(params_num))

    if opt.adam:
        optimizer = optim.Adam(filtered_parameters,
                               lr=opt.lr,
                               betas=(opt.beta1, 0.999))
    else:
        optimizer = optim.Adadelta(filtered_parameters,
                                   lr=opt.lr,
                                   rho=opt.rho,
                                   eps=opt.eps)
    print("Optimizer:")
    print(optimizer)
    """ final options """
    with open(osj(opt.outPath, '{}/opt.txt'.format(opt.experiment_name)),
              'a') as opt_file:
        opt_log = '------------ Options -------------\n'
        args = vars(opt)
        for k, v in args.items():
            opt_log += '{}: {}\n'.format(str(k), str(v))
        opt_log += '---------------------------------------\n'
        print(opt_log)
        opt_file.write(opt_log)
    """ start training """
    start_iter = 0
    if opt.continue_model != '':
        print('continue to train, start_iter: {}'.format(start_iter))

    start_time = time.time()
    best_accuracy = -1
    i = start_iter

    while True:
        # train part
        for p in model.parameters():
            p.requires_grad = True

        image_tensors, labels = train_dataset.get_batch()
        image = image_tensors.to(device)

        ctc_text, ctc_length = ctc_converter.encode(labels)
        attn_text, attn_length = attn_converter.encode(labels)
        batch_size = image.size(0)
        # ctc loss
        ctc_preds, attn_preds = model(image, attn_text)
        ctc_preds = ctc_preds.log_softmax(2)
        preds_size = torch.IntTensor([ctc_preds.size(1)] * batch_size)
        ctc_preds = ctc_preds.permute(1, 0, 2)
        ctc_cost = ctc_criterion(ctc_preds, ctc_text, preds_size, ctc_length)
        # attn loss
        target = attn_text[:, 1:]
        attn_cost = attn_criterion(attn_preds.view(-1, attn_preds.shape[-1]),
                                   target.contiguous().view(-1))
        cost = opt.ctc_weight * ctc_cost + (1.0 - opt.ctc_weight) * attn_cost

        model.zero_grad()
        cost.backward()
        torch.nn.utils.clip_grad_norm_(
            model.parameters(),
            opt.grad_clip)  # gradient clipping with 5 (Default)
        optimizer.step()
        loss_avg.add(cost)
        # validation part
        if i % opt.valInterval == 0:
            elapsed_time = time.time() - start_time
            logging.info('[{}/{}] Loss: {:0.5f} elapsed_time: {:0.5f}'.format(
                i, opt.num_iter, loss_avg.val(), elapsed_time))
            # for log
            with open(
                    osj(opt.outPath,
                        '{}/log_train.txt'.format(opt.experiment_name)),
                    'a') as log:
                log.write(
                    '[{}/{}] Loss: {:0.5f} elapsed_time: {:0.5f}\n'.format(
                        i, opt.num_iter, loss_avg.val(), elapsed_time))
                loss_avg.reset()

                model.eval()
                with torch.no_grad():
                    valid_loss, current_accuracy, ctc_accuracy, current_norm_ED, preds, labels, infer_time, length_of_data \
                        = mtl_validation(model, ctc_criterion, attn_criterion, valid_loader, ctc_converter, attn_converter, opt)
                model.train()

                for pred, gt in zip(preds[:5], labels[:5]):
                    pred = pred[:pred.find('[s]')]
                    gt = gt[:gt.find('[s]')]
                    print('{:20s}, gt: {:20s},   {}'.format(
                        pred, gt, str(pred == gt)))
                    log.write('{:20s}, gt: {:20s},   {}\n'.format(
                        pred, gt, str(pred == gt)))

                valid_log = '[{}/{}] valid loss: {:0.5f}'.format(
                    i, opt.num_iter, valid_loss)
                valid_log += ' accuracy: {:0.3f}'.format(current_accuracy)

                log.write(valid_log + '\n')

                # save best accuracy model
                if current_accuracy > best_accuracy:
                    best_accuracy = current_accuracy
                    torch.save(
                        model.state_dict(),
                        osj(opt.outPath, '{}/best_accuracy.pth'.format(
                            opt.experiment_name)))

                best_model_log = 'best_accuracy: {:0.3f}'.format(best_accuracy)
                logging.info(best_model_log)
                log.write(best_model_log + '\n')

        if (i + 1) % 50000 == 0:
            torch.save(
                model.state_dict(),
                osj(opt.outPath,
                    '{}/iter_{}.pth'.format(opt.experiment_name, i + 1)))

        if i == opt.num_iter:
            logging.info('end the training')
            sys.exit()
        i += 1
Пример #6
0
def validation(model, criterion, evaluation_loader, converter, opt):
    """ validation or evaluation """
    for p in model.parameters():
        p.requires_grad = False

    n_correct = 0
    norm_ED = 0
    length_of_data = 0
    infer_time = 0
    valid_loss_avg = Averager()

    for i, (image_tensors, labels) in enumerate(evaluation_loader):
        batch_size = image_tensors.size(0)
        length_of_data = length_of_data + batch_size
        with torch.no_grad():
            image = image_tensors.cuda()
            # For max length prediction
            length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] *
                                                   batch_size)
            text_for_pred = torch.cuda.LongTensor(
                batch_size, opt.batch_max_length + 1).fill_(0)

            text_for_loss, length_for_loss = converter.encode(labels)

        start_time = time.time()
        if 'CTC' in opt.Prediction:
            preds = model(image, text_for_pred).log_softmax(2)
            forward_time = time.time() - start_time

            # Calculate evaluation loss for CTC deocder.
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            preds = preds.permute(1, 0, 2)  # to use CTCloss format
            cost = criterion(preds, text_for_loss, preds_size, length_for_loss)

            # Select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
            preds_str = converter.decode(preds_index.data, preds_size.data)

        else:
            preds = model(image, text_for_pred, is_train=False)
            forward_time = time.time() - start_time

            preds = preds[:, :text_for_loss.shape[1] - 1, :]
            target = text_for_loss[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.contiguous().view(-1, preds.shape[-1]),
                             target.contiguous().view(-1))

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)
            labels = converter.decode(text_for_loss[:, 1:], length_for_loss)

        infer_time += forward_time
        valid_loss_avg.add(cost)

        # calculate accuracy.
        for pred, gt in zip(preds_str, labels):
            if 'Attn' in opt.Prediction:
                pred = pred[:pred.find(
                    '[s]')]  # prune after "end of sentence" token ([s])
                gt = gt[:gt.find('[s]')]

            if pred == gt:
                n_correct += 1
            norm_ED += edit_distance(pred, gt) / len(gt)

    accuracy = n_correct / float(length_of_data) * 100

    return valid_loss_avg.val(
    ), accuracy, norm_ED, preds_str, labels, infer_time, length_of_data
Пример #7
0
        allExtraData = []
        with torch.no_grad():
            for i, batch in enumerate(propagate_loader, 1):
                ext_data, _ = [_.cuda() for _ in batch]
                allExtraData.append(ext_data)
        allExtraData = torch.cat(allExtraData)

    timer = Timer()

    for epoch in range(1, cfg.max_epoch + 1):
        # torch.cuda.empty_cache()
        lr_scheduler.step()

        model.train()

        tl = Averager()
        ta = Averager()

        for i, batch in enumerate(train_loader, 1):
            # time.sleep(100)
            torch.cuda.empty_cache()
            data, _ = [_.cuda() for _ in batch]
            p = cfg.shot * cfg.train_way
            data_shot, data_query = data[:p], data[p:]

            proto = model(data_shot)
            proto = proto.reshape(cfg.shot, cfg.train_way, -1).mean(dim=0)

            query_proto = model(data_query)

            p = (1 - cfg.progalambda) * proto
Пример #8
0
def train(model,
          optimizer,
          src_loader,
          tgt_loader,
          valid_loader,
          criterion,
          log_interval=1000,
          val_interval=50,
          posp=1,
          nagp=0.5,
          params_cls=0.5,
          params_da=0.5,
          da_type='cmmd',
          max_fpr=0.01):
    global max_auc
    global max_auchead
    global min_loss
    posp = torch.FloatTensor([posp]).cuda()
    nagp = torch.FloatTensor([nagp]).cuda()
    one = torch.FloatTensor([1]).cuda()

    iter_src = iter(src_loader)
    iter_tgt = iter(tgt_loader)
    num_iter = len(src_loader)
    stoper = Stoper()

    avg_all_loss = Averager()
    avg_src_loss = Averager()
    avg_tgt_loss = Averager()
    avg_da_loss = Averager()
    start_time = time.time()
    for i in range(1, num_iter * 20):
        model.train()
        src_ids, src_values, src_seqlength, src_label, src_seq_mask = iter_src.next(
        )
        src_ids, src_values, src_label = src_ids.cuda(), src_values.cuda(
        ), src_label.cuda().float()
        src_seq_mask = src_seq_mask.cuda()
        if i % len(src_loader) == 0:
            iter_src = iter(src_loader)
        if i % len(tgt_loader) == 0:
            iter_tgt = iter(tgt_loader)

        src_p = posp * src_label + nagp * (one - src_label)
        src_y, src_fea_LSTM = model(src_ids, src_values, src_seqlength,
                                    src_seq_mask, 'src')
        src_loss = torch.mean(
            src_p * criterion(src_y, src_label)
        )  # + torch.mean(src_p * criterion(src_spey, src_label))

        tgt_ids, tgt_values, tgt_seqlength, tgt_label, tgt_seq_mask = iter_tgt.next(
        )
        tgt_ids, tgt_values, tgt_label = tgt_ids.cuda(), tgt_values.cuda(
        ), tgt_label.cuda().float()
        tgt_seq_mask = tgt_seq_mask.cuda()
        # print(tgt_seqlength, tgt_label)

        tgt_p = posp * tgt_label + nagp * (one - tgt_label)
        tgt_y, tgt_fea_LSTM, tgt_spey = model(tgt_ids, tgt_values,
                                              tgt_seqlength, tgt_seq_mask,
                                              'tgt')
        tgt_loss = torch.mean(
            tgt_p * criterion(tgt_y, tgt_label)
        )  # + 0.5 * torch.mean(tgt_p * criterion(tgt_spey, tgt_label))
        if da_type == 'cmmd':
            da_loss = cmmd(src_fea_LSTM, tgt_fea_LSTM, src_label.long(),
                           tgt_label.long())
        elif da_type == 'mmd':
            da_loss = mmd_rbf_noaccelerate(src_fea_LSTM, tgt_fea_LSTM)
        elif da_type == 'coral':
            da_loss = coral(src_fea_LSTM, tgt_fea_LSTM)
        elif da_type == 'euclidian':
            da_loss = euclidian(src_fea_LSTM, tgt_fea_LSTM)
        elif da_type == 'c_euclidian':
            da_loss = c_euclidian(src_fea_LSTM, tgt_fea_LSTM, src_label.long(),
                                  tgt_label.long())
        elif da_type == 'nometric':
            da_loss = nometric(src_fea_LSTM, tgt_fea_LSTM)
        elif da_type == 'ced':
            da_loss = ced(src_fea_LSTM, tgt_fea_LSTM, src_label.long(),
                          tgt_label.long())
        lambd = 2 / (1 + math.exp((-5 * i) / (len(src_loader)))) - 1
        loss = params_cls * src_loss + tgt_loss + params_da * lambd * da_loss
        model.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        avg_all_loss.add(loss.item())
        avg_src_loss.add(src_loss.item())
        avg_tgt_loss.add(tgt_loss.item())
        avg_da_loss.add(da_loss.item())
        if (i + 1) % log_interval == 0:
            print(
                'step: {}, loss: {:.4f}, src_loss: {:.4f}, tgt_loss: {:.4f}, {}_loss:, {:.4f}, lambda: {}'
                .format(i + 1, avg_all_loss.item(), avg_src_loss.item(),
                        avg_tgt_loss.item(), da_type, avg_da_loss.item(),
                        lambd))
            avg_all_loss = Averager()
            avg_src_loss = Averager()
            avg_tgt_loss = Averager()
            avg_da_loss = Averager()

        if (i + 1) % val_interval == 0:
            end_time = time.time()
            print('train time (s):', end_time - start_time)
            start_time = time.time()
            auc_head, loss, auc = test(model, valid_loader, criterion, posp,
                                       max_fpr)
            if loss < min_loss:
                min_loss = loss
            if auc > max_auc:
                max_auc = auc
            if auc_head > max_auchead:
                torch.save(model, f'{save_dir}/tmp.pt')
                max_auchead = auc_head
            print(
                'dev ---  auchead: {:.4f}, max_auchead: {:.4f}, auc: {:.4f}, max_auc: {:.4f}, loss: {:.4f}, minloss: {:.4f}'
                .format(auc_head, max_auchead, auc, max_auc, loss, min_loss))
            end_time = time.time()
            print('dev time (s):', end_time - start_time)
            start_time = time.time()
            if stoper.add(auc_head):
                print('training end')
                break
Пример #9
0
    def transform(self, stu, que, ans) -> pd.DataFrame:
        """
        Main method to calculate, preprocess students's features and append textual embeddings

        :param stu: students dataframe with preprocessed textual columns
        :param que: questions dataframe with preprocessed textual columns
        :param ans: answers dataframe with preprocessed textual columns
        :return: dataframe of students's id, timestamp and model-friendly students's features after that timestamp
        """
        stu['students_state'] = stu['students_location'].apply(
            lambda s: str(s).split(', ')[-1])

        que['questions_body_length'] = que['questions_body'].apply(
            lambda s: len(str(s)))
        ans['answers_body_length'] = ans['answers_body'].apply(
            lambda s: len(str(s)))

        # prepare all the dataframes needed for iteration
        que_change = stu.merge(que,
                               left_on='students_id',
                               right_on='questions_author_id')
        ans_change = que_change.merge(ans, left_on='questions_id', right_on='answers_question_id') \
            .rename(columns={'answers_date_added': 'students_time'})

        # add new columns which will be used to determine to which change corressponds stacked DataFrame row
        ans_change['change_type'] = 'answer'
        que_change['change_type'] = 'question'
        que_change = que_change.rename(
            columns={'questions_date_added': 'students_time'})

        # stack two DataFrame to form resulting one for iteration
        df = pd.concat([que_change, ans_change], ignore_index=True,
                       sort=True).sort_values('students_time')

        # data is a dist with mapping from student's id to his list of features
        # each list contains dicts with mapping from feature name to its value on a particular moment
        data = {}
        avgs = {}

        for i, row in stu.iterrows():
            cur_stu = row['students_id']

            # DEFAULT CASE
            # student's feature values before he left any questions
            if cur_stu not in data:
                new = {
                    'students_questions_asked': 0,
                    'students_previous_question_time':
                    row['students_date_joined']
                }
                for feature in ['students_time'
                                ] + self.features['numerical']['mean']:
                    new[feature] = None
                data[cur_stu] = [new]
                avgs[cur_stu] = {
                    feature: Averager()
                    for feature in self.features['numerical']['mean']
                }

        for i, row in df.iterrows():
            cur_stu = row['students_id']

            # features on previous timestamp
            prv = data[cur_stu][-1]
            new = prv.copy()

            new['students_time'] = row['students_time']

            # UPDATE RULES
            # if current change is new question, update question-depended features
            if row['change_type'] == 'question':
                new['students_questions_asked'] += 1
                new['students_previous_question_time'] = row[
                    'questions_date_added']
                new['students_average_question_body_length'] = row[
                    'questions_body_length']
            # if new answer is added, update answer-depended features
            else:
                new['students_average_answer_body_length'] = row[
                    'answers_body_length']
                new['students_average_answer_amount'] = new['students_average_answer_amount'] + 1 \
                    if new['students_average_answer_amount'] is not None else 1

            # NORMALIZE AVERAGE FEATURES
            for feature in ['students_average_question_body_length'] if row['change_type'] == 'question' else \
                    ['students_average_answer_body_length', 'students_average_answer_amount']:
                avgs[cur_stu][feature].upd(new[feature])
                new[feature] = avgs[cur_stu][feature].get()

            data[cur_stu].append(new)

        # construct a DataFrame out of dict of list of feature dicts
        df = pd.DataFrame([{
            **f,
            **{
                'students_id': id
            }
        } for (id, fs) in data.items() for f in fs])

        df = df.merge(stu, on='students_id')
        # launch feature pre-processing
        self.preprocess(df)

        # re-order the columns
        df = df[['students_id', 'students_time'] + self.features['all']]

        return df