示例#1
0
    def fit(self, train_loader, valid_loader, start_epoch=0, max_epochs=200):

        best_acc = 0.
        bar = IncrementalBar(max=max_epochs - start_epoch)
        for e in range(start_epoch, max_epochs):
            bar.message = '{:>5.2f}%%'.format(bar.percent)
            bar.suffix = '{}/{} [{}<{}\t{:.2f}it/s]'.format(
                bar.index, bar.max, bar.elapsed_td, bar.eta_td, bar.avg)
            bar.next()
            if e == self.milestones[0]:
                schedule_lr(self.optimizer)  # update learning rate once
            if e == self.milestones[1]:
                schedule_lr(self.optimizer)
            if e == self.milestones[2]:
                schedule_lr(self.optimizer)
            self.train(train_loader, self.model, self.criterion,
                       self.optimizer, e)

            accuracy, best_threshold, roc_curve_tensor = self.evaluate(
                self.conf, *valid_loader['agedb_30'])
            self.board_val('agedb_30', accuracy, best_threshold,
                           roc_curve_tensor, e)
            if accuracy > best_acc:
                best_acc = accuracy
                save_checkpoint(self.model, self.optimizer, self.conf,
                                best_acc, e)
        bar.finish()
    DISP_FREQ = len(
        train_loader) // 100  # frequency to display training loss & acc

    NUM_EPOCH_WARM_UP = NUM_EPOCH // 25  # use the first 1/25 epochs to warm up
    NUM_BATCH_WARM_UP = len(
        train_loader
    ) * NUM_EPOCH_WARM_UP  # use the first 1/25 epochs to warm up
    batch = 0  # batch index
    lambda_t = 0.3
    L1_LOSS = nn.L1Loss()
    MSE_LOSS = nn.MSELoss()
    for epoch in range(NUM_EPOCH):  # start training process

        if epoch == STAGES[
                0]:  # adjust LR for each training stage after warm up, you can also choose to adjust LR manually (with slight modification) once plaueau observed
            schedule_lr(OPTIMIZER)
        if epoch == STAGES[1]:
            schedule_lr(OPTIMIZER)
        if epoch == STAGES[2]:
            schedule_lr(OPTIMIZER)

        BACKBONE.eval()  # set to different mode
        MaskNet.train()
        losses = AverageMeter()
        l1_losses = AverageMeter()
        mse_losses = AverageMeter()

        for inputs, labels in tqdm(iter(train_loader)):

            if (epoch + 1 <= NUM_EPOCH_WARM_UP) and (
                    batch + 1 <= NUM_BATCH_WARM_UP
示例#3
0
def train(ds_train, ds_adapt, args):
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    print('='*30)
    print('USE_CUDA SET TO: {}'.format(use_cuda))
    print('CUDA AVAILABLE?: {}'.format(torch.cuda.is_available()))
    print('='*30)
    device = torch.device("cuda" if use_cuda else "cpu")

    writer = SummaryWriter(comment=os.path.basename(args.cfg))
    num_classes = ds_train.num_classes

    if args.model_type == 'XTDNN':
        generator = XTDNN()
    if args.model_type == 'ETDNN':
        generator = ETDNN()
    if args.model_type == 'FTDNN':
        generator = FTDNN()

    if args.loss_type == 'adm':
        classifier = AMSMLoss(512, num_classes)
    if args.loss_type == 'adacos':
        classifier = AdaCos(512, num_classes)
    if args.loss_type == 'l2softmax':
        classifier = L2SoftMax(512, num_classes)
    if args.loss_type == 'softmax':
        classifier = SoftMax(512, num_classes)
    if args.loss_type == 'xvec':
        classifier = XVecHead(512, num_classes)
    if args.loss_type == 'arcface':
        classifier = ArcFace(512, num_classes)
    if args.loss_type == 'sphereface':
        classifier = SphereFace(512, num_classes)


    generator.train()
    classifier.train()

    generator = generator.to(device)
    classifier = classifier.to(device)

    if args.resume_checkpoint != 0:
        model_str = os.path.join(args.model_dir, '{}_{}.pt')
        for model, modelstr in [(generator, 'g'), (classifier, 'c')]:
            model.load_state_dict(torch.load(model_str.format(modelstr, args.resume_checkpoint)))

    if args.use_dropadapt and args.use_dropclass:
        model_str = os.path.join(args.model_dir, '{}_adapt_start.pt')
        for model, modelstr in [(generator, 'g'), (classifier, 'c')]:
            model_path = model_str.format(modelstr)
            assert os.path.isfile(model_path), "Couldn't find [g|c]_adapt_start.pt models in {}".format(args.model_dir)
            model.load_state_dict(torch.load(model_path))

    optimizer = torch.optim.SGD([{'params': generator.parameters(), 'lr': args.lr}, 
                                    {'params': classifier.parameters(), 'lr': args.lr * args.classifier_lr_mult},
                                ],
                                    momentum=args.momentum)

    if args.label_smooth_type == 'None':
        criterion = nn.CrossEntropyLoss()
    if args.label_smooth_type == 'disturb':
        criterion = DisturbLabelLoss(device, disturb_prob=args.label_smooth_prob)
    if args.label_smooth_type == 'uniform':
        criterion = LabelSmoothingLoss(smoothing=args.label_smooth_prob)

    iterations = 0

    total_loss = 0
    running_loss = [np.nan for _ in range(500)]

    best_vc1_eer = (-1, 1.0)
    best_sitw_eer = (-1, 1.0)

    if os.path.isfile(args.results_pkl):
        rpkl = pickle.load(open(args.results_pkl, "rb"))
        if args.test_data_vc1:
            v1eers = [(rpkl[key]['vc1_eer'], i) for i, key in enumerate(rpkl)]
            bestvc1 = min(v1eers)
            best_vc1_eer = (bestvc1[1], bestvc1[0])
        if args.test_data_sitw:
            sitweers = [(rpkl[key]['sitw_eer'], i) for i, key in enumerate(rpkl)]
            bestsitw = min(sitweers)
            best_sitw_eer = (bestsitw[1], bestsitw[0])
    else:
        rpkl = OrderedDict({})

    if args.multi_gpu:
        dpp_generator = nn.DataParallel(generator).to(device)

    data_generator = ds_train.get_batches(batch_size=args.batch_size, max_seq_len=args.max_seq_len)

    if args.use_dropclass:
        classifier.drop()
    else:
        classifier.nodrop()

    if args.model_type == 'FTDNN':
        drop_indexes = np.linspace(0, 1, args.num_iterations)
        drop_sch = ([0, 0.5, 1], [0, 0.5, 0])
        drop_schedule = np.interp(drop_indexes, drop_sch[0], drop_sch[1])

    for iterations in range(1, args.num_iterations + 1):
        if iterations > args.num_iterations:
            break
        if iterations in args.scheduler_steps:
            schedule_lr(optimizer, factor=args.scheduler_lambda)
        if iterations <= args.resume_checkpoint:
            print('Skipping iteration {}'.format(iterations))
            print('Skipping iteration {}'.format(iterations), file=open(args.log_file, "a"))
            continue

        if args.model_type == 'FTDNN':
            generator.set_dropout_alpha(drop_schedule[iterations-1])

        if args.use_dropclass and not args.drop_per_batch and not args.use_dropadapt:
            if iterations % args.its_per_drop == 0 or iterations == 1:
                ds_train, classifier = drop_classes(ds_train, classifier, num_drop=args.num_drop)
                if args.reset_affine_each_it:
                    classifier.reset_parameters()

        if args.use_dropclass and args.use_dropadapt:
            if iterations % args.its_per_drop == 0 or iterations == 2:
                # this feeds one batch in to 'reserve' CUDA memory, having iterations == 1 fails
                if args.dropadapt_random:
                    ds_train, classifier = drop_adapt_random(classifier, ds_train, num_drop=args.num_drop)
                else:
                    with torch.no_grad():
                        print('------ [{}/{}] classes remaining'.format(len(classifier.rem_classes), classifier.n_classes))
                        print('------ Aggregating training class probs on {}'.format(args.ds_adapt))
                        full_probs = aggregate_probs(ds_adapt, generator, classifier, device,
                                                        batch_size=300, max_seq_len=args.max_seq_len, uniform=args.dropadapt_uniform_agg)
                        np.save(os.path.join(args.model_dir, 'probs_{}.npy'.format(iterations)), full_probs)
                        print('------ Dropping ~{} more classes from the next {} training steps'.format(args.num_drop, args.its_per_drop))
                        if args.dropadapt_combine:
                            print('------ Combining least probable classes into one...')
                            ds_train, classifier = drop_adapt_combine(full_probs, classifier, ds_train, num_drop=args.num_drop)
                        else:
                            if args.dropadapt_onlydata:
                                ds_train = drop_adapt_onlydata(full_probs, ds_train, num_drop=args.num_drop)
                            else:
                                ds_train, classifier = drop_adapt(full_probs, classifier, ds_train, num_drop=args.num_drop)
                        print('------ [{}/{}] classes remaining'.format(len(classifier.rem_classes), classifier.n_classes))
                        np.save(os.path.join(args.model_dir, 'remclasses_{}.npy'.format(iterations)), classifier.rem_classes)
                        del full_probs

        feats, iden = next(data_generator)

        if args.drop_per_batch and args.use_dropclass:
            classifier = drop_per_batch(iden, classifier)
            if args.reset_affine_each_it:
                classifier.reset_parameters()

        feats = feats.to(device)

        if args.use_dropclass:
            iden = classifier.get_mini_labels(iden).to(device)
        else:
            iden = torch.LongTensor(iden).to(device)

        if args.multi_gpu:
            embeds = dpp_generator(feats)
        else:
            embeds = generator(feats)

        if args.loss_type == 'softmax':
            preds = classifier(embeds)
        else:
            preds = classifier(embeds, iden)

        loss = criterion(preds, iden)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if args.model_type == 'FTDNN':
            generator.step_ftdnn_layers()

        running_loss.pop(0)
        running_loss.append(loss.item())
        rmean_loss = np.nanmean(np.array(running_loss))

        if iterations % 10 == 0:
            msg = "{}: {}: [{}/{}] \t C-Loss:{:.4f}, AvgLoss:{:.4f}, lr: {}, bs: {}".format(
                                                                                args.model_dir,
                                                                                time.ctime(),
                                                                                iterations,
                                                                                args.num_iterations,
                                                                                loss.item(),
                                                                                rmean_loss, 
                                                                                get_lr(optimizer), 
                                                                                len(feats))
            print(msg)
            print(msg, file=open(args.log_file, "a"))

        writer.add_scalar('class loss', loss.item(), iterations)
        writer.add_scalar('Avg loss', rmean_loss, iterations)

        if iterations % args.checkpoint_interval == 0:
            for model, modelstr in [(generator, 'g'), (classifier, 'c')]:
                model.eval().cpu()
                cp_filename = "{}_{}.pt".format(modelstr, iterations)
                cp_model_path = os.path.join(args.model_dir, cp_filename)
                torch.save(model.state_dict(), cp_model_path)
                model.to(device).train()

            rpkl[iterations] = {}

            if args.test_data_vc1:
                vc1_eer = test(generator, ds_test_vc1, device)
                print('EER on VoxCeleb1: {}'.format(vc1_eer))
                print('EER on Voxceleb1: {}'.format(vc1_eer), file=open(args.log_file, "a"))
                writer.add_scalar('vc1_eer', vc1_eer, iterations)
                if vc1_eer < best_vc1_eer[1]:
                    best_vc1_eer = (iterations, vc1_eer)
                print('Best VC1 EER: {}'.format(best_vc1_eer))
                print('Best VC1 EER: {}'.format(best_vc1_eer), file=open(args.log_file, "a"))
                rpkl[iterations]['vc1_eer'] = vc1_eer

            if args.test_data_sitw:
                sitw_eer = test_nosil(generator, ds_test_sitw, device)
                print('EER on SITW(DEV): {}'.format(sitw_eer))
                print('EER on SITW(DEV): {}'.format(sitw_eer), file=open(args.log_file, "a"))
                writer.add_scalar('sitw_eer', sitw_eer, iterations)
                if sitw_eer < best_sitw_eer[1]:
                    best_sitw_eer = (iterations, sitw_eer)
                print('Best SITW(DEV) EER: {}'.format(best_sitw_eer))
                print('Best SITW(DEV) EER: {}'.format(best_sitw_eer), file=open(args.log_file, "a"))
                rpkl[iterations]['sitw_eer'] = sitw_eer
            
            pickle.dump(rpkl, open(args.results_pkl, "wb"))

    # ---- Final model saving -----
    for model, modelstr in [(generator, 'g'), (classifier, 'c')]:
        model.eval().cpu()
        cp_filename = "final_{}_{}.pt".format(modelstr, iterations)
        cp_model_path = os.path.join(args.model_dir, cp_filename)
        torch.save(model.state_dict(), cp_model_path)
示例#4
0
def train(ds_train):
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    print('=' * 30)
    print('USE_CUDA SET TO: {}'.format(use_cuda))
    print('CUDA AVAILABLE?: {}'.format(torch.cuda.is_available()))
    print('=' * 30)
    device = torch.device("cuda" if use_cuda else "cpu")

    writer = SummaryWriter(comment=os.path.basename(args.cfg))

    if args.model_type == 'XTDNN':
        generator = XTDNN(features_per_frame=args.input_dim, embed_features=args.embedding_dim)
    if args.model_type == 'ETDNN':
        generator = ETDNN(features_per_frame=args.input_dim, embed_features=args.embedding_dim)
    if args.model_type == 'FTDNN':
        generator = FTDNN(in_dim=args.input_dim, embedding_dim=args.embedding_dim)

    generator.train()
    generator = generator.to(device)

    model_dict = {'generator': {'model': generator, 'lr_mult': 1., 'loss_weight': None}}
    clf_head_dict = {k: {'model': None, 'lr_mult': lr_mult, 'loss_weight': loss_weight} for k, lr_mult, loss_weight in
                     zip(args.classifier_heads, args.classifier_lr_mults, args.classifier_loss_weights)}

    num_cls_per_task = [ds_train.num_classes[t] for t in args.classifier_heads]

    for clf_target, clf_type, num_classes, clf_smooth_type in zip(args.classifier_heads, 
                                                                    args.classifier_types, 
                                                                    num_cls_per_task, 
                                                                    args.classifier_smooth_types):
        if clf_type == 'adm':
            clf = AMSMLoss(args.embedding_dim, num_classes)
        elif clf_type == 'adacos':
            clf = AdaCos(args.embedding_dim, num_classes)
        elif clf_type == 'l2softmax':
            clf = L2SoftMax(args.embedding_dim, num_classes)
        elif clf_type == 'softmax':
            clf = SoftMax(args.embedding_dim, num_classes)
        elif clf_type == 'xvec':
            clf = XVecHead(args.embedding_dim, num_classes)
        elif clf_type == 'xvec_regression':
            clf = XVecHead(args.embedding_dim, 1)
        elif clf_type == 'xvec_uncertain':
            clf = XVecHeadUncertain(args.embedding_dim, num_classes)
        elif clf_type == 'arcface':
            clf = ArcFace(args.embedding_dim, num_classes)
        elif clf_type == 'sphereface':
            clf = SphereFace(args.embedding_dim, num_classes)
        else:
            assert None, 'Classifier type {} not found'.format(clf_type)

        if clf_head_dict[clf_target]['loss_weight'] >= 0.0:
            clf_head_dict[clf_target]['model'] = clf.train().to(device)
        else:
            # GRL for negative loss weight
            abs_lw = np.abs(clf_head_dict[clf_target]['loss_weight'])
            clf_head_dict[clf_target]['model'] = nn.Sequential(
                                                                GradientReversal(lambda_=abs_lw),
                                                                clf
                                                                ).train().to(device)
            clf_head_dict[clf_target]['loss_weight'] = 1.0 # this is lambda_ in the GRL

        if clf_smooth_type == 'none':
            if clf_target.endswith('regression'):
                clf_smooth = nn.SmoothL1Loss()
            else:
                clf_smooth = nn.CrossEntropyLoss()
        elif clf_smooth_type == 'twoneighbour':
            clf_smooth = TwoNeighbourSmoothingLoss(smoothing=args.label_smooth_prob)
        elif clf_smooth_type == 'uniform':
            clf_smooth = LabelSmoothingLoss(smoothing=args.label_smooth_prob)
        elif clf_smooth_type == 'disturb':
            clf_smooth = DisturbLabelLoss(device, disturb_prob=args.label_smooth_prob)
        else:
            assert None, 'Smooth type not found: {}'.format(clf_smooth_type)
        
        clf_head_dict[clf_target]['criterion'] = clf_smooth

    model_dict.update(clf_head_dict)

    if args.classifier_loss_weighting_type == 'uncertainty_kendall':
        model_dict['loss_aggregator'] = {
                                            'model': MultiTaskUncertaintyLossKendall(len(args.classifier_heads)).to(device),
                                            'lr_mult': 1.,
                                            'loss_weight': None
                                        }
    if args.classifier_loss_weighting_type == 'uncertainty_liebel':
        model_dict['loss_aggregator'] = {
                                            'model': MultiTaskUncertaintyLossLiebel(len(args.classifier_heads)).to(device),
                                            'lr_mult': 1.,
                                            'loss_weight': None
                                        }

    if args.resume_checkpoint != 0:
        model_str = os.path.join(args.model_dir, '{}_{}.pt')
        for m in model_dict:
            model_dict[m]['model'].load_state_dict(torch.load(model_str.format(m, args.resume_checkpoint)))

    optimizer = torch.optim.SGD(
        [{'params': model_dict[m]['model'].parameters(), 'lr': args.lr * model_dict[m]['lr_mult']} for m in model_dict],
        momentum=args.momentum)


    iterations = 0

    total_loss = 0
    running_loss = [np.nan for _ in range(500)]

    non_spk_clf_heads = [a for a in args.classifier_heads if a != 'speaker']
    
    best_test_eer = (-1, 1.0)
    best_test_dcf = (-1, 1.0)
    best_acc = {k: (-1, 0.0) for k in non_spk_clf_heads}

    if os.path.isfile(args.results_pkl) and args.resume_checkpoint != 0:
        rpkl = pickle.load(open(args.results_pkl, "rb"))
        keylist = list(rpkl.keys())

        if args.test_data:
            test_eers = [(rpkl[key]['test_eer'], key) for i, key in enumerate(rpkl)]
            best_teer = min(test_eers)
            best_test_eer = (best_teer[1], best_teer[0])

            test_dcfs = [(rpkl[key]['test_dcf'], key) for i, key in enumerate(rpkl)]
            besttest_dcf = min(test_dcfs)
            best_test_dcf = (besttest_dcf[1], besttest_dcf[0])

    else:
        rpkl = OrderedDict({})

    if args.multi_gpu:
        dpp_generator = nn.DataParallel(generator).to(device)

    data_generator = ds_train.get_batches(batch_size=args.batch_size, max_seq_len=args.max_seq_len)

    if args.model_type == 'FTDNN':
        drop_indexes = np.linspace(0, 1, args.num_iterations)
        drop_sch = ([0, 0.5, 1], [0, 0.5, 0])
        drop_schedule = np.interp(drop_indexes, drop_sch[0], drop_sch[1])

    for iterations in range(1, args.num_iterations + 1):
        if iterations > args.num_iterations:
            break
        if iterations in args.scheduler_steps:
            schedule_lr(optimizer, factor=args.scheduler_lambda)
        if iterations <= args.resume_checkpoint:
            print('Skipping iteration {}'.format(iterations), file=open(args.log_file, "a"))
            continue

        if args.model_type == 'FTDNN':
            if args.dropout:
                generator.set_dropout_alpha(drop_schedule[iterations - 1])

        feats, labels = next(data_generator)
        feats = feats.to(device)

        if args.multi_gpu:
            embeds = dpp_generator(feats)
        else:
            embeds = generator(feats)

        total_loss = 0
        losses = []

        loss_tensors = []

        for m in args.classifier_heads:
            lab = labels[m].to(device)
            if m == 'rec':
                preds = model_dict[m]['model'](embeds)
            else:
                preds = model_dict[m]['model'](embeds, lab)
            loss = model_dict[m]['criterion'](preds, lab)
            if args.classifier_loss_weighting_type == 'none':
                total_loss += loss * model_dict[m]['loss_weight']
            else:
                loss_tensors.append(loss)
            losses.append(round(loss.item(), 4))

        if args.classifier_loss_weighting_type.startswith('uncertainty'):
            loss_tensors = torch.FloatTensor(loss_tensors).to(device)
            total_loss = model_dict['loss_aggregator']['model'](loss_tensors)

        if args.classifier_loss_weighting_type == 'dwa':
            loss_tensors = loss_tensors
            if iterations < 4:
                loss_t_1 = np.ones(len(loss_tensors))
                for l in loss_tensors:
                    total_loss += l
            else:
                dwa_w = loss_t_1/loss_t_2
                K = len(loss_tensors)
                per_task_weight = torch.FloatTensor(dwa_w/args.dwa_temperature) #lambda_k
                per_task_weight = torch.nn.functional.softmax(per_task_weight, dim=0) * K
                per_task_weight = per_task_weight.numpy()
                for l, w in zip(loss_tensors, per_task_weight):
                    total_loss += l * w

            loss_t_2 = loss_t_1.copy()
            loss_t_1 = torch.FloatTensor(loss_tensors).detach().cpu().numpy()

            
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        if args.model_type == 'FTDNN':
            generator.step_ftdnn_layers()

        running_loss.pop(0)
        running_loss.append(total_loss.item())
        rmean_loss = np.nanmean(np.array(running_loss))

        if iterations % 10 == 0:
            msg = "{}: {}: [{}/{}] \t C-Loss:{:.4f}, AvgLoss:{:.4f}, losses: {}, lr: {}, bs: {}".format(
                args.model_dir,
                time.ctime(),
                iterations,
                args.num_iterations,
                total_loss.item(),
                rmean_loss,
                losses,
                get_lr(optimizer),
                len(feats))
            print(msg)
            print(msg, file=open(args.log_file, "a"))

        writer.add_scalar('combined loss', total_loss.item(), iterations)
        writer.add_scalar('Avg loss', rmean_loss, iterations)

        if iterations % args.checkpoint_interval == 0:
            for m in model_dict:
                model_dict[m]['model'].eval().cpu()
                cp_filename = "{}_{}.pt".format(m, iterations)
                cp_model_path = os.path.join(args.model_dir, cp_filename)
                torch.save(model_dict[m]['model'].state_dict(), cp_model_path)
                model_dict[m]['model'].to(device).train()

            if args.test_data:
                rpkl, best_test_eer, best_test_dcf = eval_step(model_dict, device, ds_test, iterations, rpkl, writer,
                                                           best_test_eer, best_test_dcf, best_acc)

    # ---- Final model saving -----
    for m in model_dict:
        model_dict[m]['model'].eval().cpu()
        cp_filename = "final_{}_{}.pt".format(m, iterations)
        cp_model_path = os.path.join(args.model_dir, cp_filename)
        torch.save(model_dict[m]['model'].state_dict(), cp_model_path)