示例#1
0
def DUN_batch_time(net,
                   savefile,
                   dset,
                   data_dir,
                   batch_size=256,
                   cuda=True,
                   gpu=None,
                   MC_samples=0,
                   workers=4,
                   big_data=False):
    _, _, val_loader, _, _, _ = \
        get_image_loader(dset, batch_size, cuda=cuda, workers=workers, distributed=False, data_dir=data_dir)

    net.load(savefile)

    times = []
    for i, (images, _) in enumerate(val_loader):
        if big_data and i > 5:
            break

        data_time = time.time()

        _ = net.fast_predict(images).data

        batch_time = time.time() - data_time
        if not (big_data and i == 0):
            times.append(batch_time)

    return np.mean(times)
示例#2
0
def ensemble_batch_time(model,
                        savefile_list,
                        dset,
                        data_dir,
                        batch_size=256,
                        cuda=True,
                        gpu=None,
                        MC_samples=0,
                        workers=4,
                        big_data=False):
    _, _, val_loader, _, _, _ = \
        get_image_loader(dset, batch_size, cuda=cuda, workers=workers, distributed=False, data_dir=data_dir)

    times = []
    for i, (images, target) in enumerate(val_loader):
        if big_data and i > 5:
            break

        data_time = time.time()
        if cuda:
            images = images.cuda(None, non_blocking=True)
            target = target.cuda(None, non_blocking=True)

        _ = ensemble_time_preds(model, savefile_list, images, gpu)

        batch_time = time.time() - data_time
        if not (big_data and i == 0):
            times.append(batch_time)

    return np.mean(times)
示例#3
0
def DUN_test_stats(net,
                   savefile,
                   dset,
                   data_dir,
                   corruption=None,
                   rotation=None,
                   batch_size=256,
                   cuda=True,
                   gpu=None,
                   MC_samples=0,
                   workers=4,
                   d_posterior=None):
    assert not (corruption is not None and rotation is not None)
    if corruption is None and rotation is None:
        _, _, val_loader, _, _, _ = \
            get_image_loader(dset, batch_size, cuda=cuda, workers=workers, distributed=False, data_dir=data_dir)
    elif corruption is not None:
        val_loader = load_corrupted_dataset(dset,
                                            severity=corruption,
                                            data_dir=data_dir,
                                            batch_size=batch_size,
                                            cuda=cuda,
                                            workers=workers)
    elif rotation is not None:
        val_loader = rotate_load_dataset(dset,
                                         rotation,
                                         data_dir=data_dir,
                                         batch_size=batch_size,
                                         cuda=cuda,
                                         workers=workers)

    net.load(savefile)

    if d_posterior is not None:
        net.prob_model.current_posterior = d_posterior

    prob_vec, target_vec = get_preds_targets_DUN(net, val_loader)

    brier = class_brier(y=target_vec, probs=prob_vec, log_probs=None)
    err = class_err(y=target_vec, model_out=prob_vec)
    ll = class_ll(y=target_vec, probs=prob_vec, log_probs=None, eps=1e-40)
    ece = class_ECE(y=target_vec, probs=prob_vec, log_probs=None, nbins=10)

    return err, ll, brier, ece
示例#4
0
def train_loop(net,
               dname,
               data_dir,
               epochs=90,
               workers=4,
               resume='',
               savedir='./',
               save_all_epochs=False,
               q_nograd_its=0,
               batch_size=256):
    mkdir(savedir)
    global best_err1

    # Load data here:
    _, train_loader, val_loader, _, _, Ntrain = \
        get_image_loader(dname, batch_size, cuda=True, workers=workers, distributed=False, data_dir=data_dir)

    net.N_train = Ntrain

    start_epoch = 0

    marginal_loglike = np.zeros(epochs)
    train_loss = np.zeros(epochs)
    dev_loss = np.zeros(epochs)

    err_train = np.zeros(epochs)
    err_dev = np.zeros(epochs)

    # optionally resume from a checkpoint
    if resume:
        if os.path.isfile(resume):
            print("=> loading checkpoint '{}'".format(resume))
            start_epoch, best_err1 = net.load(resume)
            print("=> loaded checkpoint '{}' (epoch {})".format(
                resume, start_epoch))
        else:
            print("=> no checkpoint found at '{}'".format(resume))

        candidate_progress_file = resume.split('/')
        candidate_progress_file = '/'.join(
            candidate_progress_file[:-1]) + '/stats_array.pkl'

        if os.path.isfile(candidate_progress_file):
            print("=> found progress file at '{}'".format(
                candidate_progress_file))
            try:
                marginal_loglike, err_train, train_loss, err_dev, dev_loss = \
                    load_object(candidate_progress_file)
                print("=> Loaded progress file at '{}'".format(
                    candidate_progress_file))
            except Exception:
                print("=> Unable to load progress file at '{}'".format(
                    candidate_progress_file))
        else:
            print("=> NOT found progress file at '{}'".format(
                candidate_progress_file))

    if q_nograd_its > 0:
        net.prob_model.q_logits.requires_grad = False

    for epoch in range(start_epoch, epochs):
        if q_nograd_its > 0 and epoch == q_nograd_its:
            net.prob_model.q_logits.requires_grad = True

        tic = time.time()
        nb_samples = 0
        for x, y in train_loader:
            marg_loglike_estimate, minus_loglike, err = net.fit(x, y)

            marginal_loglike[epoch] += marg_loglike_estimate * x.shape[0]
            err_train[epoch] += err * x.shape[0]
            train_loss[epoch] += minus_loglike * x.shape[0]
            nb_samples += len(x)

        marginal_loglike[epoch] /= nb_samples
        train_loss[epoch] /= nb_samples
        err_train[epoch] /= nb_samples

        toc = time.time()

        # ---- print
        print('\n depth approx posterior',
              net.prob_model.current_posterior.data.cpu().numpy())
        print(
            "it %d/%d, ELBO/evidence %.4f, pred minus loglike = %f, err = %f" %
            (epoch, epochs, marginal_loglike[epoch], train_loss[epoch],
             err_train[epoch]),
            end="")
        cprint('r', '   time: %f seconds\n' % (toc - tic))

        net.update_lr()

        # ---- dev
        tic = time.time()
        nb_samples = 0
        for x, y in val_loader:
            minus_loglike, err = net.eval(x, y)

            dev_loss[epoch] += minus_loglike * x.shape[0]
            err_dev[epoch] += err * x.shape[0]
            nb_samples += len(x)

        dev_loss[epoch] /= nb_samples
        err_dev[epoch] /= nb_samples

        toc = time.time()

        cprint('g',
               '     pred minus loglike = %f, err = %f\n' %
               (dev_loss[epoch], err_dev[epoch]),
               end="")
        cprint('g', '    time: %f seconds\n' % (toc - tic))

        filename = 'checkpoint.pth.tar'
        if save_all_epochs:
            filename = str(epoch) + '_' + filename
        net.save(os.path.join(savedir, filename), best_err1)
        if err_dev[epoch] < best_err1:
            best_err1 = err_dev[epoch]
            cprint('b', 'best top1 dev err: %f' % err_dev[epoch])
            shutil.copyfile(os.path.join(savedir, filename),
                            os.path.join(savedir, 'model_best.pth.tar'))

        all_results = [
            marginal_loglike, err_train, train_loss, err_dev, dev_loss
        ]
        save_object(all_results, os.path.join(savedir, 'stats_array.pkl'))
示例#5
0
                        row_to_add_proto["number"] = num_repeat

                    model_path = ("checkpoint.pth.tar" if best_or_last == "last" else "model_best.pth.tar")
                    if method != "ensemble":
                        savefile = folder / model_path
                    else:
                        model_indices = range(num_repeat*n_samples, (num_repeat + 1)*n_samples)
                        savefile = [folder.parent / "_".join(folder_split[:-1] +
                                                             [str(filt_ensembles_df["number"][int(model_idx)])])
                                    / model_path for model_idx in model_indices]

                    row_to_add_proto["n_samples"] = n_samples

                    if method == "DUN" and savefile not in no_train_posteriors.keys() and use_no_train_post:
                        _, train_loader, _, _, _, Ntrain = \
                            get_image_loader(dataset, batch_size=args.batch_size, cuda=True, workers=workers,
                                             data_dir=data_dir, distributed=False)

                        model_obj.load(savefile)
                        model_obj.N_train = Ntrain
                        notrain_post, _ = model_obj.get_exact_d_posterior(train_loader, train_bn=False,
                                                                          logposterior=False)
                        no_train_posteriors[savefile] = notrain_post.data

                    kwargs = {}
                    if method == "DUN" and use_no_train_post:
                        kwargs = {"d_posterior": no_train_posteriors[savefile]}

                    # all measurements of err, ll, ece, brier
                    for rotation, corruption in [(0, 0)] + corruptions[dataset] + rotations[dataset]:
                        row_to_add = row_to_add_proto.copy()
                        row_to_add.update({"rotation": rotation, "corruption": corruption})
示例#6
0
def ensemble_test_stats(model,
                        savefile_list,
                        dset,
                        data_dir,
                        corruption=None,
                        rotation=None,
                        batch_size=256,
                        cuda=True,
                        gpu=None,
                        MC_samples=0,
                        workers=4,
                        iterate=False):
    assert not (corruption is not None and rotation is not None)
    if corruption is None and rotation is None:
        _, _, val_loader, _, _, _ = \
            get_image_loader(dset, batch_size, cuda=cuda, workers=workers, distributed=False, data_dir=data_dir)
    elif corruption is not None:
        val_loader = load_corrupted_dataset(dset,
                                            severity=corruption,
                                            data_dir=data_dir,
                                            batch_size=batch_size,
                                            cuda=cuda,
                                            workers=workers)
    elif rotation is not None:
        val_loader = rotate_load_dataset(dset,
                                         rotation,
                                         data_dir=data_dir,
                                         batch_size=batch_size,
                                         cuda=cuda,
                                         workers=workers)

    logprob_vec, target_vec = ensemble_get_preds_targets(model,
                                                         savefile_list,
                                                         val_loader,
                                                         cuda=cuda,
                                                         gpu=gpu,
                                                         return_vector=iterate)
    if iterate:
        brier_vec = []
        err_vec = []
        ll_vec = []
        ece_vec = []

        for n_samples in range(1, logprob_vec.shape[1] + 1):
            comb_logprobs = torch.logsumexp(logprob_vec[:, :n_samples, :],
                                            dim=1,
                                            keepdim=False) - np.log(n_samples)

            brier_vec.append(
                class_brier(y=target_vec, log_probs=comb_logprobs, probs=None))
            err_vec.append(class_err(y=target_vec, model_out=comb_logprobs))
            ll_vec.append(
                class_ll(y=target_vec,
                         log_probs=comb_logprobs,
                         probs=None,
                         eps=1e-40))
            ece_vec.append(
                class_ECE(y=target_vec,
                          log_probs=comb_logprobs,
                          probs=None,
                          nbins=10))
        return err_vec, ll_vec, brier_vec, ece_vec

    brier = class_brier(y=target_vec, log_probs=logprob_vec, probs=None)
    err = class_err(y=target_vec, model_out=logprob_vec)
    ll = class_ll(y=target_vec, log_probs=logprob_vec, probs=None, eps=1e-40)
    ece = class_ECE(y=target_vec, log_probs=logprob_vec, probs=None, nbins=10)
    return err, ll, brier, ece
示例#7
0
def train_loop(model, dname, data_dir, epochs=90, workers=4, gpu=None, resume='', weight_decay=1e-4,
               savedir='./', milestones=None, MC_samples=1, batch_size=256):
    mkdir(savedir)
    global best_acc1

    if gpu is not None:
        print("Use GPU: {} for training".format(gpu))

    if gpu is not None:  # Check for single GPU
        torch.cuda.set_device(gpu)
        model = model.cuda(gpu)
    else:
        # # DataParallel will divide and allocate batch_size to all available GPUs
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss(reduction='mean').cuda(gpu)

    optimizer = torch.optim.SGD(model.parameters(), lr,
                                momentum=momentum,
                                weight_decay=weight_decay)
    if milestones is None:  # if milestones are not specified, set to impossible value so LR is never decayed.
        milestones = [epochs + 1]
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1)

    tr_acc1_vec = []
    tr_acc5_vec = []
    tr_loss_vec = []
    acc1_vec = []
    acc5_vec = []
    loss_vec = []

    start_epoch = 0
    # optionally resume from a checkpoint
    if resume:
        if os.path.isfile(resume):
            print("=> loading checkpoint '{}'".format(resume))
            if gpu is None:
                checkpoint = torch.load(resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(gpu)
                checkpoint = torch.load(resume, map_location=loc)
            start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            scheduler.load_state_dict(checkpoint['scheduler'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(resume))

        candidate_progress_file = resume.split('/')
        candidate_progress_file = '/'.join(candidate_progress_file[:-1]) + '/stats_array.pkl'

        if os.path.isfile(candidate_progress_file):
            print("=> found progress file at '{}'".format(candidate_progress_file))
            try:
                tr_acc1_vec, tr_acc5_vec, tr_loss_vec, acc1_vec, acc5_vec, loss_vec = \
                    load_object(candidate_progress_file)
                print("=> Loaded progress file at '{}'".format(candidate_progress_file))
            except Exception:
                print("=> Unable to load progress file at '{}'".format(candidate_progress_file))
        else:
            print("=> NOT found progress file at '{}'".format(candidate_progress_file))

    cudnn.benchmark = True

    _, train_loader, val_loader, _, _, _ = \
        get_image_loader(dname, batch_size, cuda=True, workers=workers, distributed=False, data_dir=data_dir)

    for epoch in range(start_epoch, epochs):

        # train for one epoch and update lr scheduler setting
        tr_acc1, tr_acc5, tr_loss = train(train_loader, model, criterion, optimizer, epoch, gpu)
        print('used lr: %f' % optimizer.param_groups[0]["lr"])
        scheduler.step()

        tr_acc1_vec.append(tr_acc1)
        tr_acc5_vec.append(tr_acc5)
        tr_loss_vec.append(tr_loss)

        # evaluate on validation set
        acc1, acc5, loss = validate(val_loader, model, criterion, gpu, MC_samples=MC_samples)

        acc1_vec.append(acc1)
        acc5_vec.append(acc5)
        loss_vec.append(loss)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_acc1': best_acc1,
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict(),
        }, is_best, savedir=savedir)

        all_results = [tr_acc1_vec, tr_acc5_vec, tr_loss_vec, acc1_vec, acc5_vec, loss_vec]
        save_object(all_results, os.path.join(savedir, 'stats_array.pkl'))