Пример #1
0
def optimize_para(wrapper, param, target, criterion, num_step, save_prefix=None, res=False):
    """
    wrapper: image = wrapper(z / w/ w+): an interface for a generator forward pass.
    param: z / w / w+
    target: (1, C, H, W)
    criterion: loss(pred, target)
    """
    param = param.requires_grad_().to(device)
    optimizer = FullBatchLBFGS([param], lr=.1, line_search='Wolfe')
    iter_count = [0]
    def closure():
        # todo: your optimiztion
        if iter_count[0] % 250 == 0 and save_prefix is not None:
            # visualization code
            print('iter count {} loss {:4f}'.format(iter_count, loss.item()))
            iter_result = image.data.clamp_(-1, 1)
            save_images(iter_result, save_prefix + '_%d' % iter_count[0])
        return loss

    loss = closure()
    loss.backward()
    while iter_count[0] <= num_step:
        options = {'closure': closure, 'max_ls': 10}
        loss, _, lr, _, F_eval, G_eval, _, _ = optimizer.step(options)
    image = wrapper(param)
    return param, image
Пример #2
0
def train(train_x,
          train_y,
          n_devices,
          output_device,
          checkpoint_size,
          preconditioner_size,
          n_training_iter,
):
    likelihood = gpytorch.likelihoods.GaussianLikelihood().to(output_device)
    model = ExactGPModel(train_x, train_y, likelihood,
            n_devices, F).to(output_device)
    model.train()
    likelihood.train()
    
    optimizer = FullBatchLBFGS(model.parameters(), lr=0.1)
    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    print("Here 1")
    
    with gpytorch.beta_features.checkpoint_kernel(checkpoint_size), gpytorch.settings.max_preconditioner_size(preconditioner_size):

        def closure():
            print("Zeroing Grads")
            optimizer.zero_grad()
            print("Zeroed Grads")
            output = model(train_x)
            print("Computed output")
            loss = -mll(output, train_y)
            print("Ran closure")
            return loss

        loss = closure()
        loss.backward()

        print("Ran backward")

        for i in range(n_training_iter):
            options = {'closure': closure, 'current_loss': loss, 'max_ls': 10}
            print("Begin Optim")
            loss, _, _, _, _, _, _, fail = optimizer.step(options)
            
            print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
                i + 1, n_training_iter, loss.item(),
                model.covar_module.module.base_kernel.lengthscale.item(),
                model.likelihood.noise.item()
            ))
            
            if fail:
                print('Convergence reached!')
                break
    
    print(f"Finished training on {train_x.size(0)} data points using {n_devices} GPUs.")
    return model, likelihood
Пример #3
0
    def train_gp_model_LBFGS(self, train_x, train_y):
        # Use full-batch L-BFGS optimizer
        # optimizer = FullBatchLBFGS(self.parameters(), lr=1)
        optimizer = FullBatchLBFGS(self.parameters(), lr=1E-1)
        # Access aprameters: self.likelihood.noise_covar.raw_noise
        self.likelihood.noise_covar.initialize(raw_noise=0.)
        self.mean_module.initialize(constant=0.)
        para_to_check = self.covar_module.base_kernel
        para_to_check.initialize(raw_lengthscale=0.)
        # self.covar_module.initialize(raw_lengthscale=0.)

        # "Loss" for GPs - the marginal log likelihood
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self)

        # define closure
        # define closure
        def closure():
            optimizer.zero_grad()
            output = self(train_x.double())
            loss = -mll(output, train_y.double()).sum()
            return loss

        loss = closure()
        loss.backward()

        training_iter = 20
        for i in range(training_iter):

            # perform step and update curvature
            # optimizer.zero_grad()
            options = {
                'closure': closure,
                'current_loss': loss,
                'max_ls': 20,
                'eta': 2
            }
            loss, g_new, lr, _, F_eval, G_eval, desc_dir, fail = optimizer.step(
                options)
            if self.verbose:
                logger.info(
                    'Iter %d/%d - Loss: %.3f - LR: %.3f - Func Evals: %0.0f - Grad Evals: %0.0f - fail: %0.0f'
                    % (i + 1, training_iter, loss.item(), lr, F_eval, G_eval,
                       fail))
                # logger.info(str(g_new))
            if torch.isnan(para_to_check.raw_lengthscale.data):
                # logger.warning('NaN detected')
                # self.covar_module.initialize(raw_lengthscale=1E-6)
                para_to_check.initialize(raw_lengthscale=1E-6)
    buildPhi_basic = gprh.buildPhi(m,type=meastype,tun=tun1)

if integral:
    closure_basic = gprh.gp_closure(model_basic, meastype, buildPhi_basic, lossfu_basic, n, dom_points, train_y)
else:
    closure_basic = gprh.gp_closure(model_basic, meastype, buildPhi_basic, lossfu_basic, n, dom_points, train_y, train_x=train_x)
loss_basic = closure_basic()

for i in range(training_iterations_basic):

    options = {'line_search': True, 'closure': closure_basic, 'max_ls': 3, 'ls_debug': False, 'inplace': False, 'interpolate': False,
               'eta': 3, 'c1': 1e-4, 'decrease_lr_on_max_ls': 0.1, 'increase_lr_on_min_ls': 5}

    optimiser_basic.zero_grad() # zero gradients
    loss_basic.backward()  # Backprop derivatives
    loss_basic, lr, ls_step = optimiser_basic.step(options=options) # compute new loss

    # print
    gprh.optiprint(i, training_iterations_basic, loss_basic.item(), lr, ls_step, model_basic, buildPhi_basic.L)

if integral:
    test_f = gprh.compute_and_save(model_basic, meastype, dataname, train_y, n, X, Y, Z,
            ntx, nty, test_x, dom_points, m, dim, mt, noise_std, lossfu_basic, buildPhi_basic, optimiser_basic, training_iterations_basic,
             joint=True, x0=x0, unitvecs=unitvecs, Rlim=Rlim, rec_fbp=rec_fbp, err_fbp=err_fbp, basic=True)
if point:
    test_f = gprh.compute_and_save(model_basic, meastype, dataname, train_y, n, X, Y, Z,
        ntx, nty, test_x, dom_points, m, dim, mt, noise_std, lossfu_basic, buildPhi_basic, optimiser_basic, training_iterations_basic,
         joint=True, train_x=train_x, basic=True)

try:  # since plotting might produce an error on remote machines
    vmin = 0
Пример #5
0
    # model.scale2*=1.2
    options = {
        'closure': closure,
        'max_ls': 5,
        'ls_debug': False,
        'inplace': False,
        'interpolate': False,
        'eta': 3,
        'c1': 1e-4,
        'decrease_lr_on_max_ls': 0.1,
        'increase_lr_on_min_ls': 5
    }

    optimiser.zero_grad()  # zero gradients
    loss.backward()  # propagate derivatives
    loss, lr, ls_iters = optimiser.step(options=options)  # compute new loss

    # print
    gprh.optiprint(i, training_iterations, loss.item(), lr, ls_iters, model, L)

# update phi
phi, sq_lambda, L = buildPhi.getphi(model,
                                    m,
                                    n,
                                    mt,
                                    train_x=train_x,
                                    dom_points=dom_points)

# now make predictions
test_f, cov_f = model(y_train=train_y,
                      phi=phi,
Пример #6
0
# Use full-batch L-BFGS optimizer
optimizer = FullBatchLBFGS(model.parameters())

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)


# define closure
def closure():
    optimizer.zero_grad()
    output = model(train_x)
    loss = -mll(output, train_y)
    return loss


loss = closure()
loss.backward()

training_iter = 10
for i in range(training_iter):

    # perform step and update curvature
    options = {'closure': closure, 'current_loss': loss, 'max_ls': 10}
    loss, _, lr, _, F_eval, G_eval, _, _ = optimizer.step(options)

    print(
        'Iter %d/%d - Loss: %.3f - LR: %.3f - Func Evals: %0.0f - Grad Evals: %0.0f - Log-Lengthscale: %.3f - Log_Noise: %.3f'
        % (i + 1, training_iter, loss.item(), lr, F_eval, G_eval,
           model.covar_module.base_kernel.log_lengthscale.item(),
           model.likelihood.log_noise.item()))
            ops = opfun(X_train[subsmpl])

            if (torch.cuda.is_available()):
                tgts = torch.from_numpy(
                    y_train[subsmpl]).cuda().long().squeeze()
            else:
                tgts = torch.from_numpy(y_train[subsmpl]).long().squeeze()

            loss_fn += F.cross_entropy(ops, tgts) * (len(subsmpl) / no_samples)

        return loss_fn

    # perform line search step
    options = {'closure': closure, 'current_loss': obj}
    obj, grad, lr, _, _, _, _, _ = optimizer.step(options)

    # compute statistics
    model.eval()
    train_loss, test_loss, test_acc = compute_stats(X_train,
                                                    y_train,
                                                    X_test,
                                                    y_test,
                                                    opfun,
                                                    accfun,
                                                    ghost_batch=128)

    # print data
    print('Iter:', n_iter + 1, 'lr:', lr, 'Training Loss:', train_loss,
          'Test Loss:', test_loss, 'Test Accuracy:', test_acc)
Пример #8
0
    def train_gp_model_LBFGS_SGP(self, train_x, train_y):
        # Use full-batch L-BFGS optimizer
        # optimizer = FullBatchLBFGS(self.parameters(), lr=1)
        train_dataset = TensorDataset(train_x, train_y)
        train_loader = DataLoader(train_dataset,
                                  batch_size=int(
                                      max(min(train_y.size(0) / 100, 1E4),
                                          100)),
                                  shuffle=True)
        self.train()
        self.likelihood.train()

        optimizer = FullBatchLBFGS(self.parameters(), lr=1E-1)
        # Access aprameters: self.likelihood.noise_covar.raw_noise
        # self.likelihood.noise_covar.initialize(raw_noise=0.)
        # self.mean_module.initialize(constant=0.)
        # para_to_check = self.covar_module.base_kernel
        # para_to_check.initialize(raw_lengthscale=0.)
        # self.covar_module.initialize(raw_lengthscale=0.)

        # "Loss" for GPs - the marginal log likelihood
        mll = gpytorch.mlls.VariationalELBO(self.likelihood,
                                            self,
                                            num_data=train_y.size(0),
                                            combine_terms=False).cuda()
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                         milestones=[3, 5],
                                                         gamma=0.1)

        # define closure

        training_iter = 20
        num_epochs = 2
        for i in range(num_epochs):
            scheduler.step()
            for minibatch_i, (x_batch, y_batch) in enumerate(train_loader):
                # define closure
                def closure():
                    optimizer.zero_grad()
                    # output = self(train_x.double())
                    # loss = -mll(output, train_y.double()).sum()
                    with gpytorch.settings.use_toeplitz(False):
                        output = self(x_batch.double())
                        log_lik, kl_div, log_prior = mll(
                            output, y_batch.double())
                        loss = -(log_lik - kl_div + log_prior).sum()

                    return loss

                loss = closure()
                loss.backward()
                # perform step and update curvature
                # optimizer.zero_grad()
                options = {
                    'closure': closure,
                    'current_loss': loss,
                    'max_ls': 1,
                    'eta': 2
                }
                loss, g_new, lr, _, F_eval, G_eval, desc_dir, fail = optimizer.step(
                    options)

                if self.verbose:
                    logger.info(
                        'Iter %d[%d/%d] - Loss: %.3f - LR: %.3f - Func Evals: %0.0f - Grad Evals: %0.0f - fail: %0.0f'
                        % (i + 1, minibatch_i, len(train_loader), loss.item(),
                           lr, F_eval, G_eval, fail))
Пример #9
0
    # model.scale2*=1.2
    options = {
        'closure': closure,
        'max_ls': 3,
        'ls_debug': False,
        'inplace': False,
        'interpolate': False,
        'eta': 3,
        'c1': 1e-4,
        'decrease_lr_on_max_ls': 0.1,
        'increase_lr_on_min_ls': 5
    }

    optimiser.zero_grad()  # zero gradients
    loss.backward()  # propagate derivatives
    loss, lr, ls_iters = optimiser.step(options=options)  # compute new loss

    # print
    gprh.optiprint(i, training_iterations, loss.item(), lr, ls_iters, model, L)

# update phi
phi, sq_lambda, L = buildPhi.getphi(model,
                                    n,
                                    train_x=train_x,
                                    dom_points=dom_points)

# now make predictions
test_f, cov_f = model(y_train=train_y,
                      phi=phi,
                      sq_lambda=sq_lambda,
                      L=L,
    def train(self,
              train_x,
              train_y,
              n_devices,
              output_device,
              checkpoint_size,
              preconditioner_size,
              n_training_iter,
              n_restarts=1):
        likelihood = gpytorch.likelihoods.GaussianLikelihood(
            noise_constraint=gpytorch.constraints.GreaterThan(1e-3)).to(
                output_device)
        model = ExactGPModel(train_x, train_y, likelihood, n_devices,
                             output_device).to(output_device)
        model.train()
        likelihood.train()

        optimizer = FullBatchLBFGS(model.parameters(), lr=.5)
        # "Loss" for GPs - the marginal log likelihood
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)


        with gpytorch.beta_features.checkpoint_kernel(checkpoint_size), \
             gpytorch.settings.max_preconditioner_size(preconditioner_size):

            def closure():
                optimizer.zero_grad()
                output = model(train_x)
                loss = -mll(output, train_y)
                return loss

            loss = closure()
            loss.backward()

            for i in range(n_training_iter):
                options = {
                    'closure': closure,
                    'current_loss': loss,
                    'max_ls': 20
                }

                loss, _, _, _, _, _, _, fail = optimizer.step(options)

                if self.verbosity > 2:
                    print_lengthscale = [
                        "%.3f" % p.item()
                        for p in model.covar_module.module.lengthscale
                    ]
                    print(
                        f'Iter {i+1}/{n_training_iter} - Loss: {"%.3f"%loss.item()} lengthscale: {print_lengthscale}   noise: {"%.3f"%model.likelihood.noise.item()}'
                    )
                if fail:
                    for pname, p in model.named_parameters():
                        print(pname, p.grad)
                    if self.verbosity > 2:
                        print('Convergence reached!')
                    break
        if self.verbosity > 2:
            print(
                "Finished training on {0} data points using {1} GPUs.".format(
                    train_x.size(-2), n_devices))
        return model, likelihood, mll
Пример #11
0
    options = {
        'line_search': True,
        'closure': closure2,
        'max_ls': 3,
        'ls_debug': False,
        'inplace': False,
        'interpolate': False,
        'eta': 3,
        'c1': 1e-4,
        'decrease_lr_on_max_ls': 0.1,
        'increase_lr_on_min_ls': 5
    }

    optimiser_pt.zero_grad()  # zero gradients
    loss2.backward()  # propagate derivatives
    loss2, lr, ls_step = optimiser_pt.step(options=options)  # compute new loss

    # print
    gprh.optiprint(i, training_iterations_pt, loss2.item(), lr, ls_step)

#########################################################
# joint training
#########################################################
print('\n=========Training the joint model=========')

# set appr params
dim = len(m)  # nr of latent outputs
mt = np.prod(m)  # total nr of basis functions

# buildPhi object
if integral:
        def closure():
            optimizer.zero_grad()
            loss_fn = model()
            return loss_fn

        # perform line search step
        options = {
            'closure': closure,
            'current_loss': obj,
            'eta': 2,
            'max_ls': max_ls,
            'interpolate': interpolate,
            'inplace': False
        }
        if (line_search == 'Armijo'):
            obj, lr, backtracks, clos_evals, desc_dir, fail = optimizer.step(
                options=options)

            # compute gradient at new iterate
            obj.backward()
            grad = optimizer._gather_flat_grad()

        elif (line_search == 'Wolfe'):
            obj, grad, lr, backtracks, clos_evals, grad_evals, desc_dir, fail = optimizer.step(
                options=options)

        x_new.copy_(model.x())

        func_evals += clos_evals

        # compute quantities for checking convergence
        grad_norm = torch.norm(grad)