Пример #1
0
class GPRegressor(object):
    def __init__(
            self,
            n_restarts=0,
            kernel=None,
            normalize_y=True,
            backend='sklearn',
            batch_size=1000,
            n_jobs=1,
            verbose=False,
    ):
        self.n_restarts_ = n_restarts
        self.kernel_ = kernel
        self.normalize_y_ = normalize_y
        self.backend_ = backend
        self.batch_size_ = batch_size
        self.n_jobs_ = n_jobs
        self.verbose_ = verbose

    def fit(self, X, y):
        n_samples, n_features = X.shape

        if self.verbose_:
            tprint('Fitting GP model on {} data points with dimension {}...'
                   .format(*X.shape))

        # scikit-learn backend.
        if self.backend_ == 'sklearn':
            self.model_ = GaussianProcessRegressor(
                kernel=self.kernel_,
                normalize_y=self.normalize_y_,
                n_restarts_optimizer=self.n_restarts_,
                copy_X_train=False,
            ).fit(X, y)

        # GPy backend.
        elif self.backend_ == 'gpy':
            import GPy
            if self.kernel_ == 'rbf':
                kernel = GPy.kern.RBF(
                    input_dim=n_features, variance=1., lengthscale=1.
                )
            else:
                raise ValueError('Kernel value {} not supported'
                                 .format(self.kernel_))

            self.model_ = GPy.models.SparseGPRegression(
                X, y.reshape(-1, 1), kernel=kernel,
                num_inducing=min(self.n_inducing_, n_samples)
            )
            self.model_.Z.unconstrain()
            self.model_.optimize(messages=self.verbose_)

        # GPyTorch with CUDA backend.
        elif self.backend_ == 'gpytorch':
            X = torch.Tensor(X).contiguous().cuda()
            y = torch.Tensor(y).contiguous().cuda()

            likelihood = gpytorch.likelihoods.GaussianLikelihood().cuda()
            model = GPyTorchRegressor(X, y, likelihood).cuda()

            model.train()
            likelihood.train()

            # Use the Adam optimizer.
            #optimizer = torch.optim.LBFGS([ {'params': model.parameters()} ])
            optimizer = torch.optim.Adam([
                {'params': model.parameters()}, # Includes GaussianLikelihood parameters.
            ], lr=1.)

            # Loss for GPs is the marginal log likelihood.
            mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

            training_iterations = 100
            for i in range(training_iterations):
                optimizer.zero_grad()
                output = model(X)
                loss = -mll(output, y)
                loss.backward()
                if self.verbose_:
                    tprint('Iter {}/{} - Loss: {:.3f}'
                           .format(i + 1, training_iterations, loss.item()))
                optimizer.step()

            self.model_ = model
            self.likelihood_ = likelihood

        if self.verbose_:
            tprint('Done fitting GP model.')

        return self

    def predict(self, X):
        if self.verbose_:
            tprint('Finding GP model predictions on {} data points...'
                   .format(X.shape[0]))

        if self.backend_ == 'sklearn':
            n_batches = int(ceil(float(X.shape[0]) / self.batch_size_))
            results = Parallel(n_jobs=self.n_jobs_)(#, max_nbytes=None)(
                delayed(parallel_predict)(
                    self.model_,
                    X[batch_num*self.batch_size_:(batch_num+1)*self.batch_size_],
                    batch_num, n_batches, self.verbose_
                )
                for batch_num in range(n_batches)
            )
            mean = np.concatenate([ result[0] for result in results ])
            var = np.concatenate([ result[1] for result in results ])

        elif self.backend_ == 'gpy':
            mean, var = self.model_.predict(X, full_cov=False)

        elif self.backend_ == 'gpytorch':
            X = torch.Tensor(X).contiguous().cuda()

            # Set into eval mode.
            self.model_.eval()
            self.likelihood_.eval()

            with torch.no_grad(), \
                 gpytorch.settings.fast_pred_var(), \
                 gpytorch.settings.max_root_decomposition_size(35):
                preds = self.model_(X)

            mean = preds.mean.detach().cpu().numpy()
            var = preds.variance.detach().cpu().numpy()

        if self.verbose_:
            tprint('Done predicting with GP model.')

        self.uncertainties_ = var.flatten()
        return mean.flatten()