Пример #1
0
    def predict(self, Z, return_std=False, return_cov=False):
        """Predict using the trained GPR model.

        Parameters
        ----------
        Z: list of objects or feature vectors.
            Input values of the unknown data.
        return_std: boolean
            If True, the standard-deviations of the predictions at the query
            points are returned along with the mean.
        return_cov: boolean
            If True, the covariance of the predictions at the query points are
            returned along with the mean.

        Returns
        -------
        ymean: 1D array
            Mean of the predictive distribution at query points.
        std: 1D array
            Standard deviation of the predictive distribution at query points.
        cov: 2D matrix
            Covariance of the predictive distribution at query points.
        """
        if not hasattr(self, 'Kinv'):
            raise RuntimeError('Model not trained.')
        Kzc = self._gramian(None, Z, self._C)
        Fzc = Kzc @ self.Kcc_rsqrt
        Kzx = lr.dot(Fzc, self.Fxc.T)

        ymean = Kzx @ self.Ky * self._ystd + self._ymean
        if return_std is True:
            Kzz = self._gramian(self.alpha, Z, diag=True)
            std = np.sqrt(
                np.maximum(Kzz - (Kzx @ self.Kinv @ Kzx.T).diagonal(), 0))
            return (ymean, std * self._ystd)
        elif return_cov is True:
            Kzz = self._gramian(self.alpha, Z)
            cov = np.maximum(Kzz - (Kzx @ self.Kinv @ Kzx.T).todense(), 0)
            return (ymean, cov * self._ystd**2)
        else:
            return ymean
Пример #2
0
    def log_marginal_likelihood(self,
                                theta=None,
                                C=None,
                                X=None,
                                y=None,
                                eval_gradient=False,
                                clone_kernel=True,
                                verbose=False):
        """Returns the log-marginal likelihood of a given set of log-scale
        hyperparameters.

        Parameters
        ----------
        theta: array-like
            Kernel hyperparameters for which the log-marginal likelihood is
            to be evaluated. If None, the current hyperparameters will be used.
        C: list of objects or feature vectors.
            The core set that defines the subspace of low-rank approximation.
            If None, `self.C` will be used.
        X: list of objects or feature vectors.
            Input values of the training data. If None, `self.X` will be used.
        y: 1D array
            Output/target values of the training data. If None, `self.y` will
            be used.
        eval_gradient: boolean
            If True, the gradient of the log-marginal likelihood with respect
            to the kernel hyperparameters at position theta will be returned
            alongside.
        clone_kernel: boolean
            If True, the kernel is copied so that probing with theta does not
            alter the trained kernel. If False, the kernel hyperparameters will
            be modified in-place.
        verbose: boolean
            If True, the log-likelihood value and its components will be
            printed to the screen.

        Returns
        -------
        log_likelihood: float
            Log-marginal likelihood of theta for training data.
        log_likelihood_gradient: 1D array
            Gradient of the log-marginal likelihood with respect to the kernel
            hyperparameters at position theta. Only returned when eval_gradient
            is True.
        """
        theta = theta if theta is not None else self.kernel.theta
        C = C if C is not None else self._C
        X = X if X is not None else self._X
        if y is not None:
            y_mask, y = self.mask(y)
        else:
            y = self._y
            y_mask = self._y_mask

        if clone_kernel is True:
            kernel = self.kernel.clone_with_theta(theta)
        else:
            kernel = self.kernel
            kernel.theta = theta

        t_kernel = time.perf_counter()

        if eval_gradient is True:
            Kxc, d_Kxc = self._gramian(None, X, C, kernel=kernel, jac=True)
            Kcc, d_Kcc = self._gramian(self.alpha, C, kernel=kernel, jac=True)
            Kxc, d_Kxc = Kxc[y_mask, :], d_Kxc[y_mask, :, :]
        else:
            Kxc = self._gramian(None, X, C, kernel=kernel)
            Kcc = self._gramian(self.alpha, C, kernel=kernel)
            Kxc = Kxc[y_mask, :]

        t_kernel = time.perf_counter() - t_kernel

        t_linalg = time.perf_counter()

        Kcc_rsqrt = self._corespace(Kcc=Kcc)
        F = Kxc @ Kcc_rsqrt
        K = lr.dot(F, rcond=self.beta, mode='clamp')
        K_inv = K.pinv()

        logdet = K.logdet()
        Ky = K_inv @ y
        yKy = y @ Ky
        logP = yKy + logdet

        if eval_gradient is True:
            D_theta = np.zeros_like(theta)
            K_inv2 = K_inv**2
            for i, t in enumerate(theta):
                d_F = d_Kxc[:, :, i] @ Kcc_rsqrt
                d_K = lr.dot(F, d_F.T) + lr.dot(d_F, F.T) - lr.dot(
                    F @ Kcc_rsqrt.T @ d_Kcc[:, :, i], Kcc_rsqrt @ F.T)
                d_logdet = (K_inv @ d_K).trace()
                d_Kinv_part = K_inv2 @ d_K - K_inv2 @ d_K @ (K @ K_inv)
                d_Kinv = d_Kinv_part + d_Kinv_part.T - K_inv @ d_K @ K_inv
                d_yKy = d_Kinv.quadratic(y, y)
                D_theta[i] = (d_logdet + d_yKy) * np.exp(t)
            retval = (logP, D_theta)
        else:
            retval = logP

        t_linalg = time.perf_counter() - t_linalg

        if verbose:
            mprint.table(
                ('logP', '%12.5g', yKy + logdet),
                ('dlogP', '%12.5g', np.linalg.norm(D_theta)),
                ('y^T.K.y', '%12.5g', yKy),
                ('log|K| ', '%12.5g', logdet),
                ('Cond(K)', '%12.5g', K.cond()),
                ('GPU time', '%10.2g', t_kernel),
                ('CPU time', '%10.2g', t_linalg),
            )

        return retval
Пример #3
0
    def predict_loocv(self, Z, z, return_std=False, method='auto'):
        """Compute the leave-one-out cross validation prediction of the given
        data.

        Parameters
        ----------
        Z: list of objects or feature vectors.
            Input values of the unknown data.
        z: 1D array
            Target values of the training data.
        return_std: boolean
            If True, the standard-deviations of the predictions at the query
            points are returned along with the mean.
        method: 'auto' or 'ridge-like' or 'gpr-like'
            Selects the algorithm used for fast evaluation of the leave-one-out
            cross validation without expliciting training one model per sample.
            'ridge-like' seems to be more stable with a smaller core size (that
            is not rank-deficit), while 'gpr-like' seems to be more stable with
            a larger core size. By default, the option is 'auto' and the
            function will choose a method based on an analysis on the
            eigenspectrum of the dataset.

        Returns
        -------
        ymean: 1D array
            Leave-one-out mean of the predictive distribution at query points.
        std: 1D array
            Leave-one-out standard deviation of the predictive distribution at
            query points.
        """
        assert (len(Z) == len(z))
        z = np.asarray(z)
        if self.normalize_y is True:
            z_mean, z_std = np.mean(z), np.std(z)
            z = (z - z_mean) / z_std
        else:
            z_mean, z_std = 0, 1

        if not hasattr(self, 'Kcc_rsqrt'):
            raise RuntimeError('Model not trained.')
        Kzc = self._gramian(None, Z, self._C)

        Cov = Kzc.T @ Kzc
        Cov.flat[::len(self._C) + 1] += self.alpha
        Cov_rsqrt, eigvals = powerh(Cov,
                                    -0.5,
                                    return_symmetric=False,
                                    return_eigvals=True)

        # if an eigenvalue is smaller than alpha, it would have been negative
        # in the unregularized Cov matrix
        if method == 'auto':
            if eigvals.min() > self.alpha:
                method = 'ridge-like'
            else:
                method = 'gpr-like'

        if method == 'ridge-like':
            P = Kzc @ Cov_rsqrt
            L = lr.dot(P, P.T)
            zstar = z - (z - L @ z) / (1 - L.diagonal())
            if return_std is True:
                raise NotImplementedError(
                    'LOOCV std using the ridge-like method is not ready yet.')
        elif method == 'gpr-like':
            F = Kzc @ self.Kcc_rsqrt
            Kinv = lr.dot(F, rcond=self.beta, mode='clamp').pinv()
            zstar = z - (Kinv @ z) / Kinv.diagonal()
            if return_std is True:
                std = np.sqrt(1 / np.maximum(Kinv.diagonal(), 1e-14))
        else:
            raise RuntimeError(f'Unknown method {method} for predict_loocv.')

        if return_std is True:
            return (zstar * z_std + z_mean, std * z_std)
        else:
            return zstar * z_std + z_mean
Пример #4
0
    def fit(self,
            C,
            X,
            y,
            loss='likelihood',
            tol=1e-5,
            repeat=1,
            theta_jitter=1.0,
            verbose=False):
        """Train a low-rank approximate GPR model. If the `optimizer` argument
        was set while initializing the GPR object, the hyperparameters of the
        kernel will be optimized using the specified loss function.

        Parameters
        ----------
        C: list of objects or feature vectors.
            The core set that defines the subspace of low-rank approximation.
        X: list of objects or feature vectors.
            Input values of the training data.
        y: 1D array
            Output/target values of the training data.
        loss: 'likelihood' or 'loocv'
            The loss function to be minimzed during training. Could be either
            'likelihood' (negative log-likelihood) or 'loocv' (mean-square
            leave-one-out cross validation error).
        tol: float
            Tolerance for termination.
        repeat: int
            Repeat the hyperparameter optimization by the specified number of
            times and return the best result.
        theta_jitter: float
            Standard deviation of the random noise added to the initial
            logscale hyperparameters across repeated optimization runs.
        verbose: bool
            Whether or not to print out the optimization progress and outcome.

        Returns
        -------
        self: LowRankApproximateGPR
            returns an instance of self.
        """
        self.C = C
        self.X = X
        self.y = y
        '''hyperparameter optimization'''
        if self.optimizer:

            if loss == 'likelihood':
                objective = self.log_marginal_likelihood
            elif loss == 'loocv':
                raise NotImplementedError(
                    '(ง๑ •̀_•́)ง LOOCV training not ready yet.')

            def xgen(n):
                x0 = self.kernel.theta.copy()
                yield x0
                yield from x0 + theta_jitter * np.random.randn(n - 1, len(x0))

            opt = self._hyper_opt(method=self.optimizer,
                                  fun=lambda theta, objective=objective:
                                  objective(theta,
                                            eval_gradient=True,
                                            clone_kernel=False,
                                            verbose=verbose),
                                  xgen=xgen(repeat),
                                  tol=tol,
                                  verbose=verbose)
            if verbose:
                print(f'Optimization result:\n{opt}')

            if opt.success:
                self.kernel.theta = opt.x
            else:
                raise RuntimeError(
                    f'Training using the {loss} loss did not converge, got:\n'
                    f'{opt}')
        '''build and store GPR model'''
        self.Kcc_rsqrt = self._corespace(C=self._C)
        self.Kxc = self._gramian(None, self._X, self._C)[self._y_mask, :]
        self.Fxc = self.Kxc @ self.Kcc_rsqrt
        self.Kinv = lr.dot(self.Fxc, rcond=self.beta, mode='clamp').pinv()
        self.Ky = self.Kinv @ self._y

        return self
Пример #5
0
def test_dot():
    X = np.random.randn(100, 10)
    Y = np.random.randn(100, 10)
    assert (isinstance(lr.dot(X, Y.T), lr.LATR))
    assert (isinstance(lr.dot(X, X.T), lr.LATR))
    assert (isinstance(lr.dot(X), lr.LLT))
    assert (isinstance(lr.dot(X, method='direct'), lr.LATR))
    assert (isinstance(lr.dot(X, method='auto'), lr.LLT))
    assert (isinstance(lr.dot(X, method='spectral'), lr.LLT))
    assert (isinstance(lr.dot(X, Y.T, method='direct'), lr.LATR))
    assert (isinstance(lr.dot(X, Y.T, method='auto'), lr.LATR))
    with pytest.raises(RuntimeError):
        lr.dot(X, Y.T, method='spectral')
    with pytest.raises(AssertionError):
        lr.dot(X, method='abc')
    with pytest.raises(AssertionError):
        lr.dot(X, Y.T, method='abc')