Пример #1
0
 def test_modelrecreation(self):
     par = toy_model()
     pcopy = GPRegression(par.X.copy(),
                          par.Y.copy(),
                          kernel=par.kern.copy())
     np.testing.assert_allclose(par.param_array, pcopy.param_array)
     np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
     self.assertSequenceEqual(str(par), str(pcopy))
     self.assertIsNot(par.param_array, pcopy.param_array)
     self.assertIsNot(par.gradient_full, pcopy.gradient_full)
     self.assertTrue(pcopy.checkgrad())
     self.assert_(np.any(pcopy.gradient != 0.0))
     np.testing.assert_allclose(pcopy.param_array,
                                par.param_array,
                                atol=1e-6)
     par.randomize()
     with tempfile.TemporaryFile('w+b') as f:
         par.pickle(f)
         f.seek(0)
         pcopy = pickle.load(f)
     np.testing.assert_allclose(par.param_array, pcopy.param_array)
     np.testing.assert_allclose(par.gradient_full,
                                pcopy.gradient_full,
                                atol=1e-6)
     self.assertSequenceEqual(str(par), str(pcopy))
     self.assert_(pcopy.checkgrad())
Пример #2
0
    def visualize_quadratic_function(self):
        x_range = np.linspace(0., 1., 80)
        y_range = np.linspace(0., 1., 80)
        X = cartesian([x_range, y_range])

        import os
        if not os.path.exists("./pics/"):
            os.makedirs("./pics/")

        #################################
        #     TRAIN THE W_OPTIMIZER     #
        #################################

        Opt = TripathyOptimizer()

        for j in range(self.no_tries):
            print("Try number : ", j)

            W_hat = self.kernel.sample_W()
            self.kernel.update_params(
                W=W_hat,
                s=self.kernel.inner_kernel.variance,
                l=self.kernel.inner_kernel.lengthscale
            )

            W_hat, sn, l, s = Opt.run_two_step_optimization(self.kernel, self.sn, self.X, self.Y)

            # Create the gp_regression function and pass in the predictor function as f_hat
            self.kernel.update_params(W=W_hat, l=l, s=s)
            gp_reg = GPRegression(self.X, self.Y, self.kernel, noise_var=sn)

            y = self.function.f( np.dot(X, self.real_W).T )
            y_hat = gp_reg.predict(self.X)[0].squeeze()

            #################################
            #   END TRAIN THE W_OPTIMIZER   #
            #################################

            fig = plt.figure()
            ax = Axes3D(fig)

            # First plot the real function
            ax.scatter(X[:,0], X[:, 1], y, s=1)
            ax.scatter(self.X[:,0], self.X[:, 1], y_hat, cmap=plt.cm.jet)
            fig.savefig('./pics/Iter_' + str(j) + '.png', )
            # plt.show()
            plt.close(fig)

            # Save the W just in case
            l = loss(
                self.kernel,
                W_hat,
                sn,
                s,
                l,
                self.X,
                self.Y
            )
            np.savetxt("./pics/Iter_" + str(j) + "__" + "Loss_" + str(l) + ".txt", W_hat)
Пример #3
0
def fit_single_GP_model(X, Y, parameter_list, ard=False):
    kernel = RBF(X.shape[1],
                 ARD=parameter_list[3],
                 lengthscale=parameter_list[0],
                 variance=parameter_list[1])
    gp = GPRegression(X=X, Y=Y, kernel=kernel, noise_var=parameter_list[2])
    gp.likelihood.variance.fix(1e-2)
    gp.optimize()
    return gp
def compare_against_mmd_test():
    data = loadmat("../data/02-solar.mat")
    X = data["X"]
    y = data["y"]

    X_train, y_train, X_test, y_test, N, N_test = prepare_dataset(X, y)

    kernel = RBF(input_dim=1, variance=0.608, lengthscale=0.207)
    m = GPRegression(X_train, y_train, kernel, noise_var=0.283)
    m.optimize()
    pred_mean, pred_std = m.predict(X_test)

    s = GaussianQuadraticTest(None)
    gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std)
    U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0])
    num_test_samples = 10000
    null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples)
    #     null_samples = sample_null_simulated_gp(s, pred_mean, pred_std, num_test_samples)
    p_value_ours = 1.0 - np.mean(null_samples <= stat)

    y_rep = np.random.randn(len(X_test)) * pred_std.flatten() + pred_mean.flatten()
    y_rep = np.atleast_2d(y_rep).T
    A = np.hstack((X_test, y_test))
    B = np.hstack((X_test, y_rep))
    feats_p = RealFeatures(A.T)
    feats_q = RealFeatures(B.T)
    width = 1
    kernel = GaussianKernel(10, width)
    mmd = QuadraticTimeMMD()
    mmd.set_kernel(kernel)
    mmd.set_p(feats_p)
    mmd.set_q(feats_q)
    mmd_stat = mmd.compute_statistic()

    # sample from null
    num_null_samples = 10000
    mmd_null_samples = np.zeros(num_null_samples)
    for i in range(num_null_samples):
        # fix y_rep from above, and change the other one (that would replace y_test)
        y_rep2 = np.random.randn(len(X_test)) * pred_std.flatten() + pred_mean.flatten()
        y_rep2 = np.atleast_2d(y_rep2).T
        A = np.hstack((X_test, y_rep2))
        feats_p = RealFeatures(A.T)
        width = 1
        kernel = GaussianKernel(10, width)
        mmd = QuadraticTimeMMD()
        mmd.set_kernel(kernel)
        mmd.set_p(feats_p)
        mmd.set_q(feats_q)
        mmd_null_samples[i] = mmd.compute_statistic()

    p_value_mmd = 1.0 - np.mean(mmd_null_samples <= mmd_stat)

    return p_value_ours, p_value_mmd
    def test_visualize_augmented_sinusoidal_function(self):

        self.init()

        import os
        if not os.path.exists("./pics/camelback/"):
            os.makedirs("./pics/")
            os.makedirs("./pics/camelback/")

        #################################
        #     TRAIN THE W_OPTIMIZER     #
        #################################

        Opt = TripathyOptimizer()

        print("Real hidden matrix is: ", self.real_W)

        for j in range(self.no_tries):
            print("Try number : ", j)

            W_hat = self.kernel.sample_W()
            self.kernel.update_params(
                W=W_hat,
                s=self.kernel.inner_kernel.variance,
                l=self.kernel.inner_kernel.lengthscale
            )

            W_hat, sn, l, s = Opt.run_two_step_optimization(self.kernel, self.sn, self.X, self.Y)

            # Create the gp_regression function and pass in the predictor function as f_hat
            self.kernel.update_params(W=W_hat, l=l, s=s)
            gp_reg = GPRegression(self.X, self.Y, self.kernel, noise_var=sn)
            y_hat = gp_reg.predict(self.X)[0].squeeze()

            #################################
            #   END TRAIN THE W_OPTIMIZER   #
            #################################

            # Save the W just in case
            l = loss(
                self.kernel,
                W_hat,
                sn,
                s,
                l,
                self.X,
                self.Y
            )
            np.savetxt("./pics/camelback/Iter_" + str(j) + "__" + "Loss_" + str(l) + ".txt", W_hat)
Пример #6
0
    def check_if_matrix_is_found(self):

        print("Starting to optimize stuf...")

        import os
        if not os.path.exists("./featureSelection/"):
            os.makedirs("./featureSelection/")

        #################################
        #     TRAIN THE W_OPTIMIZER     #
        #################################

        Opt = TripathyOptimizer()

        print("Real hidden matrix is: ", self.real_W)
        # Start with the approximation of the real matrix

        W_hat = self.kernel.sample_W()
        self.kernel.update_params(W=W_hat,
                                  s=self.kernel.inner_kernel.variance,
                                  l=self.kernel.inner_kernel.lengthscale)

        W_hat, sn, l, s = Opt.try_two_step_optimization_with_restarts(
            self.kernel, self.phi_X, self.Y)

        # Create the gp_regression function and pass in the predictor function as f_hat
        self.kernel.update_params(W=W_hat, l=l, s=s)
        gp_reg = GPRegression(self.phi_X, self.Y, self.kernel, noise_var=sn)

        # Maybe predict even more values ? (plot the entire surface?)
        y_hat = gp_reg.predict(self.phi_X)[0].squeeze()

        #################################
        #   END TRAIN THE W_OPTIMIZER   #
        #################################

        # Save the W just in case
        l = loss(self.kernel, W_hat, sn, s, l, self.phi_X, self.Y)

        np.savetxt(
            config['basepath'] + "/featureSelection/" + str(l) +
            "_BestLoss.txt", W_hat)
        np.savetxt(
            config['basepath'] + "/featureSelection/" + str(l) +
            "_realMatr.txt", self.real_W)

        # Create the gp_regression function and pass in the predictor function as f_hat
        self.plot_3d(y_hat, title=str(l) + "_BestLoss")
Пример #7
0
 def test_add_observer(self):
     par = toy_model()
     par.name = "original"
     par.count = 0
     par.add_observer(self, self._callback, 1)
     pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
     self.assertNotIn(par.observers[0], pcopy.observers)
     pcopy = par.copy()
     pcopy.name = "copy"
     self.assertTrue(par.checkgrad())
     self.assertTrue(pcopy.checkgrad())
     self.assertTrue(pcopy.kern.checkgrad())
     import ipdb;ipdb.set_trace()
     self.assertIn(par.observers[0], pcopy.observers)
     self.assertEqual(par.count, 3)
     self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad
Пример #8
0
def dloss_ds(kernel, fix_W, fix_sn, s, fix_l, X, Y):
    # TODO: write some tests that check if changeing X or Y affect this derivative correctly!
    kernel.update_params(W=fix_W, s=s, l=fix_l)
    Y = Y.reshape((-1, 1))
    # The following line modifies `kernel`, so don't remove it
    gp_reg = GPRegression(X, Y, kernel, noise_var=fix_sn)
    grads = kernel.inner_kernel.variance.gradient

    return grads
Пример #9
0
 def test_add_observer(self):
     par = toy_model()
     par.name = "original"
     par.count = 0
     par.add_observer(self, self._callback, 1)
     pcopy = GPRegression(par.X.copy(),
                          par.Y.copy(),
                          kernel=par.kern.copy())
     self.assertNotIn(par.observers[0], pcopy.observers)
     pcopy = par.copy()
     pcopy.name = "copy"
     self.assertTrue(par.checkgrad())
     self.assertTrue(pcopy.checkgrad())
     self.assertTrue(pcopy.kern.checkgrad())
     import ipdb
     ipdb.set_trace()
     self.assertIn(par.observers[0], pcopy.observers)
     self.assertEqual(par.count, 3)
     self.assertEqual(pcopy.count, 6)  # 3 of each call to checkgrad
Пример #10
0
 def test_modelrecreation(self):
     par = toy_model()
     pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
     np.testing.assert_allclose(par.param_array, pcopy.param_array)
     np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
     self.assertSequenceEqual(str(par), str(pcopy))
     self.assertIsNot(par.param_array, pcopy.param_array)
     self.assertIsNot(par.gradient_full, pcopy.gradient_full)
     self.assertTrue(pcopy.checkgrad())
     self.assert_(np.any(pcopy.gradient!=0.0))
     np.testing.assert_allclose(pcopy.param_array, par.param_array, atol=1e-6)
     par.randomize()
     with tempfile.TemporaryFile('w+b') as f:
         par.pickle(f)
         f.seek(0)
         pcopy = pickle.load(f)
     np.testing.assert_allclose(par.param_array, pcopy.param_array)
     np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full, atol=1e-6)
     self.assertSequenceEqual(str(par), str(pcopy))
     self.assert_(pcopy.checkgrad())
Пример #11
0
    def test_gp_regression(self):
        """
            The prediction of GPRegression sohuld be 1D!
        :return:
        """
        self.init()

        test_samples = 10

        Xrand = np.random.rand(test_samples, self.real_dim)

        # Check shape of GP
        gp_reg = GPRegression(self.X,
                              self.Y,
                              kernel=self.kernel,
                              noise_var=self.sn)

        y_hat = gp_reg.predict(Xrand)[
            0]  # Apparently, this predicts both mean and variance...

        assert y_hat.shape == (test_samples, 1), y_hat.shape
Пример #12
0
def loss(kernel, W, sn, s, l, X, Y):
    """
    :param W: The orthogonal projection matrix
    :param sn: The noise-variance for the regression
    :param s: The kernel scalar hyperparameter
    :param l: The kernel lengthscales hyperparameter (array)
    :param X: The data to optimize over (observations)
    :param Y: The data to optimize over (target values)
    :return: A scalar value describing the "loss" of the given model
    """
    assert kernel.real_dim == X.shape[1]
    assert Y.shape[0] == X.shape[0]

    # Laziliy implementing GPy's function!
    # # TODO: check if the kernel inherits the correct methods
    # TODO: change that GPRegression is not newly created maybe, but instead only the parameters are changed or so
    kernel.update_params(W, l, s)
    Y = Y.reshape((-1, 1))
    gp_reg = GPRegression(X, Y, kernel, noise_var=sn)

    return gp_reg.log_likelihood()
    def fit_all_models(self):
        functions = {}

        num_features = self.Z.shape[1]
        kernel = RBF(num_features, ARD=False, lengthscale=1., variance=1.)
        gp_Y = GPRegression(X=self.Z, Y=self.Y, kernel=kernel, noise_var=1.)
        gp_Y.optimize()

        num_features = self.X.shape[1]
        kernel = RBF(num_features, ARD=False, lengthscale=1., variance=1.)
        gp_Z = GPRegression(X=self.X, Y=self.Z, kernel=kernel)
        gp_Z.optimize()

        functions = OrderedDict([('Y', gp_Y), ('Z', gp_Z), ('X', [])])

        return functions
    def test_if_function_is_found(self):
        """
            Replace these tests by the actual optimizer function!
        :return:
        """
        self.init()

        print("Real matrix is: ", self.real_W)

        all_tries = []
        for i in range(self.tries):
            # Initialize random guess
            W_hat = self.kernel.sample_W()

            # Find a good W!
            for i in range(self.max_iter):
                W_hat = self.w_optimizer.optimize_stiefel_manifold(W_hat)

            print("Difference to real W is: ", (W_hat - self.real_W))

            assert W_hat.shape == self.real_W.shape
            self.kernel.update_params(
                W=W_hat,
                l=self.kernel.inner_kernel.lengthscale,
                s=self.kernel.inner_kernel.variance
            )

            # TODO: update the gaussian process with the new kernels parameters! (i.e. W_hat)

            # Create the gp_regression function and pass in the predictor function as f_hat
            gp_reg = GPRegression(self.X, self.Y, self.kernel, noise_var=self.sn)
            res = self.metrics.mean_difference_points(
                fnc=self.function._f,
                fnc_hat=gp_reg.predict,
                A=self.real_W,
                A_hat=W_hat,
                X=self.X
            )

            all_tries.append(res)

        print(all_tries)

        assert np.asarray(all_tries).any()
    def refit_models(self, observational_samples):
        X = np.asarray(observational_samples['X'])[:, np.newaxis]
        Z = np.asarray(observational_samples['Z'])[:, np.newaxis]
        Y = np.asarray(observational_samples['Y'])[:, np.newaxis]

        functions = {}

        num_features = Z.shape[1]
        kernel = RBF(num_features, ARD=False, lengthscale=1., variance=1.)
        gp_Y = GPRegression(X=Z, Y=Y, kernel=kernel, noise_var=1.)
        gp_Y.optimize()

        num_features = X.shape[1]
        kernel = RBF(num_features, ARD=False, lengthscale=1., variance=1.)
        gp_Z = GPRegression(X=X, Y=Z, kernel=kernel)
        gp_Z.optimize()

        functions = OrderedDict([('Y', gp_Y), ('Z', gp_Z), ('X', [])])

        return functions
Пример #16
0
def dloss_dW(kernel, W, fix_sn, fix_s, fix_l, X, Y):
    """
    The derivative of the loss functions up to a parameter "param"
    :param kernel:
    :param W:
    :param fix_sn:
    :param fix_s:
    :param fix_l:
    :param X:
    :param Y:
    :return:
    """
    kernel.update_params(W=W, l=fix_l, s=fix_s)
    Y = Y.reshape((-1, 1))
    # TODO: same logic with the GPRegression. Do we actually need this here? Does this call change-params?
    gp_reg = GPRegression(X, Y, kernel, noise_var=fix_sn)

    assert kernel.W_grad.shape == W.shape

    return kernel.W_grad
Пример #17
0
def dloss_dW(kernel, W, fix_sn, fix_s, fix_l, X, Y):
    """
    The derivative of the loss functions up to a parameter "param"
    :param kernel:
    :param W:
    :param fix_sn:
    :param fix_s:
    :param fix_l:
    :param X:
    :param Y:
    :return:
    """
    kernel.update_params(W=W, l=fix_l, s=fix_s)
    Y = Y.reshape((-1, 1))
    # TODO: same logic with the GPRegression. Do we actually need this here? Does this call change-params?
    gp_reg = GPRegression(X, Y, kernel, noise_var=fix_sn)

    # return gp_reg.log_likelihood()
    # print("Gradient dictionary is: ", [ key for key, value in gp_reg.grad_dict.items() ])
    # This outputs ['dL_dK', 'dL_dthetaL', 'dL_dm']

    assert kernel.W_grad.shape == W.shape

    return kernel.W_grad
Пример #18
0
    def visualize_augmented_sinusoidal_function(self):
        x_range = np.linspace(0., 1., 80)
        y_range = np.linspace(0., 1., 80)
        X = cartesian([x_range, y_range])

        import os
        if not os.path.exists("./pics-twostep/"):
            os.makedirs("./pics-twostep/")

        #################################
        #     TRAIN THE W_OPTIMIZER     #
        #################################

        Opt = TripathyOptimizer()

        print("Real hidden matrix is: ", self.real_W)

        W_hat = self.kernel.sample_W()
        self.kernel.update_params(
            W=W_hat,
            s=self.kernel.inner_kernel.variance,
            l=self.kernel.inner_kernel.lengthscale
        )

        W_hat, sn, l, s = Opt.try_two_step_optimization_with_restarts(self.kernel, self.X, self.Y)

        # TODO: Check if these values are attained over multiple iterations (check if assert otherwise fails)

        # Create the gp_regression function and pass in the predictor function as f_hat
        self.kernel.update_params(W=W_hat, l=l, s=s)
        gp_reg = GPRegression(self.X, self.Y, self.kernel, noise_var=sn)

        y = self.function.f( np.dot(X, self.real_W).T )

        if self.PLOT_MEAN:
            y_hat = gp_reg.predict(X)[0].squeeze()
        else:
            y_hat = gp_reg.predict(self.X)[0].squeeze()

        #################################
        #   END TRAIN THE W_OPTIMIZER   #
        #################################

        fig = plt.figure()
        ax = Axes3D(fig)

        # First plot the real function
        ax.scatter(X[:,0], X[:, 1], y, s=1)

        if self.PLOT_MEAN:
            ax.scatter(X[:, 0], X[:, 1], y_hat, cmap=plt.cm.jet)
        else:
            ax.scatter(self.X[:,0], self.X[:, 1], y_hat, cmap=plt.cm.jet)

        # Save the W just in case
        l = loss(
            self.kernel,
            W_hat,
            sn,
            s,
            l,
            self.X,
            self.Y
        )

        fig.savefig('./pics-twostep/BestLoss_' + str(l) + '.png', )
        plt.show()
        plt.close(fig)

        np.savetxt("./pics-twostep/BestLoss_" + str(l) + ".txt", W_hat)
Пример #19
0
def toy_model():
    X = np.linspace(0, 1, 50)[:, None]
    Y = np.sin(X)
    m = GPRegression(X=X, Y=Y)
    return m
        _, samples[i] = s.get_statistic_multiple_custom_gradient(fake_y_test[:, 0], fake_gradients[:, 0])
    
    return samples

if __name__ == '__main__':
    data = loadmat("../data/02-solar.mat")
    X = data['X']
    y = data['y']
    
    X_train, y_train, X_test, y_test, N, N_test = prepare_dataset(X, y)
    
    print "num_train:", len(X_train)
    print "num_test:", len(X_test)
    
    kernel = RBF(input_dim=1, variance=1., lengthscale=1.)
    m = GPRegression(X_train, y_train, kernel)
    m.optimize()
    
    res = 100
    pred_mean, pred_std = m.predict(X_test)
    plt.plot(X_test, pred_mean, 'b-')
    plt.plot(X_test, pred_mean + 2 * pred_std, 'b--')
    plt.plot(X_test, pred_mean - 2 * pred_std, 'b--')
    plt.plot(X_train, y_train, 'b.', markersize=3)
    plt.plot(X_test, y_test, 'r.', markersize=5)
    plt.grid(True)
    plt.xlabel(r"$X$")
    plt.ylabel(r"$y$")
    plt.savefig("gp_regression_data_fit.eps", bbox_inches='tight')
    plt.show()