def test_ElasticNet_friedman(self):
        """ 
        Tests elastic-net regularisation on quadratic (2nd order) synthetic data with irrelevent features.
        """
        # Generate friedman dataset with
        X, y = datasets.gen_friedman(n_observations=200,
                                     n_dim=10,
                                     noise=0.2,
                                     normalise=False,
                                     random_seed=42)
        X_train, X_test, y_train, y_test = datasets.train_test_split(
            X, y, train=0.8, random_seed=42)

        # Define param and basis
        s = Parameter(distribution='uniform',
                      lower=-1,
                      upper=1,
                      order=4,
                      endpoints='both')
        param = [s for _ in range(X.shape[1])]
        basis = Basis('total-order')

        # Fit OLS poly
        poly_OLS = Poly(parameters=param,
                        basis=basis,
                        method='least-squares',
                        sampling_args={
                            'mesh': 'user-defined',
                            'sample-points': X_train,
                            'sample-outputs': y_train.reshape(-1, 1)
                        })
        poly_OLS.set_model()
        _, r2_OLS = poly_OLS.get_polyscore(X_test=X_test, y_test=y_test)

        # Fit Poly with LASSO (alpha/param2 = 1.0) and check r2 improved
        poly_LASSO = Poly(parameters=param,
                          basis=basis,
                          method='elastic-net',
                          sampling_args={
                              'mesh': 'user-defined',
                              'sample-points': X_train,
                              'sample-outputs': y_train.reshape(-1, 1)
                          },
                          solver_args={
                              'param1': 0.1,
                              'param2': 1.0
                          })
        poly_LASSO.set_model()
        _, r2_LASSO = poly_LASSO.get_polyscore(X_test=X_test, y_test=y_test)
        self.assertTrue(r2_LASSO > r2_OLS)

        # Finally, check LASSO has shrunk irrelevent Poly coefficients
        coeffs = poly_LASSO.get_coefficients().squeeze()
        ideal_coeffs = 126  #As tensor-grid, order=4, relevent_dims=5
        idx = np.abs(coeffs).argsort()[::-1]
        irrelevent_coeffs = np.sum(np.abs(
            coeffs[idx[ideal_coeffs:]])) / np.sum(np.abs(coeffs))
        self.assertTrue(irrelevent_coeffs < 1e-5,
                        msg='irrelevent_coeffs = %.2e' % irrelevent_coeffs)
示例#2
0
    def test_friedman(self):
        """
        Test the gen_friedman() sythetic dataset generator and the train_test_split utility.
        """
        N = 200
        d = 6
        X,y = datasets.gen_friedman(n_observations=N, n_dim=d, noise=0.0, normalise=False)

        # Split the data
        X_train,X_test,y_train,y_test = datasets.train_test_split(X,y,train=0.75,shuffle=True,random_seed=42) 

        # Check dims
        N_train = int(N*0.75)
        N_test  = int(N*0.25)
        np.testing.assert_equal(X_train.shape,np.array([N_train,d]))
        np.testing.assert_equal(y_test.shape,np.array([N_test,]))
        y_true = 10 * np.sin(np.pi * X_test[:, 0] * X_test[:, 1]) + 20 * (X_test[:, 2] - 0.5) ** 2 + 10 * X_test[:, 3] + 5 * X_test[:, 4] 
        np.testing.assert_array_equal(y_true,y_test)
    def test_polyuq_prescribed(self):
        """
        Tests the get poly uq routine when variance data is given in sampling_args. 
        """
        # Generate data
        dim = 1
        n = 5
        N = 100
        data_noise = 0.0
        state = np.random.RandomState(42)
        our_function = lambda x: 0.3 * x**4 - 1.6 * x**3 + 0.6 * x**2 + 2.4 * x - 0.5
        X = state.uniform(-1, 1, N)
        y = our_function(X)
        X_train, X_test, y_train, y_test = datasets.train_test_split(
            X, y, train=0.7, random_seed=42)
        N_train = X_train.shape[0]
        N_test = X_test.shape[0]

        # Array of prescribed variances at each training data point
        y_var = state.uniform(0.05, 0.2, N_train)**2

        # Fit poly with prescribed variances
        param = Parameter(distribution='Uniform', lower=-1, upper=1, order=n)
        myParameters = [param for i in range(dim)
                        ]  # one-line for loop for parameters
        myBasis = Basis('univariate')
        poly = Poly(myParameters,
                    myBasis,
                    method='least-squares',
                    sampling_args={
                        'sample-points': X_train.reshape(-1, 1),
                        'sample-outputs': y_train.reshape(-1, 1),
                        'sample-output-variances': y_var
                    })
        poly.set_model()
        y_pred, y_std = poly.get_polyfit(X_test, uq=True)

        np.testing.assert_array_almost_equal(y_std.mean(),
                                             0.682095574,
                                             decimal=5,
                                             err_msg='Problem!')
    def test_polyuq_empirical(self):
        """ 
        Tests the get poly eq routine when no variance data is given, i.e. when estimating the 
        empirical variance from training data.
        """
        # Generate data
        dim = 3
        n = 1
        N = 200
        data_noise = 0.05
        X, y = datasets.gen_linear(n_observations=N,
                                   n_dim=dim,
                                   bias=0.5,
                                   n_relevent=1,
                                   noise=data_noise,
                                   random_seed=1)
        X_train, X_test, y_train, y_test = datasets.train_test_split(
            X, y, train=0.7, random_seed=42)
        N_train = X_train.shape[0]
        N_test = X_test.shape[0]

        # Fit poly and approx its variance
        param = Parameter(distribution='Uniform', lower=-1, upper=1, order=n)
        myParameters = [param for i in range(dim)
                        ]  # one-line for loop for parameters
        myBasis = Basis('tensor-grid')
        poly = Poly(myParameters,
                    myBasis,
                    method='least-squares',
                    sampling_args={
                        'sample-points': X_train,
                        'sample-outputs': y_train.reshape(-1, 1)
                    })
        poly.set_model()
        y_pred, y_std = poly.get_polyfit(X_test, uq=True)
        np.testing.assert_array_almost_equal(y_std.mean(),
                                             0.327769998,
                                             decimal=5,
                                             err_msg='Problem!')
    def test_ElasticNet_linear(self):
        """ 
        Tests elastic-net regularisation on linear (1st order) synthetic data with irrelevent features.
        """
        # Generate 10D linear test data with 2 relevent features
        X, y = datasets.gen_linear(n_observations=500,
                                   n_dim=10,
                                   bias=0.5,
                                   n_relevent=2,
                                   noise=0.2,
                                   random_seed=1)
        X_train, X_test, y_train, y_test = datasets.train_test_split(
            X, y, train=0.8, random_seed=42)

        # Define param and basis
        s = Parameter(distribution='uniform',
                      lower=-1,
                      upper=1,
                      order=1,
                      endpoints='both')
        param = [s for _ in range(X.shape[1])]
        basis = Basis('total-order')

        # Fit Poly with OLS and Elastic Net (but with lambda=0 so effectively OLS) and check r2 scores match
        poly_OLS = Poly(parameters=param,
                        basis=basis,
                        method='least-squares',
                        sampling_args={
                            'mesh': 'user-defined',
                            'sample-points': X_train,
                            'sample-outputs': y_train.reshape(-1, 1)
                        })
        poly_OLS.set_model()
        _, r2_OLS = poly_OLS.get_polyscore(X_test=X_test, y_test=y_test)

        poly_EN = poly = Poly(parameters=param,
                              basis=basis,
                              method='elastic-net',
                              sampling_args={
                                  'mesh': 'user-defined',
                                  'sample-points': X_train,
                                  'sample-outputs': y_train.reshape(-1, 1)
                              },
                              solver_args={
                                  'param1': 0.0,
                                  'param2': 0.5
                              })
        poly_EN.set_model()
        _, r2_EN = poly_EN.get_polyscore(X_test=X_test, y_test=y_test)

        np.testing.assert_array_almost_equal(r2_OLS,
                                             r2_EN,
                                             decimal=4,
                                             err_msg='Problem!')

        # Now fit Poly with LASSO (alpha/param2 = 1.0) and check r2 improved (it should because irrelevent features + noise)
        poly_LASSO = Poly(parameters=param,
                          basis=basis,
                          method='elastic-net',
                          sampling_args={
                              'mesh': 'user-defined',
                              'sample-points': X_train,
                              'sample-outputs': y_train.reshape(-1, 1)
                          },
                          solver_args={
                              'param1': 0.015,
                              'param2': 1.0
                          })
        poly_LASSO.set_model()
        _, r2_LASSO = poly_LASSO.get_polyscore(X_test=X_test, y_test=y_test)
        self.assertTrue(r2_LASSO > r2_OLS)

        # Finally, check LASSO has shrunk irrelevent Poly coefficients
        coeffs = poly_LASSO.get_coefficients().squeeze()
        ideal_coeffs = 3  #As tensor-grid, order=1, relevent_dims=2
        idx = np.abs(coeffs).argsort()[::-1]
        irrelevent_coeffs = np.sum(np.abs(
            coeffs[idx[ideal_coeffs:]])) / np.sum(np.abs(coeffs))
        self.assertTrue(irrelevent_coeffs < 1e-5,
                        msg='irrelevent_coeffs = %.2e' % irrelevent_coeffs)