Exemplo n.º 1
0
class knockoff_logit(knockoff_net):
    """ Preforms the knockoff technique with logistic regression """

    def fit(self,X_lrg=None):
        """ Generates the knockoffs, fits the regression, and performs the FDR calculations """
        # Generate knockoff as inherited from knockoff_net
        if X_lrg is None:
            if   self.knockoff_type == 'original': self._original_knockoff()
            elif self.knockoff_type == 'binary':   self._binary_knockoff()
        else:
            self.X_lrg = X_lrg

        # initialize and fit the glmnet object
        self.lognet = LogisticNet(alpha=1,n_lambdas=self.p*20,frac_lg_lambda=min(.000001,.01/(self.p**2))) 
        self.lognet.fit(self.X_lrg,self.y,normalize=False,include_intercept=self.intercept)

        # pull out some values from the glmnet object and clean
        self.lambdas = self.lognet.out_lambdas
        self.var_index_ent = np.sort(self.lognet._indices)
        self.coef_matrix = np.zeros((2*self.p,self.lognet.n_lambdas))
        self.coef_matrix[self.var_index_ent] = self.lognet._comp_coef.squeeze()[self.lognet._indices]

        # figure out when different variables entered the model
        self.var_entered = np.zeros(2*self.p).astype(bool)
        self.var_entered[self.var_index_ent] = True

        # Preform all the FDR calculations as inherited from knockoff_net
        self._get_z()
        self._get_w()
        self._get_T()
        self._get_S()
Exemplo n.º 2
0
    def fit(self, X_lrg=None):
        """ Generates the knockoffs, fits the regression, and performs the FDR calculations """
        # Generate knockoff as inherited from knockoff_net
        if X_lrg is None:
            if self.knockoff_type == 'original': self._original_knockoff()
            elif self.knockoff_type == 'binary': self._binary_knockoff()
        else:
            self.X_lrg = X_lrg

        # initialize and fit the glmnet object
        self.lognet = LogisticNet(alpha=1,
                                  n_lambdas=self.p * 20,
                                  frac_lg_lambda=min(.000001,
                                                     .01 / (self.p**2)))
        self.lognet.fit(self.X_lrg,
                        self.y,
                        normalize=False,
                        include_intercept=self.intercept)

        # pull out some values from the glmnet object and clean
        self.lambdas = self.lognet.out_lambdas
        self.var_index_ent = np.sort(self.lognet._indices)
        self.coef_matrix = np.zeros((2 * self.p, self.lognet.n_lambdas))
        self.coef_matrix[self.var_index_ent] = self.lognet._comp_coef.squeeze(
        )[self.lognet._indices]

        # figure out when different variables entered the model
        self.var_entered = np.zeros(2 * self.p).astype(bool)
        self.var_entered[self.var_index_ent] = True

        # Preform all the FDR calculations as inherited from knockoff_net
        self._get_z()
        self._get_w()
        self._get_T()
        self._get_S()
Exemplo n.º 3
0
 def test_unregularized_models(self):
     Xdn = np.random.uniform(-1, 1, size=(50,10))
     Xsp = csc_matrix(Xdn)
     w = np.random.uniform(-1, 1, size=(10,))
     y = (np.dot(Xdn, w) >= 0).astype(int)
     for alpha in [0, .5, 1]:
         for X in (Xdn, Xsp):
             lnet = LogisticNet(alpha=alpha)
             lnet.fit(X, y, lambdas=[0])
             preds = (lnet.predict(X) >= .5).astype(int)
             self.assertTrue(np.all(y == preds))
Exemplo n.º 4
0
 def test_unregularized_models(self):
     Xdn = np.random.uniform(-1, 1, size=(50, 10))
     Xsp = csc_matrix(Xdn)
     w = np.random.uniform(-1, 1, size=(10, ))
     y = (np.dot(Xdn, w) >= 0).astype(int)
     for alpha in [0, .5, 1]:
         for X in (Xdn, Xsp):
             lnet = LogisticNet(alpha=alpha)
             lnet.fit(X, y, lambdas=[0])
             preds = (lnet.predict(X) >= .5).astype(int)
             self.assertTrue(np.all(y == preds))
Exemplo n.º 5
0
 def test_ridge_models(self):
     Xdn = np.random.uniform(-1, 1, size=(50000, 3))
     Xsp = csc_matrix(Xdn)
     w = (np.random.uniform(-1, 1, size=(3, )) >= 0).astype(int) - .5
     for X in (Xdn, Xsp):
         for lam in np.linspace(.1, 1, 10):
             y = (np.dot(Xdn, w) >= 0).astype(int)
             lnet = LogisticNet(alpha=0)
             lnet.fit(X, y, lambdas=[lam])
             ratios = lnet._coefficients.ravel() / w
             norm_ratios = ratios / np.max(ratios)
             test = np.allclose(norm_ratios, 1, atol=.05)
             self.assertTrue(test)
Exemplo n.º 6
0
 def test_max_lambda(self):
     Xdn = np.random.uniform(-1, 1, size=(50, 10))
     Xsp = csc_matrix(Xdn)
     w = np.random.uniform(-1, 1, size=(10, ))
     y = (np.dot(Xdn, w) >= 0).astype(int)
     for X in (Xdn, Xsp):
         for alpha in [.01, .5, 1]:
             lnet = LogisticNet(alpha=alpha)
             lnet.fit(X, y)
             ol = lnet.out_lambdas
             max_lambda_from_fortran = ol[1] * (ol[1] / ol[2])
             max_lambda_from_python = lnet._max_lambda(X, y)
             self.assertAlmostEqual(max_lambda_from_fortran,
                                    max_lambda_from_python, 4)
Exemplo n.º 7
0
 def test_lasso_models(self):
     Xdn = np.random.uniform(-1, 1, size=(15000, 10))
     Xsp = csc_matrix(Xdn)
     w = (np.random.uniform(-1, 1, size=(10, )) >= 0).astype(int) - .5
     for w_mask in range(1, 10):
         for X in (Xdn, Xsp):
             w_masked = w.copy()
             w_masked[w_mask:] = 0
             y = (np.dot(Xdn, w_masked) >= 0).astype(int)
             lnet = LogisticNet(alpha=1)
             lnet.fit(X, y, lambdas=[.01])
             lc = lnet._coefficients
             self.assertTrue(
                 np.sum(np.abs(lc / np.max(np.abs(lc))) > .05) == w_mask)
Exemplo n.º 8
0
 def test_max_lambda(self):
     Xdn = np.random.uniform(-1, 1, size=(50,10))
     Xsp = csc_matrix(Xdn)
     w = np.random.uniform(-1, 1, size=(10,))
     y = (np.dot(Xdn, w) >= 0).astype(int)
     for X in (Xdn, Xsp):
         for alpha in [.01, .5, 1]:
             lnet = LogisticNet(alpha=alpha)
             lnet.fit(X, y)
             ol = lnet.out_lambdas
             max_lambda_from_fortran = ol[1] * (ol[1]/ol[2]) 
             max_lambda_from_python = lnet._max_lambda(X, y)
             self.assertAlmostEqual(
                 max_lambda_from_fortran, max_lambda_from_python, 4
             )
Exemplo n.º 9
0
 def test_ridge_models(self):
     Xdn = np.random.uniform(-1, 1, size=(50000,3))
     Xsp = csc_matrix(Xdn)
     w = (np.random.uniform(-1, 1, size=(3,)) >= 0).astype(int) - .5
     for X in (Xdn, Xsp):
         for lam in np.linspace(.1, 1, 10):
             y = (np.dot(Xdn, w) >= 0).astype(int)
             lnet = LogisticNet(alpha=0)
             lnet.fit(X, y, lambdas=[lam])
             ratios = lnet._coefficients.ravel() / w
             norm_ratios = ratios / np.max(ratios)
             test = np.allclose(
                 norm_ratios, 1, atol=.05
             )
             self.assertTrue(test)
Exemplo n.º 10
0
 def test_lasso_models(self):
     Xdn = np.random.uniform(-1, 1, size=(15000,10))
     Xsp = csc_matrix(Xdn)
     w = (np.random.uniform(-1, 1, size=(10,)) >= 0).astype(int) - .5
     for w_mask in range(1, 10):
         for X in (Xdn, Xsp):
             w_masked = w.copy()
             w_masked[w_mask:] = 0
             y = (np.dot(Xdn, w_masked) >= 0).astype(int)
             lnet = LogisticNet(alpha=1)
             lnet.fit(X, y, lambdas=[.01])
             lc = lnet._coefficients
             self.assertTrue(
                np.sum(np.abs(lc / np.max(np.abs(lc))) > .05) == w_mask
             )
Exemplo n.º 11
0
 def test_edge_cases(self):
     '''Edge cases in model specification.'''
     X = np.random.uniform(-1, 1, size=(50,10))
     w = np.random.uniform(-1, 1, size=(10,))
     y = (np.dot(X, w) >= 0).astype(int)
     # Edge case
     #    A single lambda is so big that it sets all estimated coefficients
     #    to zero.  This used to break the predict method.
     lnet = LogisticNet(alpha=1)
     lnet.fit(X, y, lambdas=[10**5])
     _ = lnet.predict(X)
     # Edge case
     #    Multiple lambdas are so big as to set all estiamted coefficients
     #    to zero.  This used to break the predict method.
     lnet = LogisticNet(alpha=1)
     lnet.fit(X, y, lambdas=[10**5, 2*10**5])
     _ = lnet.predict(X)
     # Edge case:
     #    Some predictors have zero varaince.  This used to break lambda 
     #    max.
     X = np.random.uniform(-1, 1, size=(50,10))
     X[2,:] = 0; X[8,:] = 0
     y = (np.dot(X, w) >= 0).astype(int)
     lnet = LogisticNet(alpha=.1)
     lnet.fit(X, y)
     ol = lnet.out_lambdas
     max_lambda_from_fortran = ol[1] * (ol[1]/ol[2]) 
     max_lambda_from_python = lnet._max_lambda(X, y)
     self.assertAlmostEqual(
         max_lambda_from_fortran, max_lambda_from_python, 4
     )
     # Edge case.
     #     All predictors have zero variance.  This is an error in 
     #     sepcification.
     with self.assertRaises(ValueError):
         X = np.ones(shape=(50,10))
         lnet = LogisticNet(alpha=.1)
         lnet.fit(X, y)
Exemplo n.º 12
0
 def test_edge_cases(self):
     '''Edge cases in model specification.'''
     X = np.random.uniform(-1, 1, size=(50, 10))
     w = np.random.uniform(-1, 1, size=(10, ))
     y = (np.dot(X, w) >= 0).astype(int)
     # Edge case
     #    A single lambda is so big that it sets all estimated coefficients
     #    to zero.  This used to break the predict method.
     lnet = LogisticNet(alpha=1)
     lnet.fit(X, y, lambdas=[10**5])
     _ = lnet.predict(X)
     # Edge case
     #    Multiple lambdas are so big as to set all estiamted coefficients
     #    to zero.  This used to break the predict method.
     lnet = LogisticNet(alpha=1)
     lnet.fit(X, y, lambdas=[10**5, 2 * 10**5])
     _ = lnet.predict(X)
     # Edge case:
     #    Some predictors have zero varaince.  This used to break lambda
     #    max.
     X = np.random.uniform(-1, 1, size=(50, 10))
     X[2, :] = 0
     X[8, :] = 0
     y = (np.dot(X, w) >= 0).astype(int)
     lnet = LogisticNet(alpha=.1)
     lnet.fit(X, y)
     ol = lnet.out_lambdas
     max_lambda_from_fortran = ol[1] * (ol[1] / ol[2])
     max_lambda_from_python = lnet._max_lambda(X, y)
     self.assertAlmostEqual(max_lambda_from_fortran, max_lambda_from_python,
                            4)
     # Edge case.
     #     All predictors have zero variance.  This is an error in
     #     sepcification.
     with self.assertRaises(ValueError):
         X = np.ones(shape=(50, 10))
         lnet = LogisticNet(alpha=.1)
         lnet.fit(X, y)