Пример #1
0
def slingshot(adata, start, n_pcs=5, cl=None):
    import numpy as np
    import pandas as pd
    import rpy2.robjects as ro
    from rpy2.robjects import numpy2ri, pandas2ri
    from rpy2.robjects.packages import importr
    importr('slingshot')
    numpy2ri.activate()
    pandas2ri.activate()
    ro.r.assign('pca', adata.obsm['X_pca'][:, :n_pcs])
    ro.r.assign('cl', adata.obs[cl])
    ro.reval('sds <- newSlingshotDataSet(pca, cl)')
    ro.reval(f'sce <- slingshot(sds, cl, start.clus="{start}")')
    pt = pd.DataFrame(np.asarray(ro.reval('slingPseudotime(sce)')),
                      index=adata.obs_names)
    pt.columns = [f'{cl}_lineage_{c}' for c in pt.columns]
    try:
        adata.obs = adata.obs.drop(pt.columns, axis=1)
    except KeyError:
        print('PT keys not dropped in obs dataframe: Not found.')
    adata.obs = pd.concat([adata.obs, pt], axis=1)
    adata.uns['slingshot'] = {}
    adata.uns['slingshot']['lineages'] = {}
    lineages = np.asarray(np.asarray(ro.reval('sce@lineages')))
    for i, l in enumerate(lineages):
        adata.uns['slingshot']['lineages'][i] = list(np.asarray(l))
    numpy2ri.deactivate()
    pandas2ri.deactivate()
    return adata
Пример #2
0
def fit_curve(data,
              circle=False,
              iterations=500,
              stretch=None,
              threshold=0.00001):
    """
    :param data: numpy array, shape (n_samples, n_features), to be denoised
    :param circle: True if fitting starts with a circle, usefull for denoising closed curves
    :param iterations: maximum number of iterations
    :param stretch: parameter that affects curve extrapolation
    :param threshold: convergence threshold on shortest distances to the curve
    :returns: denoised data in numpy array with shape (n_samples, n_features)
    """
    # For more information see:
    # https://cran.r-project.org/web/packages/princurve/princurve.pdf
    numpy2ri.activate()
    if circle:
        smoother = 'periodic.lowess'
        stretch = 0 if stretch is None else stretch
    else:
        smoother = 'smooth.spline'
        stretch = 2 if stretch is None else stretch
    pc = princurve.principal_curve(data,
                                   maxit=iterations,
                                   stretch=stretch,
                                   smoother=smoother,
                                   thresh=threshold)
    numpy2ri.deactivate()
    return np.array(pc[0])
Пример #3
0
def activate():
    global original_converter
    # If module is already activated, there is nothing to do
    if original_converter is not None: 
        return

    original_converter = conversion.make_converter('snapshot before pandas conversion',
                                                   template=conversion.converter)
    numpy2ri.activate()
    new_converter = conversion.make_converter('snapshot before pandas conversion',
                                              template=conversion.converter)
    numpy2ri.deactivate()

    for k,v in py2ri.registry.items():
        if k is object:
            continue
        new_converter.py2ri.register(k, v)

    for k,v in ri2ro.registry.items():
        if k is object:
            continue
        new_converter.ri2ro.register(k, v)
    
    for k,v in py2ro.registry.items():
        if k is object:
            continue
        new_converter.py2ro.register(k, v)

    for k,v in ri2py.registry.items():
        if k is object:
            continue
        new_converter.ri2py.register(k, v)

    conversion.converter = new_converter
    name, conversion.ri2ro, conversion.py2ri, conversion.py2ro, conversion.ri2py, lineage = new_converter
Пример #4
0
def gaussian_setup(X, Y, run_CV=True):
    """

    Some calculations that can be reused by methods:
    
    lambda.min, lambda.1se, lambda.theory and Reid et al. estimate of noise

    """
    n, p = X.shape

    Xn = X / np.sqrt((X**2).sum(0))[None, :]

    l_theory = np.fabs(Xn.T.dot(np.random.standard_normal(
        (n, 500)))).max(1).mean() * np.ones(p) * np.std(Y)

    if run_CV:
        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('Y', Y)
        rpy.r('X=as.matrix(X)')
        rpy.r('Y=as.numeric(Y)')
        rpy.r('G = cv.glmnet(X, Y, intercept=FALSE, standardize=FALSE)')
        rpy.r(
            'sigma_reid = selectiveInference:::estimate_sigma(X, Y, coef(G, s="lambda.min")[-1]) # sigma via Reid et al.'
        )
        rpy.r("L = G[['lambda.min']]")
        rpy.r("L1 = G[['lambda.1se']]")
        L = rpy.r('L')
        L1 = rpy.r('L1')
        sigma_reid = rpy.r('sigma_reid')[0]
        numpy2ri.deactivate()
        return L * np.sqrt(X.shape[0]), L1 * np.sqrt(
            X.shape[0]), l_theory, sigma_reid
    else:
        return None, None, l_theory, None
Пример #5
0
def hugeR(X, lambda_threshold):
    """
    This function computes the covariance matrix and the corresponding sparse 
    inverse covariance matrix of the numpy input matrix X, using the huge R 
    package by Liu et al.
    
    It transforms the variables in X into the nonparanormal family which allows
    then us to use the glasso algorithm to estimate the sparse inverse cov
    matrix. The lossy pre-screening isn't used here to speed up the
    calculations because we prefilter X so n ~ p. We test 30 lambda values for
    regularisation. The best model is selected using the 'stars' stability 
    approach with default threshold.
    
    For more details check docs and vignette: 
    https://cran.r-project.org/web/packages/huge/huge.pdf 
    http://r.meteo.uni.wroc.pl/web/packages/huge/vignettes/vignette.pdf
    """
    base = importr('base')
    # this allows us to send numpy to R directly, neat
    numpy2ri.activate()

    huge = importr('huge')
    X_npn = huge.huge_npn(X, npn_func="shrinkage")
    model = huge.huge(X_npn, nlambda=30, method='glasso', scr=False,
                      cov_output=True)
    model_stars = huge.huge_select(model, criterion="stars",
                                   stars_thresh=lambda_threshold)
    cov = np.array(base.as_matrix(model_stars.rx('opt.cov')[0]))
    prec = np.array(base.as_matrix(model_stars.rx('opt.icov')[0]))
    # network = np.array(base.as_matrix(model_stars.rx('refit')[0]))
    # we need to turn this off once we're done
    numpy2ri.deactivate()
    return cov, prec
Пример #6
0
def test_ROSI_gaussian_JM():
    n, p, s = 100, 30, 15

    while True:
        X, y, _, _, sigma, _ = gaussian_instance(n=n,
                                                 p=p,
                                                 s=s,
                                                 equicorrelated=False,
                                                 signal=4)

        lam = 7. * np.sqrt(n)
        X *= np.sqrt(n)
        L = ROSI.gaussian(X, y, lam, approximate_inverse='JM')
        L.sparse_inverse = True
        L.fit()

        print('here', len(L.active))
        if len(L.active) > 4:
            S = L.summary(compute_intervals=False, dispersion=sigma**2)
            numpy2ri.activate()

            rpy.r.assign("X", X)
            rpy.r.assign("y", y)
            rpy.r.assign("sigma_est", sigma)
            rpy.r.assign("lam", lam)
            rpy.r("""

            y = as.numeric(y)
            n = nrow(X)
            p = ncol(X)

            penalty_factor = rep(1, p);
            soln = selectiveInference:::solve_problem_glmnet(X, 
                                                             y, 
                                                             lam/n, 
                                                             penalty_factor=penalty_factor,
                                                             family="gaussian")
            PVS = ROSI(X, 
                       y, 
                       soln, 
                       lambda=lam, 
                       penalty_factor=penalty_factor, 
                       dispersion=sigma_est^2, 
                       family="gaussian",
                       solver="QP", 
                       construct_ci=FALSE,  
                       use_debiased=TRUE)
            active_vars=PVS$active_vars - 1 # for 0-based
            pvalues = PVS$pvalues
            """)
            pvalues = np.asarray(rpy.r('pvalues'))
            pvalues = pvalues[~np.isnan(pvalues)]
            active_set = rpy.r('active_vars')

            print(pvalues)
            print(np.asarray(S['pvalue']))

            nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0, 1] > 0.999)
            numpy2ri.deactivate()
            break
Пример #7
0
    def select(self):

        numpy2ri.activate()
        rpy.r.assign('chol_k', self.knockoff_chol)
        rpy.r('''
        knockoffs = function(X) {
           mu = rep(0, ncol(X))
           mu_k = X # sweep(X, 2, mu, "-") %*% SigmaInv_s
           X_k = mu_k + matrix(rnorm(ncol(X) * nrow(X)), nrow(X)) %*% 
            chol_k
           return(X_k)
        }
            ''')
        numpy2ri.deactivate()

        try:
            numpy2ri.activate()
            rpy.r.assign('X', self.X)
            rpy.r.assign('Y', self.Y)
            rpy.r.assign('q', self.q)
            rpy.r(
                'V=knockoff.filter(X, Y, fdr=q, knockoffs=knockoffs)$selected')
            rpy.r('if (length(V) > 0) {V = V-1}')
            V = rpy.r('V')
            numpy2ri.deactivate()
            return np.asarray(V, np.int), np.asarray(V, np.int)
        except:
            return [], []
Пример #8
0
def dlsa(Sig_inv_, beta_, sample_size, fit_intercept=False):
    '''Distributed Least Squares Approximation


    '''

    numpy2ri.activate()
    dfitted = lars_lsa(np.asarray(Sig_inv_), np.asarray(beta_),
                       intercept=fit_intercept, n=sample_size)
    numpy2ri.deactivate()

    AIC = robjects.FloatVector(dfitted.rx2("AIC"))
    AIC_minIdx = np.argmin(AIC)
    BIC = robjects.FloatVector(dfitted.rx2("BIC"))
    BIC_minIdx = np.argmin(BIC)
    beta = np.array(robjects.FloatVector(dfitted.rx2("beta")))


    if fit_intercept:
        beta_byOLS = beta_.to_numpy()
        beta0 = np.array(robjects.FloatVector(dfitted.rx2("beta0"))) + beta_byOLS[0]

        beta_byAIC = np.hstack([beta0[AIC_minIdx], beta[AIC_minIdx, :]])
        beta_byBIC = np.hstack([beta0[BIC_minIdx], beta[BIC_minIdx, :]])
    else:
        beta_byAIC = beta[AIC_minIdx, :]
        beta_byBIC = beta[BIC_minIdx, :]

    return  pd.DataFrame({"beta_byAIC": beta_byAIC, "beta_byBIC": beta_byBIC})
Пример #9
0
    def Rpval(X, Y, W, noise_scale=None):
        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('Y', Y)
        rpy.r.assign('lam', W)

        if noise_scale is not None:
            rpy.r.assign('noise_scale', noise_scale)
            rpy.r(
                'soln = selectiveInference:::randomizedLasso(X, Y, lam, noise_scale=noise_scale, kkt_tol=1.e-8, parameter_tol=1.e-8)'
            )
        else:
            rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam)')
        rpy.r('targets=selectiveInference:::compute_target(soln, type="full")')
        rpy.r(
            'rand_inf = selectiveInference:::randomizedLassoInf(soln, sampler="adaptMCMC", targets=targets, nsample=5000, burnin=2000)'
        )

        pval = np.asarray(rpy.r('rand_inf$pvalues'))
        vars = np.asarray(rpy.r('soln$active_set')) - 1
        cond_cov = np.asarray(rpy.r('soln$law$cond_cov'))
        cond_mean = np.asarray(rpy.r('soln$law$cond_mean'))
        rand = np.asarray(rpy.r('soln$perturb'))
        active = np.asarray(rpy.r('soln$active')) - 1
        soln = np.asarray(rpy.r('soln$soln'))
        ridge = rpy.r('soln$ridge_term')

        numpy2ri.deactivate()
        return pval, vars, rand, active, soln, ridge, cond_cov, cond_mean
Пример #10
0
    def setup(cls, feature_cov, data_generating_mechanism):

        cls.feature_cov = feature_cov
        cls.data_generating_mechanism = data_generating_mechanism
        cls.noise = data_generating_mechanism.noise
        numpy2ri.activate()

        # see if we've factored this before

        have_factorization = False
        if not os.path.exists('.knockoff_factorizations'):
            os.mkdir('.knockoff_factorizations')
        factors = glob.glob('.knockoff_factorizations/*npz')
        for factor_file in factors:
            factor = np.load(factor_file)
            feature_cov_f = factor['feature_cov']
            if ((feature_cov_f.shape == feature_cov.shape)
                    and (factor['method'] == cls.factor_method)
                    and np.allclose(feature_cov_f, feature_cov)):
                have_factorization = True
                print('found factorization: %s' % factor_file)
                cls.knockoff_chol = factor['knockoff_chol']

        if not have_factorization:
            print('doing factorization')
            cls.knockoff_chol = factor_knockoffs(feature_cov,
                                                 cls.factor_method)

        numpy2ri.deactivate()
Пример #11
0
    def setup(cls, feature_cov):

        cls.feature_cov = feature_cov
        numpy2ri.activate()

        # see if we've factored this before

        have_factorization = False
        if not os.path.exists('.knockoff_factorizations'):
            os.mkdir('.knockoff_factorizations')
        factors = glob.glob('.knockoff_factorizations/*npz')
        for factor_file in factors:
            factor = np.load(factor_file)
            feature_cov_f = factor['feature_cov']
            if ((feature_cov_f.shape == feature_cov.shape)
                    and (factor['method'] == cls.factor_method)
                    and np.allclose(feature_cov_f, feature_cov)):
                have_factorization = True
                cls.knockoff_chol = factor['knockoff_chol']

        if not have_factorization:
            cls.knockoff_chol = factor_knockoffs(feature_cov,
                                                 cls.factor_method)

        numpy2ri.deactivate()
Пример #12
0
    def setup(cls,
              feature_cov,
              data_generating_mechanism,
              max_model_size=6,
              level=0.90):
        cls.feature_cov = feature_cov
        cls.data_generating_mechanism = data_generating_mechanism
        cls.noise = data_generating_mechanism.noise
        numpy2ri.activate()

        # see if we've factored this before

        have_POSI_K = False
        if not os.path.exists('.POSI_data'):
            os.mkdir('.POSI_data')
        posi_data = glob.glob('.POSI_data/*npz')
        for posi_file in posi_data:
            posi = np.load(posi_file)
            posi_f = posi['feature_cov']
            if ((posi_f.shape == feature_cov.shape)
                    and np.allclose(posi_f, feature_cov)
                    and (posi['max_model_size'] == max_model_size)
                    and (posi['level'] == level)):
                have_POSI_K = True
                print('found POSI instance: %s' % posi)
                cls.POSI_K = float(posi['K'])

        if not have_POSI_K:
            print('simulating POSI constant')
            cls.POSI_K = float(
                POSI_instance(feature_cov,
                              max_model_size,
                              n=10 * feature_cov.shape[0]))

        numpy2ri.deactivate()
Пример #13
0
def activate():
    global original_converter
    # If module is already activated, there is nothing to do
    if original_converter is not None:
        return

    original_converter = conversion.Converter(
        'snapshot before pandas conversion', template=conversion.converter)
    numpy2ri.activate()
    new_converter = conversion.Converter('snapshot before pandas conversion',
                                         template=conversion.converter)
    numpy2ri.deactivate()

    for k, v in py2ri.registry.items():
        if k is object:
            continue
        new_converter.py2ri.register(k, v)

    for k, v in ri2ro.registry.items():
        if k is object:
            continue
        new_converter.ri2ro.register(k, v)

    for k, v in py2ro.registry.items():
        if k is object:
            continue
        new_converter.py2ro.register(k, v)

    for k, v in ri2py.registry.items():
        if k is object:
            continue
        new_converter.ri2py.register(k, v)

    conversion.set_conversion(new_converter)
Пример #14
0
def activate():
    warnings.warn(
        'The global conversion available with activate() '
        'is deprecated and will be removed in the next '
        'major release. Use a local converter.',
        category=DeprecationWarning)
    global original_converter
    # If module is already activated, there is nothing to do.
    if original_converter is not None:
        return

    original_converter = conversion.Converter(
        'snapshot before pandas conversion', template=conversion.converter)
    numpy2ri.activate()
    new_converter = conversion.Converter('snapshot before pandas conversion',
                                         template=conversion.converter)
    numpy2ri.deactivate()

    for k, v in py2rpy.registry.items():
        if k is object:
            continue
        new_converter.py2rpy.register(k, v)

    for k, v in rpy2py.registry.items():
        if k is object:
            continue
        new_converter.rpy2py.register(k, v)

    conversion.set_conversion(new_converter)
Пример #15
0
    def fit(self, X, y, **kwargs):
        import rpy2.robjects as ro
        import rpy2.robjects.numpy2ri as n2r

        # Make everything as numpy
        if isinstance(X, pd.DataFrame):
            X = X.values
            y = y.values

        min_idx, all_lambdas = self._select_lam_by_val(X, y)

        # Create final model by rerunning the whole dataset
        with Timer('Fitting the final model'):
            n2r.activate()
            r_result = ro.r['flam'](X,
                                    y,
                                    family=self.family,
                                    alpha=1.,
                                    **{
                                        'lambda.seq': all_lambdas
                                    })
            n2r.deactivate()

        scores = np.asanyarray(r_result.rx2('theta.hat.list')[min_idx])
        intercept = r_result.rx2('beta0.hat.vec')[min_idx]

        self.GAM_plot_dataframe = self._create_df_from_r_result(
            X, scores, intercept)
        # TODO: remove this return. Just to debug
        # return r_result
        ro.r('rm(list = ls())')  # Remove vars
Пример #16
0
    def naive_intervals(self, active_set):
        """
        selected model
        """

        numpy2ri.activate()
        if self.model_target == 'selected':
            rpy.r.assign("X", self.X[:, active_set])
        else:
            n, p = self.X.shape
            if n > p:
                rpy.r.assign("X", self.X)
            else:
                return (active_set, np.ones(len(active_set)) * np.nan,
                        np.ones(len(active_set)) * np.nan)

        rpy.r.assign("Y", self.Y)
        rpy.r.assign("level", self.confidence)
        rpy.r('CI = confint(lm(Y ~ X - 1), level=level)')
        CI = np.asarray(rpy.r('CI'))
        if self.model_target != 'selected':
            CI = CI[active_set]
        numpy2ri.deactivate()

        return active_set, CI[:, 0], CI[:, 1]
Пример #17
0
    def generate_pvalues(self):
        numpy2ri.activate()
        rpy.r.assign('x', self.X)
        rpy.r.assign('y', self.Y)
        rpy.r('y = as.numeric(y)')
        rpy.r.assign('sigma_reid', self.sigma_reid)
        rpy.r.assign('lam', self.lagrange[0])
        rpy.r('''
    sigma_est=sigma_reid
    n = nrow(x);
    gfit = glmnet(x, y, standardize=FALSE, intercept=FALSE)
    lam = lam / sqrt(n);  # lambdas are passed a sqrt(n) free from python code
    if (lam < max(abs(t(x) %*% y) / n)) {
        beta = coef(gfit, x=x, y=y, s=lam, exact=TRUE)[-1]
        out = fixedLassoInf(x, y, beta, lam*n, sigma=sigma_est, type='full', intercept=FALSE)
        active_vars=out$vars - 1 # for 0-based
        pvalues = out$pv
    } else {
        pvalues = NULL
        active_vars = numeric(0)
    }
    ''')

        pvalues = np.asarray(rpy.r('pvalues'))
        active_set = np.asarray(rpy.r('active_vars'))
        numpy2ri.deactivate()
        if len(active_set) > 0:
            return active_set, pvalues
        else:
            return [], []
def test_liu_gaussian():
    n, p, s = 200, 100, 20

    while True:

        X, y, _, _, sigma, _ = gaussian_instance(n=n,
                                                 p=p,
                                                 s=s,
                                                 equicorrelated=False,
                                                 signal=10,
                                                 sigma=1.)

        lam = 4. * np.sqrt(n)
        X *= np.sqrt(n)
        L = lasso_full.gaussian(X, y, lam)
        L.fit()
        if len(L.active) > 4:
            S = L.summary(compute_intervals=False, dispersion=sigma**2)
            numpy2ri.activate()

            rpy.r.assign('sigma_est', sigma)
            rpy.r.assign("X", X)
            rpy.r.assign("y", y)
            rpy.r.assign("lam", lam)
            rpy.r("""
            y = as.numeric(y)
            n = nrow(X)
            p = ncol(X)
            #sigma_est = sigma(lm(y ~ X - 1))
            penalty_factor = rep(1, p);
            soln = selectiveInference:::solve_problem_glmnet(X, 
                                                             y, 
                                                             lam/n, 
                                                             penalty_factor=penalty_factor,
                                                             family="gaussian")
            PVS = ROSI(X, 
                       y, 
                       soln, 
                       lambda=lam, 
                       penalty_factor=penalty_factor, 
                       dispersion=sigma_est^2, 
                       family="gaussian",
                       solver="QP", 
                       construct_ci=FALSE,
                       use_debiased=FALSE)
            active_vars=PVS$active_vars - 1 # for 0-based
            pvalues = PVS$pvalues
            """)
            pvalues = rpy.r('pvalues')
            pvalues = pvalues[~np.isnan(pvalues)]
            active_set = rpy.r('active_vars')

            print(pvalues)
            print(S['pval'])
            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0, 1] > 0.999)

            numpy2ri.deactivate()
            break
Пример #19
0
def test_ROSI_logistic_BN():
    n, p, s = 100, 120, 15

    while True:
        X, y = logistic_instance(n=n,
                                 p=p,
                                 s=s,
                                 equicorrelated=False,
                                 signal=10)[:2]

        lam = 1. * np.sqrt(n)
        X *= np.sqrt(n)
        L = ROSI.logistic(X, y, lam, approximate_inverse='BN')
        L.fit()

        if len(L.active) > 4:
            S = L.summary(compute_intervals=False, dispersion=1.)
            numpy2ri.activate()

            rpy.r.assign("X", X)
            rpy.r.assign("y", y)
            rpy.r.assign("lam", lam)
            rpy.r("""

            y = as.numeric(y)
            n = nrow(X)
            p = ncol(X)

            penalty_factor = rep(1, p);
            soln = selectiveInference:::solve_problem_glmnet(X, 
                                                             y, 
                                                             lam/n, 
                                                             penalty_factor=penalty_factor,
                                                             family="binomial")
            PVS = ROSI(X, 
                       y, 
                       soln, 
                       lambda=lam, 
                       penalty_factor=penalty_factor, 
                       dispersion=1., 
                       family="binomial", 
                       debiasing_method="BN",
                       solver="QP", 
                       construct_ci=FALSE,
                       use_debiased=TRUE)
            active_vars=PVS$active_vars - 1 # for 0-based
            pvalues = PVS$pvalues
            """)
            pvalues = rpy.r('pvalues')
            pvalues = pvalues[~np.isnan(pvalues)]
            active_set = rpy.r('active_vars')

            print(pvalues)
            print(np.asarray(S['pval']))

            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0, 1] > 0.999)
            numpy2ri.deactivate()
            break
Пример #20
0
 def testActivateTwice(self):
     # setUp method has already activated numpy converter
     self.assertEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
     rpyn.activate()
     self.assertEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
     rpyn.deactivate()
     self.assertNotEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
     rpyn.deactivate()
     self.assertNotEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
Пример #21
0
 def testActivateTwice(self):
     # setUp method has already activated numpy converter
     self.assertEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
     rpyn.activate()
     self.assertEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
     rpyn.deactivate()
     self.assertNotEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
     rpyn.deactivate()
     self.assertNotEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
Пример #22
0
def BHfilter(pval, q=0.2):
    numpy2ri.activate()
    rpy.r.assign('pval', pval)
    rpy.r.assign('q', q)
    rpy.r('Pval = p.adjust(pval, method="BH")')
    rpy.r('S = which((Pval < q)) - 1')
    S = rpy.r('S')
    numpy2ri.deactivate()
    return np.asarray(S, np.int)
Пример #23
0
def nlshrink_covariance(X, centered=False):
    LOGGER.info("computing Ledoit-Wolf non-linear shrinkage covariance")
    if not centered:
        X = X - X.mean()

    f = nostdout(r_nlshrink.nlshrink_cov)
    numpy2ri.activate()
    cov = np.asarray(f(X))
    numpy2ri.deactivate()
    return cov
 def testActivate(self):
     rpyn.deactivate()
     #FIXME: is the following still making sense ?
     self.assertNotEqual(rpyn.py2ri, conversion.py2ri)
     l = len(conversion.py2ri.registry)
     k = set(conversion.py2ri.registry.keys())
     rpyn.activate()
     self.assertTrue(len(conversion.py2ri.registry) > l)
     rpyn.deactivate()
     self.assertEqual(l, len(conversion.py2ri.registry))
     self.assertEqual(k, set(conversion.py2ri.registry.keys()))
Пример #25
0
 def testActivate(self):
     rpyn.deactivate()
     #FIXME: is the following still making sense ?
     self.assertNotEqual(rpyn.py2ri, conversion.py2ri)
     l = len(conversion.py2ri.registry)
     k = set(conversion.py2ri.registry.keys())
     rpyn.activate()
     self.assertTrue(len(conversion.py2ri.registry) > l)
     rpyn.deactivate()
     self.assertEqual(l, len(conversion.py2ri.registry))
     self.assertEqual(k, set(conversion.py2ri.registry.keys()))
Пример #26
0
def spMatrixToR(x):
    matrix_pkg = rpackages.importr('Matrix')
    coo_matrix = x.tocoo()
    numpy2ri.activate()
    result = matrix_pkg.sparseMatrix(i=IntVector(coo_matrix.row),
                                     j=IntVector(coo_matrix.col),
                                     x=FloatVector(coo_matrix.data),
                                     dims=IntVector(coo_matrix.shape),
                                     index1=False)
    numpy2ri.deactivate()
    return result
Пример #27
0
def computeMultivariatePoissonProbability(pdn, dataset):
    numpy2ri.activate()

    df = robjects.r["as.data.frame"](dataset)

    ev = pdnmodule.computeExpectedValues(pdn, df)
    print(ev)
    logprobs = pdnmodule.computeXlogProb(df, ev)
    print(logprobs)
    numpy2ri.deactivate()
    return logprobs
Пример #28
0
 def test_activate(self):
     rpyn.deactivate()
     #FIXME: is the following still making sense ?
     assert rpyn.py2rpy != conversion.py2rpy
     l = len(conversion.py2rpy.registry)
     k = set(conversion.py2rpy.registry.keys())
     rpyn.activate()
     assert len(conversion.py2rpy.registry) > l
     rpyn.deactivate()
     assert len(conversion.py2rpy.registry) == l
     assert set(conversion.py2rpy.registry.keys()) == k
Пример #29
0
    def ipcw_weights(self, event, time):
        from rpy2 import robjects
        from rpy2.robjects import numpy2ri

        _mboost = robjects.packages.importr("mboost")
        _survival = robjects.packages.importr("survival")

        numpy2ri.activate()
        iw = _mboost.IPCweights(_survival.Surv(time, event))
        numpy2ri.deactivate()
        return numpy.asarray(iw)
Пример #30
0
    def generate_pvalues(self, compute_intervals=False):
        self._fit = True
        numpy2ri.activate()
        rpy.r.assign('X', self.X)
        rpy.r.assign('y', self.Y)
        rpy.r('y = as.numeric(y)')
        rpy.r.assign('q', self.q)
        rpy.r.assign('lam', self.lagrange[0])
        rpy.r.assign("randomizer_scale", self.randomizer_scale)
        rpy.r.assign("compute_intervals", compute_intervals)
        rpy.r('''
        n = nrow(X)
        p = ncol(X)
        lam = lam * sqrt(n)
        mean_diag = mean(apply(X^2, 2, sum))
        ridge_term = sqrt(mean_diag) * sd(y) / sqrt(n)
        result = randomizedLasso(X, y, lam, ridge_term=ridge_term,
                                 noise_scale = randomizer_scale * sd(y) * sqrt(n), family='gaussian')
        active_set = result$active_set
        if (length(active_set)==0){
            active_set = -1
        } else{
            sigma_est = sigma(lm(y ~ X[,active_set] - 1))
            cat("sigma est for R", sigma_est,"\n")
            targets = selectiveInference:::compute_target(result, 'partial', sigma_est = sigma_est,
                                 construct_pvalues=rep(TRUE, length(active_set)), 
                                 construct_ci=rep(compute_intervals, length(active_set)))

            out = randomizedLassoInf(result,
                                 targets=targets,
                                 sampler = "norejection",
                                 level=0.9,
                                 burnin=1000,
                                 nsample=10000)
            active_set=active_set-1
            pvalues = out$pvalues
            intervals = out$ci
        }
        ''')

        active_set = np.asarray(rpy.r('active_set'), np.int)
        print(active_set)

        if active_set[0] == -1:
            numpy2ri.deactivate()
            return [], [], []

        pvalues = np.asarray(rpy.r('pvalues'))
        intervals = np.asarray(rpy.r('intervals'))
        numpy2ri.deactivate()
        if len(active_set) > 0:
            return active_set, pvalues
        else:
            return [], []
Пример #31
0
def deactivate():
    global original_py2ri, original_ri2ro

    # If module has never been activated or already deactivated,
    # there is nothing to do
    if not original_py2ri:
        return

    conversion.py2ri = original_py2ri
    conversion.ri2ro = original_ri2ro 
    original_py2ri = original_ri2ro = None
    numpy2ri.deactivate()
Пример #32
0
def deactivate():
    global original_py2ri, original_ri2ro

    # If module has never been activated or already deactivated,
    # there is nothing to do
    if not original_py2ri:
        return

    conversion.py2ri = original_py2ri
    conversion.ri2ro = original_ri2ro
    original_py2ri = original_ri2ro = None
    numpy2ri.deactivate()
Пример #33
0
 def select(self):
     try:
         numpy2ri.activate()
         rpy.r.assign('X', self.X)
         rpy.r.assign('Y', self.Y)
         rpy.r.assign('q', self.q)
         rpy.r('V=knockoff.filter(X, Y, fdr=q)$selected')
         rpy.r('if (length(V) > 0) {V = V-1}')
         V = rpy.r('V')
         numpy2ri.deactivate()
         return np.asarray(V, np.int), np.asarray(V, np.int)
     except:
         return [], []
Пример #34
0
 def get_R_theta(pi, c, Gamma, A, b, Sigma):
     """Return a R compatible list from numpy arrays"""
     numpy2ri.activate()
     in_theta = ListVector(dict(
         pi=pi,
         c=c.T,
         Gamma=Gamma.transpose((1,2,0)),
         A = A.transpose((1,2,0)),
         b=b.T,
         Sigma=Sigma.transpose((1,2,0))
     ))
     numpy2ri.deactivate()
     return in_theta
def compute_results(y, X, sigma, active,
                    full_results={},
                    do_knockoff=False,
                    do_AIC=True,
                    do_BIC=True,
                    do_glmnet=True,
                    alpha=0.05,
                    maxstep=np.inf,
                    compute_maxT_identify=True,
                    burnin=2000,
                    ndraw=8000):

    n, p = X.shape

    results, FS = compute_pvalues(y, X, active, sigma, maxstep=maxstep,
                                  compute_maxT_identify=compute_maxT_identify,
                                  burnin=burnin,
                                  ndraw=ndraw)
    completion_idx = completion_index(results['variable_selected'], active)
    full_results.setdefault('completion_idx', []).append(completion_idx)

    for column in results.columns:
        for i in range(results.shape[0]):
            full_results.setdefault('%s_%d' % (str(column), i+1), []).append(results[column][i])

    for i in range(len(active)):
        full_results.setdefault('active_%d' % (i+1,), []).append(active[i])

    full_results.setdefault('alpha', []).append(alpha)

    if do_knockoff:

        # this will probably not work on miller
        import rpy2.robjects as rpy
        from rpy2.robjects import numpy2ri
        rpy.conversion.py2ri = numpy2ri.numpy2ri

        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('y', y)

        # knockoff

        rpy.r.assign('alpha', alpha)

        knockoff = np.array(rpy.r("""
        library(knockoff)
        knockoff.filter(X = X, y = y, fdr=alpha, knockoffs=create.fixed, offset=0)$selected
    """)) - 1
        knockoff_R = knockoff.shape[0]
        knockoff_V = knockoff_R - len(set(active).intersection(knockoff))
        knockoff_screen = set(knockoff).issuperset(active)

        knockoff_plus = np.array(rpy.r("""
        knockoff.filter(X = X, y = y, fdr=alpha, knockoffs=create.fixed, offset=1)$selected
    """)) - 1
        knockoff_plus_R = knockoff_plus.shape[0]
        knockoff_plus_V = knockoff_plus_R - len(set(active).intersection(knockoff_plus))
        knockoff_plus_screen = set(knockoff_plus).issuperset(active)

        full_results.setdefault('knockoff_R', []).append(knockoff_R)
        full_results.setdefault('knockoff_V', []).append(knockoff_V)
        full_results.setdefault('knockoff_screen', []).append(knockoff_screen)

        full_results.setdefault('knockoff_plus_R', []).append(knockoff_plus_R)
        full_results.setdefault('knockoff_plus_V', []).append(knockoff_plus_V)
        full_results.setdefault('knockoff_plus_screen', []).append(knockoff_plus_screen)

        numpy2ri.deactivate()

    if do_AIC:

        # this will probably not work on miller
        import rpy2.robjects as rpy
        from rpy2.robjects import numpy2ri
        rpy.conversion.py2ri = numpy2ri.numpy2ri

        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('y', y)
        rpy.r('''M = step(lm(y ~ 1, 
                             data=data.frame(X, y)), 
                             scope=list(upper="~ %s"), 
                             direction="forward", 
                             trace=FALSE)''' % ' + '.join(['X%d' % i for i in range(1, p+1)]))
        AIC = np.asarray([int(v[1:]) for v in rpy.r("all.vars(M$call$formula[[3]])")]) - 1 # subtract 1 for 0-based indexing

        AIC_R = AIC.shape[0]
        AIC_V = AIC_R - len(set(active).intersection(AIC))
        AIC_screen = set(AIC).issuperset(active)

        full_results.setdefault('AIC_R', []).append(AIC_R)
        full_results.setdefault('AIC_V', []).append(AIC_V)
        full_results.setdefault('AIC_screen', []).append(AIC_screen)

        numpy2ri.deactivate()

    if do_BIC:
        import rpy2.robjects as rpy
        from rpy2.robjects import numpy2ri
        rpy.conversion.py2ri = numpy2ri.numpy2ri

        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('y', y)
        rpy.r('''M = step(lm(y ~ 1, 
                             data=data.frame(X, y)), 
                             scope=list(upper="~ %s"), 
                             direction="forward", 
                             k=log(nrow(X)),
                             trace=FALSE)''' % ' + '.join(['X%d' % i for i in range(1, p+1)]))
        BIC = np.asarray([int(v[1:]) for v in rpy.r("all.vars(M$call$formula[[3]])")]) - 1 # subtract 1 for 0-based indexing

        BIC_R = BIC.shape[0]
        BIC_V = BIC_R - len(set(active).intersection(BIC))
        BIC_screen = set(BIC).issuperset(active)

        full_results.setdefault('BIC_R', []).append(BIC_R)
        full_results.setdefault('BIC_V', []).append(BIC_V)
        full_results.setdefault('BIC_screen', []).append(BIC_screen)

        numpy2ri.deactivate()

    if do_glmnet:

        import rpy2.robjects as rpy
        from rpy2.robjects import numpy2ri
        rpy.conversion.py2ri = numpy2ri.numpy2ri

        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('y', y)
        rpy.r('''library(glmnet);
                 y = as.matrix(y);
                 X = as.matrix(X); 
                 CVG = cv.glmnet(X, y);
                 G = glmnet(X, y);
                 B = coef(G, s=CVG$lambda.min, exact=TRUE);
                 selected = which(B[2:length(B)] != 0);
                 B2 = coef(G, s=CVG$lambda.1se, exact=TRUE);
                 selected2 = which(B2[2:length(B2)] != 0);
                 ''')

        GLMnet = np.asarray(rpy.r("selected")) - 1 # subtract 1 for 0-based indexing
        GLMnet_R = GLMnet.shape[0]
        GLMnet_V = GLMnet_R - len(set(active).intersection(GLMnet))
        GLMnet_screen = set(GLMnet).issuperset(active)

        full_results.setdefault('GLMnet_R', []).append(GLMnet_R)
        full_results.setdefault('GLMnet_V', []).append(GLMnet_V)
        full_results.setdefault('GLMnet_screen', []).append(GLMnet_screen)

        GLMnet1se = np.asarray(rpy.r("selected2")) - 1 # subtract 1 for 0-based indexing
        GLMnet1se_R = GLMnet1se.shape[0]
        GLMnet1se_V = GLMnet1se_R - len(set(active).intersection(GLMnet1se))
        GLMnet1se_screen = set(GLMnet1se).issuperset(active)

        full_results.setdefault('GLMnet1se_R', []).append(GLMnet1se_R)
        full_results.setdefault('GLMnet1se_V', []).append(GLMnet1se_V)
        full_results.setdefault('GLMnet1se_screen', []).append(GLMnet1se_screen)

        numpy2ri.deactivate()

    for pval, rule_ in product(['maxT_identify_pvalue',
                                'maxT_identify_unknown_pvalue',
                                'maxT_unknown_pvalue',
                                'saturated_pvalue',
                                'nominal_pvalue',
                                'nominalT_pvalue',
                                'maxT_pvalue'],
                               zip([simple_stop, 
                                    strong_stop,
                                    forward_stop],
                                   ['simple',
                                    'strong',
                                    'forward'])):
        rule, rule_name = rule_
        (R, 
         V_var, 
         V_model, 
         screen,
         FWER_model,
         FDP_model,
         FDP_var,
         S_var) = summary(np.asarray(results['variable_selected']),
                          results[pval],
                          active,
                          rule, 
                          alpha)

        pval_name = '_'.join(pval.split('_')[:-1])
        for (n, value) in zip(['R', 'V_var', 'V_model', 'FDP_model', 'FDP_var', 'S_var', 'FWER_model', 'screen'],
                              [R, V_var, V_model, FDP_model, FDP_var, S_var, FWER_model, screen]):
            full_results.setdefault('%s_%s_%s' % (pval_name, rule_name, n), []).append(value)
        
    return full_results, FS
Пример #36
0
print 'maxT unknown:', forward_stop_U
print 'nominal:',  forward_stop_N
print 'saturated:', forward_stop_S

pvals = pvals[:20]

# R pvalues

Rpval = []
model_str = ''
for i in range(pvals.shape[0]):
    model_str = '+'.join([' X[,%d] ' % v for v in pvals['Column number'][:(i+1)]])
    Rstr = 'summary(lm(Y ~ %s))$coef[,4]' % model_str
    Rpval.append(np.array(rpy.r(Rstr))[-1])

print 'checking whether nominal agrees with R:', np.linalg.norm(np.array(Rpval) - pvals['Nominal pvalue']) / np.linalg.norm(pvals['Nominal pvalue']) 

# save the HTML table

file('../../tables/diabetes.html', 'w').write(pvals.to_html(float_format = lambda v : '%0.2f' % v, index=False))

pvals = pvals.reindex_axis(['Step', 'Variable', 'Nominal pvalue', 'Saturated pvalue', 'MaxT pvalue'], axis=1)
print pvals

# save the LaTeX table

file('../../tables/diabetes.tex', 'w').write(pvals.to_latex(float_format = lambda v : '%0.2f' % v, index=False))

numpy2ri.deactivate()
 def tearDown(self):
     rpyn.deactivate()