def test_structlmm_assoc(): random = RandomState(1) n = 20 k = 4 y = random.randn(n, 1) E = random.randn(n, k) M = ones((n, 1)) x = 1.0 * (random.rand(n, 1) < 0.2) slmm = StructLMM(y, M, E, W=E) slmm.fit(verbose=False) pv = slmm.score_2dof_assoc(x) assert_allclose([pv], [0.8470039620073695], rtol=1e-5)
def test_structlmm_inter(): random = RandomState(1) n = 20 k = 4 y = random.randn(n, 1) E = random.randn(n, k) M = ones(n) x = 1.0 * (random.rand(n) < 0.2) M = stack([M, x], axis=1) slmm = StructLMM(y, M, E, W=E) slmm.fit(verbose=False) pv = slmm.score_2dof_inter(x) assert_allclose([pv], [0.6781070640353783], rtol=1e-5)
def struct_lmm(geno_df, pheno, env, covs=None, rhos=None, no_association_test=False, no_interaction_test=False): """ Utility function to run StructLMM Parameters ---------- geno_df: (`N`, `S`) pandas.DataFrame genotype matrix for `N` samples, `S` SNPis pheno : (`N`, 1) ndarray phenotype vector env : (`N`, `K`) Environmental matrix (indviduals by number of environments) covs : (`N`, L) ndarray fixed effect design for covariates `N` samples and `L` covariates. rhos : list list of ``rho`` values. ``rho=0`` correspond to no persistent effect (only GxE); ``rho=1`` corresponds to only persitent effect (no GxE); By default, ``rho=[0, 0.2, 0.4, 0.6, 0.8, 1.]`` no_association_test : bool if True the association test is not considered. The default value is False. no_interaction_test : bool if True the interaction test is not considered. The default value is False. Returns ------- res : *:class:`pandas.DataFrame`* contains pv of joint test (if no_association_test is False), pv of interaction test (if no_interaction_test is False). """ #import pdb; pdb.set_trace() if covs is None: covs = sp.ones((env.shape[0], 1)) if rhos is None: rhos = [0, .2, .4, .6, .8, 1.] if not no_association_test: # slmm fit null slmm = StructLMM(pheno, env, W=env, rho_list=rhos) null = slmm.fit_null(F=covs, verbose=False) if not no_interaction_test: # slmm int slmm_int = StructLMM(pheno, env, W=env, rho_list=[0]) t0 = time.time() #import pdb; pdb.set_trace() res = pd.DataFrame(data=[], index=geno_df.columns.values) pv = sp.zeros(geno_df.shape[1]) pv_int = sp.zeros(geno_df.shape[1]) for snp in xrange(geno_df.shape[1]): x = geno_df.values[:, [snp]] if not no_association_test: # association test _p, _rho = slmm.score_2_dof(x) #second arg: optimal rho pv[snp] = _p if not no_interaction_test: # interaction test covs1 = sp.hstack((covs, x)) null = slmm_int.fit_null(F=covs1, verbose=False) _p, _rho = slmm_int.score_2_dof(x) pv_int[snp] = _p # add pvalues to res if not no_association_test: res['pv'] = pv if not no_interaction_test: res['pv_int'] = pv_int t = time.time() - t0 #print '%.2f s elapsed' % t return res
def st_sscan(G, y, E, M=None, tests=None, verbose=True): """Mixed-model with genetic effect heterogeneity. Parameters ---------- pheno : (`N`, 1) ndarray phenotype data environments : (`N`, `E`) ndarray environments data. covs : (`N`, `D`) ndarray covariate design matrix. By default, ``covs`` is a (`N`, `1`) array of ones. tests : list Which tests are performed. Element list values are ``'inter'`` and ``'assoc'``. By default, only the interaction test is considered. rhos : list for the association test, a list of ``rho`` values must be specified. The choice of ``rho`` affects the statistical power of the test (for more information see the StructLMM paper). By default, ``rho=[0, 0.1**2, 0.2**2, 0.3**2, 0.4**2, 0.5**2, 0.5, 1.]`` verbose : (bool, optional): if True, details such as runtime as displayed. """ from struct_lmm import StructLMM from numpy import zeros, hstack, asarray from pandas import DataFrame rhos = [0.0, 0.1**2, 0.2**2, 0.3**2, 0.4**2, 0.5**2, 0.5, 1.0] with session_block("struct-lmm analysis", disable=not verbose): with session_line("Normalising input... ", disable=not verbose): data = conform_dataset(y, M, G=G, K=None) y = data["y"] M = data["M"] G = data["G"] if tests is None: tests = ["inter"] if "inter" in tests: slmi = StructLMM(asarray(y, float), E, W=E, rho_list=[0]) if "assoc" in tests: slmm = StructLMM(asarray(y, float), E, W=E, rho_list=rhos) slmm.fit_null(F=asarray(M, float), verbose=False) _pvi = zeros(G.shape[1]) _pva = zeros(G.shape[1]) for snp in range(G.shape[1]): x = asarray(G[:, [snp]], float) if "inter" in tests: # interaction test M1 = hstack((M, x)) slmi.fit_null(F=M1, verbose=False) _pvi[snp] = slmi.score_2_dof(x) if "assoc" in tests: # association test _pva[snp] = slmm.score_2_dof(x) data = OrderedDict() data["pvi"] = _pvi data["pva"] = _pva return DataFrame(data)