示例#1
0
def test_lmm_lr(G, y, Z, Kbg, Covs=None):
    """
    low-rank lmm

    input:
    G   :   genotypes
    y   :   phenotype
    Z   :   features of low-rank matrix
    Kbg   :   background covariance matrix
    Covs :  fixed effect covariates
    """

    vd = varianceDecomposition.VarianceDecomposition(y)
    if Covs is not None:
        vd.addFixedEffect(Covs)
    vd.addRandomEffect(Kbg)
    Klr = utils.computeLinearKernel(Z)
    vd.addRandomEffect(Klr)
    vd.addRandomEffect(is_noise=True)
    vd.optimize()

    varComps = vd.getVarianceComps()[0]
    Ktotal = varComps[0] * Kbg + varComps[1] * Klr

    lm = qtl.test_lmm(G, y, covs=Covs, K=Ktotal)
    pv = lm.getPv()[0]
    beta = lm.getBetaSNP()[0]

    var_snps = beta**2 * np.var(G, axis=0)
    var_genes = np.zeros(len(beta)) + varComps[1]
    var_covs = np.zeros(len(beta))
    if Covs is not None: var_covs += np.dot(Covs, vd.getWeights()).var()

    return pv, beta, var_snps, var_covs, var_genes
示例#2
0
    def setUp(self):
        #check: do we have a csv File?
        self.dir_name = os.path.dirname(__file__)
        self.dataset = os.path.join(self.dir_name, 'varDecomp')

        if (not os.path.exists(self.dataset)) or 'recalc' in sys.argv:
            if not os.path.exists(self.dataset):
                os.makedirs(self.dataset)
            SP.random.seed(1)
            self.N = 200
            self.S = 1000
            self.P = 2
            self.D = {}
            self.genGeno()
            self.genPheno()
            self.generate = True
        else:
            self.generate = False
            #self.D = data.load(os.path.join(self.dir_name,self.dataset))
            self.D = data.load(self.dataset)
            self.N = self.D['X'].shape[0]
            self.S = self.D['X'].shape[1]
            self.P = self.D['Y'].shape[1]

        self.Kg = SP.dot(self.D['X'], self.D['X'].T)
        self.Kg = self.Kg / self.Kg.diagonal().mean()

        self.vc = VAR.VarianceDecomposition(self.D['Y'])
        self.vc.addRandomEffect(self.Kg, jitter=0)
        self.vc.addRandomEffect(is_noise=True, jitter=0)
        self.vc.addFixedEffect()
示例#3
0
data_subsample = data.subsample_phenotypes(phenotype_query=phenotype_query,
                                           intersection=True)

#get variables we need from data
phenotypes, sample_idx = data_subsample.getPhenotypes(
    phenotype_query=phenotype_query, intersection=True)
assert sample_idx.all()

K = data_subsample.getCovariance()
pos = data_subsample.getPos()

#set parameters for the analysis
N, P = phenotypes.shape

# variance component model
vc = VAR.VarianceDecomposition(phenotypes.values)
vc.addFixedEffect()
vc.addRandomEffect(K=K, trait_covar_type='lowrank_diag', rank=4)
vc.addRandomEffect(is_noise=True, trait_covar_type='lowrank_diag', rank=4)
vc.optimize()
# retrieve geno and noise covariance matrix
Cg = vc.getTraitCovar(0)
Cn = vc.getTraitCovar(1)

#convert P-values to a DataFrame for nice output writing:
genetic_covar = pd.DataFrame(data=Cg,
                             index=phenotypes.columns,
                             columns=phenotypes.columns)
noise_covar = pd.DataFrame(data=Cn,
                           index=phenotypes.columns,
                           columns=phenotypes.columns)
示例#4
0
文件: core.py 项目: NHLBI-BCB/scLVM
    def varianceDecomposition(self,
                              K=None,
                              tech_noise=None,
                              idx=None,
                              i0=None,
                              i1=None,
                              max_iter=10,
                              verbose=False):
        """
		Args:
			K:				list of random effects to be considered in the analysis
			idx:			indices of the genes to be considered in the analysis
			i0:				gene index from which the anlysis starts
			i1:				gene index to which the analysis stops
			max_iter:		maximum number of random restarts
			verbose:		if True, print progresses
		"""

        if tech_noise is not None: self.set_tech_noise(tech_noise)
        assert self.tech_noise is not None, 'scLVM:: specify technical noise'
        assert K is not None, 'scLVM:: specify K'

        if not isinstance(K, list):
            K = [K]
        for k in K:
            assert k.shape[0] == self.N, 'scLVM:: K dimension dismatch'
            assert k.shape[1] == self.N, 'scLVM:: K dimension dismatch'

        if idx is None:
            if i0 is None or i1 is None:
                i0 = 0
                i1 = self.G
            idx = SP.arange(i0, i1)
        elif not isinstance(idx, SP.ndarray):
            idx = SP.array([idx])

        _G = len(idx)
        var = SP.zeros((_G, len(K) + 2))
        _idx = SP.zeros(_G)
        geneID = SP.zeros(_G, dtype=str)
        conv = SP.zeros(_G) == 1
        Ystar = [SP.zeros((self.N, _G)) for i in range(len(K))]
        count = 0
        Ystd = self.Y - self.Y.mean(
            0)  #delta optimization might be more efficient
        Ystd /= self.Y.std(0)
        tech_noise = self.tech_noise / SP.array(self.Y.std(0))**2
        for ids in idx:
            if verbose:
                print('.. fitting gene %d' % ids)
            # extract a single gene
            y = Ystd[:, ids:ids + 1]
            # build and fit variance decomposition model
            vc = VAR.VarianceDecomposition(y)
            vc.addFixedEffect()
            for k in K:
                vc.addRandomEffect(k)
            vc.addRandomEffect(SP.eye(self.N))
            vc.addRandomEffect(SP.eye(self.N))
            vc.vd.getTerm(len(K) + 1).getKcf().setParamMask(SP.zeros(1))
            for iter_i in range(max_iter):
                scales0 = y.std() * SP.randn(len(K) + 2)
                scales0[len(K) + 1] = SP.sqrt(tech_noise[ids])
                _conv = vc.optimize(scales0=scales0, n_times=2)
                if _conv: break
            conv[count] = _conv
            if not _conv:
                var[count, -2] = SP.maximum(0, y.var() - tech_noise[ids])
                var[count, -1] = tech_noise[ids]
                count += 1
                if self.geneID is not None: geneID[count] = self.geneID[ids]
                continue
            _var = vc.getVarianceComps()[0, :]
            KiY = vc.gp.agetKEffInvYCache().ravel()
            for ki in range(len(K)):
                Ystar[ki][:, count] = _var[ki] * SP.dot(K[ki], KiY)
            var[count, :] = _var
            count += 1

        # col header
        col_header = ['hidden_%d' % i for i in range(len(K))]
        col_header.append('biol_noise')
        col_header.append('tech_noise')
        col_header = SP.array(col_header)

        # annotate column and rows of var and Ystar
        var_info = {'gene_idx': idx, 'col_header': col_header, 'conv': conv}
        if geneID is not None: var_info['geneID'] = SP.array(geneID)
        Ystar_info = {'gene_idx': idx, 'conv': conv}
        if geneID is not None: Ystar_info['geneID'] = SP.array(geneID)

        # cache stuff
        self.var = var
        self.Ystar = Ystar
        self.var_info = var_info
        self.Ystar_info = Ystar_info
示例#5
0
            Yu = np.array(Y, dtype='float')
            #Yu -= Yu.mean(0); Yu /= Yu.std(0)

            if center:
                Xu -= Xu.mean(0)
                Xu /= Xu.std(0)
            uKcis = SP.dot(Xu, Xu.T)
            uKtrans = uKpop - uKcis
            uKcis /= uKcis.diagonal().mean()
            uKtrans /= uKtrans.diagonal().mean()

            #4.3 perform experiment and store results in out_gene
            out_gene = {}

            print "cis/trans/noise + covariates variance decomposition"
            vc = VAR.VarianceDecomposition(Y)
            vc.addFixedEffect()
            vc.addRandomEffect(data.kgender)
            vc.addRandomEffect(data.kreprog)
            vc.addRandomEffect(data.kmedia)
            vc.addRandomEffect(data.kuser)
            vc.addRandomEffect(data.ksentrix_id)
            vc.addRandomEffect(data.ksentrix_pos)
            vc.addRandomEffect(data.kplate)
            vc.addRandomEffect(data.kwell)
            vc.addRandomEffect(data.ktime)
            vc.addRandomEffect(data.kpassage)
            vc.addRandomEffect(uKcis)
            vc.addRandomEffect(uKtrans)
            vc.addRandomEffect(is_noise=True)
            conv = vc.optimize()