Пример #1
0
def fitPairwiseModel(Y,XX=None,S_XX=None,U_XX=None,verbose=False):
    N,P = Y.shape
    """ initilizes parameters """
    RV = fitSingleTraitModel(Y,XX=XX,S_XX=S_XX,U_XX=U_XX,verbose=verbose)
    Cg = covariance.freeform(2)
    Cn = covariance.freeform(2)
    gp = gp2kronSum(mean(Y[:,0:2]),Cg,Cn,XX=XX,S_XX=S_XX,U_XX=U_XX)
    conv2 = SP.ones((P,P),dtype=bool)
    rho_g = SP.ones((P,P))
    rho_n = SP.ones((P,P))
    for p1 in range(P):
        for p2 in range(p1):
            if verbose:
                print '.. fitting correlation (%d,%d)'%(p1,p2)
            gp.setY(Y[:,[p1,p2]])
            Cg_params0 = SP.array([SP.sqrt(RV['varST'][p1,0]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,0])])
            Cn_params0 = SP.array([SP.sqrt(RV['varST'][p1,1]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,1])])
            params0 = {'Cg':Cg_params0,'Cn':Cn_params0}
            conv2[p1,p2],info = OPT.opt_hyper(gp,params0,factr=1e3)
            rho_g[p1,p2] = Cg.K()[0,1]/SP.sqrt(Cg.K().diagonal().prod())
            rho_n[p1,p2] = Cn.K()[0,1]/SP.sqrt(Cn.K().diagonal().prod())
            conv2[p2,p1] = conv2[p1,p2]; rho_g[p2,p1] = rho_g[p1,p2]; rho_n[p2,p1] = rho_n[p1,p2]
    RV['Cg0'] = rho_g*SP.dot(SP.sqrt(RV['varST'][:,0:1]),SP.sqrt(RV['varST'][:,0:1].T))
    RV['Cn0'] = rho_n*SP.dot(SP.sqrt(RV['varST'][:,1:2]),SP.sqrt(RV['varST'][:,1:2].T))
    RV['conv2'] = conv2
    #3. regularizes covariance matrices
    offset_g = abs(SP.minimum(LA.eigh(RV['Cg0'])[0].min(),0))+1e-4
    offset_n = abs(SP.minimum(LA.eigh(RV['Cn0'])[0].min(),0))+1e-4
    RV['Cg0_reg'] = RV['Cg0']+offset_g*SP.eye(P)
    RV['Cn0_reg'] = RV['Cn0']+offset_n*SP.eye(P)
    RV['params0_Cg']=LA.cholesky(RV['Cg0_reg'])[SP.tril_indices(P)]
    RV['params0_Cn']=LA.cholesky(RV['Cn0_reg'])[SP.tril_indices(P)]
    return RV
Пример #2
0
 def f1(x):
     C.setParams(x)
     b = C.K()[sp.tril_indices(2)]
     delta = (b - SSS)
     val = (delta * sp.dot(Hi, delta)).sum()
     db_dx0 = C.K_grad_i(0)[sp.tril_indices(2)]
     db_dx1 = C.K_grad_i(1)[sp.tril_indices(2)]
     db_dx2 = C.K_grad_i(2)[sp.tril_indices(2)]
     grad = 2 * sp.array([(delta * sp.dot(Hi, db_dx0)).sum(),
                          (delta * sp.dot(Hi, db_dx1)).sum(),
                          (delta * sp.dot(Hi, db_dx2)).sum()])
     return val, grad
Пример #3
0
 def f1(x):
     C.setParams(x)
     b = C.K()[sp.tril_indices(2)]
     delta = (b-SSS)
     val = (delta*sp.dot(Hi, delta)).sum()
     db_dx0 = C.K_grad_i(0)[sp.tril_indices(2)]
     db_dx1 = C.K_grad_i(1)[sp.tril_indices(2)]
     db_dx2 = C.K_grad_i(2)[sp.tril_indices(2)]
     grad = 2*sp.array([(delta*sp.dot(Hi, db_dx0)).sum(),
                         (delta*sp.dot(Hi, db_dx1)).sum(),
                         (delta*sp.dot(Hi, db_dx2)).sum()])
     return val, grad
Пример #4
0
 def _updateLgrad(self,i):
     """
     construct the cholesky factor from hyperparameters
     """
     self.zeros[i] = 1
     self.Lgrad[sp.tril_indices(self.dim)] = self.zeros
     self.zeros[i] = 0
Пример #5
0
 def _updateLgrad(self, i):
     """
     construct the cholesky factor from hyperparameters
     """
     self.zeros[i] = 1
     self.Lgrad[SP.tril_indices(self.P)] = self.zeros
     self.zeros[i] = 0
Пример #6
0
 def K_grad_interParam_i(self, i):
     ix, iy = sp.tril_indices(self.dim)
     ix = ix[i]
     iy = iy[i]
     R = sp.zeros((self.dim,self.dim))
     R[ix, iy] = R[iy, ix] = 1
     return R
Пример #7
0
 def K_grad_interParam_i(self, i):
     ix, iy = sp.tril_indices(self.dim)
     ix = ix[i]
     iy = iy[i]
     R = sp.zeros((self.dim,self.dim))
     R[ix, iy] = R[iy, ix] = 1
     return R
Пример #8
0
 def K_ste(self):
     if self.getFIinv() is None:
         R = None
     else:
         R = sp.zeros((self.dim, self.dim))
         R[sp.tril_indices(self.dim)] = sp.sqrt(self.getFIinv().diagonal())
         # symmetrize
         R = R + R.T - sp.diag(R.diagonal())
     return R
Пример #9
0
 def K_ste(self):
     if self.getFIinv() is None:
         R = None
     else:
         R = sp.zeros((self.dim, self.dim))
         R[sp.tril_indices(self.dim)] = sp.sqrt(self.getFIinv().diagonal())
         # symmetrize
         R = R + R.T - sp.diag(R.diagonal())
     return R
Пример #10
0
    def getInterParams(self):
        # VARIANCE + CORRELATIONS
        #R1 = self.variance
        #R2 = self.correlation[sp.tril_indices(self.dim, k = -1)]
        #R = sp.concatenate([R1,R2])

        # COVARIANCES
        R = self.K()[sp.tril_indices(self.dim)]
        return R
Пример #11
0
    def getInterParams(self):
        # VARIANCE + CORRELATIONS
        #R1 = self.variance
        #R2 = self.correlation[sp.tril_indices(self.dim, k = -1)]
        #R = sp.concatenate([R1,R2])

        # COVARIANCES
        R = self.K()[sp.tril_indices(self.dim)]
        return R
Пример #12
0
 def _initParams(self,init_method=None):
     """ this function initializes the paramenter and Ifilter """
     if self.P==1:
         if self.bgRE:
             params0 = {'Cg':SP.sqrt(0.5)*SP.ones(1),'Cn':SP.sqrt(0.5)*SP.ones(1)}
             Ifilter = None
         else:
             params0 = {'Cr':1e-9*SP.ones(1),'Cn':SP.ones(1)}
             Ifilter = {'Cr':SP.zeros(1,dtype=bool),'Cn':SP.ones(1,dtype=bool)}
     else:
         if self.bgRE:
             if self.colCovarType=='freeform':
                 if init_method=='pairwise':
                     _RV = fitPairwiseModel(self.Y,XX=self.XX,S_XX=self.S_XX,U_XX=self.U_XX,verbose=False)
                     params0 = {'Cg':_RV['params0_Cg'],'Cn':_RV['params0_Cn']}
                 elif init_method=='random':
                     params0 = {'Cg':SP.randn(self.Cg.getNumberParams()),'Cn':SP.randn(self.Cn.getNumberParams())}
                 else:
                     cov = 0.5*SP.cov(self.Y.T)+1e-4*SP.eye(self.P)
                     chol = LA.cholesky(cov,lower=True)
                     params = chol[SP.tril_indices(self.P)]
                     params0 = {'Cg':params.copy(),'Cn':params.copy()}
             Ifilter = None
         else:
             if self.colCovarType=='freeform':
                 cov = SP.cov(self.Y.T)+1e-4*SP.eye(self.P)
                 chol = LA.cholesky(cov,lower=True)
                 params = chol[SP.tril_indices(self.P)]
             #else:
             #    S,U=LA.eigh(cov)
             #    a = SP.sqrt(S[-self.rank_r:])[:,SP.newaxis]*U[:,-self.rank_r:]
             #    if self.colCovarType=='lowrank_id':
             #        c = SP.sqrt(S[:-self.rank_r].mean())*SP.ones(1)
             #    else:
             #        c = SP.sqrt(S[:-self.rank_r].mean())*SP.ones(self.P)
             #    params0_Cn = SP.concatenate([a.T.ravel(),c])
             params0 = {'Cr':1e-9*SP.ones(self.P),'Cn':params}
             Ifilter = {'Cr':SP.zeros(self.P,dtype=bool),
                         'Cn':SP.ones(params.shape[0],dtype=bool)}
     if self.mean.F is not None and self.bgRE:
         params0['mean'] = 1e-6*SP.randn(self.mean.getParams().shape[0])
         if Ifilter is not None:
             Ifilter['mean'] = SP.ones(self.mean.getParams().shape[0],dtype=bool)
     return params0,Ifilter
Пример #13
0
def fitPairwiseModel(Y, XX=None, S_XX=None, U_XX=None, verbose=False):
    N, P = Y.shape
    """ initilizes parameters """
    RV = fitSingleTraitModel(Y, XX=XX, S_XX=S_XX, U_XX=U_XX, verbose=verbose)
    Cg = covariance.freeform(2)
    Cn = covariance.freeform(2)
    gp = gp2kronSum(mean(Y[:, 0:2]), Cg, Cn, XX=XX, S_XX=S_XX, U_XX=U_XX)
    conv2 = SP.ones((P, P), dtype=bool)
    rho_g = SP.ones((P, P))
    rho_n = SP.ones((P, P))
    for p1 in range(P):
        for p2 in range(p1):
            if verbose:
                print '.. fitting correlation (%d,%d)' % (p1, p2)
            gp.setY(Y[:, [p1, p2]])
            Cg_params0 = SP.array([
                SP.sqrt(RV['varST'][p1, 0]), 1e-6 * SP.randn(),
                SP.sqrt(RV['varST'][p2, 0])
            ])
            Cn_params0 = SP.array([
                SP.sqrt(RV['varST'][p1, 1]), 1e-6 * SP.randn(),
                SP.sqrt(RV['varST'][p2, 1])
            ])
            params0 = {'Cg': Cg_params0, 'Cn': Cn_params0}
            conv2[p1, p2], info = OPT.opt_hyper(gp, params0, factr=1e3)
            rho_g[p1, p2] = Cg.K()[0, 1] / SP.sqrt(Cg.K().diagonal().prod())
            rho_n[p1, p2] = Cn.K()[0, 1] / SP.sqrt(Cn.K().diagonal().prod())
            conv2[p2, p1] = conv2[p1, p2]
            rho_g[p2, p1] = rho_g[p1, p2]
            rho_n[p2, p1] = rho_n[p1, p2]
    RV['Cg0'] = rho_g * SP.dot(SP.sqrt(RV['varST'][:, 0:1]),
                               SP.sqrt(RV['varST'][:, 0:1].T))
    RV['Cn0'] = rho_n * SP.dot(SP.sqrt(RV['varST'][:, 1:2]),
                               SP.sqrt(RV['varST'][:, 1:2].T))
    RV['conv2'] = conv2
    #3. regularizes covariance matrices
    offset_g = abs(SP.minimum(LA.eigh(RV['Cg0'])[0].min(), 0)) + 1e-4
    offset_n = abs(SP.minimum(LA.eigh(RV['Cn0'])[0].min(), 0)) + 1e-4
    RV['Cg0_reg'] = RV['Cg0'] + offset_g * SP.eye(P)
    RV['Cn0_reg'] = RV['Cn0'] + offset_n * SP.eye(P)
    RV['params0_Cg'] = LA.cholesky(RV['Cg0_reg'])[SP.tril_indices(P)]
    RV['params0_Cn'] = LA.cholesky(RV['Cn0_reg'])[SP.tril_indices(P)]
    return RV
Пример #14
0
    def adjacency(self, min_snp2gene_obs=2,fdr_cutoff=0.3,return_genes=False):
        '''
            Return a matrix showing the number of shared HPO genes by Term.
            The diagonal of the matrix is the number of genes discoverd by that 
            term. The upper diagonal shows the overlap between the row and column
            and the lower diagonal shows the hypergeomitric pval for the overlap
            between the two terms. The universe used is the number of unique genes
            in the overlap results.

            min_snp2gene_obs : int (default: 2)
                The min SNP2gene mappinging observations needed to be HPO
            fdr_cutoff: float (default: 0.3)
                The FDR cutoff the be considered HPO
            return_genes : bool (default: False)
                Return the candidate gene list instead of the overlap table
        '''
        df = self.high_priority_candidates(
                fdr_cutoff=fdr_cutoff,
                min_snp2gene_obs=min_snp2gene_obs,
                original_COB_only=True)
        # 
        x={df[0]:set(df[1].gene) for df in df.groupby('Term')}                     
        adj = []                                                                        
        #num_universe = len(set(chain(*x.values())))
        num_universe = len(self.results.gene.unique())
        for i,a in enumerate(x.keys()):                                                              
            for j,b in enumerate(x.keys()):  
                if j < i:
                    continue
                common = set(x[a]).intersection(x[b])
                num_common = len(set(x[a]).intersection(x[b]))
                if a != b:
                    pval = hypergeom.sf(num_common-1,num_universe,len(x[a]),len(x[b]))
                else:
                    # This will make the diagonal of the matrix be the number HPO genes
                    # for the element
                    pval = len(x[a])
                adj.append((a,b,num_common,pval,','.join(common))) 
        adj = pd.DataFrame(adj)                                                         
        adj.columns = ['Term1','Term2','num_common','pval','common']
        # Stop early if we just want to return the lists 
        if return_genes == True:
            adj = adj[adj.num_common>0] 
            adj = adj[np.logical_not(adj.Term1==adj.Term2)]
            return adj.drop_duplicates()
        else:
            overlap = pd.pivot_table(adj,index='Term1',columns='Term2',values='num_common')
            # Mask out the lower diagonal on the overalp matrix
            overlap.values[tril_indices(len(overlap))] = 0
            pvals = pd.pivot_table(adj,index='Term2',columns='Term1',values='pval')
            # Mask out the upper tringular on the pvals matrix
            pvals.values[triu_indices(len(pvals),1)] = 0
            return (overlap+pvals).astype(float)
Пример #15
0
 def _initParams(self, init_method=None):
     """ this function initializes the paramenter and Ifilter """
     if self.bgRE:
         if init_method=='random':
             params0 = {'covar': sp.randn(self._gpNull.covar.getNumberParams())}
         else:
             if self.P==1:
                 params0 = {'covar':sp.sqrt(0.5) * sp.ones(2)}
             else:
                 cov = 0.5*sp.cov(self.Y.T) + 1e-4*sp.eye(self.P)
                 chol = la.cholesky(cov, lower=True)
                 params = chol[sp.tril_indices(self.P)]
                 params0 = {'covar': sp.concatenate([params, params])}
     else:
         if self.P==1: 
             params_cn = sp.array([1.])
         else:
             cov = sp.cov(self.Y.T) + 1e-4*sp.eye(self.P)
             chol = la.cholesky(cov, lower=True)
             params_cn = chol[sp.tril_indices(self.P)]
         params0 = {'covar': params_cn}
     return params0
Пример #16
0
 def __init__(self, dim, jitter=1e-4):
     """
     Args:
         dim:        dimension of the free-form covariance
         jitter:     extent of diagonal offset which is added for numerical stability
                     (default value: 1e-4)
     """
     Covariance.__init__(self, dim)
     self._K_act = True
     self._calcNumberParams()
     self.dim = dim
     self.params = sp.zeros(self.n_params)
     self.idx_r, self.idx_c = sp.tril_indices(self.dim)
     self.set_jitter(jitter)
Пример #17
0
 def __init__(self, dim, jitter=1e-4):
     """
     Args:
         dim:        dimension of the free-form covariance
         jitter:     extent of diagonal offset which is added for numerical stability
                     (default value: 1e-4)
     """
     Covariance.__init__(self, dim)
     self._K_act = True
     self._calcNumberParams()
     self.dim = dim
     self.params = sp.zeros(self.n_params)
     self.idx_r, self.idx_c = sp.tril_indices(self.dim)
     self.set_jitter(jitter)
Пример #18
0
 def _initParams(self, init_method=None):
     """ internal function for params initialization """
     if self.bgRE:
         if init_method == "random":
             params0 = {
                 "covar": sp.randn(self._gpNull.covar.getNumberParams())
             }
         else:
             if self.P == 1:
                 params0 = {"covar": sp.sqrt(0.5) * sp.ones(2)}
             else:
                 cov = 0.5 * sp.cov(self.Y.T) + 1e-4 * sp.eye(self.P)
                 chol = la.cholesky(cov, lower=True)
                 params = chol[sp.tril_indices(self.P)]
                 params0 = {"covar": sp.concatenate([params, params])}
     else:
         if self.P == 1:
             params_cn = sp.array([1.])
         else:
             cov = sp.cov(self.Y.T) + 1e-4 * sp.eye(self.P)
             chol = la.cholesky(cov, lower=True)
             params_cn = chol[sp.tril_indices(self.P)]
         params0 = {"covar": params_cn}
     return params0
Пример #19
0
 def _initParams(self, init_method=None):
     """ this function initializes the paramenter and Ifilter """
     if self.bgRE:
         if init_method == 'random':
             params0 = {
                 'covar': sp.randn(self._gpNull.covar.getNumberParams())
             }
         else:
             if self.P == 1:
                 params0 = {'covar': sp.sqrt(0.5) * sp.ones(2)}
             else:
                 cov = 0.5 * sp.cov(self.Y.T) + 1e-4 * sp.eye(self.P)
                 chol = la.cholesky(cov, lower=True)
                 params = chol[sp.tril_indices(self.P)]
                 params0 = {'covar': sp.concatenate([params, params])}
     else:
         if self.P == 1:
             params_cn = sp.array([1.])
         else:
             cov = sp.cov(self.Y.T) + 1e-4 * sp.eye(self.P)
             chol = la.cholesky(cov, lower=True)
             params_cn = chol[sp.tril_indices(self.P)]
         params0 = {'covar': params_cn}
     return params0
Пример #20
0
def recover_B2_ridge( y, X, reg = 0 ):
    """Recover B2 using ridge regression"""
    N, D = X.shape

    y = y**2
    indices = sc.tril_indices(D)

    X = array( [ (outer(x,x)[indices]) for x in X ] )
    B2_ = inv(X.T.dot(X) + reg * eye(X.shape[1])).dot( X.T ).dot( y )

    B2 = zeros((D,D))

    B2[indices] = B2_
    B2 = (B2 + B2.T)/2

    return B2
Пример #21
0
    def correlation_ste(self):
        if self.getFIinv() is None:
            R = None
        else:
            idx_M = sp.zeros((self.dim,self.dim))
            idx_M[sp.tril_indices(self.dim)] = sp.arange( int( 0.5 * self.dim * (self.dim + 1) ) )
            R = sp.zeros(idx_M)
            for i in range(self.dim):
                for j in range(0,self.dim):
                    ij = idx_M[i,j] # index of cov_ij_ste from fisher
                    ii = idx_M[i,i] # index of cov_ii_ste from fisher
                    jj = idx_M[j,j] # index of cov_jj_ste from fisher
                    #TODO: complete

        # IN A VARIANCE / CORRELATION PARAMETRIZATION
        #if self.getFIinv() is None:
        #    R = None
        #else:
        #    R = sp.zeros((self.dim, self.dim))
        #    R[sp.tril_indices(self.dim, k = -1)] = sp.sqrt(self.getFIinv().diagonal()[self.dim:])
        #    R += R.T
        return R
Пример #22
0
    def correlation_ste(self):
        if self.getFIinv() is None:
            R = None
        else:
            idx_M = sp.zeros((self.dim,self.dim))
            idx_M[sp.tril_indices(self.dim)] = sp.arange( int( 0.5 * self.dim * (self.dim + 1) ) )
            R = sp.zeros(idx_M)
            for i in range(self.dim):
                for j in range(0,self.dim):
                    ij = idx_M[i,j] # index of cov_ij_ste from fisher
                    ii = idx_M[i,i] # index of cov_ii_ste from fisher
                    jj = idx_M[j,j] # index of cov_jj_ste from fisher
                    #TODO: complete

        # IN A VARIANCE / CORRELATION PARAMETRIZATION
        #if self.getFIinv() is None:
        #    R = None
        #else:
        #    R = sp.zeros((self.dim, self.dim))
        #    R[sp.tril_indices(self.dim, k = -1)] = sp.sqrt(self.getFIinv().diagonal()[self.dim:])
        #    R += R.T
        return R
Пример #23
0
 def _updateL(self):
     """
     construct the cholesky factor from hyperparameters
     """
     self.L[sp.tril_indices(self.dim)] = self.params
Пример #24
0
 def _updateL(self):
     """
     construct the cholesky factor from hyperparameters
     """
     self.L[SP.tril_indices(self.P)] = self.params
Пример #25
0
 def setCovariance(self,cov):
     """ set hyperparameters from given covariance """
     chol = LA.cholesky(cov,lower=True)
     params = chol[sp.tril_indices(self.dim)]
     self.setParams(params)
Пример #26
0
 def _initParams(self, init_method=None):
     """ this function initializes the paramenter and Ifilter """
     if self.P == 1:
         if self.bgRE:
             params0 = {
                 'Cg': SP.sqrt(0.5) * SP.ones(1),
                 'Cn': SP.sqrt(0.5) * SP.ones(1)
             }
             Ifilter = None
         else:
             params0 = {'Cr': 1e-9 * SP.ones(1), 'Cn': SP.ones(1)}
             Ifilter = {
                 'Cr': SP.zeros(1, dtype=bool),
                 'Cn': SP.ones(1, dtype=bool)
             }
     else:
         if self.bgRE:
             if self.colCovarType == 'freeform':
                 if init_method == 'pairwise':
                     _RV = fitPairwiseModel(self.Y,
                                            XX=self.XX,
                                            S_XX=self.S_XX,
                                            U_XX=self.U_XX,
                                            verbose=False)
                     params0 = {
                         'Cg': _RV['params0_Cg'],
                         'Cn': _RV['params0_Cn']
                     }
                 elif init_method == 'random':
                     params0 = {
                         'Cg': SP.randn(self.Cg.getNumberParams()),
                         'Cn': SP.randn(self.Cn.getNumberParams())
                     }
                 else:
                     cov = 0.5 * SP.cov(self.Y.T) + 1e-4 * SP.eye(self.P)
                     chol = LA.cholesky(cov, lower=True)
                     params = chol[SP.tril_indices(self.P)]
                     params0 = {'Cg': params.copy(), 'Cn': params.copy()}
             Ifilter = None
         else:
             if self.colCovarType == 'freeform':
                 cov = SP.cov(self.Y.T) + 1e-4 * SP.eye(self.P)
                 chol = LA.cholesky(cov, lower=True)
                 params = chol[SP.tril_indices(self.P)]
             #else:
             #    S,U=LA.eigh(cov)
             #    a = SP.sqrt(S[-self.rank_r:])[:,SP.newaxis]*U[:,-self.rank_r:]
             #    if self.colCovarType=='lowrank_id':
             #        c = SP.sqrt(S[:-self.rank_r].mean())*SP.ones(1)
             #    else:
             #        c = SP.sqrt(S[:-self.rank_r].mean())*SP.ones(self.P)
             #    params0_Cn = SP.concatenate([a.T.ravel(),c])
             params0 = {'Cr': 1e-9 * SP.ones(self.P), 'Cn': params}
             Ifilter = {
                 'Cr': SP.zeros(self.P, dtype=bool),
                 'Cn': SP.ones(params.shape[0], dtype=bool)
             }
     if self.mean.F is not None and self.bgRE:
         params0['mean'] = 1e-6 * SP.randn(self.mean.getParams().shape[0])
         if Ifilter is not None:
             Ifilter['mean'] = SP.ones(self.mean.getParams().shape[0],
                                       dtype=bool)
     return params0, Ifilter
Пример #27
0
 def setCovariance(self,cov):
     """ set hyperparameters from given covariance """
     chol = LA.cholesky(cov,lower=True)
     params = chol[sp.tril_indices(self.dim)]
     self.setParams(params)
    def getGenoSte(self, DGE, IGE, IEE, cageEffect):

        self._gp.covar.getFisherInf()
        F = self._gp.covar.getFisherInf()

        # scalar in front of each term
        # ordering for geno and env is
        # direct, covar, indirect as in fisher matrix
        aP = []
        vi = []

        if DGE and (not IGE):
            aP.append(self._genoCov.scale)
            vi.append(1. / covar_rescaling_factor(self._genoCov.K0))
        elif IGE and (not DGE):
            aP.append(self._genoCov.scale)
            vi.append(1. / covar_rescaling_factor(self._genoCov.K0))
        elif DGE and IGE:
            aP.append(self._genoCov.covff.K()[0,0])
            aP.append(self._genoCov.covff.K()[0,1])
            aP.append(self._genoCov.covff.K()[1,1])
            vi.append(1. / covar_rescaling_factor(self._genoCov._K))
            vi.append(1. / covar_rescaling_factor(self._genoCov._KZ + self._genoCov._ZK))
            vi.append(1. / covar_rescaling_factor(self._genoCov._ZKZ))
        else:
            pass

        if not IEE:
            aP.append(self._envCov.scale)
            vi.append(1. / covar_rescaling_factor(self._envCov.K0))
        else:
            aP.append(self._envCov.covff.K()[0,0])
            aP.append(self._envCov.covff.K()[0,1])
            aP.append(self._envCov.covff.K()[1,1])
            vi.append(1. / covar_rescaling_factor(self._envCov._K))
            vi.append(1. / covar_rescaling_factor(self._envCov._KZ + self._envCov._ZK))
            vi.append(1. / covar_rescaling_factor(self._envCov._ZKZ))

        if cageEffect:
            aP.append(self._cageCov.scale)
            vi.append(1. / covar_rescaling_factor(self._cageCov.K0))
        else:
            pass

        # make them vectors
        aP = sp.array(aP)
        vi = sp.array(vi)

        # overall variance
        # this should correspond to the one you get from sampling
        v = (aP*vi).sum()

        # fractions of variance exaplined by each term
        # (can be negative)
        h = (aP*vi) / v

        # jacobean
        J = sp.zeros((aP.shape[0], aP.shape[0]))
        J[:, 0] = h / vi
        J[-1, 1:] = -v / vi[-1]
        for i in range(aP.shape[0]-1):
            J[i, i+1] = v / vi[i]

        # transformation of Fisher
        Fnew = sp.dot(J.T, sp.dot(F, J))

        # invert the new Fisher
        S,U = sp.linalg.eigh(Fnew)
        I = S>1e-9
        U = U[:,I]
        S = S[I]
        FI = sp.dot(U,sp.dot(sp.diag(S**(-1)),U.T))
        # reorder to have same ordering as before
        idxs = list(range(1, aP.shape[0]))
        idxs.append(0)
        FI = FI[idxs, :][:, idxs]
        # R is 2x2 matrix: STE_Ad and STE_As on diag, STE_Ads off
        R = sp.zeros((2, 2))
        STE_output = {}

        if DGE and IGE:
            FI_geno = FI[:3,:][:,:3]
            #STEs = sp.sqrt(FI_geno.diagonal()) ( ordered as Ad Ads As)
            #STEs = sqrt of var of VC corr_params 
            #fills diag and 1 off first
            R[sp.tril_indices(2)] = sp.sqrt(FI_geno.diagonal())
            #now fills other off
            R = R + R.T - sp.diag(R.diagonal())
        
            corr_param_Ad_As = FI_geno[0,2]/(sp.sqrt(FI_geno[0,0])*sp.sqrt(FI_geno[2,2]))
        
        elif DGE and (not IGE):
            R[0,0] = sp.sqrt(FI[0,0])
            R[0,1] = -999
            R[1,0] = -999
            R[1,1] = -999
            corr_param_Ad_As = -999
        
        elif (not DGE) and IGE:
            R[0,0] = -999
            R[0,1] = -999
            R[1,0] = -999
            R[1,1] = sp.sqrt(FI[0,0])
            corr_param_Ad_As = -999
       
        else:
            R[0,0] = -999
            R[0,1] = -999
            R[1,0] = -999
            R[1,1] = -999
            corr_param_Ad_As = -999

        
        STE_output['R']=R
        STE_output['corr_params']= corr_param_Ad_As

        return STE_output
Пример #29
0
    def adjacency(
        self,
        min_snp2gene_obs=2,
        fdr_cutoff=0.3,
        return_genes=False,
        second_overlap=None,
    ):
        """
            Return a matrix showing the number of shared HPO genes by Term.
            The diagonal of the matrix is the number of genes discoverd by that 
            term. The upper diagonal shows the overlap between the row and column
            and the lower diagonal shows the hypergeomitric pval for the overlap
            between the two terms. The universe used is the number of unique genes
            in the overlap results.

            min_snp2gene_obs : int (default: 2)
                The min SNP2gene mappinging observations needed to be HPO
            fdr_cutoff: float (default: 0.3)
                The FDR cutoff the be considered HPO
            return_genes : bool (default: False)
                Return the candidate gene list instead of the overlap table
            second_overlap : Overlap Object (default: None)
                If specified, overlap between terms will be calculated 
                between this overlaps HPO genes and the second overlaps
                HPO genes resulting in a adjacency matrix where the 
                x-axis is overlap 1's terms and the y-axis is overlap
                2's terms and the values are the number of shared genes
                per term.
        """
        hpo1 = self.high_priority_candidates(
            fdr_cutoff=fdr_cutoff,
            min_snp2gene_obs=min_snp2gene_obs,
            original_COB_only=True,
        )

        if second_overlap is None:
            second_overlap = self
        hpo2 = second_overlap.high_priority_candidates(
            fdr_cutoff=fdr_cutoff,
            min_snp2gene_obs=min_snp2gene_obs,
            original_COB_only=True,
        )
        #
        x = {df[0]: set(df[1].gene) for df in hpo1.groupby("Term")}
        y = {df[0]: set(df[1].gene) for df in hpo2.groupby("Term")}
        adj = []
        # num_universe = len(set(chain(*x.values())))
        num_universe = len(
            set(self.results.gene.unique()).union(
                set(second_overlap.results.gene.unique())
            )
        )
        for i, a in enumerate(x.keys()):
            for j, b in enumerate(y.keys()):
                num_a = len(x[a])
                num_b = len(y[b])
                if j < i:
                    continue
                common = set(x[a]).intersection(y[b])
                num_common = len(set(x[a]).intersection(y[b]))
                if a != b:
                    pval = hypergeom.sf(
                        num_common - 1, num_universe, len(x[a]), len(y[b])
                    )
                else:
                    # This will make the diagonal of the matrix be the number HPO genes
                    # for the element
                    pval = len(x[a])
                adj.append((a, b, num_a, num_b, num_common, pval, ",".join(common)))
        adj = pd.DataFrame(adj)
        adj.columns = [
            "Term1",
            "Term2",
            "num_term1",
            "num_term2",
            "num_common",
            "pval",
            "common",
        ]
        # Stop early if we just want to return the lists
        if return_genes == True:
            adj = adj[adj.num_common > 0]
            adj = adj[np.logical_not(adj.Term1 == adj.Term2)]
            adj = adj.drop_duplicates()
            adj["bonferoni"] = adj.pval <= (0.05 / (len(x) * len(y)))
            return adj.drop_duplicates()
        else:
            overlap = pd.pivot_table(
                adj, index="Term1", columns="Term2", values="num_common"
            )
            # Mask out the lower diagonal on the overalp matrix
            overlap.values[tril_indices(len(overlap))] = 0
            pvals = pd.pivot_table(adj, index="Term1", columns="Term2", values="pval")
            # Mask out the upper tringular on the pvals matrix
            pvals.values[triu_indices(len(pvals), 1)] = 0
            return (overlap + pvals).astype(float)
Пример #30
0
def cholesky_factor(nu):
    assert len(nu) == 3
    Q = sp.zeros(shape=(2, 2))
    Q[sp.tril_indices(2)] = nu
    Q = Q.dot(Q.T)
    return Q