def fitPairwiseModel(Y,XX=None,S_XX=None,U_XX=None,verbose=False): N,P = Y.shape """ initilizes parameters """ RV = fitSingleTraitModel(Y,XX=XX,S_XX=S_XX,U_XX=U_XX,verbose=verbose) Cg = covariance.freeform(2) Cn = covariance.freeform(2) gp = gp2kronSum(mean(Y[:,0:2]),Cg,Cn,XX=XX,S_XX=S_XX,U_XX=U_XX) conv2 = SP.ones((P,P),dtype=bool) rho_g = SP.ones((P,P)) rho_n = SP.ones((P,P)) for p1 in range(P): for p2 in range(p1): if verbose: print '.. fitting correlation (%d,%d)'%(p1,p2) gp.setY(Y[:,[p1,p2]]) Cg_params0 = SP.array([SP.sqrt(RV['varST'][p1,0]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,0])]) Cn_params0 = SP.array([SP.sqrt(RV['varST'][p1,1]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,1])]) params0 = {'Cg':Cg_params0,'Cn':Cn_params0} conv2[p1,p2],info = OPT.opt_hyper(gp,params0,factr=1e3) rho_g[p1,p2] = Cg.K()[0,1]/SP.sqrt(Cg.K().diagonal().prod()) rho_n[p1,p2] = Cn.K()[0,1]/SP.sqrt(Cn.K().diagonal().prod()) conv2[p2,p1] = conv2[p1,p2]; rho_g[p2,p1] = rho_g[p1,p2]; rho_n[p2,p1] = rho_n[p1,p2] RV['Cg0'] = rho_g*SP.dot(SP.sqrt(RV['varST'][:,0:1]),SP.sqrt(RV['varST'][:,0:1].T)) RV['Cn0'] = rho_n*SP.dot(SP.sqrt(RV['varST'][:,1:2]),SP.sqrt(RV['varST'][:,1:2].T)) RV['conv2'] = conv2 #3. regularizes covariance matrices offset_g = abs(SP.minimum(LA.eigh(RV['Cg0'])[0].min(),0))+1e-4 offset_n = abs(SP.minimum(LA.eigh(RV['Cn0'])[0].min(),0))+1e-4 RV['Cg0_reg'] = RV['Cg0']+offset_g*SP.eye(P) RV['Cn0_reg'] = RV['Cn0']+offset_n*SP.eye(P) RV['params0_Cg']=LA.cholesky(RV['Cg0_reg'])[SP.tril_indices(P)] RV['params0_Cn']=LA.cholesky(RV['Cn0_reg'])[SP.tril_indices(P)] return RV
def optimize(self, Xr, params0=None, n_times=10, verbose=True, vmax=5, perturb=1e-3, factr=1e7): """ Optimize the model considering Xr """ # set params0 from null if params0==Null if params0 is None: if self.null is None: if verbose: print ".. fitting null model upstream" self.fitNull() if self.bgRE: params0 = { 'Cg': self.null['params0_g'], 'Cn': self.null['params0_n'] } else: params0 = {'Cn': self.null['params0_n']} if 'params_mean' in self.null: if self.null['params_mean'].shape[0] > 0: params0['mean'] = self.null['params_mean'] params_was_None = True else: params_was_None = False Xr *= SP.sqrt(self.N / (Xr**2).sum()) self.gp.set_Xr(Xr) self.gp.restart() start = TIME.time() for i in range(n_times): if params_was_None: params0['Cr'] = 1e-3 * SP.randn(self.rank_r * self.P) conv, info = OPT.opt_hyper(self.gp, params0, factr=factr) conv *= self.gp.Cr.K().diagonal().max() < vmax conv *= self.getLMLgrad() < 0.1 if conv or not params_was_None: break self.infoOpt = info if not conv: warnings.warn("not converged") # return value RV = {} if self.P > 1: RV['Cr'] = self.getCr() if self.bgRE: RV['Cg'] = self.getCg() RV['Cn'] = self.getCn() RV['time'] = SP.array([TIME.time() - start]) RV['params0'] = params0 RV['nit'] = SP.array([info['nit']]) RV['funcalls'] = SP.array([info['funcalls']]) RV['var'] = self.getVariances() RV['conv'] = SP.array([conv]) RV['NLLAlt'] = SP.array([self.getNLLAlt()]) RV['LLR'] = SP.array([self.getLLR()]) RV['LMLgrad'] = SP.array([self.getLMLgrad()]) return RV
def fitNull(self,verbose=True,cache=False,out_dir='./cache',fname=None,rewrite=False,seed=None,n_times=10,factr=1e3,init_method=None): """ Fit null model """ if seed is not None: SP.random.seed(seed) read_from_file = False if cache: assert fname is not None, 'MultiTraitSetTest:: specify fname' if not os.path.exists(out_dir): os.makedirs(out_dir) out_file = os.path.join(out_dir,fname) read_from_file = os.path.exists(out_file) and not rewrite RV = {} if read_from_file: f = h5py.File(out_file,'r') for key in f.keys(): RV[key] = f[key][:] f.close() self.setNull(RV) else: start = TIME.time() if self.bgRE: self.gpNull = gp2kronSum(self.mean,self.Cg,self.Cn,XX=self.XX,S_XX=self.S_XX,U_XX=self.U_XX) else: self.gpNull = gp2kronSumLR(self.Y,self.Cn,Xr=SP.ones((self.N,1)),F=self.F) for i in range(n_times): params0,Ifilter=self._initParams(init_method=init_method) conv,info = OPT.opt_hyper(self.gpNull,params0,Ifilter=Ifilter,factr=factr) if conv: break if not conv: warnings.warn("not converged") LMLgrad = SP.concatenate([self.gpNull.LMLgrad()[key]**2 for key in self.gpNull.LMLgrad().keys()]).mean() LML = self.gpNull.LML() if 'mean' in params0.keys(): RV['params_mean'] = self.gpNull.mean.getParams() RV['params0_g'] = self.Cg.getParams() RV['params0_n'] = self.Cn.getParams() RV['Cg'] = self.Cg.K() RV['Cn'] = self.Cn.K() RV['conv'] = SP.array([conv]) RV['time'] = SP.array([TIME.time()-start]) RV['NLL0'] = SP.array([LML]) RV['LMLgrad'] = SP.array([LMLgrad]) RV['nit'] = SP.array([info['nit']]) RV['funcalls'] = SP.array([info['funcalls']]) if self.bgRE: RV['h2'] = self.gpNull.h2() RV['h2_ste'] = self.gpNull.h2_ste() RV['Cg_ste'] = self.gpNull.ste('Cg') RV['Cn_ste'] = self.gpNull.ste('Cn') self.null = RV if cache: f = h5py.File(out_file,'w') dumpDictHdf5(RV,f) f.close() return RV
def optimize(self,Xr,params0=None,n_times=10,verbose=True,vmax=5,perturb=1e-3,factr=1e7): """ Optimize the model considering Xr """ # set params0 from null if params0==Null if params0 is None: if self.null is None: if verbose: print ".. fitting null model upstream" self.fitNull() if self.bgRE: params0 = {'Cg':self.null['params0_g'],'Cn':self.null['params0_n']} else: params0 = {'Cn':self.null['params0_n']} if 'params_mean' in self.null: if self.null['params_mean'].shape[0]>0: params0['mean'] = self.null['params_mean'] params_was_None = True else: params_was_None = False Xr *= SP.sqrt(self.N/(Xr**2).sum()) self.gp.set_Xr(Xr) self.gp.restart() start = TIME.time() for i in range(n_times): if params_was_None: params0['Cr'] = 1e-3*SP.randn(self.rank_r*self.P) conv,info = OPT.opt_hyper(self.gp,params0,factr=factr) conv *= self.gp.Cr.K().diagonal().max()<vmax conv *= self.getLMLgrad()<0.1 if conv or not params_was_None: break self.infoOpt = info if not conv: warnings.warn("not converged") # return value RV = {} if self.P>1: RV['Cr'] = self.getCr() if self.bgRE: RV['Cg'] = self.getCg() RV['Cn'] = self.getCn() RV['time'] = SP.array([TIME.time()-start]) RV['params0'] = params0 RV['nit'] = SP.array([info['nit']]) RV['funcalls'] = SP.array([info['funcalls']]) RV['var'] = self.getVariances() RV['conv'] = SP.array([conv]) RV['NLLAlt'] = SP.array([self.getNLLAlt()]) RV['LLR'] = SP.array([self.getLLR()]) RV['LMLgrad'] = SP.array([self.getLMLgrad()]) return RV
def fitPairwiseModel(Y, XX=None, S_XX=None, U_XX=None, verbose=False): N, P = Y.shape """ initilizes parameters """ RV = fitSingleTraitModel(Y, XX=XX, S_XX=S_XX, U_XX=U_XX, verbose=verbose) Cg = covariance.freeform(2) Cn = covariance.freeform(2) gp = gp2kronSum(mean(Y[:, 0:2]), Cg, Cn, XX=XX, S_XX=S_XX, U_XX=U_XX) conv2 = SP.ones((P, P), dtype=bool) rho_g = SP.ones((P, P)) rho_n = SP.ones((P, P)) for p1 in range(P): for p2 in range(p1): if verbose: print '.. fitting correlation (%d,%d)' % (p1, p2) gp.setY(Y[:, [p1, p2]]) Cg_params0 = SP.array([ SP.sqrt(RV['varST'][p1, 0]), 1e-6 * SP.randn(), SP.sqrt(RV['varST'][p2, 0]) ]) Cn_params0 = SP.array([ SP.sqrt(RV['varST'][p1, 1]), 1e-6 * SP.randn(), SP.sqrt(RV['varST'][p2, 1]) ]) params0 = {'Cg': Cg_params0, 'Cn': Cn_params0} conv2[p1, p2], info = OPT.opt_hyper(gp, params0, factr=1e3) rho_g[p1, p2] = Cg.K()[0, 1] / SP.sqrt(Cg.K().diagonal().prod()) rho_n[p1, p2] = Cn.K()[0, 1] / SP.sqrt(Cn.K().diagonal().prod()) conv2[p2, p1] = conv2[p1, p2] rho_g[p2, p1] = rho_g[p1, p2] rho_n[p2, p1] = rho_n[p1, p2] RV['Cg0'] = rho_g * SP.dot(SP.sqrt(RV['varST'][:, 0:1]), SP.sqrt(RV['varST'][:, 0:1].T)) RV['Cn0'] = rho_n * SP.dot(SP.sqrt(RV['varST'][:, 1:2]), SP.sqrt(RV['varST'][:, 1:2].T)) RV['conv2'] = conv2 #3. regularizes covariance matrices offset_g = abs(SP.minimum(LA.eigh(RV['Cg0'])[0].min(), 0)) + 1e-4 offset_n = abs(SP.minimum(LA.eigh(RV['Cn0'])[0].min(), 0)) + 1e-4 RV['Cg0_reg'] = RV['Cg0'] + offset_g * SP.eye(P) RV['Cn0_reg'] = RV['Cn0'] + offset_n * SP.eye(P) RV['params0_Cg'] = LA.cholesky(RV['Cg0_reg'])[SP.tril_indices(P)] RV['params0_Cn'] = LA.cholesky(RV['Cn0_reg'])[SP.tril_indices(P)] return RV
def fitSingleTraitModel(Y,XX=None,S_XX=None,U_XX=None,verbose=False): """ fit single trait model """ N,P = Y.shape RV = {} Cg = covariance.lowrank(1) Cn = covariance.lowrank(1) gp = gp2kronSum(mean(Y[:,0:1]),Cg,Cn,XX=XX,S_XX=S_XX,U_XX=U_XX) params0 = {'Cg':SP.sqrt(0.5)*SP.ones(1),'Cn':SP.sqrt(0.5)*SP.ones(1)} var = SP.zeros((P,2)) conv1 = SP.zeros(P,dtype=bool) for p in range(P): if verbose: print '.. fitting variance trait %d'%p gp.setY(Y[:,p:p+1]) conv1[p],info = OPT.opt_hyper(gp,params0,factr=1e3) var[p,0] = Cg.K()[0,0] var[p,1] = Cn.K()[0,0] RV['conv1'] = conv1 RV['varST'] = var return RV
def fitSingleTraitModel(Y, XX=None, S_XX=None, U_XX=None, verbose=False): """ fit single trait model """ N, P = Y.shape RV = {} Cg = covariance.lowrank(1) Cn = covariance.lowrank(1) gp = gp2kronSum(mean(Y[:, 0:1]), Cg, Cn, XX=XX, S_XX=S_XX, U_XX=U_XX) params0 = { 'Cg': SP.sqrt(0.5) * SP.ones(1), 'Cn': SP.sqrt(0.5) * SP.ones(1) } var = SP.zeros((P, 2)) conv1 = SP.zeros(P, dtype=bool) for p in range(P): if verbose: print '.. fitting variance trait %d' % p gp.setY(Y[:, p:p + 1]) conv1[p], info = OPT.opt_hyper(gp, params0, factr=1e3) var[p, 0] = Cg.K()[0, 0] var[p, 1] = Cn.K()[0, 0] RV['conv1'] = conv1 RV['varST'] = var return RV
gp.covar.setRandomParams() else: n_params = gp.covar.Cr.getNumberParams() n_params += gp.covar.Cn.getNumberParams() params1 = {'covar': sp.randn(n_params)} gp.setParams(params1) params = {} params['Cr'] = gp.covar.Cr.getParams().copy() params['Cn'] = gp.covar.Cn.getParams().copy() gp0.setParams(params) print ' .. optimization' _t0 = time.time() conv, info = gp.optimize() _t1 = time.time() conv, info = OPT.opt_hyper(gp0, gp0.getParams()) _t2 = time.time() t[ni, ri] = _t1 - _t0 t0[ni, ri] = _t2 - _t1 r[ni, ri] = t[ni, ri] / t0[ni, ri] RV = {'t': t, 't0': t0, 'r': r, 'Ns': Ns} fout = h5py.File(out_file, 'w') smartDumpDictHdf5(RV, fout) fout.close() else: R = {} fin = h5py.File(out_file, 'r') for key in fin.keys(): R[key] = fin[key][:] fin.close()
n_params = gp.covar.Cr.getNumberParams() n_params+= gp.covar.Cg.getNumberParams() n_params+= gp.covar.Cn.getNumberParams() params1 = {'covar': sp.randn(n_params)} gp.setParams(params1) params = {} params['Cr'] = gp.covar.Cr.getParams().copy() params['Cg'] = gp.covar.Cg.getParams().copy() params['Cn'] = gp.covar.Cn.getParams().copy() gp0.setParams(params) print ' .. optimization' _t0 = time.time() conv, info = gp.optimize() _t1 = time.time() conv,info = OPT.opt_hyper(gp0,gp0.getParams()) _t2 = time.time() t[ni, ri] = _t1-_t0 t0[ni, ri] = _t2-_t1 r[ni, ri] = t[ni, ri] / t0[ni, ri] RV = {'t': t, 't0': t0, 'r': r, 'Ns': Ns} fout = h5py.File(out_file, 'w') smartDumpDictHdf5(RV, fout) fout.close() else: R = {} fin = h5py.File(out_file, 'r') for key in fin.keys(): R[key] = fin[key][:] fin.close()
# define covariance matrices Cg = limix.CFreeFormCF(P) Cn = limix.CFreeFormCF(P) if 0: # generate parameters params = {} params['Cg'] = SP.randn(int(0.5*P*(P+1))) params['Cn'] = SP.randn(int(0.5*P*(P+1))) params['mean'] = 1e-2*SP.randn(mean.getParams().shape[0]) print "check gradient with gp2kronSum" gp = gp2kronSum(mean,Cg,Cn,XX) gp.setParams(params) gp.checkGradient() print "test optimization" conv,info = OPT.opt_hyper(gp,params,factr=1e3) print conv ipdb.set_trace() if 1: # generate parameters params = {} params['Cr'] = SP.randn(P) params['Cg'] = SP.randn(int(0.5*P*(P+1))) params['Cn'] = SP.randn(int(0.5*P*(P+1))) params['mean'] = 1e-2*SP.randn(mean.getParams().shape[0]) print "check gradient with gp3kronSum" gp = gp3kronSum(mean,Cg,Cn,XX,Xr=Xr) gp.setParams(params) gp.LMLgrad() gp.checkGradient()
def test_lmm_lr_speed(G,y,Z,Kbg,Covs=None,S=None,U=None): """ low-rank lmm input: G : genotypes y : phenotype Z : features of low-rank matrix Kbg : background covariance matrix Covs : fixed effect covariates using mtset implementation """ m = mean(y) one = np.ones((1,1)) if Z.shape[1] > G.shape[0]: return test_lmm_lr(G, y, Z, Kbg, Covs=Covs) if Covs is not None: m.addFixedEffect(Covs) nCovs = Covs.shape[1] Cg = covariance.freeform(1) Cn = covariance.freeform(1) Z/=np.sqrt(Z.shape[1]) gp = gp3kronSum(m,Cg,Cn,XX=Kbg,Xr=Z,S_XX=S,U_XX=U) params_rnd = {} params_rnd['Cg'] = 1e-4*np.random.randn(1) params_rnd['Cn'] = 1e-4*np.random.randn(1) params_rnd['Cr'] = 1e-4*np.random.randn(1) if Covs is not None: params_rnd['mean'] = 1e-6*np.random.randn(nCovs) conv,info = OPT.opt_hyper(gp,params_rnd) LML0 = gp.LML() params0 = gp.getParams() params_rnd = params0.copy() if Covs is not None: mean0 = params0['mean'] params_rnd['mean'] = 1e-6*np.random.randn(nCovs+1) params_rnd['mean'][:nCovs] = mean0 else: params_rnd['mean'] = 1e-6*np.random.randn(1) F = G.shape[1] LML = np.zeros(F) beta = np.zeros(F) for f in xrange(F): m.clearFixedEffect() if Covs is not None: m.addFixedEffect(Covs) m.addFixedEffect(G[:,[f]]) conv,info = OPT.opt_hyper(gp, params_rnd) beta[f] = m.getParams()[-1] LML[f] = gp.LML() LRT = 2*(LML0-LML) pv = stats.chi2.sf(LRT,1) return pv, beta
def fitNull(self, verbose=True, cache=False, out_dir='./cache', fname=None, rewrite=False, seed=None, n_times=10, factr=1e3, init_method=None): """ Fit null model """ if seed is not None: SP.random.seed(seed) read_from_file = False if cache: assert fname is not None, 'MultiTraitSetTest:: specify fname' if not os.path.exists(out_dir): os.makedirs(out_dir) out_file = os.path.join(out_dir, fname) read_from_file = os.path.exists(out_file) and not rewrite RV = {} if read_from_file: f = h5py.File(out_file, 'r') for key in f.keys(): RV[key] = f[key][:] f.close() self.setNull(RV) else: start = TIME.time() if self.bgRE: self.gpNull = gp2kronSum(self.mean, self.Cg, self.Cn, XX=self.XX, S_XX=self.S_XX, U_XX=self.U_XX) else: self.gpNull = gp2kronSumLR(self.Y, self.Cn, Xr=SP.ones((self.N, 1)), F=self.F) for i in range(n_times): params0, Ifilter = self._initParams(init_method=init_method) conv, info = OPT.opt_hyper(self.gpNull, params0, Ifilter=Ifilter, factr=factr) if conv: break if not conv: warnings.warn("not converged") LMLgrad = SP.concatenate([ self.gpNull.LMLgrad()[key]**2 for key in self.gpNull.LMLgrad().keys() ]).mean() LML = self.gpNull.LML() if 'mean' in params0.keys(): RV['params_mean'] = self.gpNull.mean.getParams() RV['params0_g'] = self.Cg.getParams() RV['params0_n'] = self.Cn.getParams() RV['Cg'] = self.Cg.K() RV['Cn'] = self.Cn.K() RV['conv'] = SP.array([conv]) RV['time'] = SP.array([TIME.time() - start]) RV['NLL0'] = SP.array([LML]) RV['LMLgrad'] = SP.array([LMLgrad]) RV['nit'] = SP.array([info['nit']]) RV['funcalls'] = SP.array([info['funcalls']]) if self.bgRE: RV['h2'] = self.gpNull.h2() RV['h2_ste'] = self.gpNull.h2_ste() RV['Cg_ste'] = self.gpNull.ste('Cg') RV['Cn_ste'] = self.gpNull.ste('Cn') self.null = RV if cache: f = h5py.File(out_file, 'w') dumpDictHdf5(RV, f) f.close() return RV