def fitPairwiseModel(Y,XX=None,S_XX=None,U_XX=None,verbose=False): N,P = Y.shape """ initilizes parameters """ RV = fitSingleTraitModel(Y,XX=XX,S_XX=S_XX,U_XX=U_XX,verbose=verbose) Cg = covariance.freeform(2) Cn = covariance.freeform(2) gp = gp2kronSum(mean(Y[:,0:2]),Cg,Cn,XX=XX,S_XX=S_XX,U_XX=U_XX) conv2 = SP.ones((P,P),dtype=bool) rho_g = SP.ones((P,P)) rho_n = SP.ones((P,P)) for p1 in range(P): for p2 in range(p1): if verbose: print '.. fitting correlation (%d,%d)'%(p1,p2) gp.setY(Y[:,[p1,p2]]) Cg_params0 = SP.array([SP.sqrt(RV['varST'][p1,0]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,0])]) Cn_params0 = SP.array([SP.sqrt(RV['varST'][p1,1]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,1])]) params0 = {'Cg':Cg_params0,'Cn':Cn_params0} conv2[p1,p2],info = OPT.opt_hyper(gp,params0,factr=1e3) rho_g[p1,p2] = Cg.K()[0,1]/SP.sqrt(Cg.K().diagonal().prod()) rho_n[p1,p2] = Cn.K()[0,1]/SP.sqrt(Cn.K().diagonal().prod()) conv2[p2,p1] = conv2[p1,p2]; rho_g[p2,p1] = rho_g[p1,p2]; rho_n[p2,p1] = rho_n[p1,p2] RV['Cg0'] = rho_g*SP.dot(SP.sqrt(RV['varST'][:,0:1]),SP.sqrt(RV['varST'][:,0:1].T)) RV['Cn0'] = rho_n*SP.dot(SP.sqrt(RV['varST'][:,1:2]),SP.sqrt(RV['varST'][:,1:2].T)) RV['conv2'] = conv2 #3. regularizes covariance matrices offset_g = abs(SP.minimum(LA.eigh(RV['Cg0'])[0].min(),0))+1e-4 offset_n = abs(SP.minimum(LA.eigh(RV['Cn0'])[0].min(),0))+1e-4 RV['Cg0_reg'] = RV['Cg0']+offset_g*SP.eye(P) RV['Cn0_reg'] = RV['Cn0']+offset_n*SP.eye(P) RV['params0_Cg']=LA.cholesky(RV['Cg0_reg'])[SP.tril_indices(P)] RV['params0_Cn']=LA.cholesky(RV['Cn0_reg'])[SP.tril_indices(P)] return RV
def fitNull(self,verbose=True,cache=False,out_dir='./cache',fname=None,rewrite=False,seed=None,n_times=10,factr=1e3,init_method=None): """ Fit null model """ if seed is not None: SP.random.seed(seed) read_from_file = False if cache: assert fname is not None, 'MultiTraitSetTest:: specify fname' if not os.path.exists(out_dir): os.makedirs(out_dir) out_file = os.path.join(out_dir,fname) read_from_file = os.path.exists(out_file) and not rewrite RV = {} if read_from_file: f = h5py.File(out_file,'r') for key in f.keys(): RV[key] = f[key][:] f.close() self.setNull(RV) else: start = TIME.time() if self.bgRE: self.gpNull = gp2kronSum(self.mean,self.Cg,self.Cn,XX=self.XX,S_XX=self.S_XX,U_XX=self.U_XX) else: self.gpNull = gp2kronSumLR(self.Y,self.Cn,Xr=SP.ones((self.N,1)),F=self.F) for i in range(n_times): params0,Ifilter=self._initParams(init_method=init_method) conv,info = OPT.opt_hyper(self.gpNull,params0,Ifilter=Ifilter,factr=factr) if conv: break if not conv: warnings.warn("not converged") LMLgrad = SP.concatenate([self.gpNull.LMLgrad()[key]**2 for key in self.gpNull.LMLgrad().keys()]).mean() LML = self.gpNull.LML() if 'mean' in params0.keys(): RV['params_mean'] = self.gpNull.mean.getParams() RV['params0_g'] = self.Cg.getParams() RV['params0_n'] = self.Cn.getParams() RV['Cg'] = self.Cg.K() RV['Cn'] = self.Cn.K() RV['conv'] = SP.array([conv]) RV['time'] = SP.array([TIME.time()-start]) RV['NLL0'] = SP.array([LML]) RV['LMLgrad'] = SP.array([LMLgrad]) RV['nit'] = SP.array([info['nit']]) RV['funcalls'] = SP.array([info['funcalls']]) if self.bgRE: RV['h2'] = self.gpNull.h2() RV['h2_ste'] = self.gpNull.h2_ste() RV['Cg_ste'] = self.gpNull.ste('Cg') RV['Cn_ste'] = self.gpNull.ste('Cn') self.null = RV if cache: f = h5py.File(out_file,'w') dumpDictHdf5(RV,f) f.close() return RV
def fitPairwiseModel(Y, XX=None, S_XX=None, U_XX=None, verbose=False): N, P = Y.shape """ initilizes parameters """ RV = fitSingleTraitModel(Y, XX=XX, S_XX=S_XX, U_XX=U_XX, verbose=verbose) Cg = covariance.freeform(2) Cn = covariance.freeform(2) gp = gp2kronSum(mean(Y[:, 0:2]), Cg, Cn, XX=XX, S_XX=S_XX, U_XX=U_XX) conv2 = SP.ones((P, P), dtype=bool) rho_g = SP.ones((P, P)) rho_n = SP.ones((P, P)) for p1 in range(P): for p2 in range(p1): if verbose: print '.. fitting correlation (%d,%d)' % (p1, p2) gp.setY(Y[:, [p1, p2]]) Cg_params0 = SP.array([ SP.sqrt(RV['varST'][p1, 0]), 1e-6 * SP.randn(), SP.sqrt(RV['varST'][p2, 0]) ]) Cn_params0 = SP.array([ SP.sqrt(RV['varST'][p1, 1]), 1e-6 * SP.randn(), SP.sqrt(RV['varST'][p2, 1]) ]) params0 = {'Cg': Cg_params0, 'Cn': Cn_params0} conv2[p1, p2], info = OPT.opt_hyper(gp, params0, factr=1e3) rho_g[p1, p2] = Cg.K()[0, 1] / SP.sqrt(Cg.K().diagonal().prod()) rho_n[p1, p2] = Cn.K()[0, 1] / SP.sqrt(Cn.K().diagonal().prod()) conv2[p2, p1] = conv2[p1, p2] rho_g[p2, p1] = rho_g[p1, p2] rho_n[p2, p1] = rho_n[p1, p2] RV['Cg0'] = rho_g * SP.dot(SP.sqrt(RV['varST'][:, 0:1]), SP.sqrt(RV['varST'][:, 0:1].T)) RV['Cn0'] = rho_n * SP.dot(SP.sqrt(RV['varST'][:, 1:2]), SP.sqrt(RV['varST'][:, 1:2].T)) RV['conv2'] = conv2 #3. regularizes covariance matrices offset_g = abs(SP.minimum(LA.eigh(RV['Cg0'])[0].min(), 0)) + 1e-4 offset_n = abs(SP.minimum(LA.eigh(RV['Cn0'])[0].min(), 0)) + 1e-4 RV['Cg0_reg'] = RV['Cg0'] + offset_g * SP.eye(P) RV['Cn0_reg'] = RV['Cn0'] + offset_n * SP.eye(P) RV['params0_Cg'] = LA.cholesky(RV['Cg0_reg'])[SP.tril_indices(P)] RV['params0_Cn'] = LA.cholesky(RV['Cn0_reg'])[SP.tril_indices(P)] return RV
def fitSingleTraitModel(Y,XX=None,S_XX=None,U_XX=None,verbose=False): """ fit single trait model """ N,P = Y.shape RV = {} Cg = covariance.lowrank(1) Cn = covariance.lowrank(1) gp = gp2kronSum(mean(Y[:,0:1]),Cg,Cn,XX=XX,S_XX=S_XX,U_XX=U_XX) params0 = {'Cg':SP.sqrt(0.5)*SP.ones(1),'Cn':SP.sqrt(0.5)*SP.ones(1)} var = SP.zeros((P,2)) conv1 = SP.zeros(P,dtype=bool) for p in range(P): if verbose: print '.. fitting variance trait %d'%p gp.setY(Y[:,p:p+1]) conv1[p],info = OPT.opt_hyper(gp,params0,factr=1e3) var[p,0] = Cg.K()[0,0] var[p,1] = Cn.K()[0,0] RV['conv1'] = conv1 RV['varST'] = var return RV
def fitSingleTraitModel(Y, XX=None, S_XX=None, U_XX=None, verbose=False): """ fit single trait model """ N, P = Y.shape RV = {} Cg = covariance.lowrank(1) Cn = covariance.lowrank(1) gp = gp2kronSum(mean(Y[:, 0:1]), Cg, Cn, XX=XX, S_XX=S_XX, U_XX=U_XX) params0 = { 'Cg': SP.sqrt(0.5) * SP.ones(1), 'Cn': SP.sqrt(0.5) * SP.ones(1) } var = SP.zeros((P, 2)) conv1 = SP.zeros(P, dtype=bool) for p in range(P): if verbose: print '.. fitting variance trait %d' % p gp.setY(Y[:, p:p + 1]) conv1[p], info = OPT.opt_hyper(gp, params0, factr=1e3) var[p, 0] = Cg.K()[0, 0] var[p, 1] = Cn.K()[0, 0] RV['conv1'] = conv1 RV['varST'] = var return RV
# add first fixed effect F = 1.*(SP.rand(N,3)<0.2); A = SP.ones((1,P)) mean.addFixedEffect(F=F,A=A) # define covariance matrices Cg = limix.CFreeFormCF(P) Cn = limix.CFreeFormCF(P) if 0: # generate parameters params = {} params['Cg'] = SP.randn(int(0.5*P*(P+1))) params['Cn'] = SP.randn(int(0.5*P*(P+1))) params['mean'] = 1e-2*SP.randn(mean.getParams().shape[0]) print "check gradient with gp2kronSum" gp = gp2kronSum(mean,Cg,Cn,XX) gp.setParams(params) gp.checkGradient() print "test optimization" conv,info = OPT.opt_hyper(gp,params,factr=1e3) print conv ipdb.set_trace() if 1: # generate parameters params = {} params['Cr'] = SP.randn(P) params['Cg'] = SP.randn(int(0.5*P*(P+1))) params['Cn'] = SP.randn(int(0.5*P*(P+1))) params['mean'] = 1e-2*SP.randn(mean.getParams().shape[0]) print "check gradient with gp3kronSum"
def fitNull(self, verbose=True, cache=False, out_dir='./cache', fname=None, rewrite=False, seed=None, n_times=10, factr=1e3, init_method=None): """ Fit null model """ if seed is not None: SP.random.seed(seed) read_from_file = False if cache: assert fname is not None, 'MultiTraitSetTest:: specify fname' if not os.path.exists(out_dir): os.makedirs(out_dir) out_file = os.path.join(out_dir, fname) read_from_file = os.path.exists(out_file) and not rewrite RV = {} if read_from_file: f = h5py.File(out_file, 'r') for key in f.keys(): RV[key] = f[key][:] f.close() self.setNull(RV) else: start = TIME.time() if self.bgRE: self.gpNull = gp2kronSum(self.mean, self.Cg, self.Cn, XX=self.XX, S_XX=self.S_XX, U_XX=self.U_XX) else: self.gpNull = gp2kronSumLR(self.Y, self.Cn, Xr=SP.ones((self.N, 1)), F=self.F) for i in range(n_times): params0, Ifilter = self._initParams(init_method=init_method) conv, info = OPT.opt_hyper(self.gpNull, params0, Ifilter=Ifilter, factr=factr) if conv: break if not conv: warnings.warn("not converged") LMLgrad = SP.concatenate([ self.gpNull.LMLgrad()[key]**2 for key in self.gpNull.LMLgrad().keys() ]).mean() LML = self.gpNull.LML() if 'mean' in params0.keys(): RV['params_mean'] = self.gpNull.mean.getParams() RV['params0_g'] = self.Cg.getParams() RV['params0_n'] = self.Cn.getParams() RV['Cg'] = self.Cg.K() RV['Cn'] = self.Cn.K() RV['conv'] = SP.array([conv]) RV['time'] = SP.array([TIME.time() - start]) RV['NLL0'] = SP.array([LML]) RV['LMLgrad'] = SP.array([LMLgrad]) RV['nit'] = SP.array([info['nit']]) RV['funcalls'] = SP.array([info['funcalls']]) if self.bgRE: RV['h2'] = self.gpNull.h2() RV['h2_ste'] = self.gpNull.h2_ste() RV['Cg_ste'] = self.gpNull.ste('Cg') RV['Cn_ste'] = self.gpNull.ste('Cn') self.null = RV if cache: f = h5py.File(out_file, 'w') dumpDictHdf5(RV, f) f.close() return RV