def analyze(options): # load data print 'import data' if options.cfile is None: cov = {'eval': None, 'evec': None} warnings.warn( 'warning: cfile not specifed, a one variance compoenent model will be considered' ) else: cov = readCovarianceMatrixFile(options.cfile, readCov=False) Y = readPhenoFile(options.pfile, idx=options.trait_idx) null = readNullModelFile(options.nfile) wnds = readWindowsFile(options.wfile) F = None if options.ffile: F = readCovariatesFile(options.ffile) #null['params_mean'] = SP.loadtxt(options.nfile + '.f0') if F is not None: assert Y.shape[0] == F.shape[0], 'dimensions mismatch' if options.i0 is None: options.i0 = 1 if options.i1 is None: options.i1 = wnds.shape[0] # name of output file if options.perm_i is not None: res_dir = os.path.join(options.resdir, 'perm%d' % options.perm_i) else: res_dir = os.path.join(options.resdir, 'test') if not os.path.exists(res_dir): os.makedirs(res_dir) n_digits = len(str(wnds.shape[0])) fname = str(options.i0).zfill(n_digits) fname += '_' + str(options.i1).zfill(n_digits) + '.res' resfile = os.path.join(res_dir, fname) # analysis t0 = time.time() scan(options.bfile, Y, cov, null, wnds, options.minSnps, options.i0, options.i1, options.perm_i, resfile, F) t1 = time.time() print '... finished in %s seconds' % (t1 - t0)
def analyze(options): # load data print 'import data' if options.cfile is None: cov = {'eval':None,'evec':None} warnings.warn('warning: cfile not specifed, a one variance compoenent model will be considered') else: cov = readCovarianceMatrixFile(options.cfile,readCov=False) Y = readPhenoFile(options.pfile,idx=options.trait_idx) null = readNullModelFile(options.nfile) wnds = readWindowsFile(options.wfile) F = None if options.ffile: F = readCovariatesFile(options.ffile) #null['params_mean'] = SP.loadtxt(options.nfile + '.f0') if F is not None: assert Y.shape[0]==F.shape[0], 'dimensions mismatch' if options.i0 is None: options.i0 = 1 if options.i1 is None: options.i1 = wnds.shape[0] # name of output file if options.perm_i is not None: res_dir = os.path.join(options.resdir,'perm%d'%options.perm_i) else: res_dir = os.path.join(options.resdir,'test') if not os.path.exists(res_dir): os.makedirs(res_dir) n_digits = len(str(wnds.shape[0])) fname = str(options.i0).zfill(n_digits) fname+= '_'+str(options.i1).zfill(n_digits)+'.res' resfile = os.path.join(res_dir,fname) # analysis t0 = time.time() scan(options.bfile,Y,cov,null,wnds,options.minSnps,options.i0,options.i1,options.perm_i,resfile,F) t1 = time.time() print '... finished in %s seconds'%(t1-t0)
def preprocess(options): assert options.bfile!=None, 'Please specify a bfile.' """ computing the covariance matrix """ if options.compute_cov: assert options.bfile!=None, 'Please specify a bfile.' assert options.cfile is not None, 'Specify covariance matrix basename' print 'Computing covariance matrix' t0 = time.time() computeCovarianceMatrix(options.plink_path,options.bfile,options.cfile,options.sim_type) t1 = time.time() print '... finished in %s seconds'%(t1-t0) print 'Computing eigenvalue decomposition' t0 = time.time() eighCovarianceMatrix(options.cfile) t1 = time.time() print '... finished in %s seconds'%(t1-t0) """ computing principal components """ if options.compute_PCs>0: assert options.ffile is not None, 'Specify fix effects basename for saving PCs' t0 = time.time() computePCs(options.plink_path,options.compute_PCs,options.bfile,options.ffile) t1 = time.time() print '... finished in %s seconds'%(t1-t0) """ fitting the null model """ if options.fit_null: if options.nfile is None: options.nfile = os.path.split(options.bfile)[-1] warnings.warn('nfile not specifed, set to %s'%options.nfile) print 'Fitting null model' assert options.pfile is not None, 'phenotype file needs to be specified' # read pheno Y = readPhenoFile(options.pfile,idx=options.trait_idx) # read covariance if options.cfile is None: cov = {'eval':None,'evec':None} warnings.warn('cfile not specifed, a one variance compoenent model will be considered') else: cov = readCovarianceMatrixFile(options.cfile,readCov=False) assert Y.shape[0]==cov['eval'].shape[0], 'dimension mismatch' # read covariates F = None if options.ffile is not None: F = readCovariatesFile(options.ffile) assert Y.shape[0]==F.shape[0], 'dimensions mismatch' t0 = time.time() fit_null(Y,cov['eval'],cov['evec'],options.nfile, F) t1 = time.time() print '.. finished in %s seconds'%(t1-t0) """ precomputing the windows """ if options.precompute_windows: if options.wfile==None: options.wfile = os.path.split(options.bfile)[-1] + '.%d'%options.window_size warnings.warn('wfile not specifed, set to %s'%options.wfile) print 'Precomputing windows' t0 = time.time() pos = readBimFile(options.bfile) nWnds,nSnps=splitGeno(pos,size=options.window_size,out_file=options.wfile+'.wnd') print 'Number of variants:',pos.shape[0] print 'Number of windows:',nWnds print 'Minimum number of snps:',nSnps.min() print 'Maximum number of snps:',nSnps.max() t1 = time.time() print '.. finished in %s seconds'%(t1-t0) # plot distribution of nSnps if options.plot_windows: print 'Plotting ditribution of number of SNPs' plot_file = options.wfile+'.wnd.pdf' plt = PL.subplot(1,1,1) PL.hist(nSnps,30) PL.xlabel('Number of SNPs') PL.ylabel('Number of windows') PL.savefig(plot_file)