# get cluster configuration # needed for specifying logfile names with clust_conf['log_task_id'] conf_file = os.environ['HOME']+"/picopili.conf" configs = read_conf(conf_file) cluster = configs['cluster'] clust_conf = read_clust_conf() # from config impute_ex = find_exec('impute2',key='i2loc') shapeit_ex = find_exec('shapeit',key='shloc') # get directory containing current script # (to get absolute path for scripts) rp_bin = os.path.dirname(os.path.realpath(__file__)) chunker_ex = rp_bin+'/chunk_snps.py' test_exec(chunker_ex,'picopili chunking script') if args.ref_dir is not None: # verify exists assert os.path.isdir(args.ref_dir), "Failed to find imputation reference directory %s" % args.ref_dir # prepend to references accordingly args.ref_maps = str(args.ref_dir) +'/' + args.ref_maps args.ref_haps = str(args.ref_dir) +'/' + args.ref_haps args.ref_legs = str(args.ref_dir) +'/' + args.ref_legs args.ref_samps = str(args.ref_dir) +'/' + args.ref_samps # TODO: here # .hg19.ch.fl.bim for chunking # imp. references
smartpcax = find_exec('smartpca', key='eloc') # if unspecified if args.rscript_ex == None or args.rscript_ex == "None": args.rscript_ex = find_exec("Rscript", key='rscloc') if args.primus_ex == None or args.primus_ex == "None": args.primus_ex = find_exec("run_PRIMUS.pl", key='priloc') # get directory containing current script # (to get absolute path for scripts) rp_bin = os.path.dirname(os.path.realpath(__file__)) Rplotpcax = str(rp_bin) + '/plot_pca.Rscript' # test executables test_exec(args.primus_ex, 'PRIMUS') test_exec(plinkx, 'Plink') test_exec(smartpcax, 'Eigensoft smartpca') test_exec(args.rscript_ex, 'Rscript') print '\n' print '############' print 'Begin!' print '############' #################################### # Compute maximum unrelated set # a) run PRIMUS # b) verify ran successfully ####################################
############# ### read config conf_file = os.environ['HOME']+"/picopili.conf" configs = read_conf(conf_file) analyst = configs['init'] # find plink plinkx = find_exec('plink',key='p2loc') if not args.skip_platform: # get directory containing current script # (hack to get plague script location) rp_bin = os.path.dirname(os.path.realpath(__file__)) plague_ex = rp_bin + '/plague_pico.pl' test_exec(plague_ex, 'Platform guessing script') # TODO: verify plague works properly across platforms (primary concern is Compress::Zlib loading) # verify bfiles are files, not paths assert '/' not in args.bfile, "--bfile must specify only a file stem, not a path" print '\n' print '############' print 'Begin!' print '############' ############# qcdir = 'qc_'+str(args.out) print '\n...Setting up working directory (./%s)...' % qcdir #############
configs = read_conf(conf_file) plinkx = configs['p2loc']+"plink" # get directory containing current script # (hack to help find ld region text file) rp_bin = os.path.dirname(os.path.realpath(__file__)) rp_dir = os.path.dirname(rp_bin) ############# print '\n...Checking dependencies...' # check exists, executable ############# # plink test_exec(plinkx, 'Plink') # ld region file, if needed # try in rp_dir/lib/ in addition to cwd if args.extra_ld_regions != None and args.extra_ld_regions != "None": if os.path.isfile(args.extra_ld_regions): print "LD region file found: %s" % args.extra_ld_regions elif os.path.isfile(str(rp_dir + '/lib/' + args.extra_ld_regions)): args.extra_ld_regions = str(rp_dir + '/lib/' + args.extra_ld_regions) print "LD region file found: %s" % args.extra_ld_regions else: raise IOError("LD region file %s not found in current directory or %s." % (args.extra_ld_regions, str(rp_dir + '/lib/'))) print '\n'
############# # get variables from path as needed # - Rscript (if unspecified) # - IBD plotting script # - PCA plotting script (optional) if args.rscript_ex == None or args.rscript_ex == "None": args.rscript_ex = find_from_path('Rscript', 'Rscript') Rplotibdx = find_from_path('plot_reap_ibd.Rscript', 'IBD plotting script') if plot_pca: Rplotpcax = find_from_path('plot_pca.Rscript', 'PCA plotting script') # verify executables test_exec(plinkx, 'Plink') test_exec(args.rscript_ex, 'Rscript') test_exec(args.admixture_ex, 'ADMIXTURE') test_exec(args.reap_ex, 'REAP') # pca file if plot_pca: assert os.path.isfile( args.plot_admix_pca ), "PCA file does not exist (%r)" % args.plot_admix_pca assert '/' not in args.target_bfile, "--plot-admix-pca must specify only a file, not a path" # verify bfiles are files, not paths assert '/' not in args.unrel_bfile, "--unrel-bfile must specify only a file stem, not a path" assert '/' not in args.target_bfile, "--target-bfile must specify only a file stem, not a path"
print '--out ' + str(args.out) print '--format ' + str(args.format) print '--min-rel ' + str(args.min_rel) print '--max-gens ' + str(args.max_gens) print ' ' # verify input files exist assert os.path.isfile( args.input_ibd ), "IBD/relatedness file does not exist (%r)" % args.input_ibd assert os.path.isfile( str(args.bfile) + '.fam'), "Plink fam file does not exist (%s)" % str(args.bfile) + '.fam' # test executables test_exec(args.primus_ex, 'PRIMUS') test_exec(args.findped_ex, 'PRIMUS pedigree matching script') print ' ' # unzip relatedness file if needed if args.input_ibd.endswith('.gz'): ibd_txtfile = str(args.input_ibd) + '.txt' print 'Unzipping IBD relatedness file to %s' % ibd_txtfile ibd_out = open(ibd_txtfile, 'w') subprocess.check_call(['gunzip', '-c', str(args.input_ibd)], stdout=ibd_out) ibd_out.close() else: ibd_txtfile = str(args.input_ibd) assert os.path.isfile(
############## #print '\n...Reading ricopili config file...' ############## # #### read plink loc from config #conf_file = os.environ['HOME']+"/ricopili.conf" #configs = read_conf(conf_file) ############# print '\n...Checking dependencies...' # check exists, executable ############# # verify executables test_exec(args.rplink_ex, 'Plink') #if not args.rserve_active: test_exec(args.r_ex, 'R') # TODO: find a way to test Rserve available? # check required R scripts present rp_bin = os.path.dirname( os.path.realpath(__file__)) # use location of current script to get rp_bin if args.covar is None: R_gee = rp_bin + '/gee_logit_nocov.R' else: R_gee = rp_bin + '/gee_logit_covar.R' assert os.path.isfile(R_gee), 'Failed to find R GEE script %s' % str(R_gee) # verify bfiles are files, not paths assert '/' not in args.bfile, "--bfile must specify only a file stem, not a path"
# print settings print 'Using settings:' print '--input-ibd '+str(args.input_ibd) print '--bfile '+str(args.bfile) print '--out '+str(args.out) print '--format '+str(args.format) print '--min-rel '+str(args.min_rel) print '--max-gens '+str(args.max_gens) print ' ' # verify input files exist assert os.path.isfile(args.input_ibd), "IBD/relatedness file does not exist (%r)" % args.input_ibd assert os.path.isfile(str(args.bfile)+'.fam'), "Plink fam file does not exist (%s)" % str(args.bfile)+'.fam' # test executables test_exec(args.primus_ex, 'PRIMUS') test_exec(args.findped_ex, 'PRIMUS pedigree matching script') print ' ' # unzip relatedness file if needed if args.input_ibd.endswith('.gz'): ibd_txtfile = str(args.input_ibd) + '.txt' print 'Unzipping IBD relatedness file to %s' % ibd_txtfile ibd_out = open(ibd_txtfile, 'w') subprocess.check_call(['gunzip','-c',str(args.input_ibd)],stdout=ibd_out) ibd_out.close() else: ibd_txtfile = str(args.input_ibd) assert os.path.isfile(ibd_txtfile), "Failed to extract IBD/relatedness file (%r)" % args.input_ibd
configs = read_conf(conf_file) plinkx = configs['p2loc'] + "plink" # get directory containing current script # (hack to help find ld region text file) rp_bin = os.path.dirname(os.path.realpath(__file__)) rp_dir = os.path.dirname(rp_bin) ############# print '\n...Checking dependencies...' # check exists, executable ############# # plink test_exec(plinkx, 'Plink') # ld region file, if needed # try in rp_dir/lib/ in addition to cwd if args.extra_ld_regions != None and args.extra_ld_regions != "None": if os.path.isfile(args.extra_ld_regions): print "LD region file found: %s" % args.extra_ld_regions elif os.path.isfile(str(rp_dir + '/lib/' + args.extra_ld_regions)): args.extra_ld_regions = str(rp_dir + '/lib/' + args.extra_ld_regions) print "LD region file found: %s" % args.extra_ld_regions else: raise IOError( "LD region file %s not found in current directory or %s." % (args.extra_ld_regions, str(rp_dir + '/lib/'))) print '\n'
############## #print '\n...Reading ricopili config file...' ############## # #### read plink loc from config #conf_file = os.environ['HOME']+"/ricopili.conf" #configs = read_conf(conf_file) ############# print '\n...Checking dependencies...' # check exists, executable ############# # verify executables test_exec(args.rplink_ex, 'Plink') # verify bfiles are files, not paths assert '/' not in args.bfile, "--bfile must specify only a file stem, not a path" # verify input files exist if args.keep is not None: assert os.path.isfile(args.keep), "ID inclusion file does not exist (%r)" % args.keep if args.remove is not None: assert os.path.isfile(args.remove), "ID exclusion file does not exist (%r)" % args.remove if args.extract is not None: assert os.path.isfile(args.extract), "SNP inclusion file does not exist (%r)" % args.extract if args.exclude is not None: assert os.path.isfile(args.exclude), "SNP exclusion file does not exist (%r)" % args.exclude if args.pheno is not None: assert os.path.isfile(args.pheno), "Phenotype file does not exist (%r)" % args.pheno
analyst = configs['init'] if not args.skip_platform: # get directory containing current script # (hack to get plague script location) rp_bin = os.path.dirname(os.path.realpath(__file__)) plague_ex = rp_bin + '/plague.pl' ############# print '\n...Checking dependencies...' # check exists, executable ############# # verify executables test_exec(plinkx, 'Plink') if not args.skip_platform: test_exec(plague_ex, 'Platform guessing script') # TODO: verify plague works properly across platforms (primary concern is Compress::Zlib loading) # verify bfiles are files, not paths assert '/' not in args.bfile, "--bfile must specify only a file stem, not a path" print '\n' print '############' print 'Begin!' print '############' ############# qcdir = 'qc_'+str(args.out)
if args.rplink_ex is None or args.rplink_ex == "None": args.rplink_ex = find_exec('plink', key='rplloc') # get R if not provided if args.r_ex == None or args.r_ex == "None": args.r_ex = find_from_path('R', 'R') # if still fail, try config if args.r_ex is None or args.r_ex == "None": args.r_ex = find_exec('R', key='rloc') if args.rserve_ex is None or args.rserve_ex == "None": args.rserve_ex = find_exec('Rserve', key='rservloc') # verify executables test_exec(args.rplink_ex, 'Plink') test_exec(args.r_ex, 'R') test_exec(args.rserve_ex, 'Rserve') # check required R scripts present rp_bin = os.path.dirname( os.path.realpath(__file__)) # use location of current script to get rp_bin if args.covar is None: R_gee = rp_bin + '/gee_logit_nocov.R' else: R_gee = rp_bin + '/gee_logit_covar.R' assert os.path.isfile(R_gee), 'Failed to find R GEE script %s' % str(R_gee) # verify bfiles are files, not paths assert '/' not in args.bfile, "--bfile must specify only a file stem, not a path"
############# print '\n...Checking dependencies...' # check exists, executable ############# # find required files if args.rscript_ex == None or args.rscript_ex == "None": args.rscript_ex = find_from_path("Rscript", 'Rscript') Rplotpcax = find_from_path("plot_pca.Rscript", 'PCA plotting script') # test executables test_exec(args.primus_ex, 'PRIMUS') test_exec(plinkx, 'Plink') test_exec(smartpcax, 'Eigensoft smartpca') test_exec(args.rscript_ex, 'Rscript') print '\n' print '############' print 'Begin!' print '############' #################################### # Compute maximum unrelated set # a) run PRIMUS # b) verify ran successfully
############# # get variables from path as needed # - Rscript (if unspecified) # - IBD plotting script # - PCA plotting script (optional) if args.rscript_ex == None or args.rscript_ex == "None": args.rscript_ex = find_from_path('Rscript', 'Rscript') Rplotibdx = find_from_path('plot_reap_ibd.Rscript', 'IBD plotting script') if plot_pca: Rplotpcax = find_from_path('plot_pca.Rscript', 'PCA plotting script') # verify executables test_exec(plinkx, 'Plink') test_exec(args.rscript_ex, 'Rscript') test_exec(args.admixture_ex, 'ADMIXTURE') test_exec(args.reap_ex, 'REAP') # pca file if plot_pca: assert os.path.isfile(args.plot_admix_pca), "PCA file does not exist (%r)" % args.plot_admix_pca assert '/' not in args.target_bfile, "--plot-admix-pca must specify only a file, not a path" # verify bfiles are files, not paths assert '/' not in args.unrel_bfile, "--unrel-bfile must specify only a file stem, not a path" assert '/' not in args.target_bfile, "--target-bfile must specify only a file stem, not a path"