def main(): draw_me = False sum_only = False MHC = False regions = pc_toolbox.create_region_list(REGION_LOC) bfile = BFILE #ldfolder = LDFOLDER ldfolder = '/home/jkb4y/ubs/work/results/Intersection_06022014/eurmeta_LD/' #outfolder = OUTFOLDER outfolder = '/home/jkb4y/ubs/work/results/Intersection_06022014/CAnalysis_LD/' assoc_folder = ASSOC_FOLDER #assoc_folder = '/home/jkb4y/work/results/Intersection_06022014/CAnalysis_eurmeta' #for r2 in ['0','0.2','0.4','0.6','0.8']: for r2 in ['0']: #for r2 in ['0','0.2']: #for r2 in ['0.2','0.4']: #for r2 in ['0.8','0.6']: #r2 = '0.8' tot_loc = os.path.join(ldfolder,'all_regions_r2_{0}.ld'.format(r2)) with open(tot_loc, mode="w") as total: ld_title_list = ['CHR_A','BP_A','SNP_A','CHR_B','BP_B','SNP_B','R2'] total.write('\t'.join(ld_title_list)+'\n') for region in regions: chrband = region.ID chromosome = region.chro subfolder = pc_toolbox.chr_folder(outfolder, chromosome) assoc_chr_folder = pc_toolbox.chr_folder(assoc_folder, chromosome) if chrband == '6p21.32' and not MHC: continue ld, snp_loc = plink_ld(region, bfile, ldfolder, assoc_chr_folder, r2, sum_only) copy_to_tot(tot_loc, ld) lz_ld = ld_for_lz(ld) main_lz(region, subfolder, assoc_chr_folder, snp_loc,lz_ld, r2, draw_me)
def main(): regions = pc_toolbox.create_region_list(REGION_LOC) ldfolder = '/home/jkb4y/work/results/2012Feb1/hg18/LD/' outfolder = '/home/jkb4y/work/results/2012Feb1/hg18/CAnalysis_Test/' for region in regions: chrband = region.band chromosome = region.chro subfolder = pc_toolbox.chr_folder(outfolder, chromosome) ldfile = chrband + '_r2_0.8_lz.ld' ld_sub = pc_toolbox.chr_folder(ldfolder, chromosome) ld = os.path.join(ld_sub,ldfile) snp_list = '{0}_~leastP_SNPs.txt'.format(chrband) snp_loc = os.path.join(ld_sub,snp_list) main_lz(region, subfolder, snp_loc,ld)
def plink_ld(region, bfile, ldfolder, assoc_folder, r2, sum_only=False): chromosome = region.chro chrband = region.ID ldfile = chrband + '_r2_{0}'.format(r2) #ldfile = 'EVI5_EXPERIMENT_r2_{0}'.format(r2) ld_sub = pc_toolbox.chr_folder(ldfolder, chromosome) ld = os.path.join(ld_sub,ldfile) #snp_list = 'EVI5_EXPERIMENT.txt' snp_list = '{0}_~leastP_SNPs.txt'.format(chrband) snp_loc = os.path.join(assoc_folder,snp_list) if sum_only: return ld + '.ld', snp_loc plink_args = ['plink','--noweb','--bfile',bfile, '--chr', chromosome, '--from-mb', region.start, '--to-mb',region.end, '--r2','--ld-window-r2',r2,'--ld-window','999999', '--ld-window-kb','99999','--ld-snp-list',snp_loc, '--out', ld,'--filter-controls','--maf', '0.005'] p = subprocess.Popen(plink_args, bufsize = 0, executable=None,stdin=None, stdout=None,stderr=None, preexec_fn=None,close_fds=False, shell=False,cwd=None,env=None, universal_newlines=False, startupinfo=None, creationflags=0) p.wait() return ld + '.ld', snp_loc
def main(argv): global loopcount, output_folder, plink_test, chromosome, refgene global maxloops, loopagain, SNP_loc, range_start_bp, range_end_bp global out_flag, condition_list, firstloop, pheno_tag, c_interval global chrband, interrupt firstloop = True loopagain = True loopcount = 0 cl_arguments(argv) label = "" mb_start = convert_bp_to_mb(range_start_bp) mb_end = convert_bp_to_mb(range_end_bp) cl_indicator = "" rgene = refgene cband = chrband ptag = "" if not pheno_tag == "": ptag = "_" + pheno_tag if chrband is None: rangelabel = range_label(chromosome, refgene, range_start_bp, range_end_bp) cband = "NA" else: rangelabel = chrband if condition_list is not None: if interrupt: if "~" in condition_list: cl_indicator = "~" else: cl_indicator = "~" if refgene is None: rgene = "NA" label = rangelabel.replace(":", "_") + ptag + out_flag + "_" + cl_indicator of = "--" if not out_flag == "": of_pretty_list = out_flag.split("_") of = of_pretty_list[1] sys.stdout.write( """ ****************************************************************************** *********************************************************** Beginning pc_workhorse.py on {0} at {1} $$$ {2} {3} {4} {5} {6} {7} *********************************************************** ****************************************************************************** """.format( label, time.strftime("%a,%c"), cband, chromosome, rgene, mb_start, mb_end, of + ptag ) ) sys.stdout.flush() output_folder = str(pc_toolbox.chr_folder(output_folder, chromosome)) os.chdir(output_folder) assoc_extension = determine_extension(plink_test) SNP_loc = os.path.join(output_folder, label + "leastP_SNPs.txt") ## if condition_list is not None: ## ref_snp = given_condition_list(SNP_loc, condition_list) while loopcount < maxloops and loopagain == True: print_snp_list(SNP_loc) # ref_snp = None if interrupt: ref_snp, part_counter = given_condition_list(SNP_loc, condition_list, interrupt) print ( """ ****************************************************** <<<PROCESS INTERRUPTED AFTER IDENTIFYING CONDITIONAL SNP: {0}>>> PROCESS RESTARTED and LOGFILE APPENDED TO. ****************************************************** """.format( loopcount ) ) print loopcount interrupt = False assoc_file_basename = label + str(loopcount) + "_SNPsOut" print assoc_file_basename assoc_file_name = assoc_file_basename + assoc_extension script_name = label + "script_" + str(loopcount) + ".txt" write_script(assoc_file_basename, script_name) plink(script_name) if loopcount == 0 and condition_list is not None: ref_snp, part_counter = given_condition_list(SNP_loc, condition_list, interrupt) title = ( make_lz_title(rangelabel, pheno_tag, assoc_file_basename, ref_snp) .replace("_z", "") .replace("_a", "") .replace("_b", "") ) pdf_loc = locuszoom( assoc_file_name, chromosome, range_start_bp, range_end_bp, assoc_file_basename, title, ref_snp ) fix_pdf(pdf_loc) ref_snp = None loopcount = part_counter else: find_and_record(assoc_file_name, SNP_loc, condition_list, plink_test, c_interval) sys.stdout.flush() title = make_lz_title(rangelabel, pheno_tag, assoc_file_basename) pdf_loc = locuszoom(assoc_file_name, chromosome, range_start_bp, range_end_bp, assoc_file_basename, title) fix_pdf(pdf_loc) loopcount = loopcount + 1 print ( """ ***************************************************************** **************************************************** Termination condition reached, and pc_workhorse ended at {0} for region {1} at {2}. **************************************************** ****************************************************************** """.format( time.strftime("%a,%c"), cband, rangelabel ) )