def RunBICTest(chr, beginSeqName, fileName, globalOrLocalTest, organism): """ Runs all BIC test calculations. chr: Current chromosome number. beginSeqName: Beginning of the sequence name of the queried chromosome. (e.g. "Gene") fileName: Output file name. globalOrLocalTest: String identifying whether to use global or local Mt values. organism: Organism's name. """ print ("BIC") if (globalOrLocalTest == "Global"): PalphaGammaHatAsym = printAndParseFiles.parsePalphaGamma("Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatAsymmetric%d.txt" % (globalOrLocalTest,chr), beginSeqName) PalphaGammaHatSym = printAndParseFiles.parsePalphaGamma("Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatSymmetric%d.txt" % (globalOrLocalTest,chr), beginSeqName) LgammaAsym = printAndParseFiles.parseValueFile("Calculations_" + organism + "\Lgamma%s\LgammaAsymmetric%d.txt" % (globalOrLocalTest,chr)) LgammaSym = printAndParseFiles.parseValueFile("Calculations_" + organism + "\Lgamma%s\LgammaSymmetric%d.txt" % (globalOrLocalTest,chr)) BICAsym = BIC.findBIC(PalphaGammaHatAsym, LgammaAsym, "A") BICSym = BIC.findBIC(PalphaGammaHatSym, LgammaSym, "S") printAndParseFiles.compareAndPrintBICs(BICAsym, BICSym, fileName) elif (globalOrLocalTest == "Local"): PalphaGammaHatAsym = printAndParseFiles.parsePalphaGamma("Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatAsymmetric%d.txt" % (globalOrLocalTest,chr), beginSeqName) PalphaGammaHatSym = printAndParseFiles.parsePalphaGamma("Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatSymmetric%d.txt" % (globalOrLocalTest,chr), beginSeqName) LgammaAsym = printAndParseFiles.parseValueFile("Calculations_" + organism + "\Lgamma%s\LgammaAsymmetric%d.txt" % (globalOrLocalTest,chr)) LgammaSym = printAndParseFiles.parseValueFile("Calculations_" + organism + "\Lgamma%s\LgammaSymmetric%d.txt" % (globalOrLocalTest,chr)) BICAsym = BIC.findBIC(PalphaGammaHatAsym, LgammaAsym, "A") BICSym = BIC.findBIC(PalphaGammaHatSym, LgammaSym, "S") printAndParseFiles.compareAndPrintBICs(BICAsym, BICSym, fileName) else: print "Error with Global or Local choice. Please type 'Global' or 'Local'" print ("DONE WITH CHR %d" % chr)
def output(I, forward, reverse): D = (list(), list()) model = BIC.get_best_model(I, penality , diff_threshold) print model.k for rv in model.rvs: print rv for i,FH in enumerate((forward, reverse)): for line in FH: chrom,start, stop, cov = line.strip("\n").split("\t") pos = (float(stop) + float(start ) ) /2. if pos > I.stop: break elif I.start<=pos <=I.stop: D[i].append((pos, float(cov))) minX,maxX = min([x for d in D for x,y in d]), max([x for d in D for x,y in d]) xs = np.linspace(0, (maxX-minX)/100., 1000 ) bins = 500 counts,edges = np.histogram([(x-minX)/100. for x,y in D[0]], weights=[y for x,y in D[0]], bins=bins, normed=1) edges = (edges[:-1] + edges[1:])/2. F = plt.figure(figsize=(15,10)) plt.bar(edges, counts,width=(edges[-1]-edges[0])/bins,alpha=0.5) plt.plot(xs, map(lambda x: model.pdf(x, 1) , xs), linewidth=2.) counts,edges = np.histogram([(x-minX)/100. for x,y in D[1]], weights=[y for x,y in D[1]], bins=bins, normed=1) edges = (edges[:-1] + edges[1:])/2. plt.bar(edges, -counts, width=(edges[-1]-edges[0])/bins, color="red",alpha=0.5) plt.plot(xs, map(lambda x: -model.pdf(x, -1) ,xs ), linewidth=2.) plt.show()
def output(I, G): model = BIC.get_best_model(I, penality , diff_threshold) bidirs = [rv for rv in model.rvs if check_bidir_component(rv,si_thresh=si_thresh, l_thresh=l_thresh, w_thresh=w_thresh, pi_thresh=pi_thresh)] for c in bidirs: G["mu"].append(c.mu) G["si"].append(c.si) G["l"].append(c.l) G["pi"].append(c.pi) pass
def ouput(I, FHW): model = BIC.get_best_model(I, penality, diff_threshold) bidirs = [ rv for rv in model.rvs if check_bidir_component(rv, si_thresh=si_thresh, l_thresh=l_thresh, w_thresh=w_thresh, pi_thresh=pi_thresh) ] if bidirs: FHW.write("#" + I.chrom + ":" + str(I.start) + "-" + str(I.stop) + "\n") for N in bidirs: XS = bin_ChIP_signal(N, I) FHW.write(N.__str__() + "\n") for X, data_type, peak in XS: FHW.write(data_type + "," + str(peak) + "," + ",".join([str(x) + "-" + str(y) for x, y in X]) + "\n")
def output(I, FHW, penality,diff_threshold ): model = BIC.get_best_model(I, penality , diff_threshold) FHW.write("#" + I.chrom + ":" + str(I.start) + "-" + str(I.stop) + "," + str(I.annotation_N) + "\n") for rv in model.rvs: FHW.write(rv.__str__()+"\n")
def run(root): display_fits = False parameters = False correlation = False correlation_BO = True if correlation_BO: DIR ="/Users/joazofeifa/Lab/gro_seq_files/HCT116/EMG_out_files/" DMSO1hr101911 ="DMSO1hr101911_model_fits/EMG-4_bidirectional_hits_intervals.bed" DMSO1027 ="DMSO1027_1212_model_fits/EMG-3_bidirectional_hits_intervals.bed" Ma6_NoIndex ="Ma6_NoIndex_L008_R1_001/EMG-6_bidirectional_hits_intervals.bed" DMSO2_3 ="Allen2014_DMSO2_3-2_bidirectional_hits_intervals.bed" Nutlin2_3 = "Nutlin2_3_model_fits/EMG-2_bidirectional_hits_intervals.bed" RefSeq = "/Users/joazofeifa/Lab/genome_files/RefSeqHG19.txt" ChIP_p53 = "/Users/joazofeifa/Lab/ACM_IEEE_Paper_analysis/files/bedFiles/Atleast7of7.bedbothstrands.bed_norefgene.bed" ChIP_p53 = "/Users/joazofeifa/Lab/nutlin_bidirectional_hits_intervals_091715.bed.count.bed.h.bed.namescoreDMSObi.resSig.txt.bed.txt" DMSO_forward = "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.pos.BedGraph" DMSO_reverse = "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.neg.BedGraph" Nutlin_forward = "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/Nutlin2_3.sorted.pos.BedGraph" Nutlin_reverse = "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/Nutlin2_3.sorted.neg.BedGraph" # DMSO1hr101911_L,DMSO1hr101911_G = load.load_model_fits_bed_file(DIR+DMSO1hr101911) # DMSO1027_L,DMSO1027_G = load.load_model_fits_bed_file(DIR+DMSO1027) # Ma6_NoIndex_L,Ma6_NoIndex_G = load.load_model_fits_bed_file(DIR+Ma6_NoIndex) DMSO2_3_L,DMSO2_3_G = load.load_model_fits_bed_file(DIR+DMSO2_3) correlations.parameters_dist(DMSO2_3_L) #Nutlin2_3_L,Nutlin2_3_G = load.load_model_fits_bed_file(DIR+Nutlin2_3) # density_plots.insert_bedgraph(DMSO2_3_L,(DMSO_forward,DMSO_reverse )) # density_plots.insert_bedgraph(Nutlin2_3_L,(Nutlin_forward,Nutlin_reverse )) # overlaps = correlations.match_UP(Ma6_NoIndex_L, DMSO2_3_L) # density_plots.plot_density(overlaps) # correlations.p53_binding(Nutlin2_3_L, DMSO2_3_L, overlaps) # correlations.label_p53(overlaps, attr="lam", LOG=True ) # correlations.promoter_differences_test((DMSO2_3_L,Nutlin2_3_L)) # correlations.p53_differences_test((Nutlin2_3_L,)) # correlations.si_lam(overlaps) # correlations.run(overlaps, attr="si", LOG=False ) # correlations.run_all(overlaps) if correlation: DIR ="/Users/joazofeifa/Lab/gro_seq_files/HCT116/EMG_out_files/" DMSO1hr101911 ="DMSO1hr101911_model_fits/model_fits.txt" DMSO1027 ="DMSO1027_1212_model_fits/model_fits.txt" Ma6_NoIndex ="Ma6_NoIndex_L008_R1_001/model_fits.txt" DMSO2_3 ="DMSO2_3_model_fits/model_fits.txt" Nutlin2_3 = "Nutlin2_3_model_fits/model_fits.txt" DMSO1hr101911_L,DMSO1hr101911_G = load.load_model_fits_bed_file(DIR+DMSO1hr101911) DMSO1027_L,DMSO1027_G = load.load_model_fits_bed_file(DIR+DMSO1027) Ma6_NoIndex_L,Ma6_NoIndex_L = load.load_model_fits_bed_file(DIR+Ma6_NoIndex) DMSO2_3_L,DMSO2_3_G = load.load_model_fits_bed_file(DIR+DMSO2_3) Nutlin2_3_L,Nutlin2_3_G = load.load_model_fits_bed_file(DIR+Nutlin2_3) correlations.run(DMSO2_3_L,DMSO2_3_L,Ma6_NoIndex_L,Ma6_NoIndex_L ) if display_fits: out_dir = "/Users/joeyazo/Desktop/Lab/gro_seq_files/HCT116/EMG_out_files/" model_file = out_dir+"model_fits_out_all_4" data_file = out_dir+"test_file_2.tsv" intervals = load.EMG_out(model_file) load.insert_data(data_file, intervals) dmf.display(intervals,bins=300) if parameters: EMG_out_FILE = root + "gro_seq_files/HCT116/EMG_out_files/EMG_model_fits_all_0" parameters = False BIC_analysis = True #only supports loading one at a time fits = load.EMG_out(EMG_out_FILE) if parameters: lap.run(fits, spec=None, weight_thresh=0.1,retry_tresh=0, converged=True) if BIC_analysis: BIC.run(fits)