def test_entry_creation(self): entry1_1 = bed.BedEntry('X', 8000, 9000) entry1_1.signalValue = 10 entry1_2 = bed.BedEntry('X', 80, 900) entry1_2.signalValue = 3 bed1 = bed.BedFile([entry1_1, entry1_2]) entry2_1 = bed.BedEntry('X', 8500, 9000) entry2_2 = bed.BedEntry('X', 80, 900) entry2_1.signalValue = 10 entry2_2.signalValue = 3 bed2 = bed.BedFile([entry2_1, entry2_2]) entry3_1 = bed.BedEntry('X', 7500, 9000) entry3_2 = bed.BedEntry('X', 80, 900) entry3_1.signalValue = 10 entry3_2.signalValue = 3 bed3 = bed.BedFile([entry3_1, entry3_2]) entry4_1 = bed.BedEntry('X', 5000, 8999) entry4_2 = bed.BedEntry('X', 80, 900) entry4_1.signalValue = 10 entry4_2.signalValue = 3 bed4 = bed.BedFile([entry4_1, entry4_2]) unions = union([bed1, bed2, bed3, bed4], 2) for fragment in unions[0]: print(fragment)
def test_single_chrom(self): med1 = bed.BedFile("test/data/med/med1_peaks.broadPeak", "Peaks").getChrom("chr17") med2 = bed.BedFile("test/data/med/med2_peaks.broadPeak", "Peaks").getChrom("chr17") med3 = bed.BedFile("test/data/med/med3_peaks.broadPeak", "Peaks").getChrom("chr17") bedf = [med1, med2, med3] minentries = 2 # First create intersection and rank the entries in each replicate and return the rankproduct values ranks = rankreps(bedf, minentries, rankmethod='signalValue') # Calculate rank product for each entry that contributes to a union entry # Calculate the pvalues of the rank product values print('Calculating rank product probabilities...') rpb_up = rankprodbounds(ranks[1], len(ranks[1]), len(bedf), 'geometric') print('Calculating binomial threshold...') # Calculate rpb and binomial intersection point Pks = thresholdCalc(rpb_up, k=len(bedf) - (minentries - 1)) if len(Pks[2]) != 0: binomAlpha = round(min(Pks[2]), 3) else: print('No binomial convergence, defaulting to 0.1') binomAlpha = 0.1 # Perform multiple hypothesis testing correction upon the pvals fdr = multipletesting.fdrcorrection(rpb_up) # Determine whether to remove entries that are called significant print('Cleaning up output...') for i, v in enumerate(ranks[0][0]): p = rpb_up[i] if p != 0.0: ranks[0][0][i].addOption(name='TBD', score=min([abs(int(125 * math.log2(rpb_up[i]))), 1000]), strand='.', pValue=rpb_up[i], qValue=fdr[1][i]) else: ranks[0][0][i].addOption(name='TBD', score=1000, strand='.', pValue=2.5e-20, qValue=2.5e-20) collapsed = bed.BedFile(ranks[0][0], 'IDR') connected = connect_entries(collapsed, bedf, 20, True) self.assertNotEqual(len(collapsed), len(connected))
def joinChIA(chia): interactions = [] for rep in chia: for intr in rep: interactions.append(intr) pe = bed.BedFile(interactions, 'bedpe') return pe
def run_1(self, args): for i in args.input: print(str(i.name)) print('Processing Input...') bedfs = [bed.BedFile(str(i.name), 'Peaks') for i in args.input] rankprod.performrankprod(bedfs, args.minentries, args.rankmethod, 'all', args.duphandling, args.random_seed, args.alpha, args.output, args.size, False, args.fragment)
def test_from_bed(self): med1 = bed.BedFile("test/data/med/med1_peaks.broadPeak", "Peaks") med2 = bed.BedFile("test/data/med/med2_peaks.broadPeak", "Peaks") med3 = bed.BedFile("test/data/med/med3_peaks.broadPeak", "Peaks") bedf = [med1, med2, med3] performrankprod(bedf, minentries=2, rankmethod="pvalue", specifyMax=None, duphandling='average', random_seed=0.5, alpha=0.05, filename="test_fragments_true", default_min_peak=20, print_pvals=True, fragment=True) performrankprod(bedf, minentries=2, rankmethod="pvalue", specifyMax=None, duphandling='average', random_seed=0.5, alpha=0.05, filename="test_fragments_false", default_min_peak=20, print_pvals=True, fragment=False)
def ChIAreproducibility(chia, chip, minentries=None, rank='signalValue', threshold='all', alpha=0.05, filename='NA'): if minentries is None: minentries = len(chip) if len(chia) == 1 and len(chip) == 1: return verifyChIAPeaks(chia[0], chip[0], filename=filename, alpha=alpha) else: try: # Check to see if multiple chiapet files are entered chia[0] inters = joinChIA(chia) except TypeError: inters = chia rankprod.performrankprod(chip, minentries=minentries, rankmethod=rank, alpha=alpha, filename=filename + '.bed') if threshold == 'binom': RP = bed.BedFile('T2_' + filename + '.bed', 'Peaks') return verifyChIAPeaks(inters, RP, filename=filename, alpha=alpha) elif threshold == 'alpha': RP = bed.BedFile('T1_' + filename + '.bed', 'Peaks') return verifyChIAPeaks(inters, RP, filename=filename, alpha=alpha) elif threshold == 'all': RP = bed.BedFile('ALL_' + filename + '.bed', 'Peaks') return verifyChIAPeaks(inters, RP, filename=filename, alpha=alpha)
def test_bed(self): bf = bed.BedFile("test/data/med/med1_peaks.broadPeak", "Peaks") print(bf.chroms.keys()) g = bf.generate('chr1') print(next(g)) print(next(g)) print(next(g)) cnt = 0 for entry in bf: cnt += 1 print(str(cnt) + '\t' + str(entry)) if cnt == 100: break entry1 = bed.BedEntry('chrX', 3858266, 3858530) print(entry1 in bf) entry2 = bed.BedEntry('chrX', 10047550, 10067694) for x in bf.getOverlap(entry2): print(x) entry3 = bed.BedEntry('chr9', 102699903, 102700167) for x in bf.getClosest(entry3): print(x) for y in x: print(y)
def verifyChIAPeaks(chiaData, chipData, filename, alpha): listinters = [] PETs = [] probs = [] inters = {} PETgroups = {} prs = [] fdr = [] RP = [] if getattr(chiaData, 'format') == 'BED12': chiaData = bed.BED12toBEDPE(chiaData) for link in chiaData: if link.PETs <= 2: continue o1 = chipData.getOverlap(link.partner1) o2 = chipData.getOverlap(link.partner2) try: PETgroups[link.PETs].append(link) except KeyError: PETgroups[link.PETs] = [link] try: if len(o1) > 0 and len(o2) > 0: for peaks1, peaks2 in zip(o1, o2): # Create entries in dictionary with peaks as support try: inters[link].append([peaks1, peaks2]) except KeyError: inters[link] = [peaks1, peaks2] except TypeError: continue dists = [] for k, v in inters.items(): pvals = [] rs = [] # if k.PETs <= 1: # continue # kvals = [] for peak in v: try: pvals.append(peak.pValue) rs.append(peak.signalValue) # combined = scipy.stats.combine_pvalues([peak1.pValue, peak2.pValue], 'fisher') # pvals.append(combined[1]) except AttributeError: if type(peak) == 'list': for p in peak: try: pvals.append(p.pValue) rs.append(peak.signalValue) except AttributeError: continue else: continue if len(pvals) != 0: combined = scipy.stats.combine_pvalues(pvals, 'stouffer', rs) probs.append(combined[1]) # rs = iterflatten(rs) # RP.append(rs) listinters.append(k) PETs.append(k.PETs) dists.append(k.getDistance()) # probcor = multipletesting.fdrcorrection(probs, alpha) PETs, listinters, probcor, dists = (list(x) for x in zip( *sorted(zip(PETs, listinters, probs, dists), key=lambda pair: pair[0], reverse=True))) bins = np.arange(min(dists), max(dists), max(dists) / 50, dtype=int) brobs = [] ints = [] pets2 = [] ainters = [] tags = 0 # for bin in range(1, 50): # pets = [] # lints = [] # ps = [] # ds = [] # # rp = [] # for idx, j in enumerate(np.digitize(dists, bins)): # if j == bin: # pets.append(PETs[idx]) # lints.append(listinters[idx]) # ps.append(probcor[idx]) # ds.append(dists[idx]) # rp.append(RP[idx]) # try: # pets, lints, ps, ds, rp = (list(x) for x in zip(*sorted(zip(pets, lints, ps, ds, rp), # key=lambda pair: pair[0], reverse=True))) # except ValueError: # continue # n = len(RP) # k = sum(pets) t = sum(PETs) # brobs = [] tags = 0 # RPranks = scipy.stats.rankdata(RP) # mRPrank = max(RPranks) # RPranks1 = [mRPrank+1 - x for x in RPranks] # probranks = scipy.stats.rankdata(probcor) # mprobs = max(probranks) # probranks1 = [mprobs+1 -x for x in probranks] # # distranks = scipy.stats.rankdata(dists) # # mdist = max(distranks) # # distranks1 = [mdist+1 - x for x in distranks] # # PETranks1 = scipy.stats.rankdata(PETs) # # mpet = max(PETranks1) # rankprods = [] # # PETranks = [mpet + 1 - x for x in PETranks1] # PETdists = [x/y for x,y in zip(dists, PETs)] # PDranks = scipy.stats.rankdata(PETdists) # mPD = max(PDranks) # PDranks1 = [mPD+1 - x for x in PDranks] # for rank in range(len(RP)): # print(PDranks[rank], PETdists[rank]) # try: # rankprods.append(PDranks1[rank]*RPranks1[rank]) # pets2.append(PETs[rank]) # except IndexError: # continue # print(rankprods) # rpb = rankprodbounds(rankprods, len(rankprods), 2, 'geometric') # for p in rpb: # brobs.append(p) # print(rpb) # mRP = max(rankprods) # mPETs = max(PETs) # mdists = max(dists) for i, p in enumerate(zip(probcor, PETs)): tags += p[1] b = scipy.stats.binom.cdf(tags, t, 1 - p[0]) # b2 = scipy.stats.combine_pvalues([b, [i]], 'stouffer', [(mPETs-PETs[i])/mPETs, (mRP-rankprods[i])/mRP]) print(p[0], p[1]) brobs.append(b) pets2.append(p[1]) ints.append(listinters[i]) # # secinters = [] # # secPETs = [] # # secprobcor = [] # print(rpb) # ints, probs = (list(x) for x in zip(*sorted(zip(ainters, probs), key=lambda pair: pair[1], reverse=False))) corrected = multipletesting.fdrcorrection(brobs, alpha) thresh = rankprod.thresholdCalc(brobs) try: binomAlpha = round(min(thresh[2]), 3) if binomAlpha == 0: binomAlpha = 0.05 except ValueError: binomAlpha = 0.05 # brobs.append(rpb) print(binomAlpha) for i, p in zip(ints, corrected[1]): i.addOption(pValue=p) prs.append(p) ainters.append(i) # print(p, binomAlpha) if p <= 0.05: if p == 0: i.addOption(pValue=0.00000000000001) fdr.append(i) # else: # secinters.append(i) # secPETs.append(pet) # secprobcor.append(pc) # n = sum(secPETs) # brobs = [] # tags = 0 # for i, p in zip(secPETs, secprobcor): # tags += i # b = scipy.stats.binom.cdf(tags, n, p) # brobs.append(b) # # corrected = multipletesting.fdrcorrection(brobs, alpha) # for i, p in zip(listinters, brobs): # i.addOption(pValue=p) # if p <= alpha: # if p == 0: # i.addOption(pValue=0.00000000000001) # fdr.append(i) fdr = bed.BedFile(fdr, "BEDPE") allinters = bed.BedFile(ainters, "BEDPE") print(len(fdr), 'Interactions pass FDR threshold') bed.writeBedFile(allinters, filename + '.bedpe', format='BEDPE') bed.writeBedFile(fdr, filename + '.bedpe', format="BEDPE") intersBED12 = bed.BEDPEtoBED12(allinters) bed.writeBedFile(intersBED12, filename + '.bed', format='BED12') return ainters, fdr, allinters, PETs, dists, brobs, corrected[1]
def verifyChIAPeaks(chiaData, chipData, filename, alpha): listinters = [] PETs = [] probs = [] inters = {} PETgroups = {} prs = [] fdr = [] RP = [] if getattr(chiaData, 'format') == 'BED12': chiaData = bed.BED12toBEDPE(chiaData) for link in chiaData: if link.PETs <= 2: continue o1 = chipData.getOverlap(link.partner1) o2 = chipData.getOverlap(link.partner2) try: PETgroups[link.PETs].append(link) except KeyError: PETgroups[link.PETs] = [link] try: if len(o1) > 0 and len(o2) > 0: for peaks1, peaks2 in zip(o1, o2): # Create entries in dictionary with peaks as support try: inters[link].append([peaks1, peaks2]) except KeyError: inters[link] = [peaks1, peaks2] except TypeError: continue dists = [] for k, v in inters.items(): pvals = [] rs = [] # if k.PETs <= 1: # continue # kvals = [] for peak in v: try: pvals.append(peak.pValue) rs.append(peak.signalValue) # combined = scipy.stats.combine_pvalues([peak1.pValue, peak2.pValue], 'fisher') # pvals.append(combined[1]) except AttributeError: if type(peak) == 'list': for p in peak: try: pvals.append(p.pValue) rs.append(peak.signalValue) except AttributeError: continue else: continue if len(pvals) != 0: combined = scipy.stats.combine_pvalues(pvals, 'stouffer', rs) probs.append(combined[1]) # rs = iterflatten(rs) # RP.append(rs) listinters.append(k) PETs.append(k.PETs) dists.append(k.getDistance()) # probcor = multipletesting.fdrcorrection(probs, alpha) PETs, listinters, probcor, dists = (list(x) for x in zip( *sorted(zip(PETs, listinters, probs, dists), key=lambda pair: pair[0], reverse=True))) bins = np.arange(min(dists), max(dists), max(dists) / 50, dtype=int) brobs = [] ints = [] pets2 = [] ainters = [] tags = 0 n = len(RP) # k = sum(pets) t = sum(PETs) # brobs = [] tags = 0 for i, p in enumerate(zip(probcor, PETs)): tags += p[1] b = scipy.stats.binom.cdf(tags, t, 1 - p[0]) print(p[0], p[1]) brobs.append(b) pets2.append(p[1]) ints.append(listinters[i]) corrected = multipletesting.fdrcorrection(brobs, alpha) thresh = rankprod.thresholdCalc(brobs) try: binomAlpha = round(min(thresh[2]), 3) if binomAlpha == 0: binomAlpha = 0.05 except ValueError: binomAlpha = 0.05 # brobs.append(rpb) print(binomAlpha) for i, p in zip(ints, corrected[1]): i.addOption(pValue=p) prs.append(p) ainters.append(i) # print(p, binomAlpha) if p <= 0.05: if p == 0: i.addOption(pValue=0.00000000000001) fdr.append(i) fdr = bed.BedFile(fdr, "BEDPE") allinters = bed.BedFile(ainters, "BEDPE") print(len(fdr), 'Interactions pass FDR threshold') bed.writeBedFile(allinters, filename + '.bedpe', format='BEDPE') bed.writeBedFile(fdr, filename + '.bedpe', format="BEDPE") intersBED12 = bed.BEDPEtoBED12(allinters) bed.writeBedFile(intersBED12, filename + '.bed', format='BED12') return ainters, fdr, allinters, PETs, dists, brobs, corrected[1]