def footprints(self, withCutoff=-30, merge=1): """ This returns reads GenomicIntervalSet with the intervals retrieved below the specific cutoff applied to the selected data """ #This find the positions of all the ranges below the cutoff using reads new method ranges = [] tempMLE, templogProb = np.array(self.lengths), np.array(self.scores) #Here we have some different logic for selecting the summits of footprints #TODO: Document this part while templogProb.min() < withCutoff: minimapos = templogProb.argmin() minimafplen = tempMLE[minimapos] minimaphalffplen = int(minimafplen) / 2 lbound = max(minimapos - (minimaphalffplen), 0) rbound = min(minimapos + (minimaphalffplen), len(templogProb)) ranges.append((lbound, rbound, templogProb.min(), minimafplen)) templogProb[max(lbound - minimafplen, 0 ):min(rbound + minimafplen, len(templogProb))] = 1 returnSet = pyDNase.GenomicIntervalSet() #Merges overlapping ranges (TODO: documentation) if ranges: # This change here changes the way we merge footprints from the probability trace #TODO: Documentation if merge: merged_ranges = [] while len(ranges): #Find best score sorted(ranges, key=lambda x: -x[2]) #Take the last value best = ranges.pop() merged_ranges.append(best) #Check for overlapping regions and remove new_ranges = [] for c, d, e, f in ranges: if not c <= best[1] <= d: new_ranges.append([c, d, e, f]) ranges = new_ranges else: merged_ranges = ranges #Creates reads GenomicIntervalSet and adds the footprints to them for i in merged_ranges: rstartbp = self.interval.startbp + i[0] #We must add one to the end base of the footprint to account for the BED file format rendbp = self.interval.startbp + i[1] + 1 region = pyDNase.GenomicInterval(self.interval.chromosome, rstartbp, rendbp, strand="+", score=i[2]) returnSet += region return returnSet
#Call footprints import sys import pyDNase import pyDNase.footprinting as fp if (sys.argv[5] == 'singleEnd'): regions = pyDNase.GenomicIntervalSet(sys.argv[1]) reads = pyDNase.BAMHandler(sys.argv[2]) f = len(regions) - 1 for x in range(f): footprinter = fp.wellington1D(regions[x], reads) footprints = footprinter.footprints(withCutoff=int(sys.argv[4])) with open(sys.argv[3], "a") as bedout: bedout.write(str(footprints)) else: regions = pyDNase.GenomicIntervalSet(sys.argv[1]) reads = pyDNase.BAMHandler(sys.argv[2]) f = len(regions) - 1 for x in range(f): footprinter = fp.wellington(regions[x], reads) footprints = footprinter.footprints(withCutoff=int(sys.argv[4])) with open(sys.argv[3], "a") as bedout: bedout.write(str(footprints))
# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import argparse import pyDNase from clint.textui import progress parser = argparse.ArgumentParser( description= 'writes a BED file with the FOS for the interval specified as the score') parser.add_argument("-A", action="store_true", help="ATAC-seq mode (default: False)", default=False) parser.add_argument( "regions", help="BED file of the regions you want to generate the average profile for" ) parser.add_argument("reads", help="The BAM file containing the DNase-seq data") parser.add_argument("output", help="filename to write the output to") args = parser.parse_args() reads = pyDNase.BAMHandler(args.reads, ATAC=args.A) regions = pyDNase.GenomicIntervalSet(args.regions) outfile = open(args.output, "w") for i in progress.bar(regions): i.score = reads.FOS(i) print >> outfile, i
def test_footprinting(self): """Test footprinting""" #Load test data reads = pyDNase.BAMHandler(pyDNase.example_reads()) regions = pyDNase.GenomicIntervalSet(pyDNase.example_regions()) footprinter = wellington(regions[0], reads) #Note - we only check the accuracy of the footprinting to 3 decimal places to allow for differences in floating point numbers numpy.testing.assert_array_almost_equal(footprinter.scores, [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.8505197962574915, -0.7522459055434079, -0.6405956238609599, -0.35029217770692905, -0.19445213824845226, -0.04510918998207078, -0.013127544708030047, -0.019434755711449096, -0.017813062409838532, -0.4899192539679181, -0.7366170062412767, -1.160234291218491, -1.4932241116142613, -2.528451574312211, -2.9873463332686545, -4.0789439624702215, -4.608073840135845, -4.6080738401358445, -5.46591166889954, -6.317058518040485, -7.846849141309235, -8.70970430615968, -7.84684914298093, -10.57133857477595, -9.524456623200592, -8.450720744685238, -7.351088844276472, -6.227879918162327, -5.085807684913266, -1.412414402021511, -3.461932293846784, -3.6968244901998126, -3.6968244901997713, -3.9374380500569046, -3.9374380500569046, -3.502106381128687, -3.968687434788506, -3.968687434788506, -3.9686874347885044, -4.210084222760481, -4.708248147109799, -4.481083945460659, -4.614616491048433, -6.331304868565458, -6.7188196319447515, -7.805240790859276, -10.096125803164037, -10.096125904865069, -9.804317009970552, -10.942957174739428, -10.831197056706369, -9.451636014876547, -9.271803479479166, -10.547425524609011, -11.356756808330887, -10.173763450595242, -17.266997956146163, -24.135650052599853, -26.79974412054261, -24.068532700189742, -20.83033463447785, -17.442306072203564, -3.3271869067645095, -1.552524387513255, -1.2303389949451933, -1.116146321342096, -0.7241346073398854, -0.8217741198401821, -0.5077397193727583, -0.4619110913457732, -0.22648726483418524, -0.08368942693734599, -0.04662652321248819, -0.10740322088702083, -0.1600382576388667, -0.09849358892510252, -0.2996877100052051, -0.4956516466712493, -0.8286771565689258, -0.7441816651207845, -0.5312102440124086, -6.089145200199429, -54.524611990632465, -55.11290166247622, -53.73358776712574, -56.37380673644542, -59.597668457279916, -63.142121596069494, -69.8245790871056, -76.97479986221292, -83.6326531975367, -88.05928977864403, -87.62205344847811, -90.7846299628178, -94.85120273316905, -90.09506169785546, -85.09363194018195, -90.25622681870428, -80.40916250197246, -84.41195387381595, -96.25001089840575, -105.99203665518576, -109.60076099775432, -116.04973655820825, -124.40507207962382, -120.71820677125163, -121.99289957155713, -121.7696295849731, -128.86709184814546, -130.00197395916774, -138.7286574562139, -150.07398897152254, -141.58993458465335, -134.33745073269844, -134.76596995468543, -106.6912682602024, -96.02214212537493, -85.8950778423277, -73.04392809450209, -54.85091731066348, -44.010732916962205, -31.573437293391223, -23.59371038683095, -18.62378346291484, -3.2863459020700057, -1.8733702431391752, -0.492074167081423, -0.27948577530733343, -0.27948577530733343, -0.07138091975833981, -0.09972653646891905, -0.05418579937724513, -0.024132554170139438, -0.021842812415429565, -0.9566534364564785, -6.932360951667957, -11.187077720714367, -13.553355643835602, -14.21631406001477, -14.983929833667665, -15.422758574896921, -18.32278174888965, -18.2834926735795, -17.265359820713286, -16.13035610465361, -14.086076680349992, -13.521427957090859, -12.515293283803214, -11.480271740126698, -9.92078604101271, -8.797191973771438, -6.985510255611701, -5.426767915467293, -5.183152081566609, -3.7475983370968295, -1.9153547972282414, -0.0006083021245538324, -13.64272847695586, -10.286808471857325, -15.63569341874549, -20.86940117070692, -22.928591109686124, -30.496433497261098, -26.10052633266505, -29.221144392666716, -24.0276270737085, -21.301001754269702, -20.97154340860586, -15.798224427435104, -17.780912132981612, -24.823354886252613, -24.604927499889286, -24.955334454941635, -38.74241644973382, -43.782982787325366, -46.80273522972689, -46.08571305295883, -47.92277577875605, -41.4868217475951, -37.915322367616675, -34.16174895135005, -33.58267055798403, -32.06130865601216, -34.094574908150825, -39.695727106225405, -40.120719852615196, -41.05121481573844, -42.01796136083251, -39.75209693618059, -35.73339613779332, -34.731089314533676, -32.694583271242884, -29.577625993685, -28.026659577292953, -25.215089099008644, -25.174202473704753, -21.952113990014446, -17.028869764873075, -15.578727453806595, -16.1579750791396, -12.974390056172448, -8.418484753962995, -5.7847304546785905, -2.2267773783077134, -1.4570520375724902, -1.543691534890984, -1.575957362444019, -0.7176800307627448, -0.7968619556272615, -4.841045489929452, -5.248527604937139, -1.0472142687516643, -1.0630763089203221, -2.185755905394793, -3.8307492546267254, -4.993169872339857, -7.2764872801107385, -6.792829090234741, -6.452991771598523, -6.952945781664499, -8.215168486202954, -6.613961853070211, -22.150574756810474, -28.514525290020345, -27.33821547951633, -29.034538366843996, -33.82258103970177, -41.26481032907057, -40.912839794048644, -48.684226156049405, -49.44508720397513, -61.863467137712874, -70.11156862148243, -82.93974699146762, -91.62613467860213, -91.54466150389183, -73.5404690802315, -75.77506886003911, -78.05398228595476, -84.42906672420139, -93.01020782082938, -89.65901048860756, -109.20614016921928, -121.0826042903611, -120.2996268556599, -117.38782641714545, -128.50467987996305, -128.9595101418021, -133.14841986541902, -136.82233726671367, -133.94746637928725, -154.5649504690748, -164.11983575086742, -159.85307484109336, -151.89784688535133, -153.56557629402886, -146.72984757341305, -135.04501822595842, -127.92055598311715, -126.08111294376953, -120.03403862241993, -99.25696665821185, -71.19178328684012, -64.94518489350295, -59.98207339614661, -54.12991577221696, -43.206052468123545, -29.456860663206527, -6.411526985333728, -6.44709453786988, -6.215828945120546, -5.762898291384889, -4.3769156224166315, -3.2727915503830047, -2.616087927600661, -2.313254659995694, -1.8641066899878078, -1.8186414374916933, -0.8008712043775049, -0.6426129783652371, -0.5224073311989104, -0.2710345166975603, -0.43819657644966853, -1.2626459311104576, -1.9408301832235342, -3.9812039032702886, -3.9812039032702886, -2.861605777578473, -3.2137507785013066, -3.2137507785013066, -2.9669916392942004, -3.2617340566815645, -3.9686874347885044, -3.54350638697767, -3.54350638697767, -3.1070679887817896, -2.8384054421005627, -2.2611557931086583, -2.9566374983191013, -2.2617270920463315, -2.5370237970085574, -3.2091208219605813, -3.0532448758817448, -1.6966894030794892, -2.2744775410764126, -2.729866824495538, -3.080565957210189, -2.808261821233711, -3.251159821714309, -2.1636899060453407, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], decimal=3) numpy.testing.assert_array_equal(footprinter.lengths, [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 15, 13, 11, 15, 13, 25, 25, 11, 13, 15, 17, 19, 21, 23, 25, 25, 11, 13, 15, 17, 15, 21, 19, 17, 15, 13, 11, 21, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 23, 25, 15, 17, 19, 19, 17, 15, 13, 19, 25, 25, 23, 25, 11, 11, 13, 15, 13, 11, 11, 25, 25, 15, 15, 19, 17, 15, 13, 11, 25, 25, 25, 11, 15, 17, 19, 15, 23, 11, 25, 25, 25, 25, 25, 25, 21, 23, 25, 25, 25, 25, 25, 23, 25, 25, 25, 21, 23, 25, 25, 23, 25, 25, 21, 19, 19, 21, 25, 25, 25, 23, 25, 23, 21, 19, 19, 15, 15, 11, 11, 11, 23, 25, 25, 25, 25, 25, 25, 25, 25, 25, 11, 11, 13, 15, 11, 11, 21, 17, 15, 13, 15, 13, 25, 25, 23, 21, 19, 19, 13, 13, 13, 11, 25, 11, 13, 15, 17, 11, 13, 15, 17, 15, 13, 11, 25, 15, 17, 19, 19, 23, 25, 25, 21, 23, 21, 19, 17, 13, 25, 25, 25, 25, 25, 25, 25, 23, 21, 19, 25, 25, 23, 11, 11, 15, 15, 13, 15, 13, 11, 19, 15, 13, 11, 11, 11, 11, 11, 15, 15, 19, 21, 23, 25, 25, 23, 25, 25, 15, 11, 13, 15, 17, 19, 21, 23, 25, 25, 25, 23, 25, 25, 25, 25, 25, 25, 25, 25, 25, 21, 23, 25, 25, 25, 25, 25, 25, 21, 23, 25, 25, 25, 25, 23, 25, 25, 25, 25, 23, 21, 19, 15, 15, 13, 11, 13, 25, 25, 25, 25, 25, 25, 25, 23, 25, 25, 25, 25, 11, 11, 11, 13, 11, 11, 15, 17, 17, 21, 23, 25, 25, 25, 25, 23, 17, 19, 17, 15, 13, 11, 19, 11, 13, 15, 15, 13, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ])
def test_BED_reading(self): """Testing BED files""" regions = pyDNase.GenomicIntervalSet(pyDNase.example_regions()) self.assertEqual(str(regions), 'chr6\t170863142\t170863532\t0\t0.0\t+\n')
# Sanity check parameters from the user try: args.footprint_sizes = xrange_from_string(args.footprint_sizes) except ValueError: raise RuntimeError("Footprint sizes must be supplied as from,to,step") assert 0 < args.FDR_cutoff < 1, "FDR must be between 0 and 1" assert args.FDR_limit < 0, "FDR limit must be less than 0" # Treatment reads2 = pyDNase.BAMHandler(args.treatment_bam, caching=0, ATAC=args.A) # Control reads1 = pyDNase.BAMHandler(args.control_bam, caching=0, ATAC=args.A) # Regions of Interest regions = pyDNase.GenomicIntervalSet(args.bedsites) # Output treatment_output = open(args.treatment_only_output, "w", buffering=1) control_output = open(args.control_only_output, "w", buffering=1) # Determine Number of CPUs to use if args.processes: CPUs = args.processes else: CPUs = mp.cpu_count() # NOTE: This roughly scales at about 450mb per 300 regions held in memory max_regions_cached_in_memory = 50 * CPUs p = mp.Pool(CPUs) print "Performing differential footprinting..."
# # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import matplotlib.pyplot as plt import pyDNase from pyDNase.footprinting import wellington #Load test data reads = pyDNase.BAMHandler(pyDNase.example_reads()) regions = pyDNase.GenomicIntervalSet(pyDNase.example_regions()) #Plot cuts data plt.plot(reads[regions[0]]["+"], c="red") plt.plot([-i for i in reads[regions[0]]["-"]], c="blue") #Footprint and plot the results footprinter = wellington(regions[0], reads) plt.plot(footprinter.scores, c="black") plt.show()
parser.add_argument("treat_dhs", help="The DHSs belonging to the Treatment") parser.add_argument("control_dhs", help="The DHSs belonging to the control") parser.add_argument( "reads_treat", help="The BAM file containing the Treatment DNase-seq data") parser.add_argument("reads_control", help="The BAM file containing the Control DNase-seq data") parser.add_argument("output", help="filename to write the output to") args = parser.parse_args() reads_treat = pyDNase.BAMHandler(args.reads_treat, caching=not args.l, ATAC=args.A) reads_control = pyDNase.BAMHandler(args.reads_control, caching=not args.l, ATAC=args.A) treat_dhs = pyDNase.GenomicIntervalSet(args.treat_dhs) control_dhs = pyDNase.GenomicIntervalSet(args.control_dhs) regions = pyDNase.GenomicIntervalSet(args.regions) treat_total_cuts = 0 control_total_cuts = 0 treat_base_pairs = 0 control_base_pairs = 0 puts("Calculating enrichment for Treatment") for i in progress.bar(treat_dhs): treat_total_cuts += sum([sum(j) for j in list(reads_treat[i].values())]) treat_base_pairs += len(i) puts("Calculating enrichment for Control") for i in progress.bar(control_dhs):
# # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import matplotlib.pyplot as plt import pyDNase from pyDNase.footprinting import wellington #Load test data reads = pyDNase.BAMHandler("example.bam") regions = pyDNase.GenomicIntervalSet("example.bed") #Plot cuts data plt.plot(reads[regions[0]]["+"], c="red") plt.plot(-reads[regions[0]]["-"], c="blue") #Footprint and plot the results footprinter = wellington(regions[0], reads) plt.plot(footprinter.scores, c="black") plt.show()
bamFileName = sys.argv[2] outputFileName = sys.argv[3] # Parameters cutoff = -30 footprintSizes = range(6, 40, 1) to_remove = [] # Creating new region file name with the first three columns only newRegionFileName = outputFileName + "regions.bed" os.system("cut -f 1,2,3 " + regionFileName + " > " + newRegionFileName) to_remove.append(newRegionFileName) # Execution outputFile = open(outputFileName, "w") regions = pyDNase.GenomicIntervalSet(newRegionFileName) reads = pyDNase.BAMHandler(bamFileName) for region in regions: footprinter = fp.wellington(region, reads, shoulder_sizes=range(35, 36), footprint_sizes=footprintSizes, FDR=0, bonferroni=0) footprints = footprinter.footprints(withCutoff=cutoff) for e in footprints: outputFile.write("\t".join([ str(k) for k in [e.chromosome, e.startbp, e.endbp, e.label, e.score, e.strand] ]) + "\n") outputFile.close()
parser.add_argument('bam_file', type = str, help = 'BAM file containing reads to plot') parser.add_argument('outfile', type = str, help = 'Output file (.tsv)') parser.add_argument('-w', '--window', dest = 'w', type = int, default = 200, help = 'Window size to plot. Default = 200bp') args = parser.parse_args() ################################################################################################################################# # Read BAM file reads = pyDNase.BAMHandler(args.bam_file) # Calculate the distance to extend footprints by (window size / 2) extend = int(math.ceil(args.w / 2)) # Get regions from BED file regions = pyDNase.GenomicIntervalSet(args.bed_file) # Keep track of number of forward and reverse reads fwd_cut_tracking = dict() rev_cut_tracking = dict() sys.stderr.write('Counting cuts in regions...\n') for site in progress.bar(regions): # Get chromosome, strand, start and end positions for site chrom = site.chromosome start = site.startbp end = site.endbp strand = site.strand # Calculate the center position for the site