Exemplo n.º 1
0
    def footprints(self, withCutoff=-30, merge=1):
        """
        This returns reads GenomicIntervalSet with the intervals retrieved below the specific cutoff applied to the selected data
        """
        #This find the positions of all the ranges below the cutoff using reads new method
        ranges = []
        tempMLE, templogProb = np.array(self.lengths), np.array(self.scores)

        #Here we have some different logic for selecting the summits of footprints
        #TODO: Document this part

        while templogProb.min() < withCutoff:
            minimapos = templogProb.argmin()
            minimafplen = tempMLE[minimapos]
            minimaphalffplen = int(minimafplen) / 2
            lbound = max(minimapos - (minimaphalffplen), 0)
            rbound = min(minimapos + (minimaphalffplen), len(templogProb))
            ranges.append((lbound, rbound, templogProb.min(), minimafplen))
            templogProb[max(lbound - minimafplen, 0
                            ):min(rbound + minimafplen, len(templogProb))] = 1

        returnSet = pyDNase.GenomicIntervalSet()
        #Merges overlapping ranges (TODO: documentation)
        if ranges:
            # This change here changes the way we merge footprints from the probability trace
            #TODO: Documentation
            if merge:
                merged_ranges = []
                while len(ranges):
                    #Find best score
                    sorted(ranges, key=lambda x: -x[2])
                    #Take the last value
                    best = ranges.pop()
                    merged_ranges.append(best)
                    #Check for overlapping regions and remove
                    new_ranges = []
                    for c, d, e, f in ranges:
                        if not c <= best[1] <= d:
                            new_ranges.append([c, d, e, f])
                    ranges = new_ranges
            else:
                merged_ranges = ranges
            #Creates reads GenomicIntervalSet and adds the footprints to them
            for i in merged_ranges:
                rstartbp = self.interval.startbp + i[0]
                #We must add one to the end base of the footprint to account for the BED file format
                rendbp = self.interval.startbp + i[1] + 1
                region = pyDNase.GenomicInterval(self.interval.chromosome,
                                                 rstartbp,
                                                 rendbp,
                                                 strand="+",
                                                 score=i[2])
                returnSet += region
        return returnSet
Exemplo n.º 2
0
#Call footprints
import sys
import pyDNase
import pyDNase.footprinting as fp

if (sys.argv[5] == 'singleEnd'):
    regions = pyDNase.GenomicIntervalSet(sys.argv[1])
    reads = pyDNase.BAMHandler(sys.argv[2])
    f = len(regions) - 1
    for x in range(f):
        footprinter = fp.wellington1D(regions[x], reads)
        footprints = footprinter.footprints(withCutoff=int(sys.argv[4]))
        with open(sys.argv[3], "a") as bedout:
            bedout.write(str(footprints))
else:
    regions = pyDNase.GenomicIntervalSet(sys.argv[1])
    reads = pyDNase.BAMHandler(sys.argv[2])
    f = len(regions) - 1
    for x in range(f):
        footprinter = fp.wellington(regions[x], reads)
        footprints = footprinter.footprints(withCutoff=int(sys.argv[4]))
        with open(sys.argv[3], "a") as bedout:
            bedout.write(str(footprints))
Exemplo n.º 3
0
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import argparse
import pyDNase
from clint.textui import progress

parser = argparse.ArgumentParser(
    description=
    'writes a BED file with the FOS for the interval specified as the score')
parser.add_argument("-A",
                    action="store_true",
                    help="ATAC-seq mode (default: False)",
                    default=False)
parser.add_argument(
    "regions",
    help="BED file of the regions you want to generate the average profile for"
)
parser.add_argument("reads", help="The BAM file containing the DNase-seq data")
parser.add_argument("output", help="filename to write the output to")
args = parser.parse_args()

reads = pyDNase.BAMHandler(args.reads, ATAC=args.A)
regions = pyDNase.GenomicIntervalSet(args.regions)

outfile = open(args.output, "w")
for i in progress.bar(regions):
    i.score = reads.FOS(i)
    print >> outfile, i
Exemplo n.º 4
0
 def test_footprinting(self):
     """Test footprinting"""
     #Load test data
     reads = pyDNase.BAMHandler(pyDNase.example_reads())
     regions = pyDNase.GenomicIntervalSet(pyDNase.example_regions())
     footprinter = wellington(regions[0], reads)
     #Note - we only check the accuracy of the footprinting to 3 decimal places to allow for differences in floating point numbers
     numpy.testing.assert_array_almost_equal(footprinter.scores, [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, -0.8505197962574915, -0.7522459055434079, -0.6405956238609599,
         -0.35029217770692905, -0.19445213824845226, -0.04510918998207078,
         -0.013127544708030047, -0.019434755711449096,
         -0.017813062409838532, -0.4899192539679181, -0.7366170062412767,
         -1.160234291218491, -1.4932241116142613, -2.528451574312211,
         -2.9873463332686545, -4.0789439624702215, -4.608073840135845,
         -4.6080738401358445, -5.46591166889954, -6.317058518040485,
         -7.846849141309235, -8.70970430615968, -7.84684914298093,
         -10.57133857477595, -9.524456623200592, -8.450720744685238,
         -7.351088844276472, -6.227879918162327, -5.085807684913266,
         -1.412414402021511, -3.461932293846784, -3.6968244901998126,
         -3.6968244901997713, -3.9374380500569046, -3.9374380500569046,
         -3.502106381128687, -3.968687434788506, -3.968687434788506,
         -3.9686874347885044, -4.210084222760481, -4.708248147109799,
         -4.481083945460659, -4.614616491048433, -6.331304868565458,
         -6.7188196319447515, -7.805240790859276, -10.096125803164037,
         -10.096125904865069, -9.804317009970552, -10.942957174739428,
         -10.831197056706369, -9.451636014876547, -9.271803479479166,
         -10.547425524609011, -11.356756808330887, -10.173763450595242,
         -17.266997956146163, -24.135650052599853, -26.79974412054261,
         -24.068532700189742, -20.83033463447785, -17.442306072203564,
         -3.3271869067645095, -1.552524387513255, -1.2303389949451933,
         -1.116146321342096, -0.7241346073398854, -0.8217741198401821,
         -0.5077397193727583, -0.4619110913457732, -0.22648726483418524,
         -0.08368942693734599, -0.04662652321248819, -0.10740322088702083,
         -0.1600382576388667, -0.09849358892510252, -0.2996877100052051,
         -0.4956516466712493, -0.8286771565689258, -0.7441816651207845,
         -0.5312102440124086, -6.089145200199429, -54.524611990632465,
         -55.11290166247622, -53.73358776712574, -56.37380673644542,
         -59.597668457279916, -63.142121596069494, -69.8245790871056,
         -76.97479986221292, -83.6326531975367, -88.05928977864403,
         -87.62205344847811, -90.7846299628178, -94.85120273316905,
         -90.09506169785546, -85.09363194018195, -90.25622681870428,
         -80.40916250197246, -84.41195387381595, -96.25001089840575,
         -105.99203665518576, -109.60076099775432, -116.04973655820825,
         -124.40507207962382, -120.71820677125163, -121.99289957155713,
         -121.7696295849731, -128.86709184814546, -130.00197395916774,
         -138.7286574562139, -150.07398897152254, -141.58993458465335,
         -134.33745073269844, -134.76596995468543, -106.6912682602024,
         -96.02214212537493, -85.8950778423277, -73.04392809450209,
         -54.85091731066348, -44.010732916962205, -31.573437293391223,
         -23.59371038683095, -18.62378346291484, -3.2863459020700057,
         -1.8733702431391752, -0.492074167081423, -0.27948577530733343,
         -0.27948577530733343, -0.07138091975833981, -0.09972653646891905,
         -0.05418579937724513, -0.024132554170139438, -0.021842812415429565,
         -0.9566534364564785, -6.932360951667957, -11.187077720714367,
         -13.553355643835602, -14.21631406001477, -14.983929833667665,
         -15.422758574896921, -18.32278174888965, -18.2834926735795,
         -17.265359820713286, -16.13035610465361, -14.086076680349992,
         -13.521427957090859, -12.515293283803214, -11.480271740126698,
         -9.92078604101271, -8.797191973771438, -6.985510255611701,
         -5.426767915467293, -5.183152081566609, -3.7475983370968295,
         -1.9153547972282414, -0.0006083021245538324, -13.64272847695586,
         -10.286808471857325, -15.63569341874549, -20.86940117070692,
         -22.928591109686124, -30.496433497261098, -26.10052633266505,
         -29.221144392666716, -24.0276270737085, -21.301001754269702,
         -20.97154340860586, -15.798224427435104, -17.780912132981612,
         -24.823354886252613, -24.604927499889286, -24.955334454941635,
         -38.74241644973382, -43.782982787325366, -46.80273522972689,
         -46.08571305295883, -47.92277577875605, -41.4868217475951,
         -37.915322367616675, -34.16174895135005, -33.58267055798403,
         -32.06130865601216, -34.094574908150825, -39.695727106225405,
         -40.120719852615196, -41.05121481573844, -42.01796136083251,
         -39.75209693618059, -35.73339613779332, -34.731089314533676,
         -32.694583271242884, -29.577625993685, -28.026659577292953,
         -25.215089099008644, -25.174202473704753, -21.952113990014446,
         -17.028869764873075, -15.578727453806595, -16.1579750791396,
         -12.974390056172448, -8.418484753962995, -5.7847304546785905,
         -2.2267773783077134, -1.4570520375724902, -1.543691534890984,
         -1.575957362444019, -0.7176800307627448, -0.7968619556272615,
         -4.841045489929452, -5.248527604937139, -1.0472142687516643,
         -1.0630763089203221, -2.185755905394793, -3.8307492546267254,
         -4.993169872339857, -7.2764872801107385, -6.792829090234741,
         -6.452991771598523, -6.952945781664499, -8.215168486202954,
         -6.613961853070211, -22.150574756810474, -28.514525290020345,
         -27.33821547951633, -29.034538366843996, -33.82258103970177,
         -41.26481032907057, -40.912839794048644, -48.684226156049405,
         -49.44508720397513, -61.863467137712874, -70.11156862148243,
         -82.93974699146762, -91.62613467860213, -91.54466150389183,
         -73.5404690802315, -75.77506886003911, -78.05398228595476,
         -84.42906672420139, -93.01020782082938, -89.65901048860756,
         -109.20614016921928, -121.0826042903611, -120.2996268556599,
         -117.38782641714545, -128.50467987996305, -128.9595101418021,
         -133.14841986541902, -136.82233726671367, -133.94746637928725,
         -154.5649504690748, -164.11983575086742, -159.85307484109336,
         -151.89784688535133, -153.56557629402886, -146.72984757341305,
         -135.04501822595842, -127.92055598311715, -126.08111294376953,
         -120.03403862241993, -99.25696665821185, -71.19178328684012,
         -64.94518489350295, -59.98207339614661, -54.12991577221696,
         -43.206052468123545, -29.456860663206527, -6.411526985333728,
         -6.44709453786988, -6.215828945120546, -5.762898291384889,
         -4.3769156224166315, -3.2727915503830047, -2.616087927600661,
         -2.313254659995694, -1.8641066899878078, -1.8186414374916933,
         -0.8008712043775049, -0.6426129783652371, -0.5224073311989104,
         -0.2710345166975603, -0.43819657644966853, -1.2626459311104576,
         -1.9408301832235342, -3.9812039032702886, -3.9812039032702886,
         -2.861605777578473, -3.2137507785013066, -3.2137507785013066,
         -2.9669916392942004, -3.2617340566815645, -3.9686874347885044,
         -3.54350638697767, -3.54350638697767, -3.1070679887817896,
         -2.8384054421005627, -2.2611557931086583, -2.9566374983191013,
         -2.2617270920463315, -2.5370237970085574, -3.2091208219605813,
         -3.0532448758817448, -1.6966894030794892, -2.2744775410764126,
         -2.729866824495538, -3.080565957210189, -2.808261821233711,
         -3.251159821714309, -2.1636899060453407, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
     ],
                                             decimal=3)
     numpy.testing.assert_array_equal(footprinter.lengths, [
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 15,
         13, 11, 15, 13, 25, 25, 11, 13, 15, 17, 19, 21, 23, 25, 25, 11, 13,
         15, 17, 15, 21, 19, 17, 15, 13, 11, 21, 25, 25, 25, 25, 25, 25, 25,
         25, 25, 25, 25, 25, 23, 25, 15, 17, 19, 19, 17, 15, 13, 19, 25, 25,
         23, 25, 11, 11, 13, 15, 13, 11, 11, 25, 25, 15, 15, 19, 17, 15, 13,
         11, 25, 25, 25, 11, 15, 17, 19, 15, 23, 11, 25, 25, 25, 25, 25, 25,
         21, 23, 25, 25, 25, 25, 25, 23, 25, 25, 25, 21, 23, 25, 25, 23, 25,
         25, 21, 19, 19, 21, 25, 25, 25, 23, 25, 23, 21, 19, 19, 15, 15, 11,
         11, 11, 23, 25, 25, 25, 25, 25, 25, 25, 25, 25, 11, 11, 13, 15, 11,
         11, 21, 17, 15, 13, 15, 13, 25, 25, 23, 21, 19, 19, 13, 13, 13, 11,
         25, 11, 13, 15, 17, 11, 13, 15, 17, 15, 13, 11, 25, 15, 17, 19, 19,
         23, 25, 25, 21, 23, 21, 19, 17, 13, 25, 25, 25, 25, 25, 25, 25, 23,
         21, 19, 25, 25, 23, 11, 11, 15, 15, 13, 15, 13, 11, 19, 15, 13, 11,
         11, 11, 11, 11, 15, 15, 19, 21, 23, 25, 25, 23, 25, 25, 15, 11, 13,
         15, 17, 19, 21, 23, 25, 25, 25, 23, 25, 25, 25, 25, 25, 25, 25, 25,
         25, 21, 23, 25, 25, 25, 25, 25, 25, 21, 23, 25, 25, 25, 25, 23, 25,
         25, 25, 25, 23, 21, 19, 15, 15, 13, 11, 13, 25, 25, 25, 25, 25, 25,
         25, 23, 25, 25, 25, 25, 11, 11, 11, 13, 11, 11, 15, 17, 17, 21, 23,
         25, 25, 25, 25, 23, 17, 19, 17, 15, 13, 11, 19, 11, 13, 15, 15, 13,
         11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
     ])
Exemplo n.º 5
0
 def test_BED_reading(self):
     """Testing BED files"""
     regions = pyDNase.GenomicIntervalSet(pyDNase.example_regions())
     self.assertEqual(str(regions),
                      'chr6\t170863142\t170863532\t0\t0.0\t+\n')
Exemplo n.º 6
0
# Sanity check parameters from the user

try:
    args.footprint_sizes = xrange_from_string(args.footprint_sizes)
except ValueError:
    raise RuntimeError("Footprint sizes must be supplied as from,to,step")

assert 0 < args.FDR_cutoff < 1, "FDR must be between 0 and 1"
assert args.FDR_limit < 0, "FDR limit must be less than 0"

# Treatment
reads2 = pyDNase.BAMHandler(args.treatment_bam, caching=0, ATAC=args.A)
# Control
reads1 = pyDNase.BAMHandler(args.control_bam, caching=0, ATAC=args.A)
# Regions of Interest
regions = pyDNase.GenomicIntervalSet(args.bedsites)
# Output
treatment_output = open(args.treatment_only_output, "w", buffering=1)
control_output = open(args.control_only_output, "w", buffering=1)

# Determine Number of CPUs to use
if args.processes:
    CPUs = args.processes
else:
    CPUs = mp.cpu_count()
# NOTE: This roughly scales at about 450mb per 300 regions held in memory
max_regions_cached_in_memory = 50 * CPUs
p = mp.Pool(CPUs)

print "Performing differential footprinting..."
Exemplo n.º 7
0
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import matplotlib.pyplot as plt
import pyDNase
from pyDNase.footprinting import wellington

#Load test data
reads = pyDNase.BAMHandler(pyDNase.example_reads())
regions = pyDNase.GenomicIntervalSet(pyDNase.example_regions())

#Plot cuts data
plt.plot(reads[regions[0]]["+"], c="red")
plt.plot([-i for i in reads[regions[0]]["-"]], c="blue")

#Footprint and plot the results
footprinter = wellington(regions[0], reads)
plt.plot(footprinter.scores, c="black")

plt.show()
Exemplo n.º 8
0
parser.add_argument("treat_dhs", help="The DHSs belonging to the Treatment")
parser.add_argument("control_dhs", help="The DHSs belonging to the control")
parser.add_argument(
    "reads_treat", help="The BAM file containing the Treatment DNase-seq data")
parser.add_argument("reads_control",
                    help="The BAM file containing the Control DNase-seq data")
parser.add_argument("output", help="filename to write the output to")
args = parser.parse_args()

reads_treat = pyDNase.BAMHandler(args.reads_treat,
                                 caching=not args.l,
                                 ATAC=args.A)
reads_control = pyDNase.BAMHandler(args.reads_control,
                                   caching=not args.l,
                                   ATAC=args.A)
treat_dhs = pyDNase.GenomicIntervalSet(args.treat_dhs)
control_dhs = pyDNase.GenomicIntervalSet(args.control_dhs)
regions = pyDNase.GenomicIntervalSet(args.regions)

treat_total_cuts = 0
control_total_cuts = 0
treat_base_pairs = 0
control_base_pairs = 0

puts("Calculating enrichment for Treatment")
for i in progress.bar(treat_dhs):
    treat_total_cuts += sum([sum(j) for j in list(reads_treat[i].values())])
    treat_base_pairs += len(i)

puts("Calculating enrichment for Control")
for i in progress.bar(control_dhs):
Exemplo n.º 9
0
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import matplotlib.pyplot as plt
import pyDNase
from pyDNase.footprinting import wellington

#Load test data
reads = pyDNase.BAMHandler("example.bam")
regions = pyDNase.GenomicIntervalSet("example.bed")

#Plot cuts data
plt.plot(reads[regions[0]]["+"], c="red")
plt.plot(-reads[regions[0]]["-"], c="blue")

#Footprint and plot the results
footprinter = wellington(regions[0], reads)
plt.plot(footprinter.scores, c="black")

plt.show()
Exemplo n.º 10
0
bamFileName = sys.argv[2]
outputFileName = sys.argv[3]

# Parameters
cutoff = -30
footprintSizes = range(6, 40, 1)
to_remove = []

# Creating new region file name with the first three columns only
newRegionFileName = outputFileName + "regions.bed"
os.system("cut -f 1,2,3 " + regionFileName + "  > " + newRegionFileName)
to_remove.append(newRegionFileName)

# Execution
outputFile = open(outputFileName, "w")
regions = pyDNase.GenomicIntervalSet(newRegionFileName)
reads = pyDNase.BAMHandler(bamFileName)
for region in regions:
    footprinter = fp.wellington(region,
                                reads,
                                shoulder_sizes=range(35, 36),
                                footprint_sizes=footprintSizes,
                                FDR=0,
                                bonferroni=0)
    footprints = footprinter.footprints(withCutoff=cutoff)
    for e in footprints:
        outputFile.write("\t".join([
            str(k) for k in
            [e.chromosome, e.startbp, e.endbp, e.label, e.score, e.strand]
        ]) + "\n")
outputFile.close()
Exemplo n.º 11
0
parser.add_argument('bam_file', type = str, help = 'BAM file containing reads to plot')
parser.add_argument('outfile', type = str, help = 'Output file (.tsv)')
parser.add_argument('-w', '--window', dest = 'w', type = int, default = 200, help = 'Window size to plot. Default = 200bp')

args = parser.parse_args()

#################################################################################################################################

# Read BAM file
reads = pyDNase.BAMHandler(args.bam_file)

# Calculate the distance to extend footprints by (window size / 2)
extend = int(math.ceil(args.w / 2))

# Get regions from BED file
regions = pyDNase.GenomicIntervalSet(args.bed_file)

# Keep track of number of forward and reverse reads
fwd_cut_tracking = dict()
rev_cut_tracking = dict()

sys.stderr.write('Counting cuts in regions...\n')

for site in progress.bar(regions):
	# Get chromosome, strand, start and end positions for site
	chrom = site.chromosome
	start = site.startbp
	end = site.endbp
	strand = site.strand

	# Calculate the center position for the site