def main():

    snps = IntervalFile("bedtools/tests/data/snps.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")

    # find snps that overlap with repeat annotations
    for a in snps:
        for hit in rmsk.search(a.chrom, a.start, a.end):
            print a.chrom, a.start, a.end, a.name,
            print hit.chrom, hit.start, hit.end, hit.name
def main():

    bam  = Samfile("bedtools/tests/data/NA18152.bam", "rb")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    
    for al in bam:
        chrom = bam.getrname(al.rname)
        start = al.pos
        end   = al.aend
        name  = al.qname 
        for hit in rmsk.search(chrom, start, end):
            print chrom, start, end, name,
            print hit.chrom, hit.start, hit.end, hit.name
示例#3
0
def main():

    bam = Samfile("bedtools/tests/data/NA18152.bam", "rb")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")

    # Example 1:
    #    Method: IntervalFile.all_hits()
    #    Report _all_ of the rmsk features that overlap with the BAM alignment
    for al in bam:
        strand = "+"
        if al.is_reverse: strand = "-"
        i = Interval(bam.getrname(al.rname), al.pos, al.aend, strand)

        for hit in rmsk.all_hits(i, same_strand=True, ovlp_pct=0.75):
            print "\t".join(str(x) for x in [i, hit])
def main():

    bam  = Samfile("bedtools/tests/data/NA18152.bam", "rb")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    
    # Example 1:
    #    Method: IntervalFile.all_hits()
    #    Report _all_ of the rmsk features that overlap with the BAM alignment
    for al in bam:
        strand = "+"
        if al.is_reverse: strand = "-"
        i = Interval(bam.getrname(al.rname), al.pos, al.aend, strand)
        
        for hit in rmsk.all_hits(i, same_strand=True, ovlp_pct=0.75):
            print "\t".join(str(x) for x in [i,hit])
示例#5
0
def main():
    """
    Examples of printing each interval in an interval file.
     - Works with BED, GTF and VCF files.
     - Can be uncompressed or GZIP compressed.
    """

    # 0.1 Each interval in a BED file
    for exon in IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed"):
        print exon

    # 0.2 Each gene in a GTF file
    for gene in IntervalFile("bedtools/tests/data/genes.hg18.chr21.gtf"):
        print gene

    # 0.3 Each gene in a _compressed_ GTF file
    for gene in IntervalFile("bedtools/tests/data/genes.hg18.chr21.gtf.gz"):
        print gene
def main(args):
	"""
	Examples of printing each interval in an interval file.
	- Works with BED, GTF and VCF files.
	- Can be uncompressed or GZIP compressed.
	"""

	##########################################################
	# ex1. Report the coordinates of overlap b/w exons and rmsk
	#
	# Equivalent to: intersectBed -a exons -b rmsk
	# Uses:           IntervalFile.all_hits()
	##########################################################
	genes = IntervalFile(args.genefile)
	peaks  = IntervalFile(args.peakfile)

	for gene in genes:
		for peak_hit in peaks.all_hits(gene):
			print "\t".join(str(f) for f in [gene.chrom, peak_hit.o_start, peak_hit.o_end])
示例#7
0
class IntervalFileTest(unittest.TestCase):
    file = "data/rmsk.hg18.chr21.bed"
    def setUp(self):
        self.file = os.path.join(PATH, self.file)
        self.bed = IntervalFile(self.file)

    def testOverlaps(self):
        hits = self.bed.search("chr21", 9719768, 9739768)
        print len(hits)
        self.assertEqual(len(hits), 8)
        for hit in hits:
            self.assert_(hit.start <= 9739768 and hit.end >= 9719768)

    def testStrands(self):
        hits = self.bed.search("chr21", 9719768, 9739768, "+")
        for hit in hits:
            self.assert_(hit.strand == '+')
        hits = self.bed.search("chr21", 9719768, 9739768, "-")
        for hit in hits:
            self.assert_(hit.strand == '-')
示例#8
0
def main():
    """
    """

    ##########################################################
    # ex1. Report the coordinates of overlap b/w exons and rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk 
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons  = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk   = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    
    # allow 1kb of "slop" on each side of the exon 
    # when looking for hits
    window = 1000
    for exon in exons:
        # add the slop and search
        exon_slop = Interval(exon.chrom, exon.start-window, exon.end + window, exon.strand)
        for rmsk_hit in rmsk.all_hits(exon_slop):
            print "\t".join(str(f) for f in [exon, rmsk_hit])
示例#9
0
def main():
    """
    """

    ##########################################################
    # ex1. Report the coordinates of overlap b/w exons and rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")

    # allow 1kb of "slop" on each side of the exon
    # when looking for hits
    window = 1000
    for exon in exons:
        # add the slop and search
        exon_slop = Interval(exon.chrom, exon.start - window,
                             exon.end + window, exon.strand)
        for rmsk_hit in rmsk.all_hits(exon_slop):
            print "\t".join(str(f) for f in [exon, rmsk_hit])
示例#10
0
class IntervalFileTest(unittest.TestCase):
    file = "data/rmsk.hg18.chr21.bed"
    def setUp(self):
        self.file = os.path.join(PATH, self.file)
        self.bed = IntervalFile(self.file)

    def testOverlaps(self):
        i    = Interval("chr21", 9719768, 9739768)
        hits = self.bed.all_hits(i)
        print len(hits)
        self.assertEqual(len(hits), 8)
        for hit in hits:
            self.assert_(hit.start <= 9739768 and hit.end >= 9719768)

    def testStrands(self):
        i = Interval("chr21", 9719768, 9739768, "+")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '+')

        i = Interval("chr21", 9719768, 9739768, "-")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '-')
示例#11
0
def Main():
    args=ParseArg()


    #store bam files and count information:
    bams={}
    total_reads=np.zeros(len(args.bams))
    for i in range(len(args.bams)):
        temp_name=args.name[i]
        print >> sys.stderr, "\nReading bam file:"+temp_name+"..."
        bams[temp_name]=pysam.Samfile(args.bams[i],'rb')
        if args.normalize:
            for b in bams[temp_name]:
                if not b.is_unmapped:
                    total_reads[i]+=1
                if total_reads[i]%10000==0:
                    print >> sys.stderr, "  reading %d reads..\r"%(total_reads[i]),



    output=open(args.output+"_count.txt",'w')
    #read interval regions:
    intervals=IntervalFile(args.interval)
    header='\t'.join (str(f) for f in ['chr','start','end','name','score']) + '\t' + '\t'.join(str(f) for f in args.name )

    output.write(header+'\n')

    print >> sys.stderr,"\n\n Start counting reads for intervals..."
    for interval in intervals:
        if 'random' in interval.chrom: continue
        print_line='\t'.join (str(f) for f in [interval.chrom,interval.start,interval.end,interval.name,interval.score])
        for i in range(len(args.bams)):
            name=args.name[i]
            count=Count_num(bams[name],interval,args.len,args.fragmentL,total_reads[i])
            print_line=print_line+'\t'+str(count)
        output.write(print_line+'\n')

    #close files



    output.close()
示例#12
0
def main():
    # setup a reverse_complement translation
    rev_table = string.maketrans('ACGTacgt', 'TGCAtgca')

    def revcomp(seq, rev_table):
        return seq.translate(rev_table)

    # open your fasta file
    fasta = Fastafile("bedtools/tests/data/chr21.fa")
    # open your bed file
    bed = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")

    # for each bed, grab the the DNA in that interval
    for b in bed:
        # grab the seq, rev. comp if necessary
        seq = fasta.fetch(b.chrom, b.start, b.end)
        if b.strand == "-":
            seq = revcomp(seq, rev_table)
        # print the interval and the seq
        print b.chrom, b.start, b.end, b.strand, seq
示例#13
0
def main():
    """
    """

    ##########################################################
    # ex1. Report the coordinates of overlap b/w exons and rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        for rmsk_hit in rmsk.all_hits(exon):
            print "\t".join(
                str(f) for f in [exon.chrom, rmsk_hit.o_start, rmsk_hit.o_end])

    ##########################################################
    # ex2. Report the original features for overlapping
    #    exons and rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk -wa -wb
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        for rmsk_hit in rmsk.all_hits(exon):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.name, exon.score,
                    exon.strand, rmsk_hit.chrom, rmsk_hit.start, rmsk_hit.end,
                    rmsk_hit.name, rmsk_hit.score, rmsk_hit.strand
                ])

    ##########################################################
    # ex3. Report the count of rmsk overlapping each exon
    #
    # Equivalent to: intersectBed -a exons -b rmsk -c
    # Uses:           IntervalFile.count_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # get the number of hits in rmsk
        num_hits = rmsk.count_hits(exon)
        print "\t".join(
            str(f) for f in [
                exon.chrom, exon.start, exon.end, exon.name, exon.score,
                exon.strand, num_hits
            ])

    ##########################################################
    # ex4. Report exons that overlap at least one rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk -u
    # Uses:           IntervalFile.any_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # does this exon overlap any rmsk?
        if rmsk.any_hits(exon):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.name, exon.score,
                    exon.strand
                ])

    ##########################################################
    # ex5. Report exons that DO NOT overlap at least one rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk -v
    # Uses:           IntervalFile.any_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # does this exon overlap any rmsk?
        if not rmsk.any_hits(exon):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.name, exon.score,
                    exon.strand
                ])

    ##########################################################
    # ex6. Report overlap b/w exons and rmsk on the same strand
    #
    # Equivalent to: intersectBed -a exons -b rmsk -s
    # Uses:           IntervalFile.all_hits(same_strand=True)
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # use "same_strand" to enforce, well, same strand.
        for rmsk_hit in rmsk.all_hits(exon, same_strand=True):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.strand, rmsk_hit.
                    chrom, rmsk_hit.start, rmsk_hit.end, rmsk_hit.strand
                ])

    ##########################################################
    # ex7. Report overlap b/w exons and rmsk where the rmsk
    #    feature covers at least 50% of the exon.
    #
    # Equivalent to: intersectBed -a exons -b rmsk -f 0.50
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # use "ovlp_pct" to enforce the faction of overlap w.r.t to exon
        for rmsk_hit in rmsk.all_hits(exon, ovlp_pct=0.50):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.strand, rmsk_hit.
                    chrom, rmsk_hit.start, rmsk_hit.end, rmsk_hit.strand
                ])

    ##########################################################
    # ex8. Report overlap b/w exons and rmsk where the rmsk
    #    feature covers at least 50% of the exon.
    #
    # Equivalent to: intersectBed -a exons -b rmsk -s -f 0.50
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # use "same_strand" to enforce, well, same strand.
        for rmsk_hit in rmsk.all_hits(exon, same_strand=True, ovlp_pct=0.50):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.strand, rmsk_hit.
                    chrom, rmsk_hit.start, rmsk_hit.end, rmsk_hit.strand
                ])
示例#14
0
                        ["help", "interval", "folder", "ovlp_pct", "suffix"])
folder = "./"
percent = 0.5
for o, a in opts:
    if o in ("-h", "--help"):
        show_help()
        exit(0)
    elif o in ("-i", "--interval"):
        interval_file = a
    elif o in ("-f", "--folder"):
        folder = a
    elif o in ("-p", "--ovlp_pct"):
        percent = float(a)
    elif o in ("-s", "--suffix"):
        suffix = a

bedlist = []
name = 'chr\tstart\tend'
for i in restlist:
    i = i.strip()
    bedlist.append(IntervalFile(folder + i + suffix))
    name = name + '\t' + i.split("_")[-1]
print name
intervals = IntervalFile(interval_file)
for i in intervals:
    line = i.chrom + '\t' + str(i.start) + '\t' + str(i.end)
    for j in bedlist:
        num = j.count_hits(i, ovlp_pct=percent)
        line = line + '\t' + str(num)
    print line
示例#15
0
from bedtools import Interval, IntervalFile

# Input
inFileName = sys.argv[1]
inFileName2 = sys.argv[2]
outFileName = sys.argv[3]

min_value = 1000000
inFile = open(inFileName2, "r")
for line in inFile:
    line = line.strip("\n")
    ll = line.split("\t")
    min_value = min(min_value, float(ll[3]) - 0.01)
inFile.close()

footprints = IntervalFile(inFileName2)
inFile = open(inFileName, "r")
outFile = open(outFileName, "w")
for line in inFile:
    line = line.strip("\n")
    ll = line.split("\t")

    chr = ll[0]
    pos1 = int(ll[1])
    pos2 = int(ll[2])

    query = Interval(chr, pos1, pos2)

    score = min_value
    for h in footprints.search(query):
        score = max(score, float(h.name))
示例#16
0
##########################################################################
# Add footprint column to features file
##########################################################################

newFeaturesFileName = outLoc + "newFeaturesFile.bed"
toRemove.append(newFeaturesFileName)

min_value = 1000000
inFile = open(bitInsideFootFileName, "r")
for line in inFile:
    line = line.strip("\n")
    ll = line.split("\t")
    min_value = min(min_value, float(ll[3]) - 0.01)
inFile.close()

footprints = IntervalFile(bitInsideFootFileName)
inFile = open(featuresFileName, "r")
outFile = open(newFeaturesFileName, "w")
for line in inFile:
    line = line.strip("\n")
    ll = line.split("\t")

    chr = ll[0]
    pos1 = int(ll[1])
    pos2 = int(ll[2])

    query = Interval(chr, pos1, pos2)

    score = min_value
    for h in footprints.search(query):
        score = max(score, float(h.name))
示例#17
0
 def setUp(self):
     self.file = os.path.join(PATH, self.file)
     self.bed = IntervalFile(self.file)
示例#18
0
 def setUp(self):
     self.file = os.path.join(PATH, self.file)
     self.bed = IntervalFile(self.file)