Пример #1
0
    def get_overlaping_clusters(self, region, overlap=1):
        clusters = []
        bam_tell, read_start = self.get_bam_tell(region)
        print "TELL", bam_tell, read_start
        if bam_tell or region.start < LINEAR_SIZE*4:
            r = BamReader(self.bam_path, self.logger, bam_tell, read_start, self.chr_dict_inv)
            for line in r:
                c = Cluster(read=SAM, cached=False, read_half_open=self.read_half_open, rounding=self.rounding)
                try:
                    c.read_line(line)
                except InvalidLine:
                    print "Invalid line, .bam or .bai corrupt"
                    break

                if c.overlap(region) >= overlap:                
                    clusters.append(c)
                elif c.start > region.end or c.name != region.name:
                    break

        if len(clusters) > 0:
            print "Num clusters", len(clusters)
            print "first:", clusters[0].start, clusters[0].end
            if len(clusters) > 1: print "end:", clusters[-1].start, clusters[-1].end
        else:
            print "No clusters found!"
        print

        return clusters
Пример #2
0
    def test_overlap(self):
        cluster1 = Cluster(read=BED)
        cluster1.read_line('chr1 1 100 hola 666 +')
        cluster2 = Cluster(read=BED)
        cluster2.read_line('chr1 51 200 hola 666 +')
        cluster3 = Cluster(read=PK)
        cluster3.read_line('chr3 1 100 100:1')
        cluster4 = Cluster(read=BED)
        cluster4.read_line('chr5 1 1000 hola 666 +')
        cluster5 = Cluster(read=PK)
        cluster5.read_line('chr5 1 300 300:1')
        cluster6 = Cluster(read=BED)
        cluster6.read_line('chr5 100 900 hola 666 +')

        cluster_discard = Cluster(read=BED, read_half_open=True)
        cluster_discard.read_line('chrX	61836251	61836287	id:105282	1000	+')
        cluster_satellite = Cluster(read=BED)
        cluster_satellite.read_line('chrX	61836270	61837703	Satellite')
        self.assertEqual(cluster_discard.overlap(cluster_satellite), 0.5)
        self.assertEqual(cluster1.overlap(cluster3), 0)
        self.assertEqual(cluster1.overlap(cluster1), 1)
        self.assertEqual(cluster1.overlap(cluster2), 0.5)
Пример #3
0
    def get_overlaping_clusters(self, region, overlap=1):    
        clusters = []
        self.logger.debug('Launching Samtools for %s...'%region)
        proc = subprocess.Popen("samtools view %s %s:%s-%s"%(self.bam_path, region.name, region.start, region.end), stdout=subprocess.PIPE, shell=True)
        out, err = proc.communicate()
        self.logger.debug('... done')
        lines = filter(None, out.split("\n"))
        self.logger.debug('Numlines in %s: %s'%(region, len(lines)))
        for line in lines:
            c = Cluster(read=SAM, cached=False, read_half_open=self.read_half_open, rounding=self.rounding)
            try:
                c.read_line(line)
            except InvalidLine:
                print "Invalid line, .bam or .bai corrupt"
                break

            if c.overlap(region) >= overlap:                
                clusters.append(c)
            elif c.start > region.end or c.name != region.name:
                break

        return clusters