Пример #1
0
    def get_overlaping_clusters(self, region, overlap=1):
        clusters = []
        bam_tell, read_start = self.get_bam_tell(region)
        print "TELL", bam_tell, read_start
        if bam_tell or region.start < LINEAR_SIZE*4:
            r = BamReader(self.bam_path, self.logger, bam_tell, read_start, self.chr_dict_inv)
            for line in r:
                c = Cluster(read=SAM, cached=False, read_half_open=self.read_half_open, rounding=self.rounding)
                try:
                    c.read_line(line)
                except InvalidLine:
                    print "Invalid line, .bam or .bai corrupt"
                    break

                if c.overlap(region) >= overlap:                
                    clusters.append(c)
                elif c.start > region.end or c.name != region.name:
                    break

        if len(clusters) > 0:
            print "Num clusters", len(clusters)
            print "first:", clusters[0].start, clusters[0].end
            if len(clusters) > 1: print "end:", clusters[-1].start, clusters[-1].end
        else:
            print "No clusters found!"
        print

        return clusters
Пример #2
0
 def test_extend_bug(self):
     cluster = Cluster(read=BED, write=PK)
     cluster.read_line('chr3 1 35 666 noname +')
     cluster2 = Cluster(read=BED, write=PK)
     cluster2.read_line('chr3 156 200 666 noname -')
     cluster.extend(100)
     cluster2.extend(100)
     result = cluster + cluster2
     self.assertEqual(200, len(result))
Пример #3
0
 def test_split_subtract_result(self):
     sub_result = Cluster(write_half_open=True, cached=True)
     sub_result.read_line('chr4 1 300 20:1|40:0|20:3|20:0.3|10:-6|80:1|10:0')
     clusters = sub_result.absolute_split(threshold=0)
     result = []
     result.append('chr4\t0\t20\t20:1.00\t1.0\t.\t10\t20.0\n')
     result.append('chr4\t60\t100\t20:3.00|20:0.30\t3.0\t.\t70\t66.0\n')
     result.append('chr4\t110\t190\t80:1.00\t1.0\t.\t150\t80.0\n')
     for i in range(0, len(clusters)):
         self.assertEqual(clusters[i].write_line(), result[i])
Пример #4
0
    def setUp(self):
        self.tag_list_short = [Cluster('chr1', 1, 10, read=PK), Cluster('chr1', 5, 14, read=PK)]
        self.cluster_short = Cluster(rounding=True, read=PK)
        self.cluster_short.read_line('chr1 1 14 4:1|6:2|4:1')

        self.pk_cluster = Cluster(rounding=True, read=PK, write=PK)
        self.pk_cluster.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2|1:1')
        self.bed_tag = Cluster(read=BED, write=BED, rounding=True)
        self.bed_tag.read_line('chr1 1 100 hola 666 +')
        self.cluster1 = Cluster(rounding=True, read=PK)
        self.cluster2 = Cluster(rounding=True, read=PK)
Пример #5
0
    def test_split(self):
        double_cluster = Cluster(rounding=True)
        double_cluster.read_line('chr1  100  215  5:1|10:5|5:7|5:80|5:1|5:40|15:1|10:2|5:3|5:8|10:6|10:5|10:4|10:3|6:2')

        results = double_cluster.split(0.01)
        correct_clusters = [Cluster(rounding=True), Cluster(rounding=True), Cluster(rounding=True)]
        correct_clusters[0].read_line('chr1    100      125      5:1|10:5|5:7|5:80|2:1')
        correct_clusters[1].read_line('chr1    128      141      2:1|5:40|7:1')
        correct_clusters[2].read_line('chr1    143      215      7:1|10:2|5:3|5:8|10:6|10:5|10:4|10:3|6:2')
        for i in range (0,len(correct_clusters)):
            self.assertEqual(results[i].write_line(), correct_clusters[i].write_line())
Пример #6
0
 def test_wig_read_write(self):
     cluster = Cluster(read=WIG, write=WIG, read_half_open=True, write_half_open=True, rounding=True)
     cluster.read_line('chr2 1 10 1')
     cluster.read_line('chr2 10 16 2')
     cluster.read_line('chr2 16 26 1')
     self.assertEqual(cluster.write_line(), 'chr2\t1\t10\t1\nchr2\t10\t16\t2\nchr2\t16\t26\t1\n')
     cluster.write_as(WIG, False)
     self.assertEqual(cluster.write_line(), 'chr2\t2\t10\t1\nchr2\t11\t16\t2\nchr2\t17\t26\t1\n')
Пример #7
0
    def test_simple_ucsc_representation(self):
        """Confirmed visually at the UCSC browser

        track name=simple_read visibility=full
        chr3 101 200 noname 555 +
        track type=wiggle_0 name=the_test visibility=full
        chr3 101 200 1
        """
        cluster = Cluster(read=BED, write=WIG, read_half_open=True, write_half_open=True, rounding = True, cached=True)
        
        cluster.read_line('chr3 101 200 noname 555 +')
        self.assertEqual(cluster.write_line(), 'chr3\t101\t200\t1\n')
Пример #8
0
 def test_eq(self):
     cluster = Cluster(read=PK)
     cluster2 = Cluster(read=PK)
     cluster3 = Cluster(read=PK)
     cluster.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2|1:1')
     cluster2.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2|1:1')
     cluster3.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2')
     self.assertEqual(cluster, cluster2)
     self.assertNotEqual(cluster, cluster3)
Пример #9
0
 def test_add2(self):
     cluster =  Cluster(read=BED)
     cluster.read_line('chr1 1 20000 666 hola +')
     cluster.read_line('chr1 1 20000 666 hola +')
     cluster.read_line('chr1 1 20000 666 hola +')
     cluster.read_line('chr1 1001 20000 666 hola +')
     self.assertEqual(cluster.write_line(), 'chr1\t1\t20000\t1000:3.00|19000:4.00\t4.0\t+\t10500\t79000.0\n')
Пример #10
0
    def read_and_extend(self, cluster, line, extension):
        cluster_aux = Cluster(read=BED, write=BED, read_half_open=True, write_half_open=True, rounding = True)
        if cluster.is_empty():
            cluster.read_line(line)
            cluster.extend(extension)
            #print cluster.write_line(), cluster._profile
        else:
            cluster_aux.read_line(line)
            cluster_aux.extend(extension)
            #print cluster_aux.write_line(), cluster_aux._profile
            cluster += cluster_aux

        #print cluster.write_line()
        return cluster
Пример #11
0
    def test_bed_to_half_open_wig(self):
        """Confirmed visually at the UCSC browser

        track name=simple_cluster visibility=full
        chr1 1 100 hola 666  +
        chr1 10 130 hola 666 +
        track type=wiggle_0 name=the_test  visibility=full
        chr1    1       10      1
        chr1    10      100     2
        chr1    100     130     1
        """
        cluster = Cluster(read=BED, write=WIG, read_half_open=True, write_half_open=True, rounding = True, cached=True)
        cluster.read_line('chr1 1 100 hola 666  +')
        cluster.read_line('chr1 10 130 hola 666 +')
        self.assertEqual(cluster.write_line(), 'chr1\t1\t10\t1\nchr1\t10\t100\t2\nchr1\t100\t130\t1\n')
Пример #12
0
 def test_is_significant(self):
     cluster = Cluster(rounding=True)
     cluster.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2|1:1') #area 35
     self.assertTrue(cluster.is_significant(5, "numreads"))
     self.assertTrue(cluster.is_significant(34, "numreads"))
     self.assertFalse(cluster.is_significant(36, "numreads"))
     self.assertFalse(cluster.is_significant(20))
     self.assertTrue(cluster.is_significant(1))
     self.assertTrue(cluster.is_significant(5))
Пример #13
0
 def test_comparison(self):
     cluster = Region("chr1", 1, 100)
     clusterdup = Cluster(read=BED)
     clusterdup.read_line("chr1 1 100")
     cluster2 = Cluster(read=BED)
     cluster2.read_line("chr4 1000 1010")
     cluster3 = Cluster(read=BED)
     cluster3.read_line("chr5 3 103")
     self.assertTrue(cluster < cluster2)
     self.assertTrue(cluster2 < cluster3)
     self.assertFalse(cluster > cluster3)
     self.assertFalse(cluster < clusterdup)
     self.assertTrue(cluster <= clusterdup)
Пример #14
0
 def test_bug_contiguous_peaks(self):
     cluster = Cluster(rounding=True, read=PK, write=PK)
     cluster2 = Cluster(rounding=True, read=PK, write=PK)
     cluster.read_line('chr1    849917  850408  8:2|10:4|80:5|23:6|29:7|8:5|10:3|39:2|12:3|29:4|5:3|18:4|41:3|30:4|15:5|12:4|34:3|59:2|30:1')
     cluster2.read_line('chr1    850408  850648  66:2|25:3|59:4|66:2|25:1        +')
     result = cluster + cluster2
     self.assertTrue(cluster.intersects(cluster2))
Пример #15
0
 def test_sub_and_print(self):
     cluster = Cluster()
     cluster2 = Cluster(write_half_open=False)
     cluster.read_line('chr1    1  1000  10:2|10:4|80:5|500:7|100:7|100:5')
     cluster2.read_line('chr1    11  1000  10:4|80:5|500:6|100:7|99:5|1:4.99')
     cluster2 = cluster - cluster2
     self.assertEqual(cluster2.write_line(), 'chr1\t1\t10\t10:2.00\t2.0\t.\t5\t20.0\nchr1\t101\t600\t500:1.00\t1.0\t.\t350\t500.0\nchr1\t800\t800\t1:0.01\t0.01\t.\t800\t0.01\n')
Пример #16
0
 def test_is_contiguous_wig(self):
     cluster = Cluster(read=WIG,  read_half_open=True)
     cluster.read_line('chr1    1599888 1599949 1.77')
     cluster2 = Cluster(read=WIG, write=WIG,  read_half_open=True)
     cluster2.read_line('chr1    1599949 1600001 2.65')
     #print cluster2.write_line()
     self.assertTrue(cluster.is_contiguous(cluster2))
Пример #17
0
    def test_normalized_counts(self):
        total_number_reads = 1e7
        region = Region("chr1", 1, 300)
        region_bed12 = Region("chr1", 1, 300, exome_size = 200)
        c = Cluster(read=BED)
        for i in range(0, 5):
            c.read_line("chr1 1 100")
            region.add_tags(c, True)
            region_bed12.add_tags(c, True)
            c.clear()

        self.assertEqual(region.normalized_counts(), 5.) #simple-counts
        self.assertEqual(region.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads), 1.666666666666667) #rpkm
        self.assertEqual(region_bed12.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads), 2.5) #rpkm with exon_size


        self.assertEqual(region.normalized_counts(pseudocount=True), 6.) #with pseudocounts
        self.assertEqual(region.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads, regions_analyzed=10000, pseudocount=True), 1.998001998001998)
Пример #18
0
 def test_sub_fast(self):
    #random/experiment.pk 
    experiment = Cluster(rounding=True)       
    experiment.read_line("chr1   1     1107     101:2|7:1  2.0     .       263     238.0")   
    control = Cluster() 
    control.read_line(  "chr1   46    1222      47:1|54:2|47:1 2.0     .       71331   202.0")
    experiment -= control
    self.assertEqual(experiment.write_line(), 'chr1\t1\t92\t45:2|47:1\t2.0\t.\t23\t137.0\n')#chr1    1       92      45:2|47:1       2.0     .       23      137.0
Пример #19
0
 def test_intersects(self):
     self.assertTrue(Cluster('chr1', 1, 10).intersects(Cluster('chr1', 10, 14)))
     self.assertFalse(Cluster('chr2', 1, 10).intersects(Cluster('chr1', 4, 14)))
     self.assertTrue(Cluster('chr1', 1, 10).intersects(Cluster('chr1', 4, 14)))
     cluster = Cluster(read=PK, rounding=True)
     cluster.read_line('chr1  1 100 100:1')
     cluster2 = Cluster(read=PK,rounding=True)
     cluster2.read_line('chr1 100 199 100:1')
     result = cluster + cluster2
     self.assertTrue(cluster.intersects(cluster2))
Пример #20
0
 def test_add_pk(self):
     cluster1 = Cluster(read=PK)
     cluster2 = Cluster(read=PK)
     cluster1.read_line('chr1\t1\t145\t9:2.00|41:3.00|50:2.00|45:1.00\n')
     cluster2.read_line('chr1\t1\t125\t9:4.00|41:3.00|30:2.00|45:1.00\n')
     result = cluster1 + cluster2
     self.assertEqual(result.write_line(), 'chr1\t1\t145\t50:6.00|30:4.00|20:3.00|25:2.00|20:1.00\t6.0\t.\t25\t550.0\n')
Пример #21
0
 def test_subtract_with_gaps(self):
     cluster1 = Cluster()
     cluster2 = Cluster()
     cluster1.read_line("chr2 1 100 30:1|50:2|40:1|3000:3")
     cluster2.read_line("chr2 1 100 30:1|50:0|40:1|200:0|5000:1")
     cluster1 -= cluster2
     self.assertEqual(cluster1._profile, [[50, 2.0], [40, 0.0], [200, 3.0], [2800, 2.0]])
Пример #22
0
    def get_overlaping_clusters(self, region, overlap=1):    
        clusters = []
        self.logger.debug('Launching Samtools for %s...'%region)
        proc = subprocess.Popen("samtools view %s %s:%s-%s"%(self.bam_path, region.name, region.start, region.end), stdout=subprocess.PIPE, shell=True)
        out, err = proc.communicate()
        self.logger.debug('... done')
        lines = filter(None, out.split("\n"))
        self.logger.debug('Numlines in %s: %s'%(region, len(lines)))
        for line in lines:
            c = Cluster(read=SAM, cached=False, read_half_open=self.read_half_open, rounding=self.rounding)
            try:
                c.read_line(line)
            except InvalidLine:
                print "Invalid line, .bam or .bai corrupt"
                break

            if c.overlap(region) >= overlap:                
                clusters.append(c)
            elif c.start > region.end or c.name != region.name:
                break

        return clusters
Пример #23
0
    def test_get_profile(self):
        r = Region(start=1, end=1999)
        c = Cluster(read=BED)
        c.read_line('chr4 1 40')
        r.add_tags(c, True)
        c = Cluster(read=BED, read_half_open=True)
        c.read_line('chr4 400 500')
        r.add_tags(c, True)

        meta = r.get_metacluster()
        self.assertEqual(meta._levels, [[40, 1.0], [360, 0.0], [100, 1.0]])
Пример #24
0
 def test_FDR(self):
     r = Region('', 1, 1999)
     tags = []
     for i in range(0, 50):
         c = Cluster()
         c.read_line('chr4 %s %s 20:1'%(i, i+50))
         tags.append(c)
     r.add_tags(tags, True)
     c = Cluster()
     c.read_line('chr4 55555 55558 7:1')
     r.add_tags(c)
     result = r.get_FDR_clusters()
     self.assertEqual(len(result), 1)
Пример #25
0
 def test_is_empty(self):
     cluster = Cluster(read=BED)
     self.assertEqual(cluster.is_empty(), True)
     cluster.read_line('chr1 10 130 666 hola +')
     self.assertEqual(cluster.is_empty(), False)
     cluster = Cluster(read=PK)
     self.assertEqual(cluster.is_empty(), True)
     cluster.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2|1:1')
     self.assertEqual(cluster.is_empty(), False)
     cluster2 = Cluster(read=PK)
     cluster2.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2|1:1')
     result = cluster - cluster2
     self.assertEqual(result.is_empty(), True)
Пример #26
0
 def DifferentChromosome(self):
     cluster1 = Cluster(read=PK)
     cluster1.read_line('chr1 1 15 4:1|1:2|2:10|3:4|2:15|2:2|1:1')
     cluster2 = Cluster(read=PK)
     cluster2.read_line('chr2 1 15 4:1|1:2|2:10|3:4|2:15|2:2|1:1')
     a = cluster1 +cluster2
Пример #27
0
 def read_invalid_lines(self):
    cluster = Cluster(read=SAM)
    for i in range(100):
          cluster.read_line('chr1 1 15 4:1|1:2|2:10|3:4|2:15|2:2|1:1')
Пример #28
0
 def test_cluster_pk(self):
     cluster = Cluster(read=PK)
     cluster.read_line('chr1\t1\t145\t9:2.00|41:3.00|50:2.00|45:1.00\n')
     cluster.read_line('chr1\t1\t125\t9:4.00|41:3.00|30:2.00|45:1.00\n')
     self.assertEqual(cluster.write_line(), 'chr1\t1\t145\t50:6.00|30:4.00|20:3.00|25:2.00|20:1.00\t6.0\t.\t25\t550.0\n')
Пример #29
0
    def test_add_bed(self):
        cluster1 = Cluster(read=BED)
        cluster2  = Cluster(read=BED)
        cluster3 =  Cluster(read=BED)
        cluster4 =  Cluster(read=BED)
        cluster5 =  Cluster(read=BED)
        cluster6 =  Cluster(read=BED)
        cluster1.read_line('chr1 1 100 666 hola +')
        cluster2.read_line('chr1 1 100 666 hola +')
        cluster3.read_line('chr1 1 50 666 hola +')
        cluster4.read_line('chr1 10 145 666 hola +')
        cluster5.read_line('chr1 45 95 666 hola +')
        cluster6.read_line('chr1 1 200 666 hola +')

        result1 = cluster1 +  cluster2
        result2 = cluster1 +  cluster3
        result3 = cluster4 + cluster1
        #result4 = cluster1 +  cluster3 + cluster4
        result5 = cluster1 + cluster5
        result6 = cluster1 + cluster6
        self.assertEqual(result1.write_line(), 'chr1\t1\t100\t100:2.00\t2.0\t+\t50\t200.0\n')
        self.assertEqual(result2.write_line(), 'chr1\t1\t100\t50:2.00|50:1.00\t2.0\t+\t25\t150.0\n')
        self.assertEqual(result3.write_line(), 'chr1\t1\t145\t9:1.00|91:2.00|45:1.00\t2.0\t+\t55\t236.0\n')
        #self.assertEqual(result4.write_line(), 'chr1\t1\t145\t9:2.00|41:3.00|50:2.00|45:1.00\n')
        self.assertEqual(result5.write_line(), 'chr1\t1\t100\t44:1.00|51:2.00|5:1.00\t2.0\t+\t70\t151.0\n')
Пример #30
0
 def test_max_height(self):
     cluster = Cluster(read=PK)
     cluster.read_line('chr1 1 15 4:1|1:2|2:10|3:4|2:15|2:2|1:1')
     self.assertEqual(cluster.max_height(), 15)