示例#1
0
    def test_negative_strand(self):
        """
        Whole read is in single transcript, single segment. But the segment
        borders on intergenic (downstream).
        """
        gtf_neg_data = [
            i[:6] + ['-'] + i[7:] for i in intervals_to_list(self.gtf_data)
        ]
        gtf_neg = make_file_from_list(gtf_neg_data)
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 16, 0, 549, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '20', '0.5', '0'],
            ['intergenic-CDS', '-80', '0.5', '0'],
        ]

        rnamaps.run(bam,
                    gtf_neg,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
示例#2
0
    def test_explicit_whole_in(self):
        """
        Whole read is in single transcript and is crossing the exon-intron
        landmark (it is explicit). Provide three reads, with two different
        cross-links. One cross-link has two distinct randomers.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 140, 255, [(0, 50)], {
                    'NH': 1
                }),
                ('name2:rbc:AAAA', 0, 0, 142, 255, [(0, 50)], {
                    'NH': 1
                }),
                ('name2:rbc:CCCC', 0, 0, 142, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['UTR5-intron', '-10', '1', '1'],
            ['UTR5-intron', '-8', '2', '2'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))
示例#3
0
    def test_implicit_exons(self):
        """
        Whole read is in single transcript and in single segment. Also, this
        segment is of EXON_TYPE in the "middle" segment in transcript. Only one read.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 205, 255, [(0, 20)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-UTR3', '-25', '0.25', '0'],
            ['CDS-intron', '-25', '0.25', '0'],
            ['UTR5-CDS', '5', '0.25', '0'],
            ['intron-CDS', '5', '0.25', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
示例#4
0
    def test_implicit_intergenic(self):
        """
        Whole read is in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 530, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '30', '0.5', '0'],
            ['intergenic-CDS', '-70', '0.5', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
示例#5
0
    def test_implicit_whole_in(self):
        """
        Whole read is in single transcript and in single segment. Also, this
        segment is the "middle" segment in transcript. Provide three reads, with
        two different cross-links. One cross-link has two distinct randomers.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 160, 255, [(0, 30)], {
                    'NH': 1
                }),
                ('name2:rbc:CCCC', 0, 0, 163, 255, [(0, 30)], {
                    'NH': 1
                }),
                ('name2:rbc:GGGG', 0, 0, 163, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['UTR5-intron', '10', '1', '0'],
            ['UTR5-intron', '13', '2', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))
示例#6
0
    def test_cross_transcript_read(self):
        """
        Read is half in transcript region and half in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 235, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            [
                'chrom', 'strand', 'xlink', 'second-start', 'end-position',
                'read_len'
            ],
            ['1', '+', '234', '0', '284', '50'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.cross_tr))
示例#7
0
    def test_explicit_intergenic_right(self):
        """
        Read is half in transcript region and half in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 480, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '-20', '1', '1'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))
示例#8
0
    def test_implicit_inter_tr(self):
        """
        Whole read is in single transcript, single segment. But the segment
        borders on intergenic (downstream).
        """
        bam = make_bam_file(
            {
                'chromosomes': [('1', 1000)],
                'segments': [
                    # (qname, flag, refname, pos, mapq, cigar, tags)
                    ('name2:rbc:CCCC', 0, 0, 610, 255, [(0, 30)], {
                        'NH': 1
                    }),
                ]
            },
            rnd_seed=0)

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-CDS', '-40', '0.3333', '0'],
            ['CDS-intron', '-40', '0.3333', '0'],
            ['intergenic-CDS', '10', '0.3333', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
示例#9
0
    def test_run(self):
        landmarks = make_file_from_list(
            sort=True,
            data=[
                ['chr1', '210', '211', 'gene-start;A', '.', '+'],
                ['chr1', '270', '271', 'translation-start;A', '.', '+'],
                ['chr1', '299', '300', 'noncoding-gene-end;B', '.', '-'],
                ['chr1', '330', '331', 'exon-intron;A', '.', '+'],
                ['chr1', '490', '491', 'intron-exon;A', '.', '+'],
                ['chr1', '550', '551', 'translation-end;A', '.', '+'],
                ['chr1', '749', '750', 'noncoding-gene-start;B', '.', '-'],
                ['chr1', '760', '761', 'gene-end;A', '.', '+'],
            ])

        sites = make_file_from_list([
            ['chr1', '220', '221', '.', '1', '+'],
            ['chr1', '350', '351', '.', '1', '+'],
            ['chr1', '350', '351', '.', '1', '-'],
            ['chr1', '550', '551', '.', '1', '+'],
            ['chr1', '740', '741', '.', '1', '+'],
            ['chr1', '750', '751', '.', '1', '-'],
        ])

        rnamaps.run(sites, landmarks, outdir=self.outdir)

        self.assertTrue(os.path.isdir(self.outdir))

        sites_name = remove_extension(sites, ['.bed', '.bed.gz'])
        for maptype in rnamaps.RNAMAP_TYPES:
            basename = os.path.join(self.outdir,
                                    '{}_{}'.format(sites_name, maptype))
            # for extension in ['.tsv', '.png', '_plot_data.txt']:
            for extension in ['.tsv', '.png']:
                fname = basename + extension
                self.assertTrue(os.path.isfile(fname))
                self.assertGreater(os.path.getsize(fname), 1)
示例#10
0
    def test_run(self):
        regions = make_file_from_list(
            sort=True,
            data=[
                [
                    'chr1', '.', 'intergenic', '1', '210', '.', '+', '.',
                    'gene_name "None";'
                ],
                [
                    'chr1', '.', 'UTR5', '211', '270', '.', '+', '.',
                    'gene_name "A";'
                ],
                [
                    'chr1', '.', 'CDS', '271', '330', '.', '+', '.',
                    'gene_name "A";'
                ],
                [
                    'chr1', '.', 'intron', '331', '490', '.', '+', '.',
                    'gene_name "A";'
                ],
                [
                    'chr1', '.', 'CDS', '491', '550', '.', '+', '.',
                    'gene_name "A";'
                ],
                [
                    'chr1', '.', 'UTR3', '551', '760', '.', '+', '.',
                    'gene_name "A";'
                ],
                [
                    'chr1', '.', 'intergenic', '761', '1100', '.', '+', '.',
                    'gene_name "None";'
                ],
                [
                    'chr1', '.', 'intergenic', '1', '300', '.', '-', '.',
                    'gene_name "None";'
                ],
                [
                    'chr1', '.', 'ncRNA', '301', '500', '.', '-', '.',
                    'gene_name "B";'
                ],
                [
                    'chr1', '.', 'intron', '501', '600', '.', '-', '.',
                    'gene_name "B";'
                ],
                [
                    'chr1', '.', 'ncRNA', '601', '750', '.', '-', '.',
                    'gene_name "B";'
                ],
                [
                    'chr1', '.', 'intergenic', '751', '1000', '.', '-', '.',
                    'gene_name "None";'
                ],
            ])

        sites = make_file_from_list([
            ['chr1', '220', '221', '.', '1', '+'],
            ['chr1', '350', '351', '.', '1', '+'],
            ['chr1', '350', '351', '.', '1', '-'],
            ['chr1', '550', '551', '.', '1', '+'],
            ['chr1', '740', '741', '.', '1', '+'],
            ['chr1', '750', '751', '.', '1', '-'],
        ])

        rnamaps.run(sites, regions, outdir=self.outdir)

        self.assertTrue(os.path.isdir(self.outdir))

        sites_name = remove_extension(sites, ['.bed', '.bed.gz'])
        for maptype in rnamaps.MAP_TYPES:
            basename = os.path.join(self.outdir,
                                    '{}_{}'.format(sites_name, maptype))
            # for extension in ['.tsv', '.png', '_plot_data.txt']:
            for extension in ['.tsv', '.png']:
                fname = basename + extension
                self.assertTrue(os.path.isfile(fname))
                self.assertGreater(os.path.getsize(fname), 1)