Пример #1
0
    def test_indexstar_and_mapstar(self):
        genome = make_fasta_file(num_sequences=2, seq_len=1000, rnd_seed=0)
        fastq = make_fastq_file(genome=genome, rnd_seed=0)

        command_basic = [
            'iCount',
            'indexstar',
            genome,
            self.dir,
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]
        self.assertEqual(subprocess.call(command_basic), 0)

        command_basic = [
            'iCount',
            'mapstar',
            fastq,
            self.dir,
            self.dir2,
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]

        self.assertEqual(subprocess.call(command_basic), 0)
Пример #2
0
 def setUp(self):
     self.dir = get_temp_dir()
     self.index_dir = get_temp_dir()
     self.genome = make_fasta_file(num_sequences=2, seq_len=1000)
     self.reads = make_fastq_file(genome=self.genome)
     self.annotation = make_file_from_list([
         ['1', '.', 'gene', '10', '20', '.', '+', '.', 'gene_id "A";'],
         [
             '1', '.', 'transcript', '10', '20', '.', '+', '.',
             'gene_id "A"; transcript_id "AA";'
         ],
         [
             '1', '.', 'exon', '10', '20', '.', '+', '.',
             'gene_id "A"; transcript_id "AA"; exon_number "1";'
         ],
     ])
     warnings.simplefilter("ignore", ResourceWarning)
Пример #3
0
    def test_indexstar_and_mapstar_full(self):
        genome = make_fasta_file(num_sequences=2, seq_len=1000, rnd_seed=0)
        fastq = make_fastq_file(genome=genome, rnd_seed=0)

        command_full = [
            'iCount',
            'indexstar',
            genome,
            self.dir,
            '--annotation',
            self.gtf,
            '--overhang',
            '100',
            '--overhang_min',
            '8',
            '--threads',
            '1',
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]
        self.assertEqual(subprocess.call(command_full), 0)

        command_full = [
            'iCount',
            'mapstar',
            fastq,
            self.dir,
            self.dir2,
            '--annotation',
            self.gtf,
            '--multimax',
            '50',
            '--mismatches',
            '2',
            '--threads',
            '1',
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]
        self.assertEqual(subprocess.call(command_full), 0)
Пример #4
0
    def setUp(self):
        warnings.simplefilter("ignore", (ResourceWarning, ImportWarning))

        # Make fai and fasta file:
        self.fai_data = {'1': 1000}
        self.fai = make_file_from_list(list(self.fai_data.items()), bedtool=False, extension='fai')

        random.seed(0)  # pylint:disable=no-member
        random_seeds = random.randint(10**5, size=len(self.fai))  # pylint:disable=no-member
        self.seqs = {name: make_sequence(size, rnd_seed=rseed) for (name, size), rseed in
                     zip(self.fai_data.items(), random_seeds)}
        self.fasta = make_fasta_file(sequences=self.seqs.values(), headers=self.seqs.keys())

        # Make annotation:
        self.gtf = make_file_from_list(extension='gtf', data=[
            # Gene #1, positive strand:
            ['1', '.', 'gene', '100', '499', '.', '+', '.', attrs(gid='G1')],
            # Transcript #1
            ['1', '.', 'transcript', '100', '249', '.', '+', '.', attrs(gid='G1', tid='T1')],
            ['1', '.', 'exon', '100', '149', '.', '+', '.', attrs(gid='G1', tid='T1', exn=1)],
            ['1', '.', 'exon', '200', '229', '.', '+', '.', attrs(gid='G1', tid='T1', exn=2)],
            ['1', '.', 'CDS', '200', '229', '.', '+', '.', attrs(gid='G1', tid='T1', exn=2)],
            ['1', '.', 'exon', '240', '249', '.', '+', '.', attrs(gid='G1', tid='T1', exn=3)],
            # Transcript #2
            ['1', '.', 'transcript', '240', '499', '.', '+', '.', attrs(gid='G1', tid='T2')],
            ['1', '.', 'exon', '240', '299', '.', '+', '.', attrs(gid='G1', tid='T2', exn=1)],
            ['1', '.', 'CDS', '240', '299', '.', '+', '.', attrs(gid='G1', tid='T2', exn=1)],
            ['1', '.', 'exon', '400', '499', '.', '+', '.', attrs(gid='G1', tid='T2', exn=2)],
            ['1', '.', 'CDS', '400', '499', '.', '+', '.', attrs(gid='G1', tid='T2', exn=2)],

            # Gene #2, positive strand:
            ['1', '.', 'gene', '600', '799', '.', '+', '.', attrs(gid='G2')],
            # Transcript #3
            ['1', '.', 'transcript', '600', '799', '.', '+', '.', attrs(gid='G2', tid='T3')],
            ['1', '.', 'exon', '600', '649', '.', '+', '.', attrs(gid='G2', tid='T3', exn=1)],
            ['1', '.', 'CDS', '600', '649', '.', '+', '.', attrs(gid='G2', tid='T3', exn=1)],
            ['1', '.', 'exon', '750', '799', '.', '+', '.', attrs(gid='G2', tid='T3', exn=2)],
            ['1', '.', 'CDS', '750', '799', '.', '+', '.', attrs(gid='G2', tid='T3', exn=2)],

            # Gene #3, negative strand:
            ['1', '.', 'gene', '800', '899', '.', '-', '.', 'gene_id "G3";'],
            # Transcript #4
            ['1', '.', 'transcript', '800', '899', '.', '-', '.', attrs(gid='G3', tid='T4')],
            ['1', '.', 'exon', '800', '899', '.', '-', '.', attrs(gid='G3', tid='T4', exn=1)],
            ['1', '.', 'CDS', '800', '899', '.', '-', '.', attrs(gid='G3', tid='T4', exn=1)],
        ])

        # Define positions of cross-links:
        self.read_data = [
            # [name, chromosome, sequence_len, read_start, strand]
            ('name01:rbc:CCCC', '1', 50, 80, '+'),
            ('name02:rbc:CCCC', '1', 50, 140, '+'),
            ('name03:rbc:AAAA', '1', 50, 142, '+'),
            ('name04:rbc:CCCC', '1', 50, 142, '+'),
            ('name05:rbc:CCCC', '1', 30, 160, '+'),
            ('name06:rbc:CCCC', '1', 30, 163, '+'),
            ('name07:rbc:GGGG', '1', 30, 163, '+'),
            ('name08:rbc:CCCC', '1', 20, 205, '+'),
            ('name09:rbc:CCCC', '1', 50, 235, '+'),
            ('name10:rbc:CCCC', '1', 50, 480, '+'),
            ('name11:rbc:CCCC', '1', 30, 530, '+'),
            ('name12:rbc:CCCC', '1', 30, 610, '+'),
            ('name13:rbc:CCCC', '1', 30, 549, '-'),
        ]
        # Make reads from genome, pointing to specific pre-detemrined cross-link positions:
        random_seeds = random.randint(10**5, size=len(self.read_data))  # pylint:disable=no-member
        self.reads = get_temp_file_name(extension='fastq')
        with open(self.reads, 'wt') as ofile:
            for (name, chrom, length, xlink, strand), rseed in zip(self.read_data, random_seeds):
                seq = self.seqs[chrom][xlink: xlink + length]
                if strand == '-':
                    seq = self.seqs[chrom][xlink - length: xlink]
                    compl_bases = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
                    seq = ''.join([compl_bases[nuc] for nuc in seq[::-1]])

                ofile.write('@' + name + '\n')
                ofile.write(seq + '\n')
                ofile.write('+' + '\n')
                ofile.write(
                    make_quality_scores(len(seq), min_chr=65, max_chr=73, rnd_seed=rseed) + '\n')

            # Make a "strange" read:
            # pylint: disable=undefined-loop-variable
            seq = self.seqs[chrom][250: 295] + self.seqs[chrom][310: 380]
            ofile.write('@' + 'name_strange:rbc:GGGG' + '\n')
            ofile.write(seq + '\n')
            ofile.write('+' + '\n')
            ofile.write(
                make_quality_scores(len(seq), min_chr=70, max_chr=73, rnd_seed=rseed) + '\n')