Пример #1
0
    def test_barcode_size_diff(self):
        # One mismatch, second barcode.
        barcodes = ['NAAANN', 'NNCCCNN']
        adapter = 'CCCCCC'
        data = [
            '@header1',
            'TACATT' + adapter + make_sequence(40, rnd_seed=0),
            '+',
            make_quality_scores(50, rnd_seed=0) + '!J',
            '@header2',
            'AACCCTT' + adapter + make_sequence(39, rnd_seed=0),
            '+',
            make_quality_scores(50, rnd_seed=0) + '!J',
        ]
        fq_fname = get_temp_file_name(extension='fq')
        fq_file = iCount.files.fastq.FastqFile(fq_fname, 'wt')
        fq_file.write(iCount.files.fastq.FastqEntry(*data[:4]))
        fq_file.write(iCount.files.fastq.FastqEntry(*data[4:]))
        fq_file.close()

        handle = demultiplex._extract(fq_fname, barcodes, mismatches=1)
        read1, exp_id1, randomer1 = next(handle)
        self.assertEqual(exp_id1, 0)
        self.assertEqual(randomer1, 'TTT')
        self.assertEqual(read1.id, data[0])
        self.assertEqual(read1.seq, data[1][6:])
        self.assertEqual(read1.plus, '+')
        self.assertEqual(read1.qual, data[3][6:])
        read2, exp_id2, randomer2 = next(handle)
        self.assertEqual(exp_id2, 1)
        self.assertEqual(randomer2, 'AATT')
        self.assertEqual(read2.id, data[4])
        self.assertEqual(read2.seq, data[5][7:])
        self.assertEqual(read2.plus, '+')
        self.assertEqual(read2.qual, data[7][7:])
Пример #2
0
    def test_barcode_size_diff(self):
        # One mismatch, second barcode.
        barcodes = demultiplex.prepare_barcodes(self.barcodes5, None)
        entry1 = FastqEntry(
            '@header1',
            'TTAATT' + make_sequence(40),
            '+',
            make_quality_scores(44),
        )
        entry2 = FastqEntry(
            '@header2',
            'GGACGAGG' + make_sequence(40),
            '+',
            make_quality_scores(48),
        )
        fq_fname = self.create_fq_file([entry1, entry2])

        handle = demultiplex._extract(fq_fname,
                                      barcodes,
                                      mismatches=1,
                                      minimum_length=15)
        read1, wbrc, randomer1 = next(handle)
        self.assertEqual(wbrc, self.barcodes5[0])
        self.assertEqual(randomer1, 'TTT')
        self.assertEqual(read1.id, entry1.id)
        self.assertEqual(read1.seq, entry1.seq[6:])
        self.assertEqual(read1.plus, entry1.plus)
        self.assertEqual(read1.qual, entry1.qual[6:])
        read2, wbrc, randomer2 = next(handle)
        self.assertEqual(wbrc, self.barcodes5[2])
        self.assertEqual(randomer2, 'GGGG')
        self.assertEqual(read2.id, entry2.id)
        self.assertEqual(read2.seq, entry2.seq[8:])
        self.assertEqual(read2.plus, entry2.plus)
        self.assertEqual(read2.qual, entry2.qual[8:])
Пример #3
0
    def test_extract_mismatch(self):
        # To many mismatches
        barcodes = demultiplex.prepare_barcodes(self.barcodes5, None)
        entry = FastqEntry(
            '@header1',
            'TTTTTT' + make_sequence(40),
            '+',
            make_quality_scores(46),
        )
        fq_fname = self.create_fq_file([entry])

        for read, wbrc, randomer in demultiplex._extract(fq_fname,
                                                         barcodes,
                                                         mismatches=0,
                                                         minimum_length=15):
            self.assertEqual(wbrc, 'nomatch')
            self.assertEqual(randomer, '')
            self.assertEqual(read.id, entry.id)
            self.assertEqual(read.seq, entry.seq)
            self.assertEqual(read.plus, entry.plus)
            self.assertEqual(read.qual, entry.qual)
Пример #4
0
    def test_extract_ok_2(self):
        # One mismatch, second barcode.
        barcodes = demultiplex.prepare_barcodes(self.barcodes5, None)
        entry = FastqEntry(
            '@header1',
            'TTAGTT' + make_sequence(40),
            '+',
            make_quality_scores(46),
        )
        fq_fname = self.create_fq_file([entry])

        for read, wbrc, randomer in demultiplex._extract(fq_fname,
                                                         barcodes,
                                                         mismatches=1,
                                                         minimum_length=15):
            self.assertEqual(wbrc, self.barcodes5[1])
            self.assertEqual(randomer, 'TTT')
            self.assertEqual(read.id, entry.id)
            self.assertEqual(read.seq, entry.seq[6:])
            self.assertEqual(read.plus, entry.plus)
            self.assertEqual(read.qual, entry.qual[6:])
Пример #5
0
    def test_extract_mismatch(self):
        # To many mismatches
        barcodes = ['NNAAAN', 'NNCCTN', 'NNACGN']
        adapter = 'CCCCCC'
        data = [
            '@header1', 'TTACTT' + adapter + make_sequence(40, rnd_seed=0),
            '+',
            make_quality_scores(50, rnd_seed=0) + '!J'
        ]
        fq_fname = get_temp_file_name(extension='fq')
        fq_file = iCount.files.fastq.FastqFile(fq_fname, 'wt')
        fq_file.write(iCount.files.fastq.FastqEntry(*data))
        fq_file.close()

        for read, exp_id, randomer in demultiplex._extract(fq_fname,
                                                           barcodes,
                                                           mismatches=0):
            self.assertEqual(exp_id, -1)
            self.assertEqual(randomer, '')
            self.assertEqual(read.id, data[0])
            self.assertEqual(read.seq, data[1])
            self.assertEqual(read.plus, '+')
            self.assertEqual(read.qual, data[3])
Пример #6
0
    def test_barcode3(self):
        # One mismatch, second barcode.
        barcodes = demultiplex.prepare_barcodes(
            self.barcodes5 + [self.barcodes5[2]], self.barcodes3)
        barcodes = barcodes[self.barcodes5[2]]['barcodes3']
        entry1 = FastqEntry(
            '@header1',
            make_sequence(40) + 'TTGGG',
            '+',
            make_quality_scores(45),
        )
        entry2 = FastqEntry(
            '@header2',
            make_sequence(40) + 'TCAAA',
            '+',
            make_quality_scores(45),
        )
        fq_fname = self.create_fq_file([entry1, entry2])

        handle = demultiplex._extract(fq_fname,
                                      barcodes,
                                      mismatches=1,
                                      minimum_length=15)
        read1, wbrc, randomer1 = next(handle)
        self.assertEqual(wbrc, self.barcodes3[2])
        self.assertEqual(randomer1, 'TT')
        self.assertEqual(read1.id, entry1.id)
        self.assertEqual(read1.seq, entry1.seq[:-5])
        self.assertEqual(read1.plus, entry1.plus)
        self.assertEqual(read1.qual, entry1.qual[:-5])
        read2, wbrc, randomer2 = next(handle)
        self.assertEqual(wbrc, self.barcodes3[3])
        self.assertEqual(randomer2, 'TC')
        self.assertEqual(read2.id, entry2.id)
        self.assertEqual(read2.seq, entry2.seq[:-5])
        self.assertEqual(read2.plus, entry2.plus)
        self.assertEqual(read2.qual, entry2.qual[:-5])