Exemplo n.º 1
0
    def test_illumina_writer(self):
        # 01234
        # 1234567890
        # CCTGATTT-A
        # TAACGA
        #   -  C -A
        vcf = '''##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
ref 1 . C T 10 PASS .
ref 2 . CT CA,C 10 PASS .
ref 3 . T A 10 PASS .
ref 4 . G C 10 PASS .
ref 5 . A G 10 PASS .
ref 6 . T A,C 10 PASS .
ref 7 . TT T 10 PASS .
ref 8 . T TA 10 PASS .
ref 10 . A C 10 PASS .
'''

        vcf = vcf.replace(' ', '\t')
        vcf_fhand = NamedTemporaryFile(suffix='.vcf')
        vcf_fhand.write(vcf)
        vcf_fhand.flush()
        vcf_compressed = NamedTemporaryFile(suffix='.vcf.gz')
        compress_with_bgzip(vcf_fhand, vcf_compressed)
        index_vcf_with_tabix(vcf_compressed.name)

        ref_fhand = NamedTemporaryFile(suffix='.fasta')
        ref_fhand.write('>ref\nACTGATTTA\n')
        ref_fhand.flush()

        out_fhand1 = StringIO()

        writer = IlluminaWriter(ref_fhand.name, out_fhand1, min_length=0,
                                vcf_fpath=vcf_compressed.name)

        for snp in Reader(filename=vcf_compressed.name):
            writer.write(snp)

        # With no SNPs converted to IUPAC around
        out_fhand2 = StringIO()
        writer = IlluminaWriter(ref_fhand.name, out_fhand2, min_length=0)
        for snp in Reader(filename=vcf_compressed.name):
            writer.write(snp)

        remove(vcf_compressed.name + '.tbi')
        expected = u'CHROM\tPOS\tID\tseq\n'
        expected += u'ref\t1\t.\t[C/T]*WSRHT-^A\n'
        expected += u'ref\t2\t.\tY[CT/CA/C]SRHT-^A\n'
        expected += u'ref\t3\t.\tYC[T/A]SRHT-^A\n'
        expected += u'ref\t4\t.\tY*W[G/C]RHT-^A\n'
        expected += u'ref\t5\t.\tY*WS[A/G]HT-^A\n'
        expected += u'ref\t6\t.\tY*WSR[T/A/C]T-^A\n'
        expected += u'ref\t7\t.\tY*WSRH[TT/T]A\n'
        expected += u'ref\t8\t.\tY*WSRHT[T/TA]A\n'
        expected += u'ref\t10\t.\tY*WSRHT-^A[A/C]\n'
        assert expected == out_fhand1.getvalue()

        expected = u'CHROM\tPOS\tID\tseq\nref\t1\t.\t[C/T]'
        assert expected in out_fhand1.getvalue()
Exemplo n.º 2
0
    def test_errors(self):
        # 01234
        # 1234567890
        # CCTGATTT-A
        # TAACGA
        #   -  C -A
        vcf = '''##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
ref 1 . C T 10 PASS .
ref 2 . CT CA,C 10 PASS .
ref 3 . T A 10 PASS .
ref 4 . G C 10 PASS .
ref 5 . A G 10 PASS .
ref 6 . T A,C 10 PASS .
ref 7 . TT T 10 PASS .
ref 8 . T TA 10 PASS .
ref 10 . A C 10 PASS .
'''

        vcf = vcf.replace(' ', '\t')
        vcf_fhand = NamedTemporaryFile(suffix='.vcf')
        vcf_fhand.write(vcf)
        vcf_fhand.flush()
        vcf_compressed = NamedTemporaryFile(suffix='.vcf.gz')
        compress_with_bgzip(vcf_fhand, vcf_compressed)
        index_vcf_with_tabix(vcf_compressed.name)

        ref_fhand = NamedTemporaryFile(suffix='.fasta')
        ref_fhand.write('>ref\nACTGATTTA\n')
        ref_fhand.flush()

        out_fhand = StringIO()
        writer = IlluminaWriter(ref_fhand.name, out_fhand,
                                vcf_fpath=vcf_compressed.name)
        snps = Reader(filename=vcf_compressed.name)
        snp = snps.next()
        try:
            writer.write(snp)
            self.fail('NotEnoughAdjacentSequenceError expected')
        except IlluminaWriter.NotEnoughAdjacentSequenceError:
            pass
Exemplo n.º 3
0
    def test_errors(self):
        # 01234
        # 1234567890
        # CCTGATTT-A
        # TAACGA
        #   -  C -A
        vcf = '''##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
ref 1 . C T 10 PASS .
ref 2 . CT CA,C 10 PASS .
ref 3 . T A 10 PASS .
ref 4 . G C 10 PASS .
ref 5 . A G 10 PASS .
ref 6 . T A,C 10 PASS .
ref 7 . TT T 10 PASS .
ref 8 . T TA 10 PASS .
ref 10 . A C 10 PASS .
'''

        vcf = vcf.replace(' ', '\t')
        vcf_fhand = NamedTemporaryFile(suffix='.vcf')
        vcf_fhand.write(vcf)
        vcf_fhand.flush()
        vcf_compressed = NamedTemporaryFile(suffix='.vcf.gz')
        compress_with_bgzip(vcf_fhand, vcf_compressed)
        index_vcf_with_tabix(vcf_compressed.name)

        ref_fhand = NamedTemporaryFile(suffix='.fasta')
        ref_fhand.write('>ref\nACTGATTTA\n')
        ref_fhand.flush()

        out_fhand = StringIO()
        writer = IlluminaWriter(ref_fhand.name, out_fhand,
                                vcf_fpath=vcf_compressed.name)
        snps = Reader(filename=vcf_compressed.name)
        snp = snps.next()
        try:
            writer.write(snp)
            self.fail('NotEnoughAdjacentSequenceError expected')
        except IlluminaWriter.NotEnoughAdjacentSequenceError:
            pass
Exemplo n.º 4
0
    def test_illumina_writer(self):
        # 01234
        # 1234567890
        # CCTGATTT-A
        # TAACGA
        #   -  C -A
        vcf = '''##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
ref 1 . C T 10 PASS .
ref 2 . CT CA,C 10 PASS .
ref 3 . T A 10 PASS .
ref 4 . G C 10 PASS .
ref 5 . A G 10 PASS .
ref 6 . T A,C 10 PASS .
ref 7 . TT T 10 PASS .
ref 8 . T TA 10 PASS .
ref 10 . A C 10 PASS .
'''

        vcf = vcf.replace(' ', '\t')
        vcf_fhand = NamedTemporaryFile(suffix='.vcf')
        vcf_fhand.write(vcf)
        vcf_fhand.flush()
        vcf_compressed = NamedTemporaryFile(suffix='.vcf.gz')
        compress_with_bgzip(vcf_fhand, vcf_compressed)
        index_vcf_with_tabix(vcf_compressed.name)

        ref_fhand = NamedTemporaryFile(suffix='.fasta')
        ref_fhand.write('>ref\nACTGATTTA\n')
        ref_fhand.flush()

        out_fhand1 = StringIO()

        writer = IlluminaWriter(ref_fhand.name, out_fhand1, min_length=0,
                                vcf_fpath=vcf_compressed.name)

        for snp in Reader(filename=vcf_compressed.name):
            writer.write(snp)

        # With no SNPs converted to IUPAC around
        out_fhand2 = StringIO()
        writer = IlluminaWriter(ref_fhand.name, out_fhand2, min_length=0)
        for snp in Reader(filename=vcf_compressed.name):
            writer.write(snp)

        remove(vcf_compressed.name + '.tbi')
        expected = u'CHROM\tPOS\tID\tseq\n'
        expected += u'ref\t1\t.\t[C/T]*WSRHT-^A\n'
        expected += u'ref\t2\t.\tY[CT/CA/C]SRHT-^A\n'
        expected += u'ref\t3\t.\tYC[T/A]SRHT-^A\n'
        expected += u'ref\t4\t.\tY*W[G/C]RHT-^A\n'
        expected += u'ref\t5\t.\tY*WS[A/G]HT-^A\n'
        expected += u'ref\t6\t.\tY*WSR[T/A/C]T-^A\n'
        expected += u'ref\t7\t.\tY*WSRH[TT/T]A\n'
        expected += u'ref\t8\t.\tY*WSRHT[T/TA]A\n'
        expected += u'ref\t10\t.\tY*WSRHT-^A[A/C]\n'
        assert expected == out_fhand1.getvalue()

        expected = u'CHROM\tPOS\tID\tseq\nref\t1\t.\t[C/T]'
        assert expected in out_fhand1.getvalue()