示例#1
0
    def test_haplotypes(self):
        # REF      TGG   TT    G-
        #          2093  2099  3200
        # varset1  CCC   GG
        #                CC    GG
        #          v1   v2,v3   v4
        v1 = factory.from_edit('chr24', 2093, 'TGG', 'CCC')
        v2 = factory.from_edit('chr24', 2098, 'TT', 'GG')
        v3 = factory.from_edit('chr24', 2098, 'TT', 'CC')
        v4 = factory.from_edit('chr24', 3200, 'G', 'GG')
        hap = variant.Haplotype.from_variants([v1, v2])
        s1 = VariantSet.from_variants([hap, v3, v4])
        s2 = VariantSet.from_variants(s1.iter_vrt())
        vrt = list(s2.find_vrt('chr24', 2090, 2095, expand=True))
        self.assertEqual(len(vrt), 2)
        self.assertEqual(s2.nof_unit_vrt(), 8)
        self.assertEqual(s1.diff(s2).nof_unit_vrt(), 0)
        self.assertEqual(len(list(s2.diff_vrt(s1).iter_vrt())), 0)

        h1 = variant.Haplotype.from_variants([v1, v2])
        h2 = variant.Haplotype.from_variants([v3, v4])
        s3 = VariantSet.from_variants([h1, h2])

        self.assertEqual(s3.diff(s2).nof_unit_vrt(), 0)
        self.assertEqual(s2.diff(s3).nof_unit_vrt(), 0)
示例#2
0
    def test_diff_vrt(self):
        # REF    G     TTGG         C
        #        1206   2093        10044
        # s1     C      CCC         T
        #        G      CCC         G
        #                           C
        #              2092         10044
        # s2           TT           T
        #              AC           T

        variants1 = [('chr24', 1206, 'G', 'C'), ('chr24', 2093, 'TG', 'CC'),
                     ('chr24', 2095, 'G', 'C'), ('chr24', 10044, 'C', 'T'),
                     ('chr24', 10044, 'C', 'G')]
        variants2 = [('chr24', 2092, 'TT', 'AC'), ('chr24', 10044, 'C', 'T')]

        s1 = VariantSet.from_variants(
            [factory.from_edit(*v) for v in variants1])
        s2 = VariantSet.from_variants(
            [factory.from_edit(*v) for v in variants2])

        diff = s1.diff(s2)
        self.assertEqual(diff.nof_unit_vrt(), 4)

        v1, v2, v3, v4 = list(diff.iter_vrt())

        self.assertEqual([v1.start, v1.ref, v1.alt], [1206, 'G', 'C'])
        self.assertEqual([v4.start, v4.ref, v4.alt], [10044, 'C', 'G'])
示例#3
0
    def test_diff_of_ambig_indel(self):
        #     9945
        #     CTTTTTCAT
        # s1  CTT--TCAT
        # s2  C--TTTCAT
        s1 = VariantSet.from_variants(
            [factory.from_edit('chr24', 9947, 'TT', 'T')])
        s2 = VariantSet.from_variants(
            [factory.from_edit('chr24', 9945, 'CTT', 'C')])

        # diff1 = s1.diff(s2, match_partial=False).iter_vrt()
        # self.assertEqual(len(list(diff1)), 0) #TODO fix/reconsider
        diff2 = s1.diff(s2).iter_vrt()
        self.assertEqual(len(list(diff2)), 1)  #TODO fix/reconsider
示例#4
0
 def test_ambig_difference_snp_in_locus(self):
     #         10043
     # Ref     TC-ACA--G
     # v1 s1         CA
     # v1 s2    G
     # v2 s2     T
     fac = variant.VariantFactory(reference=self.chr24, normindel=True)
     s1 = VariantSet.from_variants(
         [fac.from_edit('chr24', 10047, 'A', 'ACA')])
     s2 = VariantSet.from_variants([
         fac.from_edit('chr24', 10044, 'C', 'G'),
         fac.from_edit('chr24', 10044, 'C', 'CT')
     ])
     self.assertEqual(len(list(s1.comm(s2, match_ambig=True).iter_vrt())),
                      0)
示例#5
0
    def test_match_with_haplotypes(self):
        # REF      TGG   TT    G|
        #          2093  2099  3200
        # varset1  CCC   GG
        #                CC    GG
        #          r1   r2,r3   r4
        r1 = factory.from_edit('chr24', 2093, 'TGG', 'CCC')
        r2 = factory.from_edit('chr24', 2098, 'TT', 'GG')
        r3 = factory.from_edit('chr24', 2098, 'TT', 'CC')
        r4 = factory.from_edit('chr24', 3200, 'GG', 'G')

        hap1 = Haplotype.from_variants([r1, r2])
        ccc1 = sorted(hap1.variants, key=lambda o: o.start)[0]
        hap2 = Haplotype.from_variants([r3, r4])
        s1 = VariantSet.from_variants([hap1, hap2])

        hap = Haplotype.from_variants([r1, r4])
        ccc2 = sorted(hap.variants, key=lambda o: o.start)[0]

        match = s1.match(hap)
        self.assertEqual(len(match), 1)
        d1 = {k: [v2.base for v2 in v] for k, v in match.items()}
        d2 = {ccc2.key: [ccc1]}
        self.assertEqual(len(d1), len(d2))
        self.assertEqual(list(d1.keys()), list(d2.keys()))
        for k in d1:
            self.assertTrue(
                all([v1.edit_equal(v2) for v1, v2 in zip(d1[k], d2[k])]))
示例#6
0
    def test_variant_cluster2(self):
        vs1 = VariantSet.from_variants([
            variant.SNP('chr1', 13366968, 'A'),
            variant.SNP('chr1', 13366969, 'T'),
            variant.SNP('chr1', 13366970, 'G')
        ])

        vs2 = VariantSet.from_variants([
            variant.SNP('chr1', 13366968, 'A'),
            variant.SNP('chr1', 13366969, 'T'),
            variant.SNP('chr1', 13366970, 'G'),
            variant.Ins('chr1', 13366970, 'TTT')
        ])

        com = list(vs1.comm(vs2, match_partial=False).iter_vrt())
        self.assertEqual(len(com), 3)
示例#7
0
    def test_from_variants_vcf(self):
        vs0 = varset.VariantSet.from_vcf(pkg_file('genomvar.test',
                                                  'data/example1.vcf'),
                                         parse_info=True)
        variants1 = sorted(vs0.iter_vrt(), key=lambda v: v.key)
        vs = VariantSet.from_variants(variants1)
        _desc = 'Test for multinumber field'
        info_spec_tuples = [('DP4', 4, 'Integer', _desc),
                            ('NSV', 1, 'Integer')]
        info_spec_dict = vs0.dtype['info']
        for info_spec in (info_spec_tuples, info_spec_dict):
            tf = tempfile.NamedTemporaryFile(suffix='.vcf')
            with open(tf.name, 'wt') as fh:
                vs.to_vcf(fh, info_spec=info_spec)

            with open(tf.name, 'rt') as fh:
                self.assertIn(
                    '##INFO=<ID=DP4,Number=4,Type=Integer,Description="{}">'\
                          .format(_desc),
                    fh.read().splitlines())
                fh.seek(0)
                # print(fh.read())
            variants2 = sorted(VCFReader(tf.name).iter_vrt(parse_info=True),
                               key=lambda v: v.key)
            self.assertEqual(len(variants1), len(variants2))
            cnt = 0
            for v1, v2 in zip(variants1, variants2):
                self.assertTrue(v1.edit_equal(v2))
                self.assertEqual(v1.attrib['info']['NSV'],
                                 v2.attrib['info']['NSV'])
示例#8
0
    def test_from_variants_to_vcf_with_sampdata(self):
        file = pkg_file('genomvar.test', 'data/example3.vcf')
        variants1 = sorted(VCFReader(file).iter_vrt(parse_samples=True),
                           key=lambda v: v.key)
        vs = VariantSet.from_variants(variants1)
        tf = tempfile.NamedTemporaryFile(suffix='.vcf')

        with open(tf.name, 'wt') as fh:
            vs.to_vcf(
                fh,
                format_spec=[RESERVED_FORMAT.GT, ('AD', 'R', 'Integer', '')],
                samples=['SAMP1'])

        with open(tf.name, 'rt') as fh:
            fh.seek(0)
            self.assertIn(
                '##FORMAT=<ID=AD,Number=R,Type=Integer,'\
                +'Description="">',
                fh.read().splitlines())
        variants2 = sorted(VCFReader(tf.name).iter_vrt(parse_samples=True),
                           key=lambda v: v.key)
        self.assertEqual(len(variants1), len(variants2))
        cnt = 0
        for v1, v2 in zip(variants1, variants2):
            self.assertTrue(v1.edit_equal(v2))
            self.assertEqual(v1.attrib['samples']['SAMP1']['AD'],
                             v2.attrib['samples']['SAMP1']['AD'])
示例#9
0
    def test_mnp_com_split(self):
        #                           23
        #    TTCACTTAGCATAATGTCTTCAAGATT
        # v1                       TT -single
        # v2                       TT -splitted
        vset1 = VariantSet.from_variants(
            [factory.from_edit('chr24', 22, 'AG', 'TT')])
        vset2 = VariantSet.from_variants([
            factory.from_edit('chr24', 22, 'A', 'T'),
            factory.from_edit('chr24', 23, 'G', 'T')
        ])

        com = vset1.comm(vset2)
        v = list(com.iter_vrt())[0]

        self.assertEqual([v.start, v.ref], [22, 'AG'])
        self.assertFalse(list(vset1.diff(vset2).iter_vrt()))
示例#10
0
    def test_strip_order_dependent_Ambig(self):
        #    10043
        # R  T--CA--CAG
        # v1 TCACA--CAG
        # v2 T--CACACAG
        factory = variant.VariantFactory(reference=pkg_file(
            'genomvar.test', 'data/chr24.fna'),
                                         normindel=True)
        v1 = factory.from_edit('chr24', 10043, 'T', 'TCA')
        v2 = factory.from_edit('chr24', 10045, 'A', 'ACA')
        s1 = VariantSet.from_variants([v1])
        s2 = VariantSet.from_variants([v2])

        diff = s1.diff(s2, match_ambig=True)
        self.assertEqual(len(list(diff.iter_vrt())), 0)
        diff = s1.diff(s2, match_ambig=False)
        self.assertEqual(len(list(diff.iter_vrt())), 1)
示例#11
0
 def test_ambig_difference_different_ambig(self):
     #         10043
     # Ref     T--CA--CA--G
     # v1 s1   T--CA--CACAG ins CA right
     # v2 s1   T------CA--G del CA left
     # v1 s2   TCACA--CA--G ins CA left
     # v2 s2   T--CA------G del CA right
     fac = variant.VariantFactory(reference=self.chr24, normindel=True)
     s1 = VariantSet.from_variants([
         fac.from_edit('chr24', 10047, 'A', 'ACA'),
         fac.from_edit('chr24', 10043, 'TCA', 'T')
     ])
     s2 = VariantSet.from_variants([
         fac.from_edit('chr24', 10043, 'T', 'TCA'),
         fac.from_edit('chr24', 10045, 'ACA', 'A')
     ])
     self.assertEqual(len(list(s1.diff(s2, match_ambig=True).iter_vrt())),
                      0)
示例#12
0
    def test_from_variants(self):
        vfset = VariantSetFromFile(
            pkg_file('genomvar.test', 'data/example1.vcf'))
        vset = VariantSet.from_variants(list(vfset.iter_vrt()))
        vrt = list(vset.find_vrt('chr24', 1200, 1210))
        self.assertEqual(len(vrt), 2)

        # Test error on out of reference bounds
        with self.assertRaises(ValueError):
            VariantSet.from_variants(list(vfset.iter_vrt()) +
                                     [variant.SNP('chr24', 10000000, 'C')],
                                     reference=self.chr24)

        # Test error on chromosome not in reference
        with self.assertRaises(ValueError):
            vs = VariantSet.from_variants(list(vfset.iter_vrt()) +
                                          [variant.SNP('chr2', 10, 'C')],
                                          reference=self.chr24)
示例#13
0
    def test_in_the_middle(self):
        #      2
        #    TTCACTTAGCAT
        # v1   GGG
        # v2    G
        s1 = VariantSet.from_variants(
            [factory.from_edit('chr24', 2, 'CAC', 'GGG')])
        s2 = VariantSet.from_variants(
            [factory.from_edit('chr24', 3, 'A', 'G')])

        diff = s1.diff(s2)
        vv = list(diff.iter_vrt())
        self.assertEqual(len(vv), 2)
        v1, v2 = vv
        self.assertEqual([v1.start, v1.ref, v1.alt], [2, 'C', 'G'])
        self.assertEqual([v2.start, v2.ref, v2.alt], [4, 'C', 'G'])

        self.assertEqual(
            len(list(s1.diff(s2, match_partial=False).iter_vrt())), 1)
示例#14
0
    def test_VariantSet_cmp2(self):
        vcf1 = pkg_file('genomvar.test', 'data/example3.vcf')
        s1 = VariantSet.from_vcf(vcf1)
        s2 = VariantSet.from_variants(reversed(list(s1.iter_vrt())))
        diff = s1.diff(s2)
        self.assertEqual(diff.nof_unit_vrt(), 0)

        comm = s1.comm(s2)
        self.assertEqual(comm.nof_unit_vrt(), s1.nof_unit_vrt())
        self.assertEqual(s2.comm(s1).nof_unit_vrt(), comm.nof_unit_vrt())
示例#15
0
    def test_to_vcf_no_ref(self):
        vs1 = VariantSet.from_variants(
            [variant.Del("chr24", 23, 24),
             variant.SNP("chr24", 1206, "C")])

        buf = io.StringIO()
        vs1.to_vcf(buf, reference=self.chr24)
        buf.seek(0)

        vs2 = VariantSet.from_vcf(buf)
        self.assertEqual(vs1.comm(vs2).nof_unit_vrt(), 2)
示例#16
0
    def test_many_chroms_shuffled(self):
        vs1 = VariantSet.from_vcf(
            pkg_file('genomvar.test', 'data/example3.vcf'))
        vrt = list(vs1.iter_vrt())
        vrt2 = [vrt[i] for i in (0, 7, 1, 6, 2, 5, 3, 4)]
        vs2 = VariantSet.from_variants(vrt2)

        self.assertEqual(vs1.comm(vs2).nof_unit_vrt(), vs1.nof_unit_vrt())
        self.assertEqual(vs1.diff(vs2).nof_unit_vrt(), 0)

        self.assertEqual(vs2.comm(vs1).nof_unit_vrt(), vs2.nof_unit_vrt())
        self.assertEqual(vs2.diff(vs1).nof_unit_vrt(), 0)
示例#17
0
    def test_ovlp(self):
        #                        23
        # TTCACTTAGCATAATGTCTTCAAG|ATT
        #                         G
        #                          C <- not interfering

        ins = factory.from_edit(chrom='chr24', start=23, ref='G', alt='GG')
        s1 = VariantSet.from_variants([ins], )
        vb = factory.from_edit(chrom='chr24', start=24, ref='A', alt='C')
        self.assertEqual(len(s1.ovlp(vb)), 0)
        vb2 = factory.from_edit(chrom='chr24', start=23, ref='G', alt='GG')
        self.assertEqual(len(s1.ovlp(vb2, match_ambig=False)), 1)
示例#18
0
    def test_match2(self):
        #       1
        # R     TCACAG
        # del1  T--CAG
        # del2  TCA--G
        del1 = variant.Del('chrom', 1, 3)
        adel1 = variant.AmbigDel('chrom', (1, 1), (5, 3), 'CA', '')
        del2 = variant.Del('chrom', 2, 4)
        adel2 = variant.AmbigDel('chrom', (1, 2), (5, 4), 'AC', '')

        vs = VariantSet.from_variants([del1])
        self.assertEqual(len(vs.match(adel1)), 1)
        self.assertEqual(len(vs.match(adel2)), 0)
        self.assertEqual(len(vs.match(adel2, match_ambig=True)), 1)

        vs2 = VariantSet.from_variants([adel1])
        self.assertEqual(len(vs2.match(adel1)), 1)
        self.assertEqual(len(vs2.match(del1)), 1)
        self.assertEqual(len(vs2.match(adel2)), 0)
        self.assertEqual(len(vs2.match(adel2, match_ambig=True)), 1)
        self.assertEqual(len(vs2.match(del2, match_ambig=True)), 1)
        self.assertEqual(len(vs2.match(del2, match_ambig=False)), 0)
示例#19
0
    def test_variant_cluster(self):
        vs1 = VariantSet.from_variants([
            variant.SNP('chr1', 13366968, 'A'),
            variant.SNP('chr1', 13366969, 'T'),
            variant.SNP('chr1', 13366970, 'G')
        ])

        vs2 = VariantSet.from_variants([
            variant.Del('chr1', 13366967, 13366969),
            variant.Ins('chr1', 13366971, 'TG')
        ])

        diff = list(vs1.diff(vs2).iter_vrt())
        self.assertEqual(len(diff), 3)

        vs3 = VariantSet.from_variants([variant.MNP("chr1", 13366968, 'ATG')])
        self.assertEqual(len(list(vs1.diff(vs3,match_partial=True)\
                                     .iter_vrt())), 0)
        self.assertEqual(len(list(vs3.diff(vs1,match_partial=True)\
                                     .iter_vrt())), 0)
        self.assertEqual(len(list(vs1.diff(vs3,match_partial=False)\
                                     .iter_vrt())), 3)
        self.assertEqual(len(list(vs3.diff(vs1,match_partial=False)\
                                     .iter_vrt())), 1)
示例#20
0
    def test_from_variants_with_attributes(self):
        reader = VCFReader(pkg_file('genomvar.test', 'data/example1.vcf'))
        vset = VariantSet.from_variants(list(reader.iter_vrt(parse_info=True)))
        vrt = list(vset.find_vrt('chr24', 1200, 1210))
        self.assertEqual(len(vrt), 2)

        v1 = vrt[0]
        self.assertEqual(v1.attrib['info']['NSV'], 1)
        self.assertEqual(v1.attrib['id'], '5')

        v2 = vrt[1]
        self.assertEqual(v2.attrib['id'], None)

        recs = vset.to_records()
        self.assertEqual(recs[0]['attrib']['info']['NSV'], 2)
示例#21
0
    def test_consistency_of_diff_and_com(self):
        # REF    G     TTGG         CACAGTTC---CA-C
        #        1206   2093        10044
        # s1     C      CCC         T       CC  G
        #        C      CCC         G       CC
        #              2092         10044
        # s2           TT           T        CC
        #              AC           T           G

        vfac = VariantFactory(reference=self.chr24, normindel=True)
        variants1 = [('chr24', 1206, 'G', 'C'), ('chr24', 2093, 'TG', 'CC'),
                     ('chr24', 2095, 'G', 'C'), ('chr24', 10044, 'C', 'T'),
                     ('chr24', 10044, 'C', 'G'), ('chr24', 10051, 'C', 'CCC')]
        variants2 = [('chr24', 2092, 'TT', 'AC'), ('chr24', 10044, 'C', 'T'),
                     ('chr24', 10052, 'C', 'CCC'), ('chr24', 10053, 'A', 'AG')]

        s1 = VariantSet.from_variants([vfac.from_edit(*v) for v in variants1])
        s2 = VariantSet.from_variants([vfac.from_edit(*v) for v in variants2])

        com1 = s1.comm(s2, match_ambig=True)
        com2 = s2.comm(s1, match_ambig=True)
        s1_s2 = s1.diff(s2, match_ambig=True)
        s2_s1 = s2.diff(s1, match_ambig=True)

        nof = {}
        nof['com1'] = com1.nof_unit_vrt()
        nof['com2'] = com1.nof_unit_vrt()
        nof['s1'] = s1.nof_unit_vrt()
        nof['s2'] = s2.nof_unit_vrt()
        nof['s1_s2'] = s1_s2.nof_unit_vrt()
        nof['s2_s1'] = s2_s1.nof_unit_vrt()

        self.assertEqual(nof['com1'], 3)
        self.assertEqual(nof['com2'], nof['com1'])
        self.assertEqual(nof['com1'] + nof['s1_s2'], nof['s1'])
        self.assertEqual(nof['com1'] + nof['s2_s1'], nof['s2'])
示例#22
0
 def test_from_variants_to_records(self):
     fac = variant.VariantFactory(reference=self.chr24, normindel=True)
     hap = Haplotype.from_variants([
         fac.from_edit('chr24', 1207, 'G', 'C'),
         fac.from_edit('chr24', 1207, 'G', 'T')
     ])
     vs = VariantSet.from_variants([
         fac.from_edit('chr24', 10043, 'T', 'TCA'),
         fac.from_edit('chr24', 10045, 'ACA', 'A'), hap
     ])
     recs = vs.to_records()
     self.assertEqual(recs.shape, (4, ))
     self.assertEqual(list(recs.dtype.fields), [
         'chrom', 'start', 'end', 'ref', 'alt', 'vartype', 'phase_group',
         'attrib'
     ])
示例#23
0
    def test_from_variants_to_vcf_with_info(self):
        variants1 = sorted(VCFReader(
            pkg_file('genomvar.test',
                     'data/example1.vcf')).iter_vrt(parse_info=True),
                           key=lambda v: v.key)
        vs = VariantSet.from_variants(variants1)
        tf = tempfile.NamedTemporaryFile(suffix='.vcf')

        # Test Invalid specs
        invalid_specs = [('NSV', ), ('NSV', 1, 'Integedr'),
                         ('NSV', 'C', 'Integer', 'Number of Simple Variants')]
        _buf = io.StringIO()
        for spec in invalid_specs:
            with self.assertRaises(ValueError) as cm:
                vs.to_vcf(_buf, info_spec=[spec])
            exc = cm.exception
            self.assertTrue('INFO spec' in exc.args[0])

        with open(tf.name, 'wt') as fh:
            vs.to_vcf(fh,
                      info_spec=[
                          ('NSV', 1, 'Integer', 'Number of Simple Variants'),
                          ('AF', 'A', 'Float', '', 'source', 'version')
                      ])

        with open(tf.name, 'rt') as fh:
            self.assertIn(
                '##INFO=<ID=NSV,Number=1,Type=Integer,'\
                +'Description="Number of Simple Variants">',
                fh.read().splitlines())
        variants2 = sorted(VCFReader(tf.name).iter_vrt(parse_info=True),
                           key=lambda v: v.key)
        self.assertEqual(len(variants1), len(variants2))
        cnt = 0
        for v1, v2 in zip(variants1, variants2):
            self.assertTrue(v1.edit_equal(v2))
            self.assertEqual(v1.attrib['info']['NSV'],
                             v2.attrib['info']['NSV'])
示例#24
0
 def test_diff_del(self):
     vrt = variant.Del(chrom="chr1", start=6751613, end=6751627)
     vs1 = VariantSet.from_variants([vrt])
     vs2 = VariantSet.from_variants([vrt])
     self.assertEqual(len(list(vs1.diff(vs2).iter_vrt())), 0)
     self.assertEqual(len(list(vs1.comm(vs2).iter_vrt())), 1)
示例#25
0
 def test_diff_ins(self):
     vrt = variant.Ins(chrom="chr1", start=6751613, alt='AGTC')
     vs1 = VariantSet.from_variants([vrt])
     vs2 = VariantSet.from_variants([vrt])
     self.assertEqual(len(list(vs1.diff(vs2).iter_vrt())), 0)
     self.assertEqual(len(list(vs1.comm(vs2).iter_vrt())), 1)