Пример #1
0
    def test_from_vcf_to_records(self):
        vs = VariantSet.from_vcf(pkg_file('genomvar.test',
                                          'data/example1.vcf'),
                                 parse_info=True,
                                 parse_samples=True)

        self.assertEqual(vs._samples, ['SAMP1'])

        # Test nested dtype
        recs = vs.to_records(nested=True)
        self.assertEqual(list(recs.dtype.fields), [
            'chrom', 'start', 'end', 'ref', 'alt', 'vartype', 'phase_group',
            'info', 'SAMPLES'
        ])
        self.assertEqual(
            list(recs['info'].dtype.fields),
            ['NSV', 'AF', 'DP4', 'ECNT', 'pl', 'mt', 'RECN', 'STR'])
        self.assertEqual(list(recs['SAMPLES'].dtype.fields), ['SAMP1'])
        self.assertEqual(list(recs['SAMPLES']['SAMP1'].dtype.fields), ['GT'])

        # Test not nested
        recs = vs.to_records(nested=False)
        self.assertEqual(list(recs.dtype.fields), [
            'chrom', 'start', 'end', 'ref', 'alt', 'vartype', 'phase_group',
            'info_NSV', 'info_AF', 'info_DP4', 'info_ECNT', 'info_pl',
            'info_mt', 'info_RECN', 'info_STR', 'SAMPLES_SAMP1_GT'
        ])
Пример #2
0
    def test_asterisk_variant(self):
        vset = VariantSet.from_vcf(pkg_file(
            'genomvar.test', 'data/example_with_asterisk.vcf.gz'),
                                   parse_info=True)

        vrt = list(vset.find_vrt('chr1', 995507, 995515))
        self.assertEqual(len(vrt), 3)
Пример #3
0
    def test_from_vcf_with_attr(self):
        s = VariantSet.from_vcf(pkg_file('genomvar.test', 'data/example1.vcf'),
                                parse_info=True)
        _vrt = list(s.find_vrt('chr24', 150, 160))
        self.assertEqual(len(_vrt), 1)
        vrt = _vrt[0]
        self.assertEqual(vrt.attrib['info']['AF'], 1.0)

        # Check multiallelic locus
        _vrt = list(s.find_vrt('chr24', 20, 30))
        self.assertEqual(len(_vrt), 2)
        for vrt in _vrt:
            if not vrt.is_variant_instance(variant.Null):
                self.assertEqual(vrt.attrib['info']['AF'], 0.5)

        # Check None/KeyError cases (".",field absent...)
        _vrt = list(
            filter(lambda o: not o.is_variant_instance(variant.Null),
                   s.find_vrt('chr24', 450, 460)))
        self.assertEqual(len(_vrt), 1)
        vrt = _vrt[0]
        with self.assertRaises(ValueError):
            vrt.attrib['info']['Randomfields']

        _vrt = list(
            filter(lambda o: not o.is_variant_instance(variant.Null),
                   s.find_vrt('chr24', 4750, 4760)))
        self.assertEqual(len(_vrt), 1)
        vrt = _vrt[0]
        self.assertEqual(vrt.attrib['info']['STR'], True)
Пример #4
0
 def test_empty_vcf(self):
     buf = io.StringIO()
     with open(pkg_file('genomvar.test', 'data/example1.vcf')) as fh:
         for line in itertools.takewhile(lambda l: l.startswith('#'), fh):
             buf.write(line)
     buf.seek(0)
     vs = VariantSet.from_vcf(buf)
     self.assertEqual(vs.nof_unit_vrt(), 0)
Пример #5
0
 def test_sv_types(self):
     with warnings.catch_warnings(record=True) as wrn:
         vs = VariantSet.from_vcf(
             pkg_file('genomvar.test', 'data/example4.vcf.gz'))
         warnings.simplefilter('always')
         self.assertEqual(vs.nof_unit_vrt(), 100)
         self.assertGreater(len(wrn), 1)
         self.assertIn('Structural', str(wrn[-1].message))
Пример #6
0
    def test_sort_chroms(self):
        vs = VariantSet.from_vcf(
            pkg_file('genomvar.test', 'data/example2.vcf.gz'))
        vs.sort_chroms()
        self.assertEqual(list(vs.get_chroms()), ['chr23', 'chr24'])

        vs.sort_chroms(key=lambda c: 1 if c == 'chr24' else 2)
        self.assertEqual(list(vs.get_chroms()), ['chr24', 'chr23'])
Пример #7
0
 def test_from_string_buffer(self):
     buf = io.StringIO()
     with open(pkg_file('genomvar.test', 'data/example1.vcf'), 'rt') as fh:
         for line in fh:
             buf.write(line)
     buf.seek(0)
     vs = VariantSet.from_vcf(buf)
     self.assertEqual(len(list(vs.find_vrt('chr24', 150, 160))), 1)
     self.assertEqual(len(list(vs.find_vrt('chr24', 20, 30))), 2)
Пример #8
0
    def test_from_vcf_with_samples(self):
        vset = VariantSet.from_vcf(pkg_file('genomvar.test',
                                            'data/example1.vcf'),
                                   parse_samples=True)

        vrt = list(vset.find_vrt('chr24', 1200, 1210))
        self.assertEqual(len(vrt), 2)
        v1, v2 = vrt
        self.assertEqual(v1.attrib['samples']['SAMP1']['GT'], (1, 0))
Пример #9
0
    def test_from_vcf_problematic(self):
        vset = VariantSet.from_vcf(pkg_file('genomvar.test',
                                            'data/example5.vcf.gz'),
                                   parse_info=True)

        v = list(vset.find_vrt('1', 160109406, 160109409))

        self.assertEqual(len(v), 1)
        v = v[0]
        self.assertEqual(v.attrib['info']['PH'], ('.', ))
Пример #10
0
    def test_VariantSet_cmp2(self):
        vcf1 = pkg_file('genomvar.test', 'data/example3.vcf')
        s1 = VariantSet.from_vcf(vcf1)
        s2 = VariantSet.from_variants(reversed(list(s1.iter_vrt())))
        diff = s1.diff(s2)
        self.assertEqual(diff.nof_unit_vrt(), 0)

        comm = s1.comm(s2)
        self.assertEqual(comm.nof_unit_vrt(), s1.nof_unit_vrt())
        self.assertEqual(s2.comm(s1).nof_unit_vrt(), comm.nof_unit_vrt())
Пример #11
0
    def test_from_vcf_with_info(self):
        vset = VariantSet.from_vcf(pkg_file('genomvar.test',
                                            'data/example1.vcf'),
                                   parse_info=True)

        vrt = list(vset.find_vrt('chr24', 1200, 1210))
        self.assertEqual(len(vrt), 2)
        v1, v2 = vrt
        self.assertEqual(v1.alt, 'C')
        self.assertEqual(v1.attrib['info']['NSV'], 1)
Пример #12
0
    def test_VariantSet_cmp(self):
        vcf1 = pkg_file('genomvar.test', 'data/example1.vcf')
        vcf2 = pkg_file('genomvar.test', 'data/example2.vcf.gz')
        s1 = VariantSet.from_vcf(vcf1, parse_info=True, parse_samples=True)
        s2 = VariantSet.from_vcf(vcf2)
        diff = s1.diff(s2)
        self.assertEqual(diff.nof_unit_vrt(), 14)
        # Now same diff but without loading in memory
        N = 0
        for vrt in s1.diff_vrt(s2).iter_vrt():
            N += vrt.nof_unit_vrt()
        self.assertEqual(N, 14)

        comm = s1.comm(s2)
        self.assertEqual(len(list(comm.iter_vrt())), 4)
        v1, v2 = sorted(comm.iter_vrt(), key=lambda v: v.key)[:2]
        self.assertEqual(v1.attrib['info']['NSV'], 1)
        self.assertEqual(v1.attrib['samples']['SAMP1']['GT'], (0, 1))

        self.assertEqual(s2.comm(s1).nof_unit_vrt(), comm.nof_unit_vrt())
Пример #13
0
    def test_to_vcf_no_ref(self):
        vs1 = VariantSet.from_variants(
            [variant.Del("chr24", 23, 24),
             variant.SNP("chr24", 1206, "C")])

        buf = io.StringIO()
        vs1.to_vcf(buf, reference=self.chr24)
        buf.seek(0)

        vs2 = VariantSet.from_vcf(buf)
        self.assertEqual(vs1.comm(vs2).nof_unit_vrt(), 2)
Пример #14
0
    def test_many_chroms_shuffled(self):
        vs1 = VariantSet.from_vcf(
            pkg_file('genomvar.test', 'data/example3.vcf'))
        vrt = list(vs1.iter_vrt())
        vrt2 = [vrt[i] for i in (0, 7, 1, 6, 2, 5, 3, 4)]
        vs2 = VariantSet.from_variants(vrt2)

        self.assertEqual(vs1.comm(vs2).nof_unit_vrt(), vs1.nof_unit_vrt())
        self.assertEqual(vs1.diff(vs2).nof_unit_vrt(), 0)

        self.assertEqual(vs2.comm(vs1).nof_unit_vrt(), vs2.nof_unit_vrt())
        self.assertEqual(vs2.diff(vs1).nof_unit_vrt(), 0)
Пример #15
0
    def test_find_vrt(self):
        ivfs = VariantSetFromFile(pkg_file('genomvar.test',
                                           'data/example2.vcf.gz'),
                                  index=True)
        vs = VariantSet.from_vcf(
            pkg_file('genomvar.test', 'data/example2.vcf.gz'))

        self.assertEqual(
            sum([v.nof_unit_vrt() for v in ivfs.find_vrt('chr24')]),
            sum([v.nof_unit_vrt() for v in vs.find_vrt('chr24')]))

        self.assertEqual(sum([v.nof_unit_vrt() for v in ivfs.iter_vrt()]),
                         sum([v.nof_unit_vrt() for v in vs.iter_vrt()]))
Пример #16
0
 def test_match(self):
     # REF      TGG   TT
     #          2093  2099
     # vs1      CCC   GG
     # vs2            CG
     #          r1   r2,r3
     vs1 = VariantSet.from_vcf(
         pkg_file('genomvar.test', 'data/example1.vcf'))
     vrt_CG = factory.from_edit('chr24', 2098, 'TT', 'CG')
     vrt_CC = factory.from_edit('chr24', 2098, 'TT', 'CC')
     self.assertEqual(len(vs1.match(vrt_CG)), 1)
     self.assertEqual(len(vs1.match(vrt_CG, match_partial=False)), 0)
     self.assertEqual(len(vs1.match(vrt_CC)), 0)
     self.assertEqual(len(vs1.match(vrt_CC, match_partial=False)), 0)
Пример #17
0
    def test_from_vcf(self):
        vset = VariantSet.from_vcf(pkg_file('genomvar.test',
                                            'data/example1.vcf'),
                                   reference=self.chr24,
                                   normindel=True)

        vrt = list(vset.find_vrt('chr24', 1200, 1210))
        self.assertEqual(len(vrt), 2)

        # Test presence of null operation
        vrt = list(vset.find_vrt('chr24', 20, 25))
        self.assertEqual(len(vrt), 2)
        for _v in vrt:
            if not _v.is_variant_instance(variant.Null):
                self.assertEqual(_v.attrib['id'], '1')
Пример #18
0
 def test_drop_duplicates(self):
     buf = io.StringIO()
     with open(pkg_file('genomvar.test', 'data/example1.vcf')) as fh:
         for line in fh:
             if line.startswith('#'):
                 buf.write(line)
             else:
                 for i in range(2):
                     buf.write(line)
     buf.seek(0)
     vs = VariantSet.from_vcf(buf, parse_info=True, parse_samples=True)
     vs2, dropped = vs.drop_duplicates(return_dropped=True)
     self.assertEqual(vs.nof_unit_vrt() / 2, vs2.nof_unit_vrt())
     self.assertEqual(vs2.nof_unit_vrt(),
                      sum([v.nof_unit_vrt() for v in dropped]))
Пример #19
0
    def test_from_to_vcf(self):
        fl = pkg_file('genomvar.test', 'data/example1.vcf')
        variants1 = sorted(VCFReader(fl).iter_vrt(parse_info=True),
                           key=lambda v: v.key)
        vs = VariantSet.from_vcf(fl, parse_info=True)
        tf = tempfile.NamedTemporaryFile(suffix='.vcf')
        with open(tf.name, 'wt') as fh:
            vs.to_vcf(fh)

        with open(tf.name, 'rt') as fh:
            fh.seek(0)
            self.assertIn(
                '##INFO=<ID=DP4,Number=4,Type=Integer,Description="Test for multinumber field">',
                fh.read().splitlines())
        variants2 = sorted(VCFReader(tf.name).iter_vrt(parse_info=True),
                           key=lambda v: v.key)
        self.assertEqual(len(variants1), len(variants2))
        cnt = 0
        for v1, v2 in zip(variants1, variants2):
            self.assertTrue(v1.edit_equal(v2))
            self.assertEqual(v1.attrib['info']['NSV'],
                             v2.attrib['info']['NSV'])
Пример #20
0
    def test_from_vcf(self):
        vset = VariantSet.from_vcf(
            pkg_file('genomvar.test', 'data/example1.vcf'))

        self.assertEqual(vset.nof_unit_vrt(), 18)
        self.assertEqual(list(vset.chroms), ['chr24'])
        vrt = list(vset.find_vrt('chr24', 1200, 1210))
        self.assertEqual(len(vrt), 2)
        v1, v2 = vrt
        self.assertTrue(v1.is_variant_instance(variant.SNP))
        self.assertEqual(v1.attrib['vcf_notation']['ref'], 'G')
        self.assertEqual(v1.attrib['vcf_notation']['start'], 1206)
        self.assertEqual(v1.attrib['vcf_notation']['row'], 4)
        self.assertEqual(v1.attrib['id'], '5')
        self.assertEqual(v1.attrib['filter'], 'PASS')
        self.assertEqual(v1.attrib['qual'], 100)
        vrt = list(vset.find_vrt('chr24', 154, 156))
        self.assertEqual(len(vrt), 1)
        v1 = vrt[0]
        self.assertTrue(v1.is_variant_instance(variant.Ins))
        vrt = list(vset.find_vrt('chr24', 20, 25))
        self.assertEqual(len(vrt), 2)
        self.assertEqual(len(list(vset.iter_vrt())), 16)
Пример #21
0
 def test_find_vrt_chrom_only(self):
     s1 = VariantSet.from_vcf(pkg_file('genomvar.test',
                                       'data/example2.vcf.gz'),
                              parse_info=True,
                              parse_samples=True)
     self.assertEqual(len(list(s1.find_vrt('chr24'))), 4)
Пример #22
0
 def test_VariantSet_no_common(self):
     vcf1 = pkg_file('genomvar.test', 'data/example1.vcf')
     vcf2 = pkg_file('genomvar.test', 'data/example_gnomad_1.vcf.gz')
     s1 = VariantSet.from_vcf(vcf1)
     s2 = VariantSet.from_vcf(vcf2)
     self.assertEqual(s1.comm(s2).nof_unit_vrt(), 0)
Пример #23
0
 def test_from_bcf(self):
     vset = VariantSet.from_vcf(
         pkg_file('genomvar.test', 'data/example3.bcf'))
     self.assertEqual(len(list(vset.iter_vrt())), 8)
Пример #24
0
 def test_random_sample(self):
     vs = VariantSet.from_vcf(pkg_file('genomvar.test',
                                       'data/example1.vcf'))
     sample = vs.sample(5)
     self.assertEqual(len(sample), 5)