def test_to_vcf_row_instantiated_variant_numeric_chrom(self): factory = variant.VariantFactory() v1 = factory.from_edit(1,2093,'TGG','CCC') row = str(self.writer.get_row(v1)) row2 = str(v1) self.assertIn('TGG', row) self.assertIn('TGG', row2)
def test_get_vcf_row_instantiated_variant(self): factory = variant.VariantFactory() v1 = factory.from_edit('chr24',2093,'TGG','CCC') row = self.writer.get_row(v1) self.assertEqual(row.REF, 'TGG') self.assertEqual(row.POS, 2094) self.assertEqual(str(row), 'chr24\t2094\t.\tTGG\tCCC\t.\t.\t.') gv = GenomVariant(v1, attrib={'id':'vrtid', 'filter':'LOWQUAL', 'qual':100}) row = self.writer.get_row(gv) self.assertEqual( str(row),'chr24\t2094\tvrtid\tTGG\tCCC\t100\tLOWQUAL\t.') vrt = factory.from_edit('chr20', 1253922,'TGT','G') row = self.writer.get_row(vrt) self.assertEqual(str(row), 'chr20\t1253923\t.\tTGT\tG\t.\t.\t.') # vf = VariantFactory(reference=ref, # normindel=True) vrt = factory.from_edit('chr1',13957,'TCCCCCA','TCCCCA') with self.assertRaises(ValueError) as cm: row = self.writer.get_row(vrt) self.assertIn('Reference is required',cm.exception.args[0])
def test_haplotype_edit_equality(self): factory = variant.VariantFactory() v1 = factory.from_edit('chr24',2093,'TGG','CCC') v2 = factory.from_edit('chr24',2098,'TT','GG') v3 = factory.from_edit('chr24',2098,'TT','CC') h1 = variant.Haplotype.from_variants([v1,v2]) h1_ = variant.Haplotype.from_variants([v1,v2]) h2 = variant.Haplotype.from_variants([v1,v3]) self.assertTrue(h1.edit_equal(h1_)) self.assertFalse(h1.edit_equal(h2))
def test_ambig_difference_snp_in_locus(self): # 10043 # Ref TC-ACA--G # v1 s1 CA # v1 s2 G # v2 s2 T fac = variant.VariantFactory(reference=self.chr24, normindel=True) s1 = VariantSet.from_variants( [fac.from_edit('chr24', 10047, 'A', 'ACA')]) s2 = VariantSet.from_variants([ fac.from_edit('chr24', 10044, 'C', 'G'), fac.from_edit('chr24', 10044, 'C', 'CT') ]) self.assertEqual(len(list(s1.comm(s2, match_ambig=True).iter_vrt())), 0)
def test_from_variants_to_records(self): fac = variant.VariantFactory(reference=self.chr24, normindel=True) hap = Haplotype.from_variants([ fac.from_edit('chr24', 1207, 'G', 'C'), fac.from_edit('chr24', 1207, 'G', 'T') ]) vs = VariantSet.from_variants([ fac.from_edit('chr24', 10043, 'T', 'TCA'), fac.from_edit('chr24', 10045, 'ACA', 'A'), hap ]) recs = vs.to_records() self.assertEqual(recs.shape, (4, )) self.assertEqual(list(recs.dtype.fields), [ 'chrom', 'start', 'end', 'ref', 'alt', 'vartype', 'phase_group', 'attrib' ])
def test_strip_order_dependent_Ambig(self): # 10043 # R T--CA--CAG # v1 TCACA--CAG # v2 T--CACACAG factory = variant.VariantFactory(reference=pkg_file( 'genomvar.test', 'data/chr24.fna'), normindel=True) v1 = factory.from_edit('chr24', 10043, 'T', 'TCA') v2 = factory.from_edit('chr24', 10045, 'A', 'ACA') s1 = VariantSet.from_variants([v1]) s2 = VariantSet.from_variants([v2]) diff = s1.diff(s2, match_ambig=True) self.assertEqual(len(list(diff.iter_vrt())), 0) diff = s1.diff(s2, match_ambig=False) self.assertEqual(len(list(diff.iter_vrt())), 1)
def test_ambig_difference_different_ambig(self): # 10043 # Ref T--CA--CA--G # v1 s1 T--CA--CACAG ins CA right # v2 s1 T------CA--G del CA left # v1 s2 TCACA--CA--G ins CA left # v2 s2 T--CA------G del CA right fac = variant.VariantFactory(reference=self.chr24, normindel=True) s1 = VariantSet.from_variants([ fac.from_edit('chr24', 10047, 'A', 'ACA'), fac.from_edit('chr24', 10043, 'TCA', 'T') ]) s2 = VariantSet.from_variants([ fac.from_edit('chr24', 10043, 'T', 'TCA'), fac.from_edit('chr24', 10045, 'ACA', 'A') ]) self.assertEqual(len(list(s1.diff(s2, match_ambig=True).iter_vrt())), 0)
def test_no_ovlp(self): # REF TGG TT # 2093 2099 # CCC GG # CG factory = variant.VariantFactory() variants = [ factory.from_edit('1', 2093, 'TGG', 'CCC'), factory.from_edit('1', 2098, 'TT', 'GG'), factory.from_edit('1', 2098, 'TT', 'CG'), factory.from_edit('2', 3200, 'G', 'GG') ] for ind, chunk in enumerate(no_ovlp(variants)): if ind == 0: self.assertEqual(len(chunk), 1) elif ind == 1: self.assertEqual(len(chunk), 2) if ind == 2: self.assertEqual(len(chunk), 1)
def setUp(self): self.chr24 = Reference( pkg_file(__name__,'data/chr24.fna')) self.nvf = variant.VariantFactory(self.chr24,normindel=True) # Factory not normalizing indels self.svf = variant.VariantFactory()
def test_min_mnps(self): factory = variant.VariantFactory() v1 = factory.from_edit('1', 1, 'AAA', 'GGG') v2 = factory.from_edit('1', 1, 'AAA', 'GGC') v3 = factory.from_edit('1', 1, 'AAA', 'CGG') self.assertEqual(nof_snp_vrt([v1, v2, v3]), 5)
from genomvar import OverlappingHaplotypeVars,\ UnsortedVariantFileError,VCFSampleMismatch,NoIndexFoundError from genomvar.vcf import VCFRow, VCFReader, RESERVED_FORMAT from genomvar import variant from genomvar.test import MyTestCase # Representation of an example used here a lot (example1.vcf) # # REF AG T| T| C G TGG TT G| T T CACAGTTCCAC # 22 154 165 453 1206 2093 2099 3200 4754 6145 10044 # varset1 A TT TG CT C CCC GG GG TCG C T---------- # AGG TT TG T CCC GG GG T G G---------- # AG -------- ------ phased # FILT h h;n n n factory = variant.VariantFactory() class TestVariantSetCase(MyTestCase): def test_empty_vcf(self): buf = io.StringIO() with open(pkg_file('genomvar.test', 'data/example1.vcf')) as fh: for line in itertools.takewhile(lambda l: l.startswith('#'), fh): buf.write(line) buf.seek(0) vs = VariantSet.from_vcf(buf) self.assertEqual(vs.nof_unit_vrt(), 0) def test_random_sample(self): vs = VariantSet.from_vcf(pkg_file('genomvar.test', 'data/example1.vcf'))