def test_exclude_duplicates(self): """ test that exclude duplicates works correctly """ # create a variant that is within two genes snv1 = create_variant("F", "missense_variant|missense_variant", "TEST1|TEST2") # two variants that lie in different genes on different chromosomes # should not be merged snv2 = create_variant("F", "missense_variant", "OTHER1", chrom="2") variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]), ((snv2, ["single_variant"], ["Monoallelic"], ["OTHER1"]))] self.assertEqual(sorted(self.finder.exclude_duplicates(variants)), sorted(variants)) # create a list of variant tuples that passed filtering for two # different gene symbols variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]), ((snv1, ["compound_het"], ["Biallelic"], ["TEST1"])), ((snv1, ["compound_het"], ["Biallelic"], ["TEST1"]))] self.assertEqual(self.finder.exclude_duplicates(variants), [(snv1, ["single_variant", "compound_het"], ["Monoallelic", "Biallelic"], ["TEST1"])]) # create a list of variant tuples that passed filtering for two # different gene symbols variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]), ((snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]))] # the same variant passing for two gene symbols should be collapsed # into a single entry, where the entry contains a list ofall the gene # symbols self.assertEqual(self.finder.exclude_duplicates(variants), [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1", "TEST2"])])
def test_create_gene_dict(self): """ test that create_gene_dict works correctly """ # create variants that share genes, or not snv1 = create_variant("F", "missense_variant|missense_variant", "TEST1|TEST2") snv2 = create_variant("F", "missense_variant", "TEST1") snv3 = create_variant("F", "missense_variant", "OTHER1") # the variants that share a gene should be grouped in lists indexed by # the gene key self.assertEqual(self.finder.create_gene_dict([snv1, snv2, snv3]), {"TEST1": [snv1, snv2], "TEST2": [snv1], "OTHER1": [snv3]})
def test_find_variants(self): """ test that find_variants() works correctly """ # define the trio, so that we can know whether the parents are affected. # The child also needs to be included and set, so that we can get the # child ID for logging purposes. family = Family("famID") family.add_child("child_id", 'dad_id', 'mom_id', 'f', '2', "/vcf/path") family.add_father("dad_id", '0', '0', 'm', '1', "/vcf/path") family.add_mother("mom_id", '0', '0', 'f', '1', "/vcf/path") family.set_child() # create variants that cover various scenarios snv1 = create_variant("F", "missense_variant|missense_variant", "TEST1|TEST2") snv2 = create_variant("F", "missense_variant|synonymous_variant", "OTHER1|OTHER2") snv3 = create_variant("F", "missense_variant", "") snv4 = create_variant("F", "missense_variant", "TESTX", chrom="X") self.finder.known_genes = {"TEST1": {"inh": ["Monoallelic"]}, "OTHER1": {"inh": ["Monoallelic"]}, "OTHER2": {"inh": ["Monoallelic"]}, "TESTX": {"inh": ["X-linked dominant"]}} # check the simplest case, a variant in a known gene self.assertEqual(self.finder.find_variants([snv1], "TEST1", family), [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"])]) # check that a gene not in a known gene does not pass self.assertEqual(self.finder.find_variants([snv1], "TEST2", family), []) # check a variant where the gene is known, but the consequence for that # gene is not functional, does not pass self.assertEqual(self.finder.find_variants([snv2], "OTHER2", family), []) # check that intergenic variants (which lack HGNC symbols) do not pass self.assertEqual(self.finder.find_variants([snv3], None, family), []) # check that a variant on chrX passes through the allosomal instance self.assertEqual(self.finder.find_variants([snv4], "TESTX", family), [(snv4, ["single_variant"], ["X-linked dominant"], ["TESTX"])]) # remove the known genes, so that the variants in unknown genes pass self.finder.known_genes = None self.assertEqual(sorted(self.finder.find_variants([snv1], "TEST2", family)), [(snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]), (snv1, ["single_variant"], ["Mosaic"], ["TEST2"])]) # but variants without gene symbols still are excluded self.assertEqual(self.finder.find_variants([snv3], None, family), [])
def test_create_gene_dict(self): """ test that create_gene_dict works correctly """ # create variants that share genes, or not snv1 = create_variant("F", "missense_variant|missense_variant", "TEST1|TEST2") snv2 = create_variant("F", "missense_variant", "TEST1") snv3 = create_variant("F", "missense_variant", "OTHER1") # the variants that share a gene should be grouped in lists indexed by # the gene key self.assertEqual(self.finder.create_gene_dict([snv1, snv2, snv3]), { "TEST1": [snv1, snv2], "TEST2": [snv1], "OTHER1": [snv3] })
def test_exclude_duplicates(self): """ test that exclude duplicates works correctly """ # create a variant that is within two genes snv1 = create_variant("F", "missense_variant|missense_variant", "TEST1|TEST2") # two variants that lie in different genes on different chromosomes # should not be merged snv2 = create_variant("F", "missense_variant", "OTHER1", chrom="2") variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]), ((snv2, ["single_variant"], ["Monoallelic"], ["OTHER1"]))] self.assertEqual(sorted(self.finder.exclude_duplicates(variants)), sorted(variants)) # create a list of variant tuples that passed filtering for two # different gene symbols variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]), ((snv1, ["compound_het"], ["Biallelic"], ["TEST1"])), ((snv1, ["compound_het"], ["Biallelic"], ["TEST1"]))] self.assertEqual(self.finder.exclude_duplicates(variants), [(snv1, ["compound_het", "single_variant" ], ["Biallelic", "Monoallelic"], ["TEST1"])]) # create a list of variant tuples that passed filtering for two # different gene symbols variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]), ((snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]))] # the same variant passing for two gene symbols should be collapsed # into a single entry, where the entry contains a list ofall the gene # symbols self.assertEqual( self.finder.exclude_duplicates(variants), [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1", "TEST2"])])
def test_find_variants(self): """ test that find_variants() works correctly """ # define the trio, so that we can know whether the parents are affected. # The child also needs to be included and set, so that we can get the # child ID for logging purposes. family = Family("famID") family.add_child("child_id", 'dad_id', 'mom_id', 'f', '2', "/vcf/path") family.add_father("dad_id", '0', '0', 'm', '1', "/vcf/path") family.add_mother("mom_id", '0', '0', 'f', '1', "/vcf/path") family.set_child() # create variants that cover various scenarios snv1 = create_variant("F", "missense_variant|missense_variant", "TEST1|TEST2") snv2 = create_variant("F", "missense_variant|synonymous_variant", "OTHER1|OTHER2") snv3 = create_variant("F", "missense_variant", "") snv4 = create_variant("F", "missense_variant", "TESTX", chrom="X") self.finder.known_genes = { "TEST1": { "inh": ["Monoallelic"] }, "OTHER1": { "inh": ["Monoallelic"] }, "OTHER2": { "inh": ["Monoallelic"] }, "TESTX": { "inh": ["X-linked dominant"] } } # check the simplest case, a variant in a known gene self.assertEqual( self.finder.find_variants([snv1], "TEST1", family), [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"])]) # check that a gene not in a known gene does not pass self.assertEqual(self.finder.find_variants([snv1], "TEST2", family), []) # check a variant where the gene is known, but the consequence for that # gene is not functional, does not pass self.assertEqual(self.finder.find_variants([snv2], "OTHER2", family), []) # check that intergenic variants (which lack HGNC symbols) do not pass self.assertEqual(self.finder.find_variants([snv3], None, family), []) # check that a variant on chrX passes through the allosomal instance self.assertEqual( self.finder.find_variants([snv4], "TESTX", family), [(snv4, ["single_variant"], ["X-linked dominant"], ["TESTX"])]) # remove the known genes, so that the variants in unknown genes pass self.finder.known_genes = None self.assertEqual( sorted(self.finder.find_variants([snv1], "TEST2", family)), [(snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]), (snv1, ["single_variant"], ["Mosaic"], ["TEST2"])]) # but variants without gene symbols still are excluded self.assertEqual(self.finder.find_variants([snv3], None, family), [])