Exemplo n.º 1
0
 def test_exclude_duplicates(self):
     """ test that exclude duplicates works correctly
     """
     
     # create a variant that is within two genes
     snv1 = create_variant("F", "missense_variant|missense_variant", "TEST1|TEST2")
     
     # two variants that lie in different genes on different chromosomes
     # should not be merged
     snv2 = create_variant("F", "missense_variant", "OTHER1", chrom="2")
     variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]),
         ((snv2, ["single_variant"], ["Monoallelic"], ["OTHER1"]))]
     self.assertEqual(sorted(self.finder.exclude_duplicates(variants)), sorted(variants))
     
     # create a list of variant tuples that passed filtering for two
     # different gene symbols
     variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]),
         ((snv1, ["compound_het"], ["Biallelic"], ["TEST1"])),
         ((snv1, ["compound_het"], ["Biallelic"], ["TEST1"]))]
     self.assertEqual(self.finder.exclude_duplicates(variants),
         [(snv1, ["single_variant", "compound_het"], ["Monoallelic", "Biallelic"], ["TEST1"])])
     
     # create a list of variant tuples that passed filtering for two
     # different gene symbols
     variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]),
         ((snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]))]
     
     # the same variant passing for two gene symbols should be collapsed
     # into a single entry, where the entry contains a list ofall the gene
     # symbols
     self.assertEqual(self.finder.exclude_duplicates(variants),
         [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1", "TEST2"])])
Exemplo n.º 2
0
 def test_create_gene_dict(self):
     """ test that create_gene_dict works correctly
     """
     
     # create variants that share genes, or not
     snv1 = create_variant("F", "missense_variant|missense_variant", "TEST1|TEST2")
     snv2 = create_variant("F", "missense_variant", "TEST1")
     snv3 = create_variant("F", "missense_variant", "OTHER1")
     
     # the variants that share a gene should be grouped in lists indexed by
     # the gene key
     self.assertEqual(self.finder.create_gene_dict([snv1, snv2, snv3]),
         {"TEST1": [snv1, snv2], "TEST2": [snv1], "OTHER1": [snv3]})
 def test_find_variants(self):
     """ test that find_variants() works correctly
     """
     
     # define the trio, so that we can know whether the parents are affected.
     # The child also needs to be included and set, so that we can get the
     # child ID for logging purposes.
     family = Family("famID")
     family.add_child("child_id", 'dad_id', 'mom_id', 'f', '2', "/vcf/path")
     family.add_father("dad_id", '0', '0', 'm', '1', "/vcf/path")
     family.add_mother("mom_id", '0', '0', 'f', '1', "/vcf/path")
     family.set_child()
     
     # create variants that cover various scenarios
     snv1 = create_variant("F", "missense_variant|missense_variant", "TEST1|TEST2")
     snv2 = create_variant("F", "missense_variant|synonymous_variant", "OTHER1|OTHER2")
     snv3 = create_variant("F", "missense_variant", "")
     snv4 = create_variant("F", "missense_variant", "TESTX", chrom="X")
     
     self.finder.known_genes = {"TEST1": {"inh": ["Monoallelic"]},
         "OTHER1": {"inh": ["Monoallelic"]},
         "OTHER2": {"inh": ["Monoallelic"]},
         "TESTX": {"inh": ["X-linked dominant"]}}
     
     # check the simplest case, a variant in a known gene
     self.assertEqual(self.finder.find_variants([snv1], "TEST1", family),
         [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"])])
     
     # check that a gene not in a known gene does not pass
     self.assertEqual(self.finder.find_variants([snv1], "TEST2", family), [])
     
     # check a variant where the gene is known, but the consequence for that
     # gene is not functional, does not pass
     self.assertEqual(self.finder.find_variants([snv2], "OTHER2", family), [])
     
     # check that intergenic variants (which lack HGNC symbols) do not pass
     self.assertEqual(self.finder.find_variants([snv3], None, family), [])
     
     # check that a variant on chrX passes through the allosomal instance
     self.assertEqual(self.finder.find_variants([snv4], "TESTX", family),
         [(snv4, ["single_variant"], ["X-linked dominant"], ["TESTX"])])
     
     # remove the known genes, so that the variants in unknown genes pass
     self.finder.known_genes = None
     self.assertEqual(sorted(self.finder.find_variants([snv1], "TEST2", family)),
         [(snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]),
         (snv1, ["single_variant"], ["Mosaic"], ["TEST2"])])
     
     # but variants without gene symbols still are excluded
     self.assertEqual(self.finder.find_variants([snv3], None, family), [])
    def test_create_gene_dict(self):
        """ test that create_gene_dict works correctly
        """

        # create variants that share genes, or not
        snv1 = create_variant("F", "missense_variant|missense_variant",
                              "TEST1|TEST2")
        snv2 = create_variant("F", "missense_variant", "TEST1")
        snv3 = create_variant("F", "missense_variant", "OTHER1")

        # the variants that share a gene should be grouped in lists indexed by
        # the gene key
        self.assertEqual(self.finder.create_gene_dict([snv1, snv2, snv3]), {
            "TEST1": [snv1, snv2],
            "TEST2": [snv1],
            "OTHER1": [snv3]
        })
    def test_exclude_duplicates(self):
        """ test that exclude duplicates works correctly
        """

        # create a variant that is within two genes
        snv1 = create_variant("F", "missense_variant|missense_variant",
                              "TEST1|TEST2")

        # two variants that lie in different genes on different chromosomes
        # should not be merged
        snv2 = create_variant("F", "missense_variant", "OTHER1", chrom="2")
        variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]),
                    ((snv2, ["single_variant"], ["Monoallelic"], ["OTHER1"]))]
        self.assertEqual(sorted(self.finder.exclude_duplicates(variants)),
                         sorted(variants))

        # create a list of variant tuples that passed filtering for two
        # different gene symbols
        variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]),
                    ((snv1, ["compound_het"], ["Biallelic"], ["TEST1"])),
                    ((snv1, ["compound_het"], ["Biallelic"], ["TEST1"]))]
        self.assertEqual(self.finder.exclude_duplicates(variants),
                         [(snv1, ["compound_het", "single_variant"
                                  ], ["Biallelic", "Monoallelic"], ["TEST1"])])

        # create a list of variant tuples that passed filtering for two
        # different gene symbols
        variants = [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"]),
                    ((snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]))]

        # the same variant passing for two gene symbols should be collapsed
        # into a single entry, where the entry contains a list ofall the gene
        # symbols
        self.assertEqual(
            self.finder.exclude_duplicates(variants),
            [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1", "TEST2"])])
    def test_find_variants(self):
        """ test that find_variants() works correctly
        """

        # define the trio, so that we can know whether the parents are affected.
        # The child also needs to be included and set, so that we can get the
        # child ID for logging purposes.
        family = Family("famID")
        family.add_child("child_id", 'dad_id', 'mom_id', 'f', '2', "/vcf/path")
        family.add_father("dad_id", '0', '0', 'm', '1', "/vcf/path")
        family.add_mother("mom_id", '0', '0', 'f', '1', "/vcf/path")
        family.set_child()

        # create variants that cover various scenarios
        snv1 = create_variant("F", "missense_variant|missense_variant",
                              "TEST1|TEST2")
        snv2 = create_variant("F", "missense_variant|synonymous_variant",
                              "OTHER1|OTHER2")
        snv3 = create_variant("F", "missense_variant", "")
        snv4 = create_variant("F", "missense_variant", "TESTX", chrom="X")

        self.finder.known_genes = {
            "TEST1": {
                "inh": ["Monoallelic"]
            },
            "OTHER1": {
                "inh": ["Monoallelic"]
            },
            "OTHER2": {
                "inh": ["Monoallelic"]
            },
            "TESTX": {
                "inh": ["X-linked dominant"]
            }
        }

        # check the simplest case, a variant in a known gene
        self.assertEqual(
            self.finder.find_variants([snv1], "TEST1", family),
            [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"])])

        # check that a gene not in a known gene does not pass
        self.assertEqual(self.finder.find_variants([snv1], "TEST2", family),
                         [])

        # check a variant where the gene is known, but the consequence for that
        # gene is not functional, does not pass
        self.assertEqual(self.finder.find_variants([snv2], "OTHER2", family),
                         [])

        # check that intergenic variants (which lack HGNC symbols) do not pass
        self.assertEqual(self.finder.find_variants([snv3], None, family), [])

        # check that a variant on chrX passes through the allosomal instance
        self.assertEqual(
            self.finder.find_variants([snv4], "TESTX", family),
            [(snv4, ["single_variant"], ["X-linked dominant"], ["TESTX"])])

        # remove the known genes, so that the variants in unknown genes pass
        self.finder.known_genes = None
        self.assertEqual(
            sorted(self.finder.find_variants([snv1], "TEST2", family)),
            [(snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]),
             (snv1, ["single_variant"], ["Mosaic"], ["TEST2"])])

        # but variants without gene symbols still are excluded
        self.assertEqual(self.finder.find_variants([snv3], None, family), [])