Пример #1
0
 def test_make_variant_panel8(self):
     ag = AlleleGenerator("src/mykrobe/data/NC_000962.3.fasta")
     gene = self.gm.get_gene("eis")
     variants = list(
         self.gm.get_variant_names("eis", "TG-1T",
                                   protein_coding_var=False))
     assert len(variants) == 1
     var = variants[0]
     ref, start, alt = split_var_name(var)
     assert ref == 'CA'
     assert start == 2715332
     assert alt == 'A'
     v = Variant.create(variant_sets=self.variant_sets,
                        reference=self.reference_id,
                        reference_bases=ref,
                        start=start,
                        alternate_bases=[alt])
     panel = ag.create(v)
     assert len(panel.alts) == 1
     alt = panel.alts[0]
     # the panel ref/alt seqs go past the end of the gene,
     # so can't comparie against gene sequence. Need to get
     # subseq from the reference seq
     panel_ref_start = self.reference_seq.find(panel.refs[0])
     assert panel_ref_start < start < panel_ref_start + len(panel.refs[0])
     seq = str(self.reference_seq[panel_ref_start:panel_ref_start +
                                  len(panel.refs[0])])
     assert seq == panel.refs[0]
     print(alt, seq[:31] + seq[31:])
     assert alt == seq[:30] + seq[31:]
     DB.drop_database('mykrobe-test')
Пример #2
0
 def test_make_variant_panel6(self):
     ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta", kmer=31)
     gene = self.gm.get_gene("pncA")
     variants = list(
         self.gm.get_variant_names("pncA",
                                   "CAG28TAA",
                                   protein_coding_var=False))
     assert len(variants) == 1
     var = variants[0]
     ref, start, alt = split_var_name(var)
     assert ref == "CTG"
     assert start == 2289212
     assert alt == "TTA"
     v = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference_id,
         reference_bases=ref,
         start=start,
         alternate_bases=[alt],
     )
     panel = ag.create(v)
     assert len(panel.alts) == 1
     alt = panel.alts[0]
     # the panel ref/alt seqs go past the end of the gene,
     # so can't comparie against gene sequence. Need to get
     # subseq from the reference seq
     panel_ref_start = self.reference_seq.find(panel.refs[0])
     assert panel_ref_start < start < panel_ref_start + len(panel.refs[0])
     seq = str(self.reference_seq[panel_ref_start:panel_ref_start +
                                  len(panel.refs[0])])
     assert seq == panel.refs[0]
     assert alt == seq[:30] + "TTA" + seq[33:]
     DB.drop_database("mykrobe-test")
Пример #3
0
 def test_make_variant_panel5(self):
     ag = AlleleGenerator("src/mykrobe/data/NC_000962.3.fasta")
     gene = self.gm.get_gene("gyrA")
     for var in self.gm.get_variant_names("gyrA", "D94X"):
         ref, start, alt = split_var_name(var)
         v = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference_id,
                            reference_bases=ref,
                            start=start,
                            alternate_bases=[alt])
         panel = ag.create(v)
         for alt in panel.alts:
             seq = copy.copy(str(gene.seq))
             seq = seq.replace(panel.refs[0], alt)
             assert Seq(seq).translate()[93] != "D"
     DB.drop_database('mykrobe-test')
Пример #4
0
 def test_make_variant_panel4(self):
     ag = AlleleGenerator("src/mykrobe/data/NC_000962.3.fasta")
     gene = self.gm.get_gene("katG")
     for var in self.gm.get_variant_names("katG", "W90R"):
         ref, start, alt = split_var_name(var)
         v = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference_id,
                            reference_bases=ref,
                            start=start,
                            alternate_bases=[alt])
         panel = ag.create(v)
         for alt in panel.alts:
             seq = copy.copy(str(gene.seq.reverse_complement()))
             seq = seq.replace(panel.refs[0], alt)
             assert seq != str(gene.seq)
             assert Seq(seq).reverse_complement().translate()[89] == "R"
     DB.drop_database('mykrobe-test')
Пример #5
0
 def test_make_variant_panel7(self):
     # Test DNA change upstream of a gene on the reverse
     # strand. The variant G-10A is in "gene space", ie
     # 10 bases upstream of eis is the nucleotide G on the
     # reverse strand. That position is 2715342 in the genome,
     # and is C on the forwards strand.
     # Here's a diagram:
     #             | <- This C is at -10 in "gene space", so variant G-10A has ref=G
     #             |    ref coord is 2715342, and variant in "ref space" is C2715342T
     # CACAGAATCCGACTGTGGCATATGCCGC
     #   |
     #   | <- C = last nucleotide of gene, at 2715332
     ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta", kmer=31)
     gene = self.gm.get_gene("eis")
     variants = list(
         self.gm.get_variant_names("eis", "G-10A",
                                   protein_coding_var=False))
     assert len(variants) == 1
     var = variants[0]
     ref, start, alt = split_var_name(var)
     assert ref == "C"
     assert start == 2715342
     assert alt == "T"
     v = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference_id,
         reference_bases=ref,
         start=start,
         alternate_bases=[alt],
     )
     panel = ag.create(v)
     assert len(panel.alts) == 1
     alt = panel.alts[0]
     # the panel ref/alt seqs go past the end of the gene,
     # so can't comparie against gene sequence. Need to get
     # subseq from the reference seq
     panel_ref_start = self.reference_seq.find(panel.refs[0])
     assert panel_ref_start < start < panel_ref_start + len(panel.refs[0])
     seq = str(self.reference_seq[panel_ref_start:panel_ref_start +
                                  len(panel.refs[0])])
     assert seq == panel.refs[0]
     assert alt == seq[:30] + "T" + seq[31:]
     DB.drop_database("mykrobe-test")
Пример #6
0
 def test_make_variant_panel1(self):
     ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta")
     gene = self.gm.get_gene("rpoB")
     for var in self.gm.get_variant_names("rpoB", "D3A"):
         ref, start, alt = split_var_name(var)
         v = Variant.create(
             variant_sets=self.variant_sets,
             reference=self.reference_id,
             reference_bases=ref,
             start=start,
             alternate_bases=[alt])
         panel = ag.create(v)
         for alt in panel.alts:
             seq = copy.copy(str(gene.seq))
             assert Seq(seq).translate()[2] == "D"
             seq = seq.replace(panel.refs[0][25:], alt[24:])
             assert seq != str(gene.seq)
             assert Seq(seq).translate()[2] == "A"
     DB.drop_database('mykrobe-test')
Пример #7
0
 def test_make_variant_panel2(self):
     ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta", kmer=31)
     gene = self.gm.get_gene("katG")
     for var in self.gm.get_variant_names("katG", "E3A"):
         ref, start, alt = split_var_name(var)
         v = Variant.create(
             variant_sets=self.variant_sets,
             reference=self.reference_id,
             reference_bases=ref,
             start=start,
             alternate_bases=[alt],
         )
         panel = ag.create(v)
         for alt in panel.alts:
             seq = copy.copy(str(gene.seq.reverse_complement()))
             seq = seq.replace(panel.refs[0][:39],
                               alt[:39 + len(alt) - len(panel.refs[0])])
             assert seq != str(gene.seq)
             assert Seq(seq).reverse_complement().translate()[2] == "A"
     DB.drop_database("mykrobe-test")
Пример #8
0
class TestINDELAlleleGenerator():
    def setup(self):
        DB.drop_database('mykrobe-test')

        self.pg = AlleleGenerator(
            reference_filepath=f"{DATA_DIR}/BX571856.1.fasta")
        self.pg2 = AlleleGenerator(
            reference_filepath=f"{DATA_DIR}/NC_000962.3.fasta")
        self.reference_set = ReferenceSet().create_and_save(name="ref_set")
        self.variant_set = VariantSet.create_and_save(
            name="this_vcf_file", reference_set=self.reference_set)
        self.variant_sets = [self.variant_set]
        self.reference = Reference().create_and_save(
            name="ref", md5checksum="sre", reference_sets=[self.reference_set])

    def test_simple_deletion1(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="AA",
                           start=31,
                           alternate_bases=["A"])
        assert v.is_indel
        assert v.is_deletion
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert self.pg._calculate_length_delta_from_indels(v, []) == 1
        assert panel.alts == [
            "GATTAAAGATAGAAATACACGATGCGAGCATCAAATTTCATAACATCACCATGAGTTTG"
        ]

    def test_simple_deletion2(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="AT",
                           start=32,
                           alternate_bases=["A"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert panel.alts == [
            "ATTAAAGATAGAAATACACGATGCGAGCAACAAATTTCATAACATCACCATGAGTTTGAT"
        ]

    def test_simple_deletion3(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="AT",
                           start=2902618,
                           alternate_bases=["T"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" in panel.refs
        assert panel.alts == [
            "TTTATACTACTGCTCAATTTTTTTACTTTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]

    def test_simple_deletion4(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="ATC",
                           start=32,
                           alternate_bases=["A"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert panel.alts == [
            "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"
        ]

    def test_simple_insertion1(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="C",
                           start=1,
                           alternate_bases=["TTTC"])
        panel = self.pg.create(v)
        #        assert_no_overlapping_kmers(panel)### Skip this test for vars in first k bases of ref
        assert v.is_indel
        assert v.is_insertion
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert panel.alts == ["TTTCGATTAAAGATAGAAATACACGATGCGAGC"]

    def test_simple_insertion2(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="C",
                           start=1,
                           alternate_bases=["CTTT"])
        panel = self.pg.create(v)
        #        assert_no_overlapping_kmers(panel)### Skip this test for vars in first k bases of ref
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert panel.alts == ["CTTTGATTAAAGATAGAAATACACGATGCGAGCA"]

    def test_simple_insertion3(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["ATTT"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert panel.alts == [
            "GATTAAAGATAGAAATACACGATGCGAGCATTTATCAAATTTCATAACATCACCATGAGTTTG"
        ]

    def test_simple_insertion4(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=32,
                           alternate_bases=["AGGGG"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert panel.alts == [
            "ATTAAAGATAGAAATACACGATGCGAGCAAGGGGTCAAATTTCATAACATCACCATGAGTTTGA"
        ]

    def test_simple_insertion5(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=2902618,
                           alternate_bases=["ATGC"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" in panel.refs
        assert panel.alts == [
            "TATACTACTGCTCAATTTTTTTACTTTTATGCTNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]

    def test_double_insertion(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=4021408,
                           alternate_bases=["ACGCTGGCGGGCG"])
        v1 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="AGA",
                            start=4021406,
                            alternate_bases=["CGG"])
        context = [v1]
        assert self.pg2._remove_overlapping_contexts(v, [v1]) == []
        panel = self.pg2.create(v, context=context)
        assert_no_overlapping_kmers(panel)
        assert "ATCTAGCCGCAAGGGCGCGAGCAGACGCAGAATCGCATGATTTGAGCTCAAATCATGCGAT" in panel.refs
        assert panel.alts == [
            "TCTAGCCGCAAGGGCGCGAGCAGACGCAGACGCTGGCGGGCGATCGCATGATTTGAGCTCAAATCATGCGAT"
        ]

    def test_double_indel_fail(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="CCA",
                           start=2288851,
                           alternate_bases=["A"])
        v1 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=2288850,
                            alternate_bases=["ACC"])
        context = [v1]
        panel = self.pg2.create(v, context=context)
        assert "GGCGCACACAATGATCGGTGGCAATACCGACCACATCGACCTCATCGACGCCGCGTTGCCG" in panel.refs
        assert "GGCGCACACAATGATCGGTGGCAATACCGACCACATCGACCTCATCGACGCCGCGTTGCCG" not in panel.alts

    def test_large_insertion(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="CCGCCGGCCCCGCCGTTT",
            start=1636155,
            alternate_bases=[
                "CTGCCGGCCCCGCCGGCGCCGCCCAATCCACCGAAGCCCCTCCCTTCGGTGGGGTCGCTGCCGCCGTCGCCGCCGTCACCGCCCTTGCCGCCGGCCCCGCCGTCGCCGCCGGCTCCGGCGGTGCCGTCGCCGCCCTGGCCGCCGGCCCCGCCGTTTCCG"
            ])
        panel = self.pg2.create(v, context=[])
        assert_no_overlapping_kmers(panel)
        assert "AGACCTAGCAGGGTGCCGGCGCCGCCCTTGCCGCCGGCCCCGCCGTTTCCGCCGCCGCCAT" in panel.refs
        assert panel.alts == [
            "GACCTAGCAGGGTGCCGGCGCCGCCCTTGCTGCCGGCCCCGCCGGCGCCGCCCAATCCACCGAAGCCCCTCCCTTCGGTGGGGTCGCTGCCGCCGTCGCCGCCGTCACCGCCCTTGCCGCCGGCCCCGCCGTCGCCGCCGGCTCCGGCGGTGCCGTCGCCGCCCTGGCCGCCGGCCCCGCCGTTTCCGCCGCCGCCGCCATCGCCGATGATGTTTTCC"
        ]
Пример #9
0
class TestINDELandSNPSAlleleGenerator():

    def setup(self):
        DB.drop_database('mykrobe-test')
        self.pg = AlleleGenerator(
            reference_filepath="src/mykrobe/data/BX571856.1.fasta")
        self.pg2 = AlleleGenerator(
            reference_filepath="src/mykrobe/data/NC_000962.3.fasta")
        self.reference_set = ReferenceSet().create_and_save(name="ref_set")
        self.variant_set = VariantSet.create_and_save(
            name="this_vcf_file",
            reference_set=self.reference_set)
        self.variant_sets = [self.variant_set]
        self.reference = Reference().create_and_save(
            name="ref",
            md5checksum="sre",
            reference_sets=[
                self.reference_set])

    def teardown(self):
        DB.drop_database('mykrobe-test')



    def test_ins_with_SNP_context(self):
        
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["ATTT"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"])
        panel = self.pg.create(v, context=[v2])
        #assert_no_overlapping_kmers(panel)  ### This test seems to fail sometimes, and pass othertimes...
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert sorted(
            panel.alts) == sorted(
            [
                "GATTAAAGATAGAAATACACGATGCGAGCATTTATCAAATTTCATAACATCACCATGAGTTTG",
                "TTAAAGATAGAAATACACGATGCGAGCATTTTTCAAATTTCATAACATCACCATGAGTTTG"])

    def test_del_with_SNP_context1(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="AA",
            start=31,
            alternate_bases=["A"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=33,
            alternate_bases=["A"])
        panel = self.pg.create(v, context=[v2])
        assert_no_overlapping_kmers(panel)  
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert sorted(
            panel.alts) == sorted(
            [
                "ATTAAAGATAGAAATACACGATGCGAGCAACAAATTTCATAACATCACCATGAGTTTGA",
                "GATTAAAGATAGAAATACACGATGCGAGCATCAAATTTCATAACATCACCATGAGTTTG"])

    def test_del_with_SNP_context2(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="AA",
            start=31,
            alternate_bases=["A"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"])
        panel = self.pg.create(v, context=[v2])
        assert_no_overlapping_kmers(panel)  
        assert self.pg._remove_overlapping_contexts(v, [v2]) == []
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert sorted(
            panel.alts) == sorted(
            ["GATTAAAGATAGAAATACACGATGCGAGCATCAAATTTCATAACATCACCATGAGTTTG"])

    def test_del_with_ins_context1(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="AAT",
            start=31,
            alternate_bases=["A"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=4,
            alternate_bases=["TTTT"])
        panel = self.pg.create(v, context=[v2])
        assert_no_overlapping_kmers(panel)  
        assert self.pg._remove_overlapping_contexts(v, [v2]) == [v2]
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert sorted(
            panel.alts) == sorted(
            [
                "GATTAAAGATAGAAATACACGATGCGAGCACAAATTTCATAACATCACCATGAGTTTGAT",
                "TTTTAAAGATAGAAATACACGATGCGAGCACAAATTTCATAACATCACCATGAGTTTG"])

    def test_del_with_ins_context2(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="ATC",
            start=32,
            alternate_bases=["A"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=1,
            alternate_bases=["CTTT"])
        panel = self.pg.create(v, context=[v2])
        assert_no_overlapping_kmers(panel)  
        assert self.pg._remove_overlapping_contexts(v, [v2]) == [v2]
        assert self.pg._remove_contexts_not_within_k(v, [v2]) == []
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert sorted(
            panel.alts) == sorted(
            ["ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"])

    def test_del_with_ins_context3(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="ATC",
            start=32,
            alternate_bases=["A"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=5,
            alternate_bases=["TT"])
        panel = self.pg.create(v, context=[v2])
        assert_no_overlapping_kmers(panel)  
        assert self.pg._remove_overlapping_contexts(v, [v2]) == [v2]
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert sorted(
            panel.alts) == sorted(
            [
                "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT",
                "TTTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"])

    def test_del_with_ins_context4(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="ATC",
            start=32,
            alternate_bases=["A"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=5,
            alternate_bases=["TT"])
        v3 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=5,
            alternate_bases=["TG"])
        panel = self.pg.create(v, context=[v2, v3])
        assert_no_overlapping_kmers(panel)  
        assert self.pg._remove_overlapping_contexts(v, [v2, v3]) == [v2, v3]
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert sorted(
            panel.alts) == sorted(
            [
                "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT",
                "TTTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT",
                "TTGAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"])

    def test_del_with_ins_context5(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="ATC",
            start=32,
            alternate_bases=["A"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=5,
            alternate_bases=["TT"])
        v3 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=6,
            alternate_bases=["AG"])
        panel = self.pg.create(v, context=[v2, v3])
        assert_no_overlapping_kmers(panel)  
        assert self.pg._remove_overlapping_contexts(v, [v2, v3]) == [v2, v3]
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert sorted(
            panel.alts) == sorted(
            [
                "TTAGAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGA",
                "TTAGAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT",
                "TTTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT",
                "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"])

    def test_del_with_ins_context_where_base_is_deleted1(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="ATC",
            start=32,
            alternate_bases=["A"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=33,
            alternate_bases=["C"])
        panel = self.pg.create(v, context=[v2])
        assert_no_overlapping_kmers(panel)  
        assert self.pg._remove_overlapping_contexts(v, [v2]) == []
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert sorted(
            panel.alts) == sorted(
            ["ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"])

    def test_del_with_ins_context_where_base_is_deleted2(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="ATC",
            start=32,
            alternate_bases=["A"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="TAAA",
            start=5,
            alternate_bases=["T"])
        v3 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=7,
            alternate_bases=["AG"])
        panel = self.pg.create(v, context=[v2, v3])
        assert_no_overlapping_kmers(panel)  
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert sorted(
            panel.alts) == sorted(
            [
                "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT",
                "CGATTGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATC",
                "TTAAGAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"])

        panel = self.pg.create(v, context=[v3, v2])
        assert_no_overlapping_kmers(panel)  
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert sorted(
            panel.alts) == sorted(
            [
                "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT",
                "CGATTGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATC",
                "TTAAGAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"])

    def test_snp_with_replace_context(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="G",
            start=2338961,
            alternate_bases=["A"])
        v1 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="GGATG",
            start=2338990,
            alternate_bases=["CGATA"])
        panel = self.pg2.create(v, context=[v1])
        assert_no_overlapping_kmers(panel)  
        assert "CGACTAGCCACCATCGCGCATCAGTGCGAGGTCAAAAGCGACCAAAGCGAGCAAGTCGCGG" in panel.refs

        assert set(panel.alts) == \
            set(["CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCCG",
             "CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCGG"])

    def test_indel_snp_indel_context(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="TCGCGTGGC",
            start=4021459,
            alternate_bases=["GCGAGCAGA"])
        v1 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=4021455,
            alternate_bases=["ATCTAGCCGCAAG"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=4021489,
            alternate_bases=["G"])
        panel = self.pg2.create(v)  # , context = [v1, v2])
        assert_no_overlapping_kmers(panel)  
        assert "ATCATGCGATTCTGCGTCTGCTCGCGAGGCTCGCGTGGCCGCCGGCGCTGGCGGGCGATCT" in panel.refs

        panel = self.pg2.create(v, context=[v1, v2])
        assert_no_overlapping_kmers(panel)  
        assert sorted(
            panel.alts) == sorted(
            [
                "ATCATGCGATTCTGCGTCTGCTCGCGAGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCG",
                "ATCATGCGATTCTGCGTCTGCTCGCGAGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCT",
                "TGCGTCTGCTCGCGATCTAGCCGCAAGGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCG",
                "TGCGTCTGCTCGCGATCTAGCCGCAAGGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCT"])

    def test_complex_context(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="ATTT",
            start=1503643,
            alternate_bases=["A"])
        v1 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="CCT",
            start=1503615,
            alternate_bases=["C"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=1503655,
            alternate_bases=["ATGCCGCCGCC"])
        panel = self.pg2.create(v, context=[v1, v2])
        assert_no_overlapping_kmers(panel)                             
        assert "ATCCTGGAGCCCACCAGCGGAAACACCGGCATTTCGCTGGCGATGGCGGCCCGGTTGAAGG" in panel.refs
        assert set(panel.alts) == set([
            "CCATCGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGGCGGCCCGGTTGAAGGGGT",
            "TCCTGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGGCGGCCCGGTTGAAGGGG",            
            "ATCGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGCCGCCGCCTGGCGGCCCGG",
            "TCCTGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGCCGCCGCCTGGCGGCCCGG",
            ])
Пример #10
0
class TestSNPAlleleGenerator():
    def setup(self):
        DB.drop_database('mykrobe-test')
        self.pg = AlleleGenerator(
            reference_filepath="src/mykrobe/data/BX571856.1.fasta")
        self.reference_set = ReferenceSet().create_and_save(name="ref_set")
        self.variant_set = VariantSet.create_and_save(
            name="this_vcf_file", reference_set=self.reference_set)
        self.variant_sets = [self.variant_set]
        self.reference = Reference().create_and_save(
            name="ref", md5checksum="sre", reference_sets=[self.reference_set])

    def test_panel_generator(self):
        pg = AlleleGenerator(
            reference_filepath="src/mykrobe/data/BX571856.1.fasta")
        assert pg.ref is not None

    def test_simple_variant(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["T"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT"
        ]
        assert panel.alts == [
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT"
        ]
        assert self.pg._calculate_length_delta_from_indels(v, []) == 0
        assert v.is_indel is False

    def test_simple_variant2(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=32,
                           alternate_bases=["T"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGATC"
        ]
        assert panel.alts == [
            "GATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGATC"
        ]

    def test_simple_variant_invalid(self):
        with pytest.raises(ValueError) as cm:
            v = Variant.create(variant_sets=self.variant_sets,
                               reference=self.reference,
                               reference_bases="T",
                               start=31,
                               alternate_bases=["T"])
            panel = self.pg.create(v)

    def test_simple_variant_start(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="C",
                           start=1,
                           alternate_bases=["T"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT"
        ]
        assert panel.alts == [
            "TGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT"
        ]

    def test_simple_variant_end(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=2902618,
                           alternate_bases=["T"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTAT"
        ]
        assert panel.alts == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTTT"
        ]

        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="T",
                           start=2902616,
                           alternate_bases=["C"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTAT"
        ]
        assert panel.alts == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTCTAT"
        ]

    def test_simple_variant_with_nearby_snp(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["T"])
        v2 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=32,
                            alternate_bases=["T"])
        panel = self.pg.create(v, context=[v2])
        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT"
        ]
        assert panel.alts == [
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT"
        ]

    def test_simple_variant_with_multiple_nearby_snps(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["T"])
        v2 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=32,
                            alternate_bases=["T"])
        v3 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["G"])

        panel = self.pg.create(v, context=[v2, v3])
        assert panel.refs == [
            'CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT',
            'CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT',
            'CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT',
            'CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT'
        ]
        assert panel.alts == [
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT"
        ]

    def test_simple_variant_with_multiple_nearby_snps2(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["T"])
        v2 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=32,
                            alternate_bases=["T"])
        v3 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["G"])
        v4 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["T"])
        v5 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["A"])
        assert sorted(self.pg._split_context([v, v3, v4])) == sorted([[v, v4],
                                                                      [v, v3]])
        assert (self.pg._split_context([v3, v4])) == [[v4], [v3]]
        assert (self.pg._split_context([v, v3, v4, v5])) == [[v, v4, v5],
                                                             [v, v3, v5]]
        panel = self.pg.create(v, context=[v2, v3, v4, v5])
        assert sorted(panel.refs) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGAAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGAATTCAAATTTCATAACATCACCATGAGTTTGAT"
        ])
        assert sorted(panel.alts) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGATATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGATTTCAAATTTCATAACATCACCATGAGTTTGAT"
        ])

    def test_simple_variant_with_multiple_nearby_snps(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["T"])
        v2 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=32,
                            alternate_bases=["T"])
        v5 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=32,
                            alternate_bases=["G"])
        v3 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["G"])
        v4 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["T"])
        panel = self.pg.create(v, context=[v2, v3, v4, v5])
        assert sorted(panel.refs) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCAGTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGAGTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTAGTCAAATTTCATAACATCACCATGAGTTTGAT"
        ])
        assert sorted(panel.alts) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTGTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTGTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTTGTCAAATTTCATAACATCACCATGAGTTTGAT"
        ])
Пример #11
0
class TestSNPAlleleGenerator:
    def setup(self):
        DB.drop_database("mykrobe-test")
        self.pg = AlleleGenerator(
            reference_filepath=f"{DATA_DIR}/BX571856.1.fasta", kmer=31)
        self.reference_set = ReferenceSet().create_and_save(name="ref_set")
        self.variant_set = VariantSet.create_and_save(
            name="this_vcf_file", reference_set=self.reference_set)
        self.variant_sets = [self.variant_set]
        self.reference = Reference().create_and_save(
            name="ref", md5checksum="sre", reference_sets=[self.reference_set])

    def test_panel_generator(self):
        pg = AlleleGenerator(reference_filepath=f"{DATA_DIR}/BX571856.1.fasta",
                             kmer=31)
        assert pg.ref is not None

    def test_simple_snp_variant(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v)
        assert panel.refs[0][:31] != panel.alts[0][:31]
        assert panel.refs[0][-32:] != panel.alts[0][-32:]
        assert panel.refs[0][-31:] != panel.alts[0][-31:]

        assert_no_overlapping_kmers(panel)

        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG"
        ]
        assert panel.alts == [
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG"
        ]
        assert self.pg._calculate_length_delta_from_indels(v, []) == 0
        assert v.is_indel is False

    def test_simple_variant2(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)

        assert panel.refs == [
            "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA"
        ]
        assert panel.alts == [
            "GATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGA"
        ]

    def test_simple_variant_invalid(self):
        with pytest.raises(ValueError) as cm:
            v = Variant.create(
                variant_sets=self.variant_sets,
                reference=self.reference,
                reference_bases="T",
                start=31,
                alternate_bases=["T"],
            )
            panel = self.pg.create(v)

    def test_simple_variant_start(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=1,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v)
        #        assert_no_overlapping_kmers(panel) ## Will have overlapping kmers only if the SNP is in the i
        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG"
        ]
        assert panel.alts == [
            "TGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG"
        ]

    def test_simple_variant_end(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=2902618,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)

        assert panel.refs == [
            "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]
        assert panel.alts == [
            "TTTATACTACTGCTCAATTTTTTTACTTTTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]

        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=2902616,
            alternate_bases=["C"],
        )
        panel = self.pg.create(v)
        assert panel.refs == [
            "ATTTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]
        assert panel.alts == [
            "ATTTTATACTACTGCTCAATTTTTTTACTTCTATNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]

    def test_simple_variant_with_nearby_snp(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["T"],
        )
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v, context=[v2])
        assert_no_overlapping_kmers(panel)

        assert set(panel.refs) == set([
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG",
        ])
        assert set(panel.alts) == set([
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG",
        ])

    def test_simple_variant_with_multiple_nearby_snps(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["T"],
        )
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        v3 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["G"],
        )

        panel = self.pg.create(v, context=[v2, v3])
        assert_no_overlapping_kmers(panel)

        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT",
        ]
        assert panel.alts == [
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT",
        ]

    def test_simple_variant_with_multiple_nearby_snps2(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["T"],
        )
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        v3 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["G"],
        )
        v4 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["T"],
        )
        v5 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["A"],
        )

        assert sorted(self.pg._split_context([v, v3, v4])) == sorted([[v, v4],
                                                                      [v, v3]])
        assert (self.pg._split_context([v3, v4])) == [[v4], [v3]]
        assert (self.pg._split_context([v, v3, v4, v5])) == [[v, v4, v5],
                                                             [v, v3, v5]]
        panel = self.pg.create(v, context=[v2, v3, v4, v5])
        assert_no_overlapping_kmers(panel)
        assert sorted(panel.refs) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGAAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGAATTCAAATTTCATAACATCACCATGAGTTTG",
        ])
        assert sorted(panel.alts) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGATATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGATTTCAAATTTCATAACATCACCATGAGTTTG",
        ])

    def test_simple_variant_with_multiple_nearby_snps(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["T"],
        )
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        v5 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["G"],
        )
        v3 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["G"],
        )
        v4 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v, context=[v2, v3, v4, v5])
        assert_no_overlapping_kmers(panel)
        assert sorted(panel.refs) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCAGTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGAGTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTAGTCAAATTTCATAACATCACCATGAGTTTG",
        ])
        assert sorted(panel.alts) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCTGTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGTGTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTTGTCAAATTTCATAACATCACCATGAGTTTG",
        ])
Пример #12
0
class TestLargeINDELAlleleGenerator():
    def setup(self):
        DB.drop_database('mykrobe-test')
        self.pg = AlleleGenerator(
            reference_filepath=f"{DATA_DIR}/NC_000962.3.fasta")
        self.reference_set = ReferenceSet().create_and_save(name="ref_set")
        self.variant_set = VariantSet.create_and_save(
            name="this_vcf_file", reference_set=self.reference_set)
        self.variant_sets = [self.variant_set]
        self.reference = Reference().create_and_save(
            name="ref", md5checksum="sre", reference_sets=[self.reference_set])

    def test_large_variant1(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases=
            "AACGCCCGGTATCTGAGGATCTGTGTTCTCACCCAATACAAGTCGCATTCACT",
            start=1355983,
            alternate_bases=[
                "ACCGCCCGGTATCTGAGGATTGGTTTTCCACCCAAATACAAGTCGCATTCGCG"
            ])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "TCGTCAACGCCCGGTATCTGAGGATCTGTGTTCTCACCCAATACAAGTCGCATTCACTGGA" in panel.refs
        assert panel.alts == [
            "TCGTCACCGCCCGGTATCTGAGGATTGGTTTTCCACCCAAATACAAGTCGCATTCGCGGGA"
        ]

    def test_large_variant2(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases=
            "AACGCCCGGTATCTGAGGATCTGTGTTCTCACCCAATACAAGTCGCATTCAC",
            start=1355983,
            alternate_bases=[
                "ACCGCCCGGTATCTGAGGATTGGTTTTCCACCCAAATACAAGTCGCATTCGC"
            ])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "TCGTCAACGCCCGGTATCTGAGGATCTGTGTTCTCACCCAATACAAGTCGCATTCACTGGA" in panel.refs
        assert panel.alts == [
            "TCGTCACCGCCCGGTATCTGAGGATTGGTTTTCCACCCAAATACAAGTCGCATTCGCTGGA"
        ]

    # def test_large_variant3(self):
    #     v = Variant.create(
    #         variant_sets=self.variant_sets,
    #         reference=self.reference,
    #         reference_bases="TCGTCAACGCCCGGTATCTGAGGATCTGTGTTCTCACCCAATACAAGTCGCATTCACTGGA",
    #         start=1355978,
    #         alternate_bases=["TCGTCAACGCCCGGTATCTGAGGATCGGTGTTCTCACCCAATACAAGTCGCATTCACTGGA"])
    #     panel = self.pg.create(v)
    #     assert_no_overlapping_kmers(panel)
    #     assert "CAAACCTCGTCAACGCCCGGTATCTGAGGATCTGTGTTCTCACCCAATACAAGTCGCATTCACTGGACCGCCA" in panel.refs
    #     assert panel.alts == [
    #         "CAAACCTCGTCAACGCCCGGTATCTGAGGATCGGTGTTCTCACCCAATACAAGTCGCATTCACTGGACCGCCA"]

    # def test_very_large_variant3(self):
    #     v = Variant.create(
    #         variant_sets=self.variant_sets,
    #         reference=self.reference,
    #         reference_bases="TCGTCAACGCCCGGTATCTGAGGATCTGTGTTCTCACCCAATACAAGTCGCATTCACTGGACCGCCAT",
    #         start=1355978,
    #         alternate_bases=["TCGTCAACGCCCGGTATCTGAGGATCGGTGTTCACCCAATACAAGTCGCATTCACTGGACCGCCAT"])
    #     panel = self.pg.create(v)
    #     assert "AAACCTCGTCAACGCCCGGTATCTGAGGATCTGTGTTCTCACCCAATACAAGTCGCATTCACTGGACCGCCATATCTCG" in panel.refs
    #     assert panel.alts == [
    #         "CAAACCTCGTCAACGCCCGGTATCTGAGGATCGGTGTTCACCCAATACAAGTCGCATTCACTGGACCGCCATATCTCGC"]

    def test_large_insertion(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=2352065,
            alternate_bases=[
                "CCTCGCCTGGGCTGGCGAGCAGACGCAAAATCCCCCGCACGCCCGGCGTGTCGGGGGATTTTGCGTCTG"
            ])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "AGCTCGGCCAGCTCAGTCACGTCGCCGCCGCCTCGCCAGTTGACCGCGCCCGCTCGCGGCT" in panel.refs
        assert panel.alts == [
            "CCAGCTCAGTCACGTCGCCGCCGCCTCGCCTGGGCTGGCGAGCAGACGCAAAATCCCCCGCACGCCCGGCGTGTCGGGGGATTTTGCGTCTGCTCGCCAGTTGACCGCGCCCGCTCGCGGCT"
        ]

    def test_large_var1(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases=
            "CGCGGGAGTAGAACGATCGCCAAGTGGTCGGTCTTGGCTGCCCACTTCATCCCCGGCGCCACCGGCAGGTCTCGCGGTCATCTCGACCAACGGAGGGCCGTCGGTGGTTCGTATCCGGCCAAGAACGGCGAGAACGGTTTGTGCCTCTATGCCAGGGTGAATGTCTCATCTCCCAGGCGGACGGTGATATCCAGTTCTCCGCCAAGAGCGGACACGTATTTGCGCAGTGTGTTGACCTGTGCGGAGCCGATGTCGCCGTTCTCGATGCTGGATACCCGGCTCTGCCGGATGTGCGCCAGCGCAGCCACCTGGACCTGGGTGAGTGACTGAGCCGCGCGCAGCTCCCGGAGCCGGAATGCCCGCACTTCATCGCGCATTCGTGCCTTGTGCCGGTCCACCGCCTCCCGGTTAACGGGACGTACGGCGTCCATGTCCCGTAGTGTCATCGCCATCGTGCCACTTACCCTTTCTTGCGCTTGCGCCTCTTTGGCTTCGTGTCCTCGAACTGTGCGAGATGTTCGGCAAACATCTCATCGGCCGCTTTGATCTTCTCGTCGTACCACTGGGTCCACCGCCCGGCCTTGTTACCGGCGGCCAGCATGATCGCCTGCCGCGCCGGGTCGAAGGCGAACAGAATGCGGACCTCGGACCGCCCTTGTGATCCTGGACGCAGCTCCTTCATGTTCTTGTGGCGCGACCCACGCACCGTGTCCACCAGAGGACAGCCAAGTGCGGGGCCCTCTTCCTCGAGAACCTCGATAGCTGCGAACACCAATTCGTAGGTCTCTCGGTCCAAGCCGTTGAGCCAGGCGGAGATGCGCTCCACATCCGCCGTCCACCCCACAGAGTCGCAGAGTAGCGCGATACGCGATATCACACAAGGGTGATATTCCTCCGGGTAAGAGCAGCGGGCGACGGGGCTACCGTCGAGGAAATGCCGGCAGGCGAGGACGGACTCTGCGCACCCGGGCCGTTGAAACAGTAGCCTGTGCCAGGCCGAGAATTCATCCCCACGTATGAGGCAGTACAGTGCGCCGCCGTGCGCGTTCTCCCATGGAACGTTCACGGGCTCCCGTGGATGACAGGCGTTTCATGAACGCCAGCGCCGCCGCAACCCGACCGAAAGCGGTTGACCCCAAGGAGAGCTGGAAGTCGAGGCCACCACCTTCGCCGCGGAGTTGCTCATGCCCGAGAGCGAGACTCGTCCCGAAATACGCCGGCTCGATTTCGGCAAGTTGCTCGAACTGAAGCGGGAATGGGCGTCGACCCGCTCGACCAGCCCCAGCCGGGTGACCAGCCCCAGCCGGGTGACCAGCCGATGCACCGCGGCGATCCCACCGAAGCCGGTGGCATCGATGTTGGCGCCGACCTCGTAGCGCACCGCGCCCGAACCCAGCATCGGCCTGGGCTGCGCCGCCCAGCGTCCAGCCCGCGCGTGCCGCGCCGCCACCCTGCGCCCTCGGCGTGTGATGTTTCGCCGACTCTGTTCATGGGTTATCTTCTTCACCACAAAGGCCTTTCCTGCTGGGCTGTGTTGAGGTCGCAAACCCAGCCAGGGTAAGGCCTTTGGCCTCTCCTACCCGGCCGACACGCTTACTGAAGGCCTAGTCTAGGCAGGCCATTCAATCTGCGGAATCGAAAAATTCGGTTCCAGCCTGCTCGTTTCCTTTCCGACAGCGATCTGACGTTGCGTAACGTCATTTGTACGGACTCTTTTAGCGGCATTGATTTCAGATGCCAACGCCGTCTGTGCTGTAGCGCCGATTGGCCGAAACTGTAAATTTGTATGATTATTTAAATCTTTGACGAACACGCGCCACAAACGTACTATCTCTTTGGCAAAGTCCACCGGCATCTCATTCAACGGTTTTGTTTGCGCGTGGTCGTCATATGTTGGTAACTGTGTAACCGGCCGCCTATCTTGCGCGTGCATCATATGACTATGAATCGGCCTTCTCCAGTGAAATTGATACAAGATCGATCCGATAAGCGGTACCTTGTACACAGTGCAATTGTAGTAATTCGCGTTTTGTCCTACGCTTGTATTCTGCGTGAAGAATTCA",
            start=2266659,
            alternate_bases=[
                "CACGCGAGTTGTAGATGATCGTTGAGTGGTCTTGCTTGGACTTCCATTTCATCTTTTCGACGCGCCAGGTCTCGCGGTCCTCCGGATCTGCGCCCGGTTTGAGTTGCACATCAAGGGGATACGGCTTGACCGACTCGTAGCCGACATGTAAGTCGGCTAGTTTCCGGCCGGCGCTGGCGAGCTGGTCGAAGCGTTCGCGGGTCTCCGGTGTTGGGATGTGCGGGAGCATCTTCTTGAGGTCAGCGGCGTATTTTGTGCGGTAGGCGGGGTCATGCAGCAGGCCGTAGACGTAGTAGAAGATGTCGTCTTTGGTGACTTGGTCGCCGATCGTGTCGCGGTAGAGCTTGAGGATGACGCCGGTGATGTTGTCGACGCGGCGGTAGCCGTGGTCGTCTACTTCGGCGTTGGTGGTGGACTCGAAATCGAGTTCGCCGTCACGTGGTTCGGTCTTCTCGTAGGTCCAGCGCGGGAAGAATTGACCGTTGCTTGAGCCCCAGAATGCGAGATCGGGGATAGCGTTTAGCATCAGACACGAGAAGGGCTTGTCTGAGCCCATGCCAACCACGTAGTAACCGACATTCCCGTGCTCCGGCGTCGGAAACATCGACGGAAGCTGGTAGGTACAGTTGTTGAGCTGCTGGTTGGGGTCGAGGTAGGCGTGCTCTTTCGTAAATGGTCGGTACGTGCCGAGCCGCATTCCCGCGGGAGCGAATTCGATGCGAATGCCTTGTGCCACTTGCCGCTTGTTGATGCGGTCCCAGCTGAACTTGGCCGAGTCCACGGTAATGAGGGCGTCAACCGGCGGGGTCTTGGCGTCCCTTCCGCGGATCTCGTTGATCCGGTCGACCTCCGAGTTGTAGAAGTCGATCGTGCGTCCGATGTTGGCCTCGAGCGCACCACGTGAAAAGTTGTAACACCACGCATCCCGGCTGGTCTTCAAGCCCGCGGAATAGTTCGCGAAGACACGTGTCACGTCAAGAGCAGCCTTCTTGTCGCCGATAACCGGCCACGCGCTGAACGCGTCGTCGCGTTGGTTGACCCAGTCACCGTGCAAGTTGGGTGTGACTGTCTGCCATTCCACCGTGTCGAGGTAGCCGTCGCCGACGATCCGCAACTTCTCCTCGCGACTCAGGTAATCGCCGATGTCGCGGTAAAGGACATCGCATGGCCCGCTGTGCTTCGGATCCTTGATGCCAAGGAAGATCGCCACCGTGTTGCGACTCCCCCCGCCAAAGACCTTGCCGCCTTCCTGGCGTGAGAGTTCCCCAGCTGTGCGCTGGTTCCCCCGCAGGTTGTACACATATACCGCCGCGTAGTCGTCGGCGAGCGACAACCGCATGCCGTCTGCCGTGTTGCCGTCTATGTACCCACCATTGGAGACGAATCCGACAACACCGTTGTCACCAATGCGGTCGGTCGCCCACCGGAACGCGCGAATATACGAGTCGTACAGGCTGTTCTTCAGCTGCGCCGTCGACCGCTTCGCGTACGTCTGCTCAATCCGCCCGTCCAACGTCGGATACTTCACGTTGGCGTTCAGGTCGTTCGCGCTGCTCTGCCCCACCGAGTACGGCGGATTCCCGATGATCACGCTGATCGGCGTCGCCAGCTGTCGCAAGATCCGAGCGTTGTTGTACGGGAACATGATCGCGTCCATCGAGTCCCCGGCTTCGGAAATCTGGAACGTGTCGGCCAGCGCCATCCCGGGGAACGGCTCATAGGCGTCGGCGTCGGCGGTCTTGCCCGCCAAAGCATGGTAGGTCGACTCGATGTTCACCGCGGCGATGTAGTACGCCAGCAGCATGATCTCGTTGGCGTGCAGCTCTTGCGAGTACTTTCGGGTGAGGTCGGCGGCCGTGATCAGGTCGGACTGCAGCAGCCGGGTAATGAATGTGCCCGTCCCGGCGAAGCCGTCCAGAATATGCACGCCCTCGTCGGTCAGCCCGCGCCCGAAATGCTTGCGCGACACGAAATCAGCCGCCCGCACAATGAAGTCCACGACCTCGACCGGCGTGTACACGATCCCCAGCGCCTCGGCCTGCTTCTTGAAGCCGATGCGGAAGAACTTCTCGTACAGCTCGGCGATCACCTGCTGCTTGCCCTCGGCGCTGGTGACCTCGCCGGCGCGCCGTCGCACCGATTCGTAAAAGCCTTCCAACCGAGCGGTTTCGGCCTCCAGGCCGGCACCCCCGACGGTGTCGACCATCTTCTGCATGGCCCGCGACACCGGGTTGTGCGACGCGAAGTCATGCCCGGCGAACAGCGCGTCGAACACCGGCTTGGTGATCAGGTGCTGCGAGAGCATGCTGATCGCGTCATCGGGGGTGATCGAGTCATTGAGGTTATCGCGCAGCCCGGCCAGGAACTGCTCGAACGCCGCCGCCGCCGTAGCGTCGGCGCCGCCGAGCAGGGCGTGGATACGGGTGGTCAGCGTCGCGGCGATGTCGGCGACATCGGCGGCCCACTGCTCCCAATAGGTCCGGGTGCCAACCTTGTCGACGATGCGCGCGTAGATCGCTTCCTGCCACTGCGACAACGAGAACATCGCCAACTGCTCCGCGACGGCGGGTCCCGCCTCGTCGGAGGTCGGCCCGATGTGACCGCCCAACAGCTTGTCGCTGCCTTCACCGGTCTTCGTCGGCTTCACGTTCAGCGCAATGCTGTTCACCATCGCGTCGAAGCGCTCGTCGTGCGACCGCAACGCGTTGAGGACCTGCCACACCACCTTGAACCGTTTGTTGTCGGCCAACGCGGCAGACGGCTCGACACCCTCGGGCACCGCCACCGGCAAGATGACGTACCCGTAGTCCTTGCCGGGCGACTTGCGCATCACCCGACCGACCGACTGCACCACGTCGACGATGGAATTGCGCGGATTCAGGAACAGCACCGCGTCCAGCGCGGGCACGTCGACCCCTTCGGAGAGGCAGCGGGCGTTGGACAGGATGCGGCATTCATCCTCGGCGACCACGCCTTTGAGCCAGGCCAGCTGTTCGTTGCGGACCAGCGCGTTGAACGTCCCGTCCACGTGGCGCACCG"
            ])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "TGGTGACGCGGGAGTAGAACGATCGCCAAGTGGTCGGTCTTGGCTGCCCACTTCATCCCCGGCGCCACCGGCAGGTCTCGCGGTCATCTCGACCAACGGAGGGCCGTCGGTGGTTCGTATCCGGCCAAGAACGGCGAGAACGGTTTGTGCCTCTATGCCAGGGTGAATGTCTCATCTCCCAGGCGGACGGTGATATCCAGTTCTCCGCCAAGAGCGGACACGTATTTGCGCAGTGTGTTGACCTGTGCGGAGCCGATGTCGCCGTTCTCGATGCTGGATACCCGGCTCTGCCGGATGTGCGCCAGCGCAGCCACCTGGACCTGGGTGAGTGACTGAGCCGCGCGCAGCTCCCGGAGCCGGAATGCCCGCACTTCATCGCGCATTCGTGCCTTGTGCCGGTCCACCGCCTCCCGGTTAACGGGACGTACGGCGTCCATGTCCCGTAGTGTCATCGCCATCGTGCCACTTACCCTTTCTTGCGCTTGCGCCTCTTTGGCTTCGTGTCCTCGAACTGTGCGAGATGTTCGGCAAACATCTCATCGGCCGCTTTGATCTTCTCGTCGTACCACTGGGTCCACCGCCCGGCCTTGTTACCGGCGGCCAGCATGATCGCCTGCCGCGCCGGGTCGAAGGCGAACAGAATGCGGACCTCGGACCGCCCTTGTGATCCTGGACGCAGCTCCTTCATGTTCTTGTGGCGCGACCCACGCACCGTGTCCACCAGAGGACAGCCAAGTGCGGGGCCCTCTTCCTCGAGAACCTCGATAGCTGCGAACACCAATTCGTAGGTCTCTCGGTCCAAGCCGTTGAGCCAGGCGGAGATGCGCTCCACATCCGCCGTCCACCCCACAGAGTCGCAGAGTAGCGCGATACGCGATATCACACAAGGGTGATATTCCTCCGGGTAAGAGCAGCGGGCGACGGGGCTACCGTCGAGGAAATGCCGGCAGGCGAGGACGGACTCTGCGCACCCGGGCCGTTGAAACAGTAGCCTGTGCCAGGCCGAGAATTCATCCCCACGTATGAGGCAGTACAGTGCGCCGCCGTGCGCGTTCTCCCATGGAACGTTCACGGGCTCCCGTGGATGACAGGCGTTTCATGAACGCCAGCGCCGCCGCAACCCGACCGAAAGCGGTTGACCCCAAGGAGAGCTGGAAGTCGAGGCCACCACCTTCGCCGCGGAGTTGCTCATGCCCGAGAGCGAGACTCGTCCCGAAATACGCCGGCTCGATTTCGGCAAGTTGCTCGAACTGAAGCGGGAATGGGCGTCGACCCGCTCGACCAGCCCCAGCCGGGTGACCAGCCCCAGCCGGGTGACCAGCCGATGCACCGCGGCGATCCCACCGAAGCCGGTGGCATCGATGTTGGCGCCGACCTCGTAGCGCACCGCGCCCGAACCCAGCATCGGCCTGGGCTGCGCCGCCCAGCGTCCAGCCCGCGCGTGCCGCGCCGCCACCCTGCGCCCTCGGCGTGTGATGTTTCGCCGACTCTGTTCATGGGTTATCTTCTTCACCACAAAGGCCTTTCCTGCTGGGCTGTGTTGAGGTCGCAAACCCAGCCAGGGTAAGGCCTTTGGCCTCTCCTACCCGGCCGACACGCTTACTGAAGGCCTAGTCTAGGCAGGCCATTCAATCTGCGGAATCGAAAAATTCGGTTCCAGCCTGCTCGTTTCCTTTCCGACAGCGATCTGACGTTGCGTAACGTCATTTGTACGGACTCTTTTAGCGGCATTGATTTCAGATGCCAACGCCGTCTGTGCTGTAGCGCCGATTGGCCGAAACTGTAAATTTGTATGATTATTTAAATCTTTGACGAACACGCGCCACAAACGTACTATCTCTTTGGCAAAGTCCACCGGCATCTCATTCAACGGTTTTGTTTGCGCGTGGTCGTCATATGTTGGTAACTGTGTAACCGGCCGCCTATCTTGCGCGTGCATCATATGACTATGAATCGGCCTTCTCCAGTGAAATTGATACAAGATCGATCCGATAAGCGGTACCTTGTACACAGTGCAATTGTAGTAATTCGCGTTTTGTCCTACGCTTGTATTCTGCGTGAAGAATTCAAACA" in panel.refs
        assert panel.alts == [
            "GGCCTCGTCGGGAATGCCGGCGATGGTGACACGCGAGTTGTAGATGATCGTTGAGTGGTCTTGCTTGGACTTCCATTTCATCTTTTCGACGCGCCAGGTCTCGCGGTCCTCCGGATCTGCGCCCGGTTTGAGTTGCACATCAAGGGGATACGGCTTGACCGACTCGTAGCCGACATGTAAGTCGGCTAGTTTCCGGCCGGCGCTGGCGAGCTGGTCGAAGCGTTCGCGGGTCTCCGGTGTTGGGATGTGCGGGAGCATCTTCTTGAGGTCAGCGGCGTATTTTGTGCGGTAGGCGGGGTCATGCAGCAGGCCGTAGACGTAGTAGAAGATGTCGTCTTTGGTGACTTGGTCGCCGATCGTGTCGCGGTAGAGCTTGAGGATGACGCCGGTGATGTTGTCGACGCGGCGGTAGCCGTGGTCGTCTACTTCGGCGTTGGTGGTGGACTCGAAATCGAGTTCGCCGTCACGTGGTTCGGTCTTCTCGTAGGTCCAGCGCGGGAAGAATTGACCGTTGCTTGAGCCCCAGAATGCGAGATCGGGGATAGCGTTTAGCATCAGACACGAGAAGGGCTTGTCTGAGCCCATGCCAACCACGTAGTAACCGACATTCCCGTGCTCCGGCGTCGGAAACATCGACGGAAGCTGGTAGGTACAGTTGTTGAGCTGCTGGTTGGGGTCGAGGTAGGCGTGCTCTTTCGTAAATGGTCGGTACGTGCCGAGCCGCATTCCCGCGGGAGCGAATTCGATGCGAATGCCTTGTGCCACTTGCCGCTTGTTGATGCGGTCCCAGCTGAACTTGGCCGAGTCCACGGTAATGAGGGCGTCAACCGGCGGGGTCTTGGCGTCCCTTCCGCGGATCTCGTTGATCCGGTCGACCTCCGAGTTGTAGAAGTCGATCGTGCGTCCGATGTTGGCCTCGAGCGCACCACGTGAAAAGTTGTAACACCACGCATCCCGGCTGGTCTTCAAGCCCGCGGAATAGTTCGCGAAGACACGTGTCACGTCAAGAGCAGCCTTCTTGTCGCCGATAACCGGCCACGCGCTGAACGCGTCGTCGCGTTGGTTGACCCAGTCACCGTGCAAGTTGGGTGTGACTGTCTGCCATTCCACCGTGTCGAGGTAGCCGTCGCCGACGATCCGCAACTTCTCCTCGCGACTCAGGTAATCGCCGATGTCGCGGTAAAGGACATCGCATGGCCCGCTGTGCTTCGGATCCTTGATGCCAAGGAAGATCGCCACCGTGTTGCGACTCCCCCCGCCAAAGACCTTGCCGCCTTCCTGGCGTGAGAGTTCCCCAGCTGTGCGCTGGTTCCCCCGCAGGTTGTACACATATACCGCCGCGTAGTCGTCGGCGAGCGACAACCGCATGCCGTCTGCCGTGTTGCCGTCTATGTACCCACCATTGGAGACGAATCCGACAACACCGTTGTCACCAATGCGGTCGGTCGCCCACCGGAACGCGCGAATATACGAGTCGTACAGGCTGTTCTTCAGCTGCGCCGTCGACCGCTTCGCGTACGTCTGCTCAATCCGCCCGTCCAACGTCGGATACTTCACGTTGGCGTTCAGGTCGTTCGCGCTGCTCTGCCCCACCGAGTACGGCGGATTCCCGATGATCACGCTGATCGGCGTCGCCAGCTGTCGCAAGATCCGAGCGTTGTTGTACGGGAACATGATCGCGTCCATCGAGTCCCCGGCTTCGGAAATCTGGAACGTGTCGGCCAGCGCCATCCCGGGGAACGGCTCATAGGCGTCGGCGTCGGCGGTCTTGCCCGCCAAAGCATGGTAGGTCGACTCGATGTTCACCGCGGCGATGTAGTACGCCAGCAGCATGATCTCGTTGGCGTGCAGCTCTTGCGAGTACTTTCGGGTGAGGTCGGCGGCCGTGATCAGGTCGGACTGCAGCAGCCGGGTAATGAATGTGCCCGTCCCGGCGAAGCCGTCCAGAATATGCACGCCCTCGTCGGTCAGCCCGCGCCCGAAATGCTTGCGCGACACGAAATCAGCCGCCCGCACAATGAAGTCCACGACCTCGACCGGCGTGTACACGATCCCCAGCGCCTCGGCCTGCTTCTTGAAGCCGATGCGGAAGAACTTCTCGTACAGCTCGGCGATCACCTGCTGCTTGCCCTCGGCGCTGGTGACCTCGCCGGCGCGCCGTCGCACCGATTCGTAAAAGCCTTCCAACCGAGCGGTTTCGGCCTCCAGGCCGGCACCCCCGACGGTGTCGACCATCTTCTGCATGGCCCGCGACACCGGGTTGTGCGACGCGAAGTCATGCCCGGCGAACAGCGCGTCGAACACCGGCTTGGTGATCAGGTGCTGCGAGAGCATGCTGATCGCGTCATCGGGGGTGATCGAGTCATTGAGGTTATCGCGCAGCCCGGCCAGGAACTGCTCGAACGCCGCCGCCGCCGTAGCGTCGGCGCCGCCGAGCAGGGCGTGGATACGGGTGGTCAGCGTCGCGGCGATGTCGGCGACATCGGCGGCCCACTGCTCCCAATAGGTCCGGGTGCCAACCTTGTCGACGATGCGCGCGTAGATCGCTTCCTGCCACTGCGACAACGAGAACATCGCCAACTGCTCCGCGACGGCGGGTCCCGCCTCGTCGGAGGTCGGCCCGATGTGACCGCCCAACAGCTTGTCGCTGCCTTCACCGGTCTTCGTCGGCTTCACGTTCAGCGCAATGCTGTTCACCATCGCGTCGAAGCGCTCGTCGTGCGACCGCAACGCGTTGAGGACCTGCCACACCACCTTGAACCGTTTGTTGTCGGCCAACGCGGCAGACGGCTCGACACCCTCGGGCACCGCCACCGGCAAGATGACGTACCCGTAGTCCTTGCCGGGCGACTTGCGCATCACCCGACCGACCGACTGCACCACGTCGACGATGGAATTGCGCGGATTCAGGAACAGCACCGCGTCCAGCGCGGGCACGTCGACCCCTTCGGAGAGGCAGCGGGCGTTGGACAGGATGCGGCATTCATCCTCGGCGACCACGCCTTTGAGCCAGGCCAGCTGTTCGTTGCGGACCAGCGCGTTGAACGTCCCGTCCACGTGGCGCACCGAACACGCCAGGCCCGGGCCGTCGTCAACCA"
        ]