Пример #1
0
def test_double_deletion_with_snp_inside_first_deletiod_and_false_deletion_path(
):

    repeated_sequence = "AGGTCCCAGGTCCATCT"
    graph = Graph.from_dicts(
        {
            1: "TTTT",
            2: "AGGTCC",
            3: "C",
            4: "A",
            5: repeated_sequence,
            6: repeated_sequence
        }, {
            1: [2, 5, 6],
            2: [3, 4],
            3: [5, 6],
            4: [5, 6],
            5: [6]
        }, [1, 2, 3, 5, 6])

    variants = VcfVariants([
        VcfVariant(1, 4, "TAGGTCCC", "T", type="DELETION"),
        VcfVariant(1, 11, "CAGGTCCCAGGTCCATCT", "C", type="DELETION")
    ])
    dummy_node_adder = DummyNodeAdder(graph, variants)
    new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes()

    print(new_graph)
    assert list(new_graph.get_edges(1)) == [2, 8]
    assert list(new_graph.get_edges(2)) == [3, 4]
    assert list(new_graph.get_edges(3)) == [5, 9]
    assert list(new_graph.get_edges(4)) == [5, 9]
    assert list(new_graph.get_edges(9)) == [6]
    assert list(new_graph.get_edges(8)) == [5, 9]
Пример #2
0
def test_double_deletion_with_snp_inside_first_deletion():

    graph = Graph.from_dicts(
        {
            1: "ACTG",
            2: "A",
            3: "C",
            4: "T",
            5: "AAA",
            6: "G"
        }, {
            1: [2, 5, 6],
            2: [3, 4],
            3: [5, 6],
            4: [5, 6],
            5: [6]
        }, [1, 2, 4, 6])

    variants = VcfVariants([
        VcfVariant(1, 4, "GAT", "G", type="DELETION"),
        VcfVariant(1, 6, "TAAA", "T", type="DELETION")
    ])
    dummy_node_adder = DummyNodeAdder(graph, variants)
    new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes()
    print(new_graph)
Пример #3
0
def test_overlapping_deletions():
    graph = Graph.from_dicts(
        {
            1: "AA",
            2: "TCTG",
            3: "TCT",
            4: "G",
            5: "A",
            6: "GG"
        }, {
            1: [2, 3],
            2: [3, 6],
            3: [4, 5],
            4: [6],
            5: [6]
        }, [1, 2, 3, 5, 6])

    variants = VcfVariants([
        VcfVariant(1, 2, "ATCTG", "A", type="DELETION"),
        VcfVariant(1, 6, "GTCTA", "T", type="DELETION"),
        VcfVariant(1, 10, "A", "G", type="SNP")
    ])
    dummy_node_adder = DummyNodeAdder(graph, variants)
    new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes()

    assert list(new_graph.get_edges(1)) == [2, 8]
    assert list(new_graph.get_edges(8)) == [3, 9]
    assert list(new_graph.get_edges(2)) == [3, 9]
    assert list(new_graph.get_edges(9)) == [6]

    ref_node, var_node = new_graph.get_variant_nodes(variants[1])
    assert ref_node == 3
    assert var_node == 9
    print(new_graph)
Пример #4
0
def test_insertion_with_identical_false_path():
    graph = Graph.from_dicts({
        1: "AA",
        2: "TCTG",
        3: "TCTG",
        4: "GG"
    }, {
        1: [2, 3],
        2: [3],
        3: [4],
    }, [1, 3, 4])

    variants = VcfVariants([
        VcfVariant(1, 2, "A", "ATCTG", type="INSERTION"),
    ])
    dummy_node_adder = DummyNodeAdder(graph, variants)
    new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes()
    print(new_graph)

    assert list(new_graph.get_edges(1)) == [2, 6]
    assert list(new_graph.get_edges(6)) == [3]
    assert list(new_graph.get_edges(2)) == [3]

    ref_node, var_node = new_graph.get_variant_nodes(variants[0])
    assert ref_node == 6
    assert var_node == 2
Пример #5
0
def test_simple():

    graph1 = Graph.from_dicts({
        1: "ACTG",
        2: "A",
        3: "C",
        4: "ACT"
    }, {
        1: [2, 3],
        2: [4],
        3: [4]
    }, [1, 2, 4])

    graph2 = Graph.from_dicts({
        1: "AAAA",
        2: "A",
        3: "C",
        4: "ACT"
    }, {
        1: [2, 3],
        2: [4],
        3: [4]
    }, [1, 2, 4])

    merged_graph = merge_graphs([graph1, graph2])

    print(merged_graph)
    assert list(merged_graph.get_edges(1)) == [2, 3]

    assert merged_graph.get_node_at_ref_offset(0) == 1
    assert merged_graph.get_node_sequence(
        merged_graph.get_node_at_ref_offset(8)) == "AAAA"
    assert merged_graph.get_node_sequence(
        merged_graph.get_node_at_ref_offset(4)) == "A"
    assert len(merged_graph.get_edges(
        merged_graph.get_node_at_ref_offset(8))) == 2
    assert len(merged_graph.get_edges(
        merged_graph.get_node_at_ref_offset(11))) == 2

    assert merged_graph.get_ref_offset_at_node(6) == 8
    assert 7 in merged_graph.linear_ref_nodes()
    assert merged_graph.get_ref_offset_at_node(7) == 12

    assert list(merged_graph.chromosome_start_nodes) == [1, 6]

    merged_graph.to_file("merged_graph.npz")
    merged_graph2 = Graph.from_file("merged_graph.npz")
Пример #6
0
def test_find_insertion_nodes():
    g = Graph.from_dicts({
        1: "CTACCA",
        2: "AA",
        3: "TAAATAA",
        4: ""
    }, {
        1: [2, 4],
        2: [3],
        4: [3]
    }, [1, 3])
    variant = VcfVariant(1, 6, "A", "AAA", "", "INSERTION")

    ref_node, variant_node = g.get_variant_nodes(variant)
    assert ref_node == 4
    assert variant_node == 2
Пример #7
0
def test_indel_graph2():
    graph = Graph.from_dicts(
        {
            1: "gggggaggcttgtggttagcagagagtgggtggaagacagaggtttgag",
            2: "ga",
            3:
            "gagagagacccaggggagaaaaccagctgcagaggcaggaggggtccagggcagcccgaggccagagatgggcgtcttccttacagccacctgtggtccc",
            100: ""
        }, {
            1: [2, 100],
            2: [3],
            100: [3]
        }, [1, 2, 3])
    kmer_finder = SnpKmerFinder(graph, k=31)
    flat_kmers = kmer_finder.find_kmers()
    print(kmer_finder.kmers_found)
Пример #8
0
def test_simple_insertion():
    graph = Graph.from_dicts({
        1: "ACTG",
        2: "C",
        3: "AAAA"
    }, {
        1: [2, 3],
        2: [3]
    }, [1, 3])

    variants = VcfVariants([VcfVariant(1, 4, "G", "GC", type="INSERTION")])
    dummy_node_adder = DummyNodeAdder(graph, variants)
    new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes()

    assert new_graph.node_has_edges(5, [3])
    assert new_graph.node_has_edges(1, [2, 5])
    assert new_graph.node_has_edges(2, [3])
def simple_test():
    g = Graph.from_dicts({
        1: "CTACCA",
        2: "AA",
        3: "TAAATAA",
        4: ""
    }, {
        1: [2, 4],
        2: [3],
        4: [3]
    }, [1, 2, 3])
    print(g.ref_offset_to_node)
    print(g.get_node_size(3))
    k = 4
    variants = VcfVariants([VcfVariant(6, "AAA", "A", "", "DELETION")])
    reference_kmers = ReferenceKmerIndex.from_sequence("CTACCAAATAAATAA", k)
    finder = UniqueVariantKmersFinder(g, reference_kmers, variants, k)
    finder.find_unique_kmers()
Пример #10
0
def test_from_dicts():
    g = Graph.from_dicts({
        1: "ACTG",
        2: "A",
        3: "G",
        4: "AAA"
    }, {
        1: [2, 3],
        2: [4],
        3: [4]
    }, [1, 2, 4])

    assert g.get_node_size(1) == 4
    assert g.get_node_size(2) == 1
    assert g.get_node_size(3) == 1
    assert g.get_node_size(4) == 3

    assert list(g.get_edges(1)) == [2, 3]

    assert g.get_node_sequence(2) == "A"
Пример #11
0
def test_indel_graph():
    graph = Graph.from_dicts({
        1: "ACTG",
        2: "A",
        3: "",
        4: "TAAT"
    }, {
        1: [2, 3],
        2: [4],
        3: [4]
    }, [1, 2, 4])
    kmer_finder = SnpKmerFinder(graph, k=3)
    flat_kmers = kmer_finder.find_kmers()
    print(kmer_finder.kmers_found)

    index = KmerIndex.from_flat_kmers(flat_kmers)
    hits = index.get(sequence_to_kmer_hash("GTA"))
    assert list(hits[1] == [1, 3, 4])
    print(hits)
    hits = index.get(sequence_to_kmer_hash("GAT"))
    assert list(hits[1] == [1, 2, 4])
    print(hits)
Пример #12
0
def test_tricky_case_nested_deletions():
    graph = Graph.from_dicts(
        {
            1: "TATAT",
            2: "AT",
            3: "A",
            4: "T",
            5: "A",
            6: "A",
            7: "T",
            8: "A",
            9: "GG"
        }, {
            1: [2, 6],
            2: [3, 6],
            3: [4, 5],
            4: [6],
            5: [6],
            6: [7, 8],
            7: [9],
            8: [9]
        }, [1, 2, 3, 5, 6, 8, 9])

    variants = VcfVariants([
        VcfVariant(1, 5, "TATAA", "T", type="DELETION"),
        VcfVariant(1, 7, "TAA", "T", type="DELETION"),
        VcfVariant(1, 5, "A", "T", type="SNP"),
    ])

    dummy_node_adder = DummyNodeAdder(graph, variants)
    new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes()
    print(new_graph)

    assert list(new_graph.get_edges(1)) == [2, 11]
    assert list(new_graph.get_edges(2)) == [3, 12]
    assert list(new_graph.get_edges(11)) == [6]
    assert list(new_graph.get_edges(12)) == [6]
Пример #13
0
def test_insertion_with_multiple_paths():

    graph = Graph.from_dicts(
        {
            1: "AAAG",
            2: "GAGT",
            3: "GA",
            4: "C",
            5: "G",
            6: "T"
        }, {
            1: [2, 3],
            2: [3],
            3: [4, 5],
            4: [6],
            5: [6]
        }, [1, 3, 5, 6])

    variants = VcfVariants([VcfVariant(1, 4, "G", "GGAGT", type="INSERTION")])
    dummy_node_adder = DummyNodeAdder(graph, variants)
    new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes()
    assert list(new_graph.get_edges(1)) == [2, 8]
    assert list(new_graph.get_edges(8)) == [3]
    print(new_graph)
Пример #14
0
def test_simple_snp_graph():

    graph = Graph.from_dicts({
        1: "ACTG",
        2: "A",
        3: "G",
        4: "AAAT"
    }, {
        1: [2, 3],
        2: [4],
        3: [4]
    }, [1, 2, 4])

    kmer_finder = SnpKmerFinder(graph, k=3)
    flat_kmers = kmer_finder.find_kmers()
    print(kmer_finder.kmers_found)
    print(flat_kmers._ref_offsets)
    print(flat_kmers._nodes)
    print(flat_kmers._hashes)

    assert kmer_finder.has_kmer("ACT", {1})
    assert kmer_finder.has_kmer("GAA", {1, 2, 4})
    assert kmer_finder.has_kmer("GGA", {1, 3, 4})
    assert kmer_finder.has_kmer("AAT", {4})