Exemplo n.º 1
0
    def test_assemble_left_double_fork(self, left_double_fork_structure):
        # assemble entire contig + branch points b/c of labels; start from end
        graph, contig, L, HDN, R, branch = left_double_fork_structure
        lh = khmer._GraphLabels(graph)
        asm = khmer.SimpleLabeledAssembler(lh)

        # first try without the labels
        paths = asm.assemble(contig[-K:])

        assert len(paths) == 1
        # without labels, should get the beginning of the HDN thru the end
        assert paths[0] == contig[HDN.pos:]

        # now add labels and check that we get two full length paths
        hdn = graph.find_high_degree_nodes(contig)
        hdn += graph.find_high_degree_nodes(branch)
        print(list(hdn))
        lh.label_across_high_degree_nodes(contig, hdn, 1)
        lh.label_across_high_degree_nodes(branch, hdn, 2)
        print(lh.get_tag_labels(list(hdn)[0]))

        paths = asm.assemble(contig[-K:])

        assert len(paths) == 2

        assert any(utils._equals_rc(path, contig) for path in paths)
        assert any(utils._equals_rc(path, branch) for path in paths)
Exemplo n.º 2
0
    def test_hash_as_seed(self, linear_structure):
        graph, contig = linear_structure
        lh = khmer.GraphLabels(graph)
        asm = khmer.SimpleLabeledAssembler(lh)

        left = graph.hash(contig[:K])
        assert utils._equals_rc(asm.assemble(left).pop(), contig)
Exemplo n.º 3
0
    def test_assemble_tandem_repeats(self, tandem_repeat_structure):
        # assemble one copy of a tandem repeat
        graph, repeat, tandem_repeats = tandem_repeat_structure
        lh = khmer._GraphLabels(graph)
        asm = khmer.SimpleLabeledAssembler(lh)
        paths = asm.assemble(repeat[:K])

        assert len(paths) == 1
        # There are K-1 k-mers spanning the junction between
        # the beginning and end of the repeat
        assert len(paths[0]) == len(repeat) + K - 1
Exemplo n.º 4
0
    def test_assemble_snp_bubble_single(self, snp_bubble_structure):
        # assemble entire contig + one of two paths through a bubble
        graph, wildtype, mutant, HDN_L, HDN_R = snp_bubble_structure
        lh = khmer._GraphLabels(graph)
        asm = khmer.SimpleLabeledAssembler(lh)

        hdn = graph.find_high_degree_nodes(wildtype)
        assert len(hdn) == 2
        lh.label_across_high_degree_nodes(wildtype, hdn, 1)

        paths = asm.assemble(wildtype[:K])

        assert len(paths) == 1
        assert utils._equals_rc(paths[0], wildtype)
Exemplo n.º 5
0
    def test_beginning_to_end_across_tip(self, right_tip_structure):
        # assemble entire contig, ignoring branch point b/c of labels
        graph, contig, L, HDN, R, tip = right_tip_structure
        lh = khmer._GraphLabels(graph)
        asm = khmer.SimpleLabeledAssembler(lh)
        hdn = graph.find_high_degree_nodes(contig)
        # L, HDN, and R will be labeled with 1
        lh.label_across_high_degree_nodes(contig, hdn, 1)

        path = asm.assemble(contig[:K])

        assert len(path) == 1, "there should only be one path"
        path = path[0]  # @CTB

        assert len(path) == len(contig)
        assert utils._equals_rc(path, contig)
Exemplo n.º 6
0
    def test_assemble_snp_bubble_both(self, snp_bubble_structure):
        # assemble entire contig + both paths
        graph, wildtype, mutant, HDN_L, HDN_R = snp_bubble_structure
        lh = khmer._GraphLabels(graph)
        asm = khmer.SimpleLabeledAssembler(lh)

        hdn = graph.find_high_degree_nodes(wildtype)
        hdn += graph.find_high_degree_nodes(mutant)
        assert len(hdn) == 2
        lh.label_across_high_degree_nodes(wildtype, hdn, 1)
        lh.label_across_high_degree_nodes(mutant, hdn, 2)

        paths = asm.assemble(wildtype[:K])

        assert len(paths) == 2

        assert any(utils._contains_rc(wildtype, path) for path in paths)
        assert any(utils._contains_rc(mutant, path) for path in paths)
Exemplo n.º 7
0
    def test_assemble_right_double_fork(self, right_double_fork_structure):
        # assemble two contigs from a double forked structure
        graph, contig, L, HDN, R, branch = right_double_fork_structure
        lh = khmer._GraphLabels(graph)
        asm = khmer.SimpleLabeledAssembler(lh)

        hdn = graph.find_high_degree_nodes(contig)
        hdn += graph.find_high_degree_nodes(branch)
        print(list(hdn))
        lh.label_across_high_degree_nodes(contig, hdn, 1)
        lh.label_across_high_degree_nodes(branch, hdn, 2)
        print(lh.get_tag_labels(list(hdn)[0]))

        paths = asm.assemble(contig[:K])
        print('Path lengths', [len(x) for x in paths])

        assert len(paths) == 2

        assert any(utils._equals_rc(path, contig) for path in paths)
        assert any(utils._equals_rc(path, branch) for path in paths)
Exemplo n.º 8
0
    def test_assemble_snp_bubble_stopbf(self, snp_bubble_structure):
        # assemble one side of bubble, blocked with stop_bf,
        # when labels on both branches
        # stop_bf should trip a filter failure, negating the label spanning
        graph, wildtype, mutant, HDN_L, HDN_R = snp_bubble_structure
        stop_bf = khmer.Nodegraph(K, 1e5, 4)
        lh = khmer._GraphLabels(graph)
        asm = khmer.SimpleLabeledAssembler(lh)

        hdn = graph.find_high_degree_nodes(wildtype)
        hdn += graph.find_high_degree_nodes(mutant)
        assert len(hdn) == 2
        lh.label_across_high_degree_nodes(wildtype, hdn, 1)
        lh.label_across_high_degree_nodes(mutant, hdn, 2)

        # do the labeling, but block the mutant with stop_bf
        stop_bf.count(mutant[HDN_L.pos + 1:HDN_L.pos + K + 1])
        paths = asm.assemble(wildtype[:K], stop_bf)

        assert len(paths) == 1
        assert any(utils._equals_rc(path, wildtype) for path in paths)
Exemplo n.º 9
0
    def test_assemble_right_triple_fork(self, right_triple_fork_structure):
        # assemble three contigs from a trip fork
        (graph, contig, L, HDN, R, top_sequence,
         bottom_sequence) = right_triple_fork_structure
        lh = khmer._GraphLabels(graph)
        asm = khmer.SimpleLabeledAssembler(lh)

        hdn = graph.find_high_degree_nodes(contig)
        hdn += graph.find_high_degree_nodes(top_sequence)
        hdn += graph.find_high_degree_nodes(bottom_sequence)
        print(list(hdn))
        lh.label_across_high_degree_nodes(contig, hdn, 1)
        lh.label_across_high_degree_nodes(top_sequence, hdn, 2)
        lh.label_across_high_degree_nodes(bottom_sequence, hdn, 3)
        print(lh.get_tag_labels(list(hdn)[0]))

        paths = asm.assemble(contig[:K])
        print([len(x) for x in paths])

        assert len(paths) == 3

        assert any(utils._equals_rc(path, contig) for path in paths)
        assert any(utils._equals_rc(path, top_sequence) for path in paths)
        assert any(utils._equals_rc(path, bottom_sequence) for path in paths)