Пример #1
0
def test_add_asm_graph():
    # Load the assembly graph.
    sg_edges_list = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'sg_edges_list')
    utg_data = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'utg_data')
    ctg_paths = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'ctg_paths')
    asm_graph = AsmGraph(sg_edges_list, utg_data, ctg_paths)

    # Add the graph to GFA.
    gfa_graph = mod.GFAGraph()
    gfa_graph.add_asm_graph(asm_graph)

    assert(len(gfa_graph.paths.keys()) == 0)

    expected = {
        ('000000016:B', '000000027:B'): ['000000016:B', '000000027:B', '*', 1540, 99.94, 449, 0, None, None, None, None],
        ('000000005:B', '000000016:B'): ['000000005:B', '000000016:B', '*', 1487, 99.93, 502, 0, None, None, None, None],
        ('000000016:B', '000000025:B'): ['000000016:B', '000000025:B', '*', 1540, 99.94, 449, 0, None, None, None, None],
        ('000000007:B', '000000005:B'): ['000000007:B', '000000005:B', '*', 1980, 99.95, 9, 0, None, None, None, None],
        ('000000018:B', '000000004:B'): ['000000018:B', '000000004:B', '*', 1963, 99.95, 26, 0, None, None, None, None],
        ('000000025:B', '000000018:B'): ['000000025:B', '000000018:B', '*', 1978, 99.95, 11, 0, None, None, None, None]
    }

    assert(len(gfa_graph.edges.keys()) == len(expected.keys()))

    for key, edge in gfa_graph.edges.iteritems():
        assert(key in expected)
        assert(expected[key] == edge)
Пример #2
0
def test_deserialize():
    gfa_graph = mod.GFAGraph()
    gfa_graph.add_node('node1', 7, 'ACTGAAA', tags={}, labels={})
    gfa_graph.add_node('node2', 10, 'AAACCCGGGT', tags={}, labels={})
    gfa_graph.add_edge('edge1',
                       'node1',
                       '+',
                       'node2',
                       '+',
                       4,
                       7,
                       0,
                       3,
                       '*',
                       tags={},
                       labels={})
    gfa_graph.add_path('000000F', ['node1', 'node2'], ['4M', '7M'],
                       tags={},
                       labels={'label1': 'test'})

    dump = mod.serialize_gfa(gfa_graph)

    fp_in = StringIO(dump)

    result = mod.deserialize_gfa(fp_in)

    assert (result.nodes == gfa_graph.nodes)
    assert (result.edges == gfa_graph.edges)
    assert (result.paths == gfa_graph.paths)
Пример #3
0
def test_add_edge_1():
    """
    Test normal usage.
    """

    gfa_graph = mod.GFAGraph()

    gfa_graph.add_node('node1', 4, 'ACTG', tags={}, labels={})
    gfa_graph.add_node('node2', 1000, '*', tags={}, labels={})

    edge_name = 'edge1'
    source, source_orient = 'node1', '+'
    sink, sink_orient = 'node2', '+'
    source_start, source_end = 4, 4
    sink_start, sink_end = 0, 0
    cigar = '*'

    gfa_graph.add_edge(edge_name,
                       source,
                       source_orient,
                       sink,
                       sink_orient,
                       source_start,
                       source_end,
                       sink_start,
                       sink_end,
                       cigar,
                       tags={},
                       labels={})

    assert (len(gfa_graph.edges.keys()) == 1)
Пример #4
0
def test_add_or_update_edge():
    gfa_graph = mod.GFAGraph()

    # First, add an edge.
    v, w, cigar = '123:B', '456:E', '*'
    overlap_len, overlap_idt, overlap_begin, overlap_end = 10000, 99.9, 0, 9000
    cross_phase, src_graph, ctg_name, type_ = None, None, None, None
    gfa_graph.add_or_update_edge(v, w, cigar, overlap_len, overlap_idt,
                                 overlap_begin, overlap_end, cross_phase,
                                 src_graph, ctg_name, type_)
    assert (len(gfa_graph.edges.keys()) == 1)
    assert ((v, w) in gfa_graph.edges)
    assert (gfa_graph.edges[(v, w)] == [
        '123:B', '456:E', '*', 10000, 99.9, 0, 9000, None, None, None, None
    ])

    # Update the None values and check if they changed.
    v, w, cigar = '123:B', '456:E', '*'
    overlap_len, overlap_idt, overlap_begin, overlap_end = 10000, 99.9, 0, 9000
    cross_phase, src_graph, ctg_name, type_ = 'N', 'OP', '000000F', 'P'
    gfa_graph.add_or_update_edge(v, w, cigar, overlap_len, overlap_idt,
                                 overlap_begin, overlap_end, cross_phase,
                                 src_graph, ctg_name, type_)
    assert (len(gfa_graph.edges.keys()) == 1)
    assert ((v, w) in gfa_graph.edges)
    assert (gfa_graph.edges[(v, w)] == [
        '123:B', '456:E', '*', 10000, 99.9, 0, 9000, 'N', 'OP', '000000F', 'P'
    ])
Пример #5
0
def test_write_gfa_v1():
    gfa_graph = mod.GFAGraph()
    gfa_graph.add_node('node1', 7, 'ACTGAAA', tags={}, labels={})
    gfa_graph.add_node('node2', 10, 'AAACCCGGGT', tags={}, labels={})
    gfa_graph.add_edge('edge1',
                       'node1',
                       '+',
                       'node2',
                       '+',
                       4,
                       7,
                       0,
                       3,
                       '*',
                       tags={},
                       labels={})
    gfa_graph.add_path('000000F', ['node1', 'node2'], ['4M', '7M'],
                       tags={},
                       labels={})

    fp_out = StringIO()
    gfa_graph.write_gfa_v1(fp_out)

    result = fp_out.getvalue()
    expected = """H	VN:Z:1.0
S	node1	ACTGAAA	LN:i:7
S	node2	AAACCCGGGT	LN:i:10
L	node1	+	node2	+	3M
P	000000F	node1,node2	4M,7M
"""
    assert (result == expected)
Пример #6
0
def test_write_gfa_v2():
    gfa_graph = mod.GFAGraph()
    gfa_graph.add_node('node1', 7, 'ACTGAAA', tags={}, labels={})
    gfa_graph.add_node('node2', 10, 'AAACCCGGGT', tags={}, labels={})
    gfa_graph.add_edge('edge1',
                       'node1',
                       '+',
                       'node2',
                       '+',
                       4,
                       7,
                       0,
                       3,
                       '*',
                       tags={},
                       labels={})
    gfa_graph.add_path('000000F', ['node1', 'node2'], ['4M', '7M'],
                       tags={},
                       labels={})

    fp_out = StringIO()
    gfa_graph.write_gfa_v2(fp_out)

    result = fp_out.getvalue()
    expected = """H	VN:Z:2.0
S	node1	7	ACTGAAA
S	node2	10	AAACCCGGGT
E	edge1	node1+	node2+	4	7$	0	3	*
"""

    assert (result == expected)
Пример #7
0
def test_add_read_from_node():
    gfa_graph = mod.GFAGraph()

    gfa_graph.add_read_from_node('123:B')
    assert(len(gfa_graph.read_in_graph) == 1)
    assert('123' in gfa_graph.read_in_graph)

    gfa_graph.add_read_from_node('456:')
    assert(len(gfa_graph.read_in_graph) == 2)
    assert('123' in gfa_graph.read_in_graph)
    assert('456' in gfa_graph.read_in_graph)

    gfa_graph.add_read_from_node('123:B')
    assert(len(gfa_graph.read_in_graph) == 2)
    assert('123' in gfa_graph.read_in_graph)
    assert('456' in gfa_graph.read_in_graph)

    gfa_graph.add_read_from_node('123:E')
    assert(len(gfa_graph.read_in_graph) == 2)
    assert('123' in gfa_graph.read_in_graph)
    assert('456' in gfa_graph.read_in_graph)

    with pytest.raises(Exception) as e_info:
        gfa_graph.add_read_from_node('123')
    with pytest.raises(Exception) as e_info:
        gfa_graph.add_read_from_node(None)
    with pytest.raises(Exception) as e_info:
        gfa_graph.add_read_from_node('')
Пример #8
0
def test_format_gfa_v1_link_line():
    gfa_graph = mod.GFAGraph()

    # Test an edge with None information.
    v, w, cigar = '123:B', '456:E', '*'
    overlap_len, overlap_idt, overlap_begin, overlap_end = 10000, 99.9, 0, 9000
    cross_phase, src_graph, ctg_name, type_ = None, None, None, None
    edge = [v, w, cigar, overlap_len, overlap_idt, overlap_begin,
            overlap_end, cross_phase, src_graph, ctg_name, type_]
    result = gfa_graph.format_gfa_v1_link_line(edge)
    expected = 'L\t123\t-\t456\t+\t*\tol:i:10000\toi:f:99.9\tob:i:0\toe:i:9000\tci:Z:NA-NA'
    assert(result == expected)

    # Test an edge with full information.
    v, w, cigar = '456:B', '789:E', '*'
    overlap_len, overlap_idt, overlap_begin, overlap_end = 10000, 99.9, 0, 9000
    cross_phase, src_graph, ctg_name, type_ = 'N', 'OP', '000000F', 'P'
    edge = [v, w, cigar, overlap_len, overlap_idt, overlap_begin,
            overlap_end, cross_phase, src_graph, ctg_name, type_]
    result = gfa_graph.format_gfa_v1_link_line(edge)
    expected = 'L\t456\t-\t789\t+\t*\tol:i:10000\toi:f:99.9\tob:i:0\toe:i:9000\tsg:Z:OP\tcp:Z:N\tci:Z:000000F-P'
    assert(result == expected)

    # Edges with integer values of 0 should be present in custom
    # fields in the output. Only None values should be skipped.
    v, w, cigar = '123:B', '456:E', '*'
    overlap_len, overlap_idt, overlap_begin, overlap_end = 0, 0, 0, 0
    cross_phase, src_graph, ctg_name, type_ = None, None, None, None
    edge = [v, w, cigar, overlap_len, overlap_idt, overlap_begin,
            overlap_end, cross_phase, src_graph, ctg_name, type_]
    result = gfa_graph.format_gfa_v1_link_line(edge)
    expected = 'L\t123\t-\t456\t+\t*\tol:i:0\toi:f:0.0\tob:i:0\toe:i:0\tci:Z:NA-NA'
    assert(result == expected)
Пример #9
0
def test_update_edge():
    gfa_graph = mod.GFAGraph()

    # First, add an edge.
    v, w, cigar = '123:B', '456:E', '*'
    overlap_len, overlap_idt, overlap_begin, overlap_end = 10000, 99.9, 0, 9000
    cross_phase, src_graph, ctg_name, type_ = None, None, None, None
    gfa_graph.add_edge(v, w, cigar, overlap_len, overlap_idt, overlap_begin,
                       overlap_end, cross_phase, src_graph, ctg_name, type_)
    assert (len(gfa_graph.edges.keys()) == 1)
    assert ((v, w) in gfa_graph.edges)
    assert (gfa_graph.edges[(v, w)] == [
        '123:B', '456:E', '*', 10000, 99.9, 0, 9000, None, None, None, None
    ])

    # Update the None values and check if they changed.
    v, w, cigar = '123:B', '456:E', '*'
    overlap_len, overlap_idt, overlap_begin, overlap_end = 10000, 99.9, 0, 9000
    cross_phase, src_graph, ctg_name, type_ = 'N', 'OP', '000000F', 'P'
    gfa_graph.update_edge(v, w, cigar, overlap_len, overlap_idt, overlap_begin,
                          overlap_end, cross_phase, src_graph, ctg_name, type_)
    assert (len(gfa_graph.edges.keys()) == 1)
    assert ((v, w) in gfa_graph.edges)
    assert (gfa_graph.edges[(v, w)] == [
        '123:B', '456:E', '*', 10000, 99.9, 0, 9000, 'N', 'OP', '000000F', 'P'
    ])

    # Add a new edge
    v, w, cigar = '456:B', '789:E', '*'
    overlap_len, overlap_idt, overlap_begin, overlap_end = 10000, 99.9, 0, 9000
    cross_phase, src_graph, ctg_name, type_ = None, None, None, None
    gfa_graph.add_edge(v, w, cigar, overlap_len, overlap_idt, overlap_begin,
                       overlap_end, cross_phase, src_graph, ctg_name, type_)
    assert (len(gfa_graph.edges.keys()) == 2)
    assert ((v, w) in gfa_graph.edges)
    assert (gfa_graph.edges[(v, w)] == [
        '456:B', '789:E', '*', 10000, 99.9, 0, 9000, None, None, None, None
    ])

    # Update the values, but check that non-None values remained the same.
    v, w, cigar = '456:B', '789:E', '*'
    overlap_len, overlap_idt, overlap_begin, overlap_end = 10001, 99.8, 0, 9001
    cross_phase, src_graph, ctg_name, type_ = 'N', 'OP', '000000F', 'P'
    gfa_graph.update_edge(v, w, cigar, overlap_len, overlap_idt, overlap_begin,
                          overlap_end, cross_phase, src_graph, ctg_name, type_)
    assert (len(gfa_graph.edges.keys()) == 2)
    assert ((v, w) in gfa_graph.edges)
    assert (gfa_graph.edges[(v, w)] == [
        '456:B', '789:E', '*', 10000, 99.9, 0, 9000, 'N', 'OP', '000000F', 'P'
    ])

    # Degenerate case, update an edge which does not exist.
    v, w, cigar = '4567:B', '789:E', '*'
    overlap_len, overlap_idt, overlap_begin, overlap_end = 10001, 99.8, 0, 9001
    cross_phase, src_graph, ctg_name, type_ = 'N', 'OP', '000000F', 'P'
    with pytest.raises(Exception) as e_info:
        gfa_graph.update_edge(v, w, cigar, overlap_len, overlap_idt,
                              overlap_begin, overlap_end, cross_phase,
                              src_graph, ctg_name, type_)
Пример #10
0
def test_add_nx_string_graph():
    # Load the assembly graph.
    sg_edges_list = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                                 'sg_edges_list')
    utg_data = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'utg_data')
    ctg_paths = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'ctg_paths')
    asm_graph = AsmGraph(sg_edges_list, utg_data, ctg_paths)

    # The following block is taken from Unzip, graphs_to_h_tigs.py.
    nx_sg = nx.DiGraph()
    arid_to_phase = {}
    for ctg_id in asm_graph.ctg_data.keys():
        ctg_G = asm_graph.get_sg_for_ctg(ctg_id)
        ctg_nodes = set(ctg_G.nodes())
        for v, w in ctg_G.edges():
            vrid = v[:9]
            wrid = w[:9]
            edge_data = asm_graph.sg_edges[(v, w)]
            if edge_data[-1] != "G":
                continue

            vphase = arid_to_phase.get(vrid, (-1, 0))
            wphase = arid_to_phase.get(wrid, (-1, 0))
            if vphase[0] == wphase[0] and vphase[1] != wphase[1]:
                cross_phase = "Y"
            else:
                cross_phase = "N"

            nx_sg.add_node(v,
                           label="%d_%d" % vphase,
                           phase="%d_%d" % vphase,
                           src="P")

            nx_sg.add_node(w,
                           label="%d_%d" % wphase,
                           phase="%d_%d" % wphase,
                           src="P")

            nx_sg.add_edge(v, w, src="OP", cross_phase=cross_phase)

            # we need to add the complimentary edges as the ctg_graph does not contain the dual edges
            rv = reverse_end(v)
            rw = reverse_end(w)
            nx_sg.add_node(rv,
                           label="%d_%d" % vphase,
                           phase="%d_%d" % vphase,
                           src="P")
            nx_sg.add_node(rw,
                           label="%d_%d" % wphase,
                           phase="%d_%d" % wphase,
                           src="P")
            nx_sg.add_edge(rw, rv, src="OP", cross_phase=cross_phase)

    # Add the string graph to the GFA.
    gfa_graph = mod.GFAGraph()
    gexf_file = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'sg.gexf')
    nx_sg = nx.read_gexf(gexf_file)
    gfa_graph.add_nx_string_graph(nx_sg)
Пример #11
0
def test_add_node_1():
    """
    Test normal usage.
    """
    gfa_graph = mod.GFAGraph()

    gfa_graph.add_node('node1', 4, 'ACTG', tags={}, labels={})
    gfa_graph.add_node('node2', 1000, '*', tags={}, labels={})

    assert (len(gfa_graph.nodes) == 2)
Пример #12
0
def test_format_gfa_v1_path_line():
    gfa_graph = mod.GFAGraph()

    # Load tiling paths from file.
    p_ctg_tiling_path_file = os.path.join(helpers.get_test_data_dir(),
                                          'p_ctg_tiling_path_1')
    p_paths, p_edge_to_ctg = gen_gfa_v1.load_tiling_paths(
        p_ctg_tiling_path_file, 'P')

    # If seq_len_map is None, all CIGAR operations should be '*'.
    expected = {
        '000000F':
        'P\t000000F\t000092122-,000081654-,000034462-,000061403-,000021348-,000062240-,000083779-,000019819+,000063672+,000026565+,000050047-\t*,*,*,*,*,*,*,*,*,*,*',
        '000001F':
        'P\t000001F\t000070651+,000018109+,000068978+,000100559+,000010548-,000006846-,000065052-,000071922+,000076878+,000000861+,000001755-\t*,*,*,*,*,*,*,*,*,*,*',
        '000002F':
        'P\t000002F\t000088930+,000008918+,000100248-,000085315-,000071965+,000082497+\t*,*,*,*,*,*',
        '000003F': 'P\t000003F\t000084518+,000011674+,000057445-\t*,*,*',
        '000004F': 'P\t000004F\t000014727+,000024020+,000060868+\t*,*,*',
    }
    seq_len_map = None
    for ctg_id, path in p_paths.iteritems():
        path_line = gfa_graph.format_gfa_v1_path_line(ctg_id, path,
                                                      seq_len_map)
        assert (path_line == expected[ctg_id])

    # The seq_len_map dict is only used for the first read in the path,
    # because it needs to be included completely. The other CIGAR operations
    # are determined directly from the edges.
    expected = {
        '000000F':
        'P\t000000F\t000092122-,000081654-,000034462-,000061403-,000021348-,000062240-,000083779-,000019819+,000063672+,000026565+,000050047-\t10000M,33726M,10123M,1352M,9924M,5834M,862M,5562M,1384M,473M,2171M',
        '000001F':
        'P\t000001F\t000070651+,000018109+,000068978+,000100559+,000010548-,000006846-,000065052-,000071922+,000076878+,000000861+,000001755-\t10000M,10077M,3766M,2648M,2421M,2089M,18168M,2723M,2451M,666M,15088M',
        '000002F':
        'P\t000002F\t000088930+,000008918+,000100248-,000085315-,000071965+,000082497+\t10000M,15215M,3113M,4851M,1857M,6035M',
        '000003F':
        'P\t000003F\t000084518+,000011674+,000057445-\t10000M,9432M,23096M',
        '000004F':
        'P\t000004F\t000014727+,000024020+,000060868+\t10000M,5238M,3235M',
    }
    for ctg_id, path in p_paths.iteritems():
        # Initialize all reads to a fixed value, just to be safe.
        seq_len_map = {}
        for edge in path:
            v, w = edge[0], edge[1]
            seq_len_map[v.split(':')[0]] = 10000
            seq_len_map[w.split(':')[0]] = 10000
        path_line = gfa_graph.format_gfa_v1_path_line(ctg_id, path,
                                                      seq_len_map)
        assert (path_line == expected[ctg_id])

    # Test a degenerate case where path is None.
    path_line = gfa_graph.format_gfa_v1_path_line('', None, None)
    assert (path_line == '')
Пример #13
0
def test_add_path_1():
    """
    Test normal usage.
    """

    gfa_graph = mod.GFAGraph()

    path_nodes = ['node1', 'node2', 'node3', 'node4', 'node5']
    path_cigars = ['500M', '400M', '300M', '200M', '100M']

    gfa_graph.add_path('000000F', path_nodes, path_cigars)

    assert (len(gfa_graph.paths.keys()) == 1)
Пример #14
0
def wrap_write_gfa_v1_test(use_sg, use_nx, use_tp, write_reads, write_contigs,
                           min_p_len, min_a_len, expected_path):
    # Create a GFA graph.
    gfa_graph = mod.GFAGraph()

    # Init paths to other input files.
    preads_file = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                               'preads4falcon.fasta')
    p_ctg_fasta = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                               'p_ctg.fa')
    a_ctg_fasta = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                               'a_ctg.fa')

    if use_sg:
        # Load the assembly graph.
        sg_edges_list = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                                     'sg_edges_list')
        utg_data = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                                'utg_data')
        ctg_paths = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                                 'ctg_paths')
        asm_graph = AsmGraph(sg_edges_list, utg_data, ctg_paths)
        # Add the string graph to the GFA.
        gfa_graph.add_asm_graph(asm_graph)

    if use_tp:
        p_ctg_tiling_path_file = os.path.join(helpers.get_test_data_dir(),
                                              'gfa-1', 'p_ctg_tiling_path')
        a_ctg_tiling_path_file = os.path.join(helpers.get_test_data_dir(),
                                              'gfa-1', 'a_ctg_tiling_path')
        gen_gfa_v1.add_tiling_paths_to_gfa(p_ctg_fasta, a_ctg_fasta,
                                           p_ctg_tiling_path_file,
                                           a_ctg_tiling_path_file, min_p_len,
                                           min_a_len, gfa_graph)

    if use_nx:
        gexf_file = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                                 'sg.gexf')
        nx_sg = nx.read_gexf(gexf_file)
        gfa_graph.add_nx_string_graph(nx_sg)

    fp_out = StringIO()
    # Run the unit under test.
    gfa_graph.write_gfa_v1(fp_out, preads_file, [p_ctg_fasta, a_ctg_fasta],
                           write_reads, write_contigs)

    # Compare results.
    value = fp_out.getvalue()
    helpers.assert_filecmp(value, expected_path)
Пример #15
0
def test_add_node_2():
    """
    Tests that exceptions get raised if parameters are not correct.
    """

    gfa_graph = mod.GFAGraph()

    with pytest.raises(Exception):
        gfa_graph.add_node('', 4, 'ACTG', tags={}, labels={})
    with pytest.raises(Exception):
        gfa_graph.add_node('node1', -1, 'ACTG', tags={}, labels={})
    with pytest.raises(Exception):
        gfa_graph.add_node('node1', 4, '', tags={}, labels={})
    with pytest.raises(Exception):
        gfa_graph.add_node('node1', 4, 'ACTG', tags=[], labels={})
    with pytest.raises(Exception):
        gfa_graph.add_node('node1', 4, 'ACTG', tags={}, labels=[])
Пример #16
0
def test_add_path_2():
    """
    Tests that exceptions get raised if parameters are not correct.
    """

    gfa_graph = mod.GFAGraph()

    with pytest.raises(Exception):
        path_nodes = ['node1', 'node2', 'node3', 'node4', 'node5']
        path_cigars = ['500M', '400M', '300M', '200M', '100M']
        gfa_graph.add_path('', path_nodes, path_cigars)
    with pytest.raises(Exception):
        path_nodes = ['node1']
        path_cigars = ['500M', '400M', '300M', '200M', '100M']
        gfa_graph.add_path('000000F', path_nodes, path_cigars)
    with pytest.raises(Exception):
        path_nodes = ['node1', 'node2', 'node3', 'node4', 'node5']
        path_cigars = ['500M']
        gfa_graph.add_path('000000F', path_nodes, path_cigars)
    with pytest.raises(Exception):
        path_nodes = []
        path_cigars = ['500M']
        gfa_graph.add_path('000000F', path_nodes, path_cigars)
    with pytest.raises(Exception):
        path_nodes = ['node1']
        path_cigars = []
        gfa_graph.add_path('000000F', path_nodes, path_cigars)
    with pytest.raises(Exception):
        path_nodes = ['node1', 'node2', 'node3', 'node4', 'node5']
        path_cigars = ['500M', '400M', '300M', '200M', '100M']
        gfa_graph.add_path('000000F',
                           path_nodes,
                           path_cigars,
                           tags=[],
                           labels={})
    with pytest.raises(Exception):
        path_nodes = ['node1', 'node2', 'node3', 'node4', 'node5']
        path_cigars = ['500M', '400M', '300M', '200M', '100M']
        gfa_graph.add_path('000000F',
                           path_nodes,
                           path_cigars,
                           tags={},
                           labels=[])
Пример #17
0
def test_add_tiling_path():
    # Load the tiling path. These methods are tested in test_gen_gfa_v1.py.
    p_ctg_tiling_path_file = os.path.join(
        helpers.get_test_data_dir(), 'p_ctg_tiling_path_1')
    p_paths, p_edge_to_ctg = gen_gfa_v1.load_tiling_paths(
        p_ctg_tiling_path_file, 'P')

    # Create a new GFA graph.
    gfa_graph = mod.GFAGraph()

    # Add the tiling paths.
    for ctg_id, path in p_paths.iteritems():
        gfa_graph.add_tiling_path(path, ctg_id)

    # Check if we have the correct number of tiling paths.
    assert(len(gfa_graph.paths.keys()) == len(p_paths.keys()))

    # They should be same as loaded.
    for ctg_id, path in p_paths.iteritems():
        assert(ctg_id in gfa_graph.paths)
        assert(gfa_graph.paths[ctg_id] == path)
Пример #18
0
def test_add_edge():
    gfa_graph = mod.GFAGraph()

    v, w, cigar = '123:B', '456:E', '*'
    overlap_len, overlap_idt, overlap_begin, overlap_end = 10000, 99.9, 0, 9000
    cross_phase, src_graph, ctg_name, type_ = 'N', 'OP', '000000F', 'P'
    gfa_graph.add_edge(v, w, cigar, overlap_len, overlap_idt, overlap_begin,
                       overlap_end, cross_phase, src_graph, ctg_name, type_)
    assert(len(gfa_graph.read_in_graph) == 2)
    assert(len(gfa_graph.edges.keys()) == 1)
    assert((v, w) in gfa_graph.edges)

    # Check that multiedges cannot be added.
    gfa_graph.add_edge(v, w, cigar, overlap_len, overlap_idt, overlap_begin,
                       overlap_end, cross_phase, src_graph, ctg_name, type_)
    assert(len(gfa_graph.read_in_graph) == 2)
    assert(len(gfa_graph.edges.keys()) == 1)
    assert((v, w) in gfa_graph.edges)

    assert(v.split(':')[0] in gfa_graph.read_in_graph)
    assert(w.split(':')[0] in gfa_graph.read_in_graph)
Пример #19
0
def test_serialize():
    gfa_graph = mod.GFAGraph()
    gfa_graph.add_node('node1', 7, 'ACTGAAA', tags={}, labels={})
    gfa_graph.add_node('node2', 10, 'AAACCCGGGT', tags={}, labels={})
    gfa_graph.add_edge('edge1',
                       'node1',
                       '+',
                       'node2',
                       '+',
                       4,
                       7,
                       0,
                       3,
                       '*',
                       tags={},
                       labels={})
    gfa_graph.add_path('000000F', ['node1', 'node2'], ['4M', '7M'],
                       tags={},
                       labels={'label1': 'test'})

    result = mod.serialize_gfa(gfa_graph)
    expected = '{"paths": {"000000F": {"labels": {"label1": "test"}, "nodes": ["node1", "node2"], "tags": {}, "name": "000000F", "cigars": ["4M", "7M"]}}, "nodes": {"node1": {"labels": {}, "seq": "ACTGAAA", "name": "node1", "len": 7, "tags": {}}, "node2": {"labels": {}, "seq": "AAACCCGGGT", "name": "node2", "len": 10, "tags": {}}}, "edges": {"(\'node1\', \'node2\')": {"labels": {}, "v_orient": "+", "tags": {}, "v_start": 4, "cigar": "*", "w_end": 3, "w_start": 0, "w_orient": "+", "name": "edge1", "v_end": 7, "w": "node2", "v": "node1"}}}'

    assert (result == expected)
Пример #20
0
def test_write_gfa_v1_2():
    # Tests a case where a node is added to the graph, but
    # there is no corresponding pread in preads4falcon.fasta file.

    # Create a GFA graph.
    gfa_graph = mod.GFAGraph()

    # Load the p_ctg tiling paths.
    p_ctg_tiling_path_file = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'p_ctg_tiling_path')
    p_paths, p_edge_to_ctg = gen_gfa_v1.load_tiling_paths(
        p_ctg_tiling_path_file, 'P')
    # Add the tiling paths to the GFA.
    for ctg_id, path in p_paths.iteritems():
        gfa_graph.add_tiling_path(path, ctg_id)

    # Init paths to other input files.
    preads_file = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'preads4falcon.fasta')
    p_ctg_fasta = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'p_ctg.fa')
    a_ctg_fasta = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'a_ctg.fa')

    write_reads = False
    write_contigs = False

    fp_out = StringIO()

    # Add a node which does not exist in the preads4falcon.fasta file.
    gfa_graph.add_read_from_node('12345:B')

    # Run the unit under test.
    with pytest.raises(Exception) as e_info:
        gfa_graph.write_gfa_v1(fp_out, preads_file, [
                               p_ctg_fasta, a_ctg_fasta], write_reads, write_contigs)
Пример #21
0
def test_gfa_graph():
    gfa_graph = mod.GFAGraph()
Пример #22
0
def wrap_write_gfa_v1_test(use_sg, use_nx, use_tp, write_reads, write_contigs, min_p_len, min_a_len, expected_path):
    # Create a GFA graph.
    gfa_graph = mod.GFAGraph()

    if use_sg:
        # Load the assembly graph.
        sg_edges_list = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'sg_edges_list')
        utg_data = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'utg_data')
        ctg_paths = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'ctg_paths')
        asm_graph = AsmGraph(sg_edges_list, utg_data, ctg_paths)
        # Add the string graph to the GFA.
        gfa_graph.add_asm_graph(asm_graph)

    if use_tp:
        # Load the p_ctg tiling paths.
        p_ctg_tiling_path_file = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'p_ctg_tiling_path')
        p_paths, p_edge_to_ctg = gen_gfa_v1.load_tiling_paths(
            p_ctg_tiling_path_file, 'P')
        # Add the tiling paths to the GFA.
        for ctg_id, path in p_paths.iteritems():
            _, contig_len = gen_gfa_v1.calc_node_coords(path)
            if contig_len >= min_p_len:
                gfa_graph.add_tiling_path(path, ctg_id)
        a_ctg_tiling_path_file = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'a_ctg_tiling_path')
        a_paths, a_edge_to_ctg = gen_gfa_v1.load_tiling_paths(
            a_ctg_tiling_path_file, 'P')
        # Add the tiling paths to the GFA.
        for ctg_id, path in a_paths.iteritems():
            _, contig_len = gen_gfa_v1.calc_node_coords(path)
            if contig_len >= min_a_len:
                gfa_graph.add_tiling_path(path, ctg_id)

    if use_nx:
        gexf_file = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'sg.gexf')
        nx_sg = nx.read_gexf(gexf_file)
        gfa_graph.add_nx_string_graph(nx_sg)

    # Init paths to other input files.
    preads_file = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'preads4falcon.fasta')
    p_ctg_fasta = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'p_ctg.fa')
    a_ctg_fasta = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'a_ctg.fa')

    fp_out = StringIO()
    # Run the unit under test.
    gfa_graph.write_gfa_v1(fp_out, preads_file, [
                           p_ctg_fasta, a_ctg_fasta], write_reads, write_contigs)

    # Compare results.
    result = fp_out.getvalue()
    result = result.splitlines()
    expected = [line.strip() for line in open(expected_path).readlines()]
    assert(result == expected)
Пример #23
0
def test_add_edge_2():
    """
    Tests that exceptions get raised if parameters are not correct.
    """

    gfa_graph = mod.GFAGraph()

    with pytest.raises(Exception):
        gfa_graph.add_edge('',
                           'node1',
                           '+',
                           'node2',
                           '+',
                           4,
                           4,
                           0,
                           0,
                           '*',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           '',
                           '+',
                           'node2',
                           '+',
                           4,
                           4,
                           0,
                           0,
                           '*',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '+',
                           '',
                           '+',
                           4,
                           4,
                           0,
                           0,
                           '*',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '1',
                           'node2',
                           '+',
                           4,
                           4,
                           0,
                           0,
                           '*',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '+',
                           'node2',
                           'z',
                           4,
                           4,
                           0,
                           0,
                           '*',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '+',
                           'node2',
                           '+',
                           -1,
                           4,
                           0,
                           0,
                           '*',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '+',
                           'node2',
                           '+',
                           4,
                           -1,
                           0,
                           0,
                           '*',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '+',
                           'node2',
                           '+',
                           4,
                           4,
                           -1,
                           0,
                           '*',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '+',
                           'node2',
                           '+',
                           4,
                           4,
                           0,
                           -1,
                           '*',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '+',
                           'node2',
                           '+',
                           4,
                           4,
                           0,
                           0,
                           '',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '+',
                           'node2',
                           '+',
                           4,
                           3,
                           0,
                           0,
                           '*',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '+',
                           'node2',
                           '+',
                           4,
                           4,
                           5,
                           0,
                           '*',
                           tags={},
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '+',
                           'node2',
                           '+',
                           4,
                           4,
                           0,
                           0,
                           '*',
                           tags=[],
                           labels={})
    with pytest.raises(Exception):
        gfa_graph.add_edge('edge1',
                           'node1',
                           '+',
                           'node2',
                           '+',
                           4,
                           4,
                           0,
                           0,
                           '*',
                           tags={},
                           labels=[])
Пример #24
0
def test_write_bandage_csv():
    gfa_graph = mod.GFAGraph()
    gfa_graph.add_node('node1', 7, 'ACTGAAA', tags={}, labels={})
    gfa_graph.add_node('node2', 10, 'AAACCCGGGT', tags={}, labels={})
    gfa_graph.add_node('node3', 7, 'ACTGAAA', tags={}, labels={})
    gfa_graph.add_node('node4', 10, 'AAACCCGGGT', tags={}, labels={})
    gfa_graph.add_node('node5', 7, 'ACTGAAA', tags={}, labels={})
    gfa_graph.add_node('node6', 10, 'AAACCCGGGT', tags={}, labels={})
    gfa_graph.add_node('node7', 14, 'AACCCGGGTACTGG', tags={}, labels={})
    gfa_graph.add_edge('edge1',
                       'node1',
                       '+',
                       'node2',
                       '+',
                       4,
                       7,
                       0,
                       3,
                       '*',
                       tags={},
                       labels={})
    gfa_graph.add_edge('edge2',
                       'node3',
                       '+',
                       'node4',
                       '+',
                       4,
                       7,
                       0,
                       3,
                       '*',
                       tags={},
                       labels={})
    gfa_graph.add_edge('edge3',
                       'node5',
                       '+',
                       'node6',
                       '+',
                       4,
                       7,
                       0,
                       3,
                       '*',
                       tags={},
                       labels={})
    gfa_graph.add_edge('edge4',
                       'node6',
                       '+',
                       'node7',
                       '+',
                       1,
                       10,
                       0,
                       9,
                       '*',
                       tags={},
                       labels={})
    gfa_graph.add_path('000000F', ['node1', 'node2'], ['3M', '7M'],
                       tags={},
                       labels={})
    gfa_graph.add_path('000001F', ['node3', 'node4'], ['3M', '7M'],
                       tags={},
                       labels={})
    gfa_graph.add_path('000002F', ['node5', 'node6'], ['3M', '7M'],
                       tags={},
                       labels={})
    # Node 6 is shared between two contigs. It's color should be yellow, and the contig list should contain both contigs.
    gfa_graph.add_path('000003F', ['node6', 'node7'], ['9M', '5M'],
                       tags={},
                       labels={})

    fp_out = StringIO()
    gfa_graph.write_bandage_csv(fp_out)

    result = fp_out.getvalue()
    expected = """\
Node name,Contig,Color,OrdinalID
node1,000000F,#1CE6FF,0
node2,000000F,#1CE6FF,1
node3,000001F,#FF34FF,0
node4,000001F,#FF34FF,1
node5,000002F,#FF4A46,0
node6,000002F;000003F,#FFFF00,0
node7,000003F,#008941,1
"""
    assert (result == expected)