def test_tiling_path_5(tmpdir): """ This is a normal test case for loading from file. The results should be the same as loading from stream. """ contig_lens = None whitelist_seqs = None tp_file = tmpdir.join('test_tiling_path') tp_file.write(test_1_input_tp_as_text) result = mod.load_tiling_paths(str(tp_file), contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) fp_in = StringIO(test_1_input_tp_as_text) expected = mod.load_tiling_paths_from_stream(fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) assert (list(result.keys()) == list(expected.keys())) for key in result: result_tp = result[key] expected_tp = expected[key] assert (result_tp.coords == expected_tp.coords) assert (result_tp.dump_as_split_lines() == expected_tp.dump_as_split_lines()) assert (result_tp.contig_len == expected_tp.contig_len) assert (result_tp.v_to_edge == expected_tp.v_to_edge) assert (result_tp.w_to_edge == expected_tp.w_to_edge)
def test_tiling_path_3(): """ This is a normal test case. Test the whitelist filter. """ fp_in = StringIO(test_1_input_tp_as_text) # Run test. contig_lens = {'000000F': 50000, '000001F': 40000, '000002F': 10000} whitelist_seqs = set(['000000F', '000002F', 'key_that_doesnt_exist']) result = mod.load_tiling_paths_from_stream(fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) # Validate. assert(sorted(result.keys()) == ['000000F', '000002F']) for key in result.keys(): lines = test_1_lines[key] sl = [line.strip().split() for line in lines.splitlines()] assert(result[key].dump_as_split_lines() == sl) offset = contig_lens['000000F'] - 45201 assert(result['000000F'].coords == {'000092122:B': 0 + offset, '000081654:B': 33726 + offset, '000034462:B': 43849 + offset, '000061403:B': 45201 + offset}) offset = contig_lens['000002F'] - 8473 assert(result['000002F'].coords == {'000014727:E': 0 + offset, '000024020:E': 5238 + offset, '000060868:E': 8473 + offset}) for key, tp in result.iteritems(): assert(tp.contig_len == contig_lens[key]) assert(result['000000F'].v_to_edge == {'000092122:B': 0, '000081654:B': 1, '000034462:B': 2}) assert(result['000000F'].w_to_edge == {'000081654:B': 0, '000034462:B': 1, '000061403:B': 2}) assert(result['000002F'].v_to_edge == {'000014727:E': 0, '000024020:E': 1}) assert(result['000002F'].w_to_edge == {'000024020:E': 0, '000060868:E': 1})
def test_calc_node_coords_1(): """ Run a normal test. """ fp_in = StringIO(test_1_input_tp_as_text) # Run test. # The calc_node_coords requires a list of TilingEdge objects. # Parsing of tiling paths and loading of TilingEdge objects was already # tested above, se here we just reuse the mechanism to get tp.edges. expected_coord_map = {} expected_coord_map['000000F'] = {'000092122:B': 0, '000081654:B': 33726, '000034462:B': 43849, '000061403:B': 45201} expected_coord_map['000001F'] = {'000092122:B': 0, '000081654:B': 33726} expected_coord_map['000002F'] = {'000014727:E': 0, '000024020:E': 5238, '000060868:E': 8473} expected_contig_len = {} expected_contig_len['000000F'] = 45201 expected_contig_len['000001F'] = 33726 expected_contig_len['000002F'] = 8473 contig_lens = expected_contig_len whitelist_seqs = None tps = mod.load_tiling_paths_from_stream(fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) for key, tp in tps.iteritems(): coord_map, contig_len = mod.calc_node_coords(tp.edges, first_node_offset=0) assert(coord_map == expected_coord_map[key]) assert(contig_len == expected_contig_len[key])
def test_tiling_path_5(tmpdir): """ This is a normal test case for loading from file. The results should be the same as loading from stream. """ contig_lens = None whitelist_seqs = None tp_file = tmpdir.join('test_tiling_path') tp_file.write(test_1_input_tp_as_text) result = mod.load_tiling_paths(str(tp_file), contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) fp_in = StringIO(test_1_input_tp_as_text) expected = mod.load_tiling_paths_from_stream(fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) assert(result.keys() == expected.keys()) for key in result: result_tp = result[key] expected_tp = expected[key] assert(result_tp.coords == expected_tp.coords) assert(result_tp.dump_as_split_lines() == expected_tp.dump_as_split_lines()) assert(result_tp.contig_len == expected_tp.contig_len) assert(result_tp.v_to_edge == expected_tp.v_to_edge) assert(result_tp.w_to_edge == expected_tp.w_to_edge)
def test_tiling_path_3(): """ This is a normal test case. Test the whitelist filter. """ fp_in = StringIO(test_1_input_tp_as_text) # Run test. contig_lens = {'000000F': 50000, '000001F': 40000, '000002F': 10000} whitelist_seqs = set(['000000F', '000002F', 'key_that_doesnt_exist']) result = mod.load_tiling_paths_from_stream(fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) # Validate. assert (sorted(result.keys()) == ['000000F', '000002F']) for key in list(result.keys()): lines = test_1_lines[key] sl = [line.strip().split() for line in lines.splitlines()] assert (result[key].dump_as_split_lines() == sl) offset = contig_lens['000000F'] - 45201 assert (result['000000F'].coords == { '000092122:B': 0 + offset, '000081654:B': 33726 + offset, '000034462:B': 43849 + offset, '000061403:B': 45201 + offset }) offset = contig_lens['000002F'] - 8473 assert (result['000002F'].coords == { '000014727:E': 0 + offset, '000024020:E': 5238 + offset, '000060868:E': 8473 + offset }) for key, tp in result.items(): assert (tp.contig_len == contig_lens[key]) assert (result['000000F'].v_to_edge == { '000092122:B': 0, '000081654:B': 1, '000034462:B': 2 }) assert (result['000000F'].w_to_edge == { '000081654:B': 0, '000034462:B': 1, '000061403:B': 2 }) assert (result['000002F'].v_to_edge == { '000014727:E': 0, '000024020:E': 1 }) assert (result['000002F'].w_to_edge == { '000024020:E': 0, '000060868:E': 1 })
def test_find_a_ctg_placement_1(): """ Normal test case. The find_a_ctg_placement method expects a valid primary contig tiling path dict and a valid associate contig tiling path dict as inputs. Validation of the construction of TilingPath objects is performed in the tests above; here we take these objects for granted. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) fp_in = StringIO(test_placement_1_a_path_as_text) a_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) result = mod.find_a_ctg_placement(p_paths, a_paths) for key, placement in result.iteritems(): assert(placement == test_placement_1_expected[key])
def test_find_a_ctg_placement_2(): """ Test empty a_ctg paths. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) a_paths = {} result = mod.find_a_ctg_placement(p_paths, a_paths) assert(len(result.keys()) == 0)
def test_find_a_ctg_placement_3(): """ Test empty p_ctg paths. If the primary contig cannot be found, this should throw an exception. """ p_paths = {} fp_in = StringIO(test_placement_1_a_path_as_text) a_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) with pytest.raises(Exception): result = mod.find_a_ctg_placement(p_paths, a_paths)
def test_find_a_ctg_placement_1(): """ Normal test case. The find_a_ctg_placement method expects a valid primary contig tiling path dict and a valid associate contig tiling path dict as inputs. Validation of the construction of TilingPath objects is performed in the tests above; here we take these objects for granted. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) fp_in = StringIO(test_placement_1_a_path_as_text) a_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) result = mod.find_a_ctg_placement(p_paths, a_paths) for key, placement in result.items(): assert (placement == test_placement_1_expected[key])
def test_calc_node_coords_3(): """ Run a normal test. """ fp_in = StringIO(test_1_input_tp_as_text) # Run test. # The calc_node_coords requires a list of TilingEdge objects. # Parsing of tiling paths and loading of TilingEdge objects was already # tested above, se here we just reuse the mechanism to get tp.edges. first_node_offset = {} first_node_offset['000000F'] = 50000 - 45201 first_node_offset['000001F'] = 40000 - 33726 first_node_offset['000002F'] = 10000 - 8473 expected_contig_len = {} expected_contig_len['000000F'] = 50000 # 45201 expected_contig_len['000001F'] = 40000 # 33726 expected_contig_len['000002F'] = 10000 # 8473 expected_coord_map = {} offset = first_node_offset['000000F'] expected_coord_map['000000F'] = { '000092122:B': 0 + offset, '000081654:B': 33726 + offset, '000034462:B': 43849 + offset, '000061403:B': 45201 + offset } offset = first_node_offset['000001F'] expected_coord_map['000001F'] = { '000092122:B': 0 + offset, '000081654:B': 33726 + offset } offset = first_node_offset['000002F'] expected_coord_map['000002F'] = { '000014727:E': 0 + offset, '000024020:E': 5238 + offset, '000060868:E': 8473 + offset } contig_lens = expected_contig_len whitelist_seqs = None tps = mod.load_tiling_paths_from_stream(fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) for key, tp in tps.items(): coord_map, contig_len = mod.calc_node_coords( tp.edges, first_node_offset=first_node_offset[key]) assert (coord_map == expected_coord_map[key]) assert (contig_len == expected_contig_len[key])
def test_find_a_ctg_placement_2(): """ Test empty a_ctg paths. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) a_paths = {} result = mod.find_a_ctg_placement(p_paths, a_paths) assert (len(list(result.keys())) == 0)
def test_get_subpath_8(): """ The end coordinate should not be <= start coordinate. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] test_start = 10 test_end = 7 with pytest.raises(Exception): result = p_path.get_subpath(test_start, test_end)
def test_get_subpath_1(): """ Check the case when the beginning and the end of the contig are provided. This should extract the entire path. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = 0 test_end = p_path.contig_len result = p_path.get_subpath(test_start, test_end) expected = [val.get_split_line() for val in p_path.edges], test_start, test_end assert(result == expected)
def test_get_subpath_4(): """ Normal test - extract a subpath between internal coordinates. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = 43849 + 10 # Start is within the third edge. test_end = 68767 + 100 # End is within the 9th edge result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges[2:9]], 10, (test_end - 43849) assert(result == expected)
def test_get_subpath_7(): """ When both coordinates are <= 0, then only the first edge should be output. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = -100 test_end = -10 result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges[0:1]], test_start, test_end assert(result == expected)
def test_get_subpath_2(): """ Test with a start coord < 0, which is possible if the contig was improper. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = -100 test_end = p_path.contig_len result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges], test_start, test_end assert(result == expected)
def test_get_subpath_6(): """ The end coordinate is only one base into a new edge. This edge should entirely be output. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = 43849 + 0 # Start is within the third edge. test_end = 69240 + 1 # End is within the 9th edge result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges[2:10]], 0, (test_end - 43849) assert(result == expected)
def test_tiling_path_4_expect_crash(): """ This test attempts to create a tiling path from edges which are out of order. An exception should be raised when initializing the TilingPath object. The `calc_node_coords` should raise an exception for node '000021348:B'. """ test_3_lines = {} test_3_lines['000000F'] = test_1_lines['000000F'] + """\n000000F 000062240:B 000083779:B 000083779 862 0 30696 99.79""" test_3_lines['000001F'] = test_1_lines['000001F'] test_3_lines['000002F'] = test_1_lines['000002F'] test_3_input_tp_as_text = '\n'.join([val for key, val in test_3_lines.iteritems()]) fp_in = StringIO(test_3_input_tp_as_text) contig_lens = None whitelist_seqs = None with pytest.raises(Exception): result = mod.load_tiling_paths_from_stream(fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs)
def test_get_subpath_3(): """ Test the end coordinate larger than the contig length. This is also possible if the contig was improper, but is frowned upon. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = -100 test_end = p_path.contig_len + 1000 result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges], test_start, test_end assert(result == expected)
def test_get_subpath_5(): """ Test edge case, when the selected coordinates are right at the ends of the tiling path edges. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = 43849 + 0 # Start is within the third edge. test_end = 69240 + 0 # End is within the 9th edge result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges[2:9]], 0, (test_end - 43849) assert(result == expected)
def test_tiling_path_1(): """ This is a normal test case. """ fp_in = StringIO(test_1_input_tp_as_text) # Run test. contig_lens = None whitelist_seqs = None result = mod.load_tiling_paths_from_stream(fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) # Validate. assert(sorted(result.keys()) == ['000000F', '000001F', '000002F']) # This checks several methods: # - The entire TilingPathEdge: the values are directly parsed via constructor # called from the TilingPath class. The correctness is tested by callin # the dump_as_split_lines() which should reconstruct the original input line. # - TilingPath has a method `dump_as_split_lines` which returns all such # split lines. for key, lines in test_1_lines.iteritems(): sl = [line.strip().split() for line in lines.splitlines()] assert(result[key].dump_as_split_lines() == sl) # Check the coordinates. assert(result['000000F'].coords == {'000092122:B': 0, '000081654:B': 33726, '000034462:B': 43849, '000061403:B': 45201}) assert(result['000001F'].coords == {'000092122:B': 0, '000081654:B': 33726}) assert(result['000002F'].coords == {'000014727:E': 0, '000024020:E': 5238, '000060868:E': 8473}) assert(result['000000F'].contig_len == 45201) assert(result['000001F'].contig_len == 33726) assert(result['000002F'].contig_len == 8473) assert(result['000000F'].v_to_edge == {'000092122:B': 0, '000081654:B': 1, '000034462:B': 2}) assert(result['000000F'].w_to_edge == {'000081654:B': 0, '000034462:B': 1, '000061403:B': 2}) assert(result['000001F'].v_to_edge == {'000092122:B': 0}) assert(result['000001F'].w_to_edge == {'000081654:B': 0}) assert(result['000002F'].v_to_edge == {'000014727:E': 0, '000024020:E': 1}) assert(result['000002F'].w_to_edge == {'000024020:E': 0, '000060868:E': 1})
def test_get_subpath_1(): """ Check the case when the beginning and the end of the contig are provided. This should extract the entire path. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = 0 test_end = p_path.contig_len result = p_path.get_subpath(test_start, test_end) expected = [val.get_split_line() for val in p_path.edges], test_start, test_end assert (result == expected)
def test_get_subpath_4(): """ Normal test - extract a subpath between internal coordinates. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = 43849 + 10 # Start is within the third edge. test_end = 68767 + 100 # End is within the 9th edge result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges[2:9]], 10, (test_end - 43849) assert (result == expected)
def test_get_subpath_2(): """ Test with a start coord < 0, which is possible if the contig was improper. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = -100 test_end = p_path.contig_len result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges], test_start, test_end assert (result == expected)
def test_get_subpath_7(): """ When both coordinates are <= 0, then only the first edge should be output. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = -100 test_end = -10 result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges[0:1]], test_start, test_end assert (result == expected)
def test_tiling_path_2(): """ This is a normal test case. Run with a dict specifying contig lengths. This should offset the coordinates. """ fp_in = StringIO(test_1_input_tp_as_text) # Run test. contig_lens = {'000000F': 50000, '000001F': 40000, '000002F': 10000} whitelist_seqs = None result = mod.load_tiling_paths_from_stream(fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) # Validate. assert(sorted(result.keys()) == ['000000F', '000001F', '000002F']) for key, lines in test_1_lines.iteritems(): sl = [line.strip().split() for line in lines.splitlines()] assert(result[key].dump_as_split_lines() == sl) # Check the coordinates. offset = contig_lens['000000F'] - 45201 assert(result['000000F'].coords == {'000092122:B': 0 + offset, '000081654:B': 33726 + offset, '000034462:B': 43849 + offset, '000061403:B': 45201 + offset}) offset = contig_lens['000001F'] - 33726 assert(result['000001F'].coords == {'000092122:B': 0 + offset, '000081654:B': 33726 + offset}) offset = contig_lens['000002F'] - 8473 assert(result['000002F'].coords == {'000014727:E': 0 + offset, '000024020:E': 5238 + offset, '000060868:E': 8473 + offset}) for key, tp in result.iteritems(): assert(tp.contig_len == contig_lens[key]) assert(result['000000F'].v_to_edge == {'000092122:B': 0, '000081654:B': 1, '000034462:B': 2}) assert(result['000000F'].w_to_edge == {'000081654:B': 0, '000034462:B': 1, '000061403:B': 2}) assert(result['000001F'].v_to_edge == {'000092122:B': 0}) assert(result['000001F'].w_to_edge == {'000081654:B': 0}) assert(result['000002F'].v_to_edge == {'000014727:E': 0, '000024020:E': 1}) assert(result['000002F'].w_to_edge == {'000024020:E': 0, '000060868:E': 1})
def test_tiling_path_4_expect_crash(): """ This test attempts to create a tiling path from edges which are out of order. An exception should be raised when initializing the TilingPath object. The `calc_node_coords` should raise an exception for node '000021348:B'. """ test_3_lines = {} test_3_lines['000000F'] = test_1_lines[ '000000F'] + """\n000000F 000062240:B 000083779:B 000083779 862 0 30696 99.79""" test_3_lines['000001F'] = test_1_lines['000001F'] test_3_lines['000002F'] = test_1_lines['000002F'] test_3_input_tp_as_text = '\n'.join( [val for key, val in test_3_lines.items()]) fp_in = StringIO(test_3_input_tp_as_text) contig_lens = None whitelist_seqs = None with pytest.raises(Exception): result = mod.load_tiling_paths_from_stream( fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs)
def test_get_subpath_3(): """ Test the end coordinate larger than the contig length. This is also possible if the contig was improper, but is frowned upon. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = -100 test_end = p_path.contig_len + 1000 result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges], test_start, test_end assert (result == expected)
def test_get_subpath_5(): """ Test edge case, when the selected coordinates are right at the ends of the tiling path edges. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = 43849 + 0 # Start is within the third edge. test_end = 69240 + 0 # End is within the 9th edge result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges[2:9]], 0, (test_end - 43849) assert (result == expected)
def test_get_subpath_6(): """ The end coordinate is only one base into a new edge. This edge should entirely be output. """ fp_in = StringIO(test_placement_1_p_path_as_text) p_paths = mod.load_tiling_paths_from_stream(fp_in, contig_lens=None, whitelist_seqs=None) p_path = p_paths['000000F'] # The result is composed of (subpath, start_in_subpath, end_in_subpath). test_start = 43849 + 0 # Start is within the third edge. test_end = 69240 + 1 # End is within the 9th edge result = p_path.get_subpath(test_start, test_end) subpath, start_in_subpath, end_in_subpath = result expected = [val.get_split_line() for val in p_path.edges[2:10]], 0, (test_end - 43849) assert (result == expected)
def test_tiling_path_2(): """ This is a normal test case. Run with a dict specifying contig lengths. This should offset the coordinates. """ fp_in = StringIO(test_1_input_tp_as_text) # Run test. contig_lens = {'000000F': 50000, '000001F': 40000, '000002F': 10000} whitelist_seqs = None result = mod.load_tiling_paths_from_stream(fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) # Validate. assert (sorted(result.keys()) == ['000000F', '000001F', '000002F']) for key, lines in test_1_lines.items(): sl = [line.strip().split() for line in lines.splitlines()] assert (result[key].dump_as_split_lines() == sl) # Check the coordinates. offset = contig_lens['000000F'] - 45201 assert (result['000000F'].coords == { '000092122:B': 0 + offset, '000081654:B': 33726 + offset, '000034462:B': 43849 + offset, '000061403:B': 45201 + offset }) offset = contig_lens['000001F'] - 33726 assert (result['000001F'].coords == { '000092122:B': 0 + offset, '000081654:B': 33726 + offset }) offset = contig_lens['000002F'] - 8473 assert (result['000002F'].coords == { '000014727:E': 0 + offset, '000024020:E': 5238 + offset, '000060868:E': 8473 + offset }) for key, tp in result.items(): assert (tp.contig_len == contig_lens[key]) assert (result['000000F'].v_to_edge == { '000092122:B': 0, '000081654:B': 1, '000034462:B': 2 }) assert (result['000000F'].w_to_edge == { '000081654:B': 0, '000034462:B': 1, '000061403:B': 2 }) assert (result['000001F'].v_to_edge == {'000092122:B': 0}) assert (result['000001F'].w_to_edge == {'000081654:B': 0}) assert (result['000002F'].v_to_edge == { '000014727:E': 0, '000024020:E': 1 }) assert (result['000002F'].w_to_edge == { '000024020:E': 0, '000060868:E': 1 })
def test_tiling_path_1(): """ This is a normal test case. """ fp_in = StringIO(test_1_input_tp_as_text) # Run test. contig_lens = None whitelist_seqs = None result = mod.load_tiling_paths_from_stream(fp_in, contig_lens=contig_lens, whitelist_seqs=whitelist_seqs) # Validate. assert (sorted(result.keys()) == ['000000F', '000001F', '000002F']) # This checks several methods: # - The entire TilingPathEdge: the values are directly parsed via constructor # called from the TilingPath class. The correctness is tested by callin # the dump_as_split_lines() which should reconstruct the original input line. # - TilingPath has a method `dump_as_split_lines` which returns all such # split lines. for key, lines in test_1_lines.items(): sl = [line.strip().split() for line in lines.splitlines()] assert (result[key].dump_as_split_lines() == sl) # Check the coordinates. assert (result['000000F'].coords == { '000092122:B': 0, '000081654:B': 33726, '000034462:B': 43849, '000061403:B': 45201 }) assert (result['000001F'].coords == { '000092122:B': 0, '000081654:B': 33726 }) assert (result['000002F'].coords == { '000014727:E': 0, '000024020:E': 5238, '000060868:E': 8473 }) assert (result['000000F'].contig_len == 45201) assert (result['000001F'].contig_len == 33726) assert (result['000002F'].contig_len == 8473) assert (result['000000F'].v_to_edge == { '000092122:B': 0, '000081654:B': 1, '000034462:B': 2 }) assert (result['000000F'].w_to_edge == { '000081654:B': 0, '000034462:B': 1, '000061403:B': 2 }) assert (result['000001F'].v_to_edge == {'000092122:B': 0}) assert (result['000001F'].w_to_edge == {'000081654:B': 0}) assert (result['000002F'].v_to_edge == { '000014727:E': 0, '000024020:E': 1 }) assert (result['000002F'].w_to_edge == { '000024020:E': 0, '000060868:E': 1 })