def setUp(self): super().setUp() nodes = {i: obg.Block(100) for i in range(1, 11)} edges = {i: [i+1] for i in range(1, 10)} self.lin_graph = obg.GraphWithReversals(nodes, edges) edges = {i: [i+1, (i+5) % 5 + 1] for i in range(1, 5)} self.double_graph = obg.GraphWithReversals(nodes, edges)
def setUp(self): from collections import defaultdict self.n_blocks = 100 self.n_edges = self.n_blocks + 50 blocks = {} blocks_list = [] for i in range(1, self.n_blocks + 1): blocks[i] = obg.Block(3) blocks_list.append(i) # Random edges edge_dict = defaultdict(list) for i in range(0, self.n_edges): start = blocks_list[randrange(0, len(blocks_list))] end = blocks_list[randrange(0, len(blocks_list))] if randrange(0, 2) == 1: start = -start if randrange(0, 2) == 1: end = -end if end == start or end == -start: continue if end not in edge_dict[start]: edge_dict[start].append(end) self.graph = obg.GraphWithReversals(blocks, edge_dict)
def test_from_intervals2(self): graph = obg.GraphWithReversals( { 1: obg.Block(10), 2: obg.Block(10), 3: obg.Block(10) }, { 1: [2], 2: [3] }) intervals = [ obg.Interval(1, 5, [1]), obg.Interval(3, 7, [1]), obg.Interval(5, 3, [1, 2]), obg.Interval(5, 6, [2]), obg.Interval(8, 8, [1, 2, 3]) ] pileup = SparsePileup.from_intervals(graph, intervals) data = pileup.data self.assertEqual(data[1].start_value, False) self.assertEqual(data[2].start_value, 2) self.assertEqual(data[3].start_value, 1) self.assertTrue(np.all([1, 3, 5, 7, 8] == data[1].indexes)) #print("data 1 values") #print(data[2].values) self.assertTrue(np.all([1, 2, 2, 1, 2] == data[1].values)) self.assertTrue(np.all([3, 5, 6] == data[2].indexes)) self.assertTrue(np.all([1, 2, 1] == data[2].values)) self.assertTrue(np.all([8] == data[3].indexes)) self.assertTrue(np.all([0] == data[3].values))
def test_long_hole(): pileup = SparseValues([0, 5, 95], [True, False, True]) graph = obg.GraphWithReversals({i: obg.Block(1) for i in range(1, 101)}, {i: [i + 1] for i in range(1, 100)}) holes = HolesCleaner(graph, pileup, 56).run() assert holes == pileup
def test_internals(): graph = obg.GraphWithReversals({101: obg.Block(100)}, {101: []}) pileup = SparseValues([0, 10, 19, 30, 41, 50], [1, 0, 1, 0, 1, 0]) cleaned = HolesCleaner(graph, pileup, 10).run() true = SparseValues([0, 30, 41, 50], [1, 0, 1, 0]) assert cleaned == true
def setUp(self): self.graph = obg.GraphWithReversals( {i: obg.Block(10) for i in range(1, 5)}, { 1: [2], 2: [3], 3: [4] }) self.pileup = SparsePileup(self.graph)
def offsetgraph(): sizes = np.ones(11) * 10 sizes[0] = 0 blocks = obg.BlockArray(sizes) blocks.node_id_offset = 100 # nodes = {i+1: obg.Block(10) for i in range(100, 110)} edges = {i: [i + 1] for i in range(101, 110)} edges[101] = [102, 103] edges[102] = [104] edges[103] = [104] return obg.GraphWithReversals(blocks, edges)
def test_holes_cleaner(): indices = np.array( [80, 100, 180, 220, 240, 250, 300, 400, 500, 520, 610, 810]) values = np.array([(i % 2) for i, _ in enumerate(indices)]) pileup = SparseValues(indices, values) graph = obg.GraphWithReversals({i + 1: obg.Block(100) for i in range(10)}, {i: [i + 1] for i in range(1, 10)}) # graph.node_indexes = np.arange(0, 1001, 100) holes = HolesCleaner(graph, pileup, 10).run() print(holes)
def complicated_graph(): nodes = {i: obg.Block(2) for i in range(1, 11)} edges = { 1: [2, 3], 2: [4], 3: [4], 4: [5, 6], 5: [7], 6: [7], 7: [8, 9], 9: [10] } return obg.GraphWithReversals(nodes, edges)
def complicated_offset(): nodes = {i: obg.Block(2) for i in range(101, 111)} edges = { 101: [102, 103], 102: [104], 103: [104], 104: [105, 106], 105: [107], 106: [107], 107: [108, 109], 109: [110] } return obg.GraphWithReversals(nodes, edges)
def setUp(self): self.graph = obg.GraphWithReversals({i: obg.Block(3) for i in range(1, 12)}, { -1: [-11], 1: [2, 3, 7], 2: [4, 5], 3: [4, 5], 4: [-5, 6, 8], 5: [7], 6: [-7, 9], 7: [9], 9: [10] })
def get_offset_based_graph(self): offset_based_edges = defaultdict(list) for edge in self.edges: from_node = edge.from_node to_node = edge.to_node if edge.from_start: from_node = -from_node if edge.to_end: to_node = -to_node offset_based_edges[from_node].append(to_node) offset_based_blocks = {} for block in self.nodes: offset_based_blocks[block.id] = block.to_obg() return offsetbasedgraph.GraphWithReversals(offset_based_blocks, offset_based_edges)
def test_intervals_to_start_and_ends(self): graph = obg.GraphWithReversals({ 1: obg.Block(10), 2: obg.Block(10) }, {1: [2]}) intervals = [ obg.Interval(1, 5, [1]), obg.Interval(3, 7, [1]), obg.Interval(5, 3, [1, 2]), obg.Interval(5, 6, [2]) ] correct_starts = {1: [1, 3, 5], 2: [0, 5]} correct_ends = {1: [5, 7, 10], 2: [3, 6]} starts, ends = intervals_to_start_and_ends(graph, intervals) #print("Starts") #print(starts) #print("Ends") #print(ends) for rp in graph.blocks: self.assertTrue(np.all(correct_starts[rp] == starts[rp])) self.assertTrue(np.all(correct_ends[rp] == ends[rp]))
def json_file_to_obg_numpy_graph(json_file_name, n_nodes=0): """ Faster method not using Graph class. Directly converts to a numpy-backed Offset Based Graph. """ logging.info("Creating ob graph from json file") adj_list = defaultdict(list) rev_adj_list = defaultdict(list) i = 0 min_node_id = 1e15 max_node_id = 0 # Find max and minh with open(json_file_name) as f: lines = f json_objs = (json.loads(line) for line in lines) has_warned_about_int = False for json_obj in json_objs: if "node" in json_obj: for node in json_obj["node"]: id = node["id"] if not isinstance(id, int): if not has_warned_about_int: logging.warning( "Node id %s is not int. Converting to int when creating graph." % id) has_warned_about_int = True id = int(id) if id < min_node_id: min_node_id = id if id > max_node_id: max_node_id = id logging.info("Min node: %d, Max node: %d" % (min_node_id, max_node_id)) nodes = np.zeros((max_node_id - min_node_id) + 2, dtype=np.uint16) logging.info("Reading from json") with open(json_file_name) as f: lines = f json_objs = (json.loads(line) for line in lines) for json_obj in json_objs: if "node" in json_obj: for node in json_obj["node"]: nodes[int(node["id"]) - min_node_id + 1] = len( node["sequence"]) if "edge" in json_obj: for edge in json_obj["edge"]: if "from_start" in edge and edge[ "from_start"] and "to_end" in edge and edge[ "to_end"]: # new in vg 1.27, this is a normal edge from end to start from_node = int(edge["to"]) to_node = int(edge["from"]) assert from_node >= 0 and to_node >= 0 else: from_node = -int( edge["from"]) if "from_start" in edge and edge[ "from_start"] else edge["from"] to_node = -int( edge["to"]) if "to_end" in edge and edge[ "to_end"] else edge["to"] adj_list[int(from_node)].append(to_node) rev_adj_list[-int(to_node)].append(-int(from_node)) logging.info("Creating numpy adj lists") adj_list = obg.graph.AdjListAsNumpyArrays.create_from_edge_dict(adj_list) rev_adj_list = obg.graph.AdjListAsNumpyArrays.create_from_edge_dict( rev_adj_list) graph = obg.GraphWithReversals(nodes, adj_list, rev_adj_list=rev_adj_list, create_reverse_adj_list=False) graph.blocks.node_id_offset = min_node_id - 1 return graph
def setUp(self): nodes = {i + 1: obg.Block(10) for i in range(5)} edges = {1: [2, 3], 2: [4], 3: [-4], 4: [5], -4: [5]} self.graph = obg.GraphWithReversals(nodes, edges) data = { node_id: ValuedIndexes( np.array([2 * i for i in range(1, 5)]), np.array([2 * i + 10 * node_id for i in range(1, 5)]), node_id * 10, 10) for node_id in nodes } self.pileup = SparsePileup(self.graph) self.pileup.data = data self.pileup = DensePileup.create_from_old_sparsepileup(self.pileup) flat_data = { node_id: ValuedIndexes(np.array([], dtype="int"), np.array([]), node_id * 10, 10) for node_id in nodes } self.flat_pileup = SparsePileup(self.graph) self.flat_pileup.data = flat_data self.flat_pileup = DensePileup.create_from_old_sparsepileup( self.flat_pileup) self.peak = BinaryContinousAreas(self.graph) self.peak.add_full(2) self.peak.add_full(4) self.peak.add_full(3) self.peak.add_start(5, 5) self.peak.add_start(-1, 5) indexes = np.array([2, 4, 6, 8]) values = np.array([2, 4, 6, 8]) self.scores = { 2: ValuedIndexes(indexes, values + 20, 20, 10), -2: ValuedIndexes(indexes, values + 20, 20, 10), 3: ValuedIndexes(indexes, values + 30, 30, 10), -3: ValuedIndexes(indexes, values + 30, 30, 10), 4: ValuedIndexes(indexes, values + 40, 40, 10), -4: ValuedIndexes(indexes, values + 40, 40, 10), 5: ValuedIndexes(np.array([2, 4]), np.array([52, 54]), 50, 5), -1: ValuedIndexes(np.array([1, 3]), np.array([16, 18]), 14, 5) } self.flat_scores = { 2: ValuedIndexes(np.array([], dtype="int"), np.array([]), 20, 10), 3: ValuedIndexes(np.array([], dtype="int"), np.array([]), 30, 10), 4: ValuedIndexes(np.array([], dtype="int"), np.array([]), 40, 10), -2: ValuedIndexes(np.array([], dtype="int"), np.array([]), 20, 10), -3: ValuedIndexes(np.array([], dtype="int"), np.array([]), 30, 10), -4: ValuedIndexes(np.array([], dtype="int"), np.array([]), 40, 10), 5: ValuedIndexes(np.array([], dtype="int"), np.array([]), 50, 5), -1: ValuedIndexes(np.array([], dtype="int"), np.array([]), 10, 5) } self.peak2 = BinaryContinousAreas(self.graph) self.peak2.add_full(2) self.peak2.add_start(-3, 7) self.peak2.add_full(4) self.peak2.add_start(5, 5) self.scores2 = { 2: ValuedIndexes(indexes, values + 20, 20, 10), -3: ValuedIndexes(np.array([1, 3, 5]), np.array([34, 36, 38]), 32, 7), 4: ValuedIndexes(indexes, values + 40, 40, 10), 5: ValuedIndexes(np.array([2, 4]), np.array([52, 54]), 50, 5), } self.scored_peak = ScoredPeak(self.peak, self.scores) self.flat_scored_peak = ScoredPeak(self.peak, self.flat_scores) self.scored_peak2 = ScoredPeak(self.peak2, self.scores2) self.max_path = obg.DirectedInterval(5, 5, [1, 3, -4, 5], graph=self.graph) self.max_path2 = obg.DirectedInterval(3, 5, [3, -4, 5], graph=self.graph)
import pytest if pytest.__version__ < "3.0.0": pytest.skip() import numpy as np import unittest import offsetbasedgraph as obg # from test_snarls import snarl_graph2 # from graph_peak_caller.control.linearsnarls import \ # UnmappedIndices, LinearPileup # from graph_peak_caller.control.snarlmaps import LinearSnarlMap graph = obg.GraphWithReversals( {3: obg.Block(20), 5: obg.Block(10), 12: obg.Block(20), 13: obg.Block(21), }, {}) @pytest.mark.skip() class TestSnarlMap(unittest.TestCase): def setUp(self): self.snarl_map = LinearSnarlMap.from_snarl_graph(snarl_graph2, graph) self.graph_positions = [obg.Position(5, 4), obg.Position(3, 4), obg.Position(12, 4), obg.Position(13, 4)] self.linear_positions = [4, 31/20*4, 10+21/20*4, 14] self.linear_positions = [p for p in self.linear_positions] self.graph_interval = obg.DirectedInterval(self.graph_positions[0], self.graph_positions[2])
def setUp(self): nodes = {i: obg.Block(10) for i in range(1, 11)} edges = {i: [i+1, i+6] for i in range(1, 6)} self.graph = obg.GraphWithReversals(nodes, edges)
import unittest from graph_peak_caller.areas import BinaryContinousAreas, BCACollection import offsetbasedgraph as obg nodes = {i: obg.Block(10) for i in range(1, 10)} graph = obg.GraphWithReversals(nodes, {i: [i+1] for i in range(1, 9)}) class TestBinaryContinousAreas(unittest.TestCase): def setUp(self): self.areas = BinaryContinousAreas(graph) self.areas.full_areas = {1: 1, 2: 1} self.areas.starts = {3: 2, 4: 5} self.areas.internal_intervals = {5: [2, 7]} def test_file_in_out(self): line = self.areas.to_file_line() print(line) new_areas = BinaryContinousAreas.from_file_line(line, graph) self.assertEqual(new_areas, self.areas) c = BCACollection([self.areas, self.areas]) c.to_file("tmp.subgraphs") BCACollection.from_file("tmp.subgraphs", graph) def test_filled_interval(self): interval = obg.DirectedInterval(4, 4, [2, 3, 4]) areas = BinaryContinousAreas(graph) areas.filled_interval(interval) areas.sanitize()
import offsetbasedgraph as obg from graph_peak_caller.postprocess.maxpaths import SparseMaxPaths from graph_peak_caller.sparsediffs import SparseValues from graph_peak_caller.peakcollection import Peak import numpy as np nodes = {i + 1: obg.Block(10) for i in range(10)} edges = {i: [i + 1] for i in range(1, 10)} edges[1] = [2, 3] edges[2] = [4] edges[3] = [4] graph = obg.GraphWithReversals(nodes, edges) def complicated_offset(): nodes = {i: obg.Block(2) for i in range(101, 111)} edges = { 101: [102, 103], 102: [104], 103: [104], 104: [105, 106], 105: [107], 106: [107], 107: [108, 109], 109: [110] } return obg.GraphWithReversals(nodes, edges) def offsetgraph():
def sorted_wierd_graph(a, b): nodes = {i + 1: obg.Block(100) for i in range(4)} edges = {1: [2, 3], 2: [4], 3: [4]} return obg.GraphWithReversals(nodes, edges)
def small_graph(): nodes = {i: obg.Block(10) for i in range(101, 107)} edges = {101: [102], 102: [103, 104], 103: [105], 104: [105], 105: [106]} return obg.GraphWithReversals(nodes, edges)
def test_end_hole(): pileup = SparseValues([0, 5, 10], [False, True, False]) graph = obg.GraphWithReversals({1: obg.Block(12)}, {1: []}) holes = HolesCleaner(graph, pileup, 4).run() assert holes == pileup