def __init__(self, graph=None, debug=0): if graph is None: graph = Graph() self.graphident = self self.graph = graph self.debug = debug self.indent = 0 graph.add_node(self, None)
def test_nodes(self): graph = Graph() self.assertEqual(graph.node_list(), []) o1 = object() o1b = object() o2 = object() graph.add_node(1, o1) graph.add_node(1, o1b) graph.add_node(2, o2) graph.add_node(3) self.assertRaises(TypeError, graph.add_node, []) self.assertTrue(graph.node_data(1) is o1) self.assertTrue(graph.node_data(2) is o2) self.assertTrue(graph.node_data(3) is None) self.assertTrue(1 in graph) self.assertTrue(2 in graph) self.assertTrue(3 in graph) self.assertEqual(graph.number_of_nodes(), 3) self.assertEqual(graph.number_of_hidden_nodes(), 0) self.assertEqual(graph.hidden_node_list(), []) self.assertEqual(list(sorted(graph)), [1, 2, 3]) graph.hide_node(1) graph.hide_node(2) graph.hide_node(3) self.assertEqual(graph.number_of_nodes(), 0) self.assertEqual(graph.number_of_hidden_nodes(), 3) self.assertEqual(list(sorted(graph.hidden_node_list())), [1, 2, 3]) self.assertFalse(1 in graph) self.assertFalse(2 in graph) self.assertFalse(3 in graph) graph.add_node(1) self.assertFalse(1 in graph) graph.restore_node(1) self.assertTrue(1 in graph) self.assertFalse(2 in graph) self.assertFalse(3 in graph) graph.restore_all_nodes() self.assertTrue(1 in graph) self.assertTrue(2 in graph) self.assertTrue(3 in graph) self.assertEqual(list(sorted(graph.node_list())), [1, 2, 3]) v = graph.describe_node(1) self.assertEqual(v, (1, o1, [], []))
def test_connected(self): graph = Graph() graph.add_node(1) graph.add_node(2) graph.add_node(3) graph.add_node(4) self.assertFalse(graph.connected()) graph.add_edge(1, 2) graph.add_edge(3, 4) self.assertFalse(graph.connected()) graph.add_edge(2, 3) graph.add_edge(4, 1) self.assertTrue(graph.connected())
def test_bfs_subgraph_does_not_reverse_egde_direction(self): graph = Graph() graph.add_node('A') graph.add_node('B') graph.add_node('C') graph.add_edge('A', 'B') graph.add_edge('B', 'C') whole_graph = graph.forw_topo_sort() subgraph_backward = graph.back_bfs_subgraph('C') subgraph_backward = subgraph_backward.forw_topo_sort() self.assertEquals(whole_graph, subgraph_backward) subgraph_forward = graph.forw_bfs_subgraph('A') subgraph_forward = subgraph_forward.forw_topo_sort() self.assertEquals(whole_graph, subgraph_forward)
def copy_to_graph(session): graph = Graph() count = 0 for artist in session.query(LastFmArtist).all(): if artist.graph_out: graph.add_node(artist.id, artist) edges = list(artist.graph_out) edges.sort(key=lambda e: e.weight, reverse=True) for edge in edges[0:2]: target = edge.to_ graph.add_node(target.id, target) graph.add_edge(artist.id, target.id, edge, create_nodes=False) if not count % 100: LOG.info('Added {0} nodes.'.format(count)) count += 1 else: LOG.warn('Discarding unconnected {0}.'.format(artist.name)) return graph
class Workflow(Process): def __init__(self, process_id, inputs, outputs, requirements, hints, label, description, steps, context, data_links=None): super(Workflow, self).__init__(process_id, inputs, outputs, requirements, hints, label, description) self.graph = Graph() self.executor = context.executor self.steps = steps self.data_links = data_links or [] self.context = context self.port_step_index = {} for step in steps: node = AppNode(step.app, {}) self.add_node(step.id, node) for inp in step.inputs: self.port_step_index[inp.id] = step.id self.move_connect_to_datalink(inp) if inp.value: node.inputs[inp.id] = inp.value for out in step.outputs: self.port_step_index[out.id] = step.id for inp in self.inputs: self.add_node(inp.id, inp) for out in self.outputs: self.move_connect_to_datalink(out) self.add_node(out.id, out) # dedupe links s = {tuple(dl.items()) for dl in self.data_links} self.data_links = [dict(dl) for dl in s] for dl in self.data_links: dst = dl['destination'].lstrip('#') src = dl['source'].lstrip('#') if src in self.port_step_index and dst in self.port_step_index: rel = Relation(src, dst, dl.get('position', 0)) src = self.port_step_index[src] dst = self.port_step_index[dst] elif src in self._inputs: rel = InputRelation(dst, dl.get('position', 0)) dst = self.port_step_index[dst] elif dst in self._outputs: rel = OutputRelation(src, dl.get('position', 0)) src = self.port_step_index[src] else: raise RabixError("invalid data link %s" % dl) self.graph.add_edge(src, dst, rel) if not self.graph.connected(): pass # raise ValidationError('Graph is not connected') def move_connect_to_datalink(self, port): for src in port.source: self.data_links.append({ 'source': src, 'destination': '#' + port.id }) del port.source[:] # Graph.add_node silently fails if node already exists def add_node(self, node_id, node): if node_id in self.graph.nodes: raise ValidationError('Duplicate node ID: %s' % node_id) self.graph.add_node(node_id, node) def run(self, job): eg = ExecutionGraph(self, job) while eg.has_next(): next_id, next = eg.next_job() self.executor.execute(next, eg.job_done, next_id) return eg.outputs def to_dict(self, context): d = super(Workflow, self).to_dict(context) d.update({ "class": "Workflow", 'steps': [step.to_dict(context) for step in self.steps] }) return d @classmethod def from_dict(cls, context, d): converted = {} for k, v in six.iteritems(d): if k == 'steps': converted[k] = [Step.from_dict(context, s) for s in v] else: converted[k] = context.from_dict(v) kwargs = Process.kwarg_dict(converted) kwargs.update({ 'steps': converted['steps'], 'data_links': converted.get('dataLinks'), 'context': context, 'inputs': [ InputParameter.from_dict(context, i) for i in converted['inputs'] ], 'outputs': [ WorkflowOutput.from_dict(context, o) for o in converted['outputs'] ] }) return cls(**kwargs)
class Workflow(Process): def __init__(self, process_id, inputs, outputs, requirements, hints, label, description, steps, context, data_links=None): super(Workflow, self).__init__( process_id, inputs, outputs, requirements, hints, label, description ) self.graph = Graph() self.executor = context.executor self.steps = steps self.data_links = data_links or [] self.context = context self.port_step_index = {} for step in steps: node = AppNode(step.app, {}) self.add_node(step.id, node) for inp in step.inputs: self.port_step_index[inp.id] = step.id self.move_connect_to_datalink(inp) if inp.value: node.inputs[inp.id] = inp.value for out in step.outputs: self.port_step_index[out.id] = step.id for inp in self.inputs: self.add_node(inp.id, inp) for out in self.outputs: self.move_connect_to_datalink(out) self.add_node(out.id, out) for dl in self.data_links: dst = dl['destination'].lstrip('#') src = dl['source'].lstrip('#') if src in self.port_step_index and dst in self.port_step_index: rel = Relation(src, dst, dl.get('position', 0)) src = self.port_step_index[src] dst = self.port_step_index[dst] elif src in self._inputs: rel = InputRelation(dst, dl.get('position', 0)) dst = self.port_step_index[dst] elif dst in self._outputs: rel = OutputRelation(src, dl.get('position', 0)) src = self.port_step_index[src] else: raise RabixError("invalid data link %s" % dl) self.graph.add_edge(src, dst, rel) if not self.graph.connected(): pass # raise ValidationError('Graph is not connected') def move_connect_to_datalink(self, port): for dl in port.connect: dl['destination'] = '#'+port.id self.data_links.append(dl) del port.connect[:] # Graph.add_node silently fails if node already exists def add_node(self, node_id, node): if node_id in self.graph.nodes: raise ValidationError('Duplicate node ID: %s' % node_id) self.graph.add_node(node_id, node) def hide_nodes(self, type): for node_id in self.graph.node_list(): node = self.graph.node_data(node_id) if isinstance(node, type): self.graph.hide_node(node_id) def run(self, job): eg = ExecutionGraph(self, job) while eg.has_next(): next_id, next = eg.next_job() self.executor.execute(next, eg.job_done, next_id) return eg.outputs def to_dict(self, context): d = super(Workflow, self).to_dict(context) d.update({ "class": "Workflow", 'steps': [step.to_dict(context) for step in self.steps] }) return d @classmethod def from_dict(cls, context, d): converted = {} for k, v in six.iteritems(d): if k == 'steps': converted[k] = [Step.from_dict(context, s) for s in v] else: converted[k] = context.from_dict(v) kwargs = Process.kwarg_dict(converted) kwargs.update({ 'steps': converted['steps'], 'data_links': converted.get('dataLinks'), 'context': context, 'inputs': [InputParameter.from_dict(context, i) for i in converted['inputs']], 'outputs': [WorkflowOutput.from_dict(context, o) for o in converted['outputs']] }) return cls(**kwargs)
def test_edges(self): graph = Graph() graph.add_node(1) graph.add_node(2) graph.add_node(3) graph.add_node(4) graph.add_node(5) self.assertTrue(isinstance(graph.edge_list(), list)) graph.add_edge(1, 2) graph.add_edge(4, 5, 'a') self.assertRaises(GraphError, graph.add_edge, 'a', 'b', create_nodes=False) self.assertEqual(graph.number_of_hidden_edges(), 0) self.assertEqual(graph.number_of_edges(), 2) e = graph.edge_by_node(1, 2) self.assertTrue(isinstance(e, int)) graph.hide_edge(e) self.assertEqual(graph.number_of_hidden_edges(), 1) self.assertEqual(graph.number_of_edges(), 1) e2 = graph.edge_by_node(1, 2) self.assertTrue(e2 is None) graph.restore_edge(e) e2 = graph.edge_by_node(1, 2) self.assertEqual(e, e2) self.assertEqual(graph.number_of_hidden_edges(), 0) self.assertEqual(graph.number_of_edges(), 2) e1 = graph.edge_by_node(1, 2) e2 = graph.edge_by_node(4, 5) graph.hide_edge(e1) graph.hide_edge(e2) self.assertEqual(graph.number_of_edges(), 0) graph.restore_all_edges() self.assertEqual(graph.number_of_edges(), 2) self.assertEqual(graph.edge_by_id(e1), (1,2)) self.assertRaises(GraphError, graph.edge_by_id, (e1+1)*(e2+1)+1) self.assertEqual(list(sorted(graph.edge_list())), [e1, e2]) self.assertEqual(graph.describe_edge(e1), (e1, 1, 1, 2)) self.assertEqual(graph.describe_edge(e2), (e2, 'a', 4, 5)) self.assertEqual(graph.edge_data(e1), 1) self.assertEqual(graph.edge_data(e2), 'a') self.assertEqual(graph.head(e2), 4) self.assertEqual(graph.tail(e2), 5) graph.add_edge(1, 3) graph.add_edge(1, 5) graph.add_edge(4, 1) self.assertEqual(list(sorted(graph.out_nbrs(1))), [2, 3, 5]) self.assertEqual(list(sorted(graph.inc_nbrs(1))), [4]) self.assertEqual(list(sorted(graph.inc_nbrs(5))), [1, 4]) self.assertEqual(list(sorted(graph.all_nbrs(1))), [2, 3, 4, 5]) graph.add_edge(5, 1) self.assertEqual(list(sorted(graph.all_nbrs(5))), [1, 4]) self.assertEqual(graph.out_degree(1), 3) self.assertEqual(graph.inc_degree(2), 1) self.assertEqual(graph.inc_degree(5), 2) self.assertEqual(graph.all_degree(5), 3) v = graph.out_edges(4) self.assertTrue(isinstance(v, list)) self.assertEqual(graph.edge_by_id(v[0]), (4, 5)) v = graph.out_edges(1) for e in v: self.assertEqual(graph.edge_by_id(e)[0], 1) v = graph.inc_edges(1) self.assertTrue(isinstance(v, list)) self.assertEqual(graph.edge_by_id(v[0]), (4, 1)) v = graph.inc_edges(5) for e in v: self.assertEqual(graph.edge_by_id(e)[1], 5) v = graph.all_edges(5) for e in v: self.assertTrue(graph.edge_by_id(e)[1] == 5 or graph.edge_by_id(e)[0] == 5) e1 = graph.edge_by_node(1, 2) self.assertTrue(isinstance(e1, int)) graph.hide_node(1) self.assertRaises(GraphError, graph.edge_by_node, 1, 2) graph.restore_node(1) e2 = graph.edge_by_node(1, 2) self.assertEqual(e1, e2)
def test_iterdata(self): graph = Graph() graph.add_node("1", "I") graph.add_node("1.1", "I.I") graph.add_node("1.2", "I.II") graph.add_node("1.3", "I.III") graph.add_node("1.1.1", "I.I.I") graph.add_node("1.1.2", "I.I.II") graph.add_node("1.2.1", "I.II.I") graph.add_node("1.2.2", "I.II.II") graph.add_node("1.2.2.1", "I.II.II.I") graph.add_node("1.2.2.2", "I.II.II.II") graph.add_node("1.2.2.3", "I.II.II.III") graph.add_edge("1", "1.1") graph.add_edge("1", "1.2") graph.add_edge("1", "1.3") graph.add_edge("1.1", "1.1.1") graph.add_edge("1.1", "1.1.2") graph.add_edge("1.2", "1.2.1") graph.add_edge("1.2", "1.2.2") graph.add_edge("1.2.2", "1.2.2.1") graph.add_edge("1.2.2", "1.2.2.2") graph.add_edge("1.2.2", "1.2.2.3") result = list(graph.iterdata("1", forward=True)) self.assertEqual(result, [ 'I', 'I.III', 'I.II', 'I.II.II', 'I.II.II.III', 'I.II.II.II', 'I.II.II.I', 'I.II.I', 'I.I', 'I.I.II', 'I.I.I' ]) result = list(graph.iterdata("1", end="1.2.1", forward=True)) self.assertEqual(result, [ 'I', 'I.III', 'I.II', 'I.II.II', 'I.II.II.III', 'I.II.II.II', 'I.II.II.I', 'I.II.I' ]) result = list(graph.iterdata("1", condition=lambda n: len(n) < 6, forward=True)) self.assertEqual(result, [ 'I', 'I.III', 'I.II', 'I.I', 'I.I.I' ]) # And the revese option: graph = Graph() graph.add_node("1", "I") graph.add_node("1.1", "I.I") graph.add_node("1.2", "I.II") graph.add_node("1.3", "I.III") graph.add_node("1.1.1", "I.I.I") graph.add_node("1.1.2", "I.I.II") graph.add_node("1.2.1", "I.II.I") graph.add_node("1.2.2", "I.II.II") graph.add_node("1.2.2.1", "I.II.II.I") graph.add_node("1.2.2.2", "I.II.II.II") graph.add_node("1.2.2.3", "I.II.II.III") graph.add_edge("1.1", "1") graph.add_edge("1.2", "1") graph.add_edge("1.3", "1") graph.add_edge("1.1.1", "1.1") graph.add_edge("1.1.2", "1.1") graph.add_edge("1.2.1", "1.2") graph.add_edge("1.2.2", "1.2") graph.add_edge("1.2.2.1", "1.2.2") graph.add_edge("1.2.2.2", "1.2.2") graph.add_edge("1.2.2.3", "1.2.2") result = list(graph.iterdata("1", forward=False)) self.assertEqual(result, [ 'I', 'I.III', 'I.II', 'I.II.II', 'I.II.II.III', 'I.II.II.II', 'I.II.II.I', 'I.II.I', 'I.I', 'I.I.II', 'I.I.I' ]) result = list(graph.iterdata("1", end="1.2.1", forward=False)) self.assertEqual(result, [ 'I', 'I.III', 'I.II', 'I.II.II', 'I.II.II.III', 'I.II.II.II', 'I.II.II.I', 'I.II.I' ]) result = list(graph.iterdata("1", condition=lambda n: len(n) < 6, forward=False)) self.assertEqual(result, [ 'I', 'I.III', 'I.II', 'I.I', 'I.I.I' ])
def test_toposort(self): graph = Graph() graph.add_node(1) graph.add_node(2) graph.add_node(3) graph.add_node(4) graph.add_node(5) graph.add_edge(1, 2) graph.add_edge(1, 3) graph.add_edge(2, 4) graph.add_edge(3, 5) ok, result = graph.forw_topo_sort() self.assertTrue(ok) for idx in range(1, 6): self.assertTrue(idx in result) self.assertTrue(result.index(1) < result.index(2)) self.assertTrue(result.index(1) < result.index(3)) self.assertTrue(result.index(2) < result.index(4)) self.assertTrue(result.index(3) < result.index(5)) ok, result = graph.back_topo_sort() self.assertTrue(ok) for idx in range(1, 6): self.assertTrue(idx in result) self.assertTrue(result.index(2) < result.index(1)) self.assertTrue(result.index(3) < result.index(1)) self.assertTrue(result.index(4) < result.index(2)) self.assertTrue(result.index(5) < result.index(3)) # Same graph as before, but with edges # reversed, which means we should get # the same results as before if using # back_topo_sort rather than forw_topo_sort # (and v.v.) graph = Graph() graph.add_node(1) graph.add_node(2) graph.add_node(3) graph.add_node(4) graph.add_node(5) graph.add_edge(2, 1) graph.add_edge(3, 1) graph.add_edge(4, 2) graph.add_edge(5, 3) ok, result = graph.back_topo_sort() self.assertTrue(ok) for idx in range(1, 6): self.assertTrue(idx in result) self.assertTrue(result.index(1) < result.index(2)) self.assertTrue(result.index(1) < result.index(3)) self.assertTrue(result.index(2) < result.index(4)) self.assertTrue(result.index(3) < result.index(5)) ok, result = graph.forw_topo_sort() self.assertTrue(ok) for idx in range(1, 6): self.assertTrue(idx in result) self.assertTrue(result.index(2) < result.index(1)) self.assertTrue(result.index(3) < result.index(1)) self.assertTrue(result.index(4) < result.index(2)) self.assertTrue(result.index(5) < result.index(3)) # Create a cycle graph.add_edge(1, 5) ok, result = graph.forw_topo_sort() self.assertFalse(ok) ok, result = graph.back_topo_sort() self.assertFalse(ok)
class WorkflowApp(App): def __init__(self, app_id, steps, context, inputs=None, outputs=None, to=None, app_description=None, annotations=None, platform_features=None): self.graph = Graph() self.inputs = inputs or [] self.outputs = outputs or [] self.executor = context.executor self.steps = steps self.to = to or {} self.context = context for step in steps: self.add_node(step.id, AppNode(step.app, {})) for step in steps: # inputs for input_port, input_val in six.iteritems(step.inputs): inp = wrap_in_list(input_val) for item in inp: self.add_edge_or_input(step, input_port, item) # outputs if step.outputs: for output_port, output_val in six.iteritems(step.outputs): self.to[output_val['$to']] = output_port if isinstance(step.app, WorkflowApp): output_node = step.app.get_output(step.app.to.get(output_port)) else: output_node = step.app.get_output(output_port) output_id = output_val['$to'] self.add_node(output_id, output_node) self.graph.add_edge( step.id, output_id, OutputRelation(output_port) ) # output_node.id = output_val['$to'] self.outputs.append(output_node) if not self.graph.connected(): pass # raise ValidationError('Graph is not connected') schema = { "@type": "JsonSchema", "type": "object", "properties": {}, "required": [] } for inp in self.inputs: schema['properties'][inp.id] = inp.validator.schema if inp.required: schema['required'].append(inp.id) super(WorkflowApp, self).__init__( app_id, JsonSchema(context, schema), self.outputs, app_description=app_description, annotations=annotations, platform_features=platform_features ) def add_edge_or_input(self, step, input_name, input_val): node_id = step.id if isinstance(input_val, dict) and '$from' in input_val: frm = wrap_in_list(input_val['$from']) for inp in frm: if '.' in inp: node, outp = inp.split('.') self.graph.add_edge(node, node_id, Relation(outp, input_name)) else: # TODO: merge input schemas if one input goes to different apps input = step.app.get_input(input_name) if inp not in self.graph.nodes: self.add_node(inp, input) self.graph.add_edge( inp, node_id, InputRelation(input_name) ) wf_input = copy.deepcopy(input) wf_input.id = inp self.inputs.append(wf_input) else: self.graph.node_data(node_id).inputs[input_name] = input_val # Graph.add_node silently fails if node already exists def add_node(self, node_id, node): if node_id in self.graph.nodes: raise ValidationError('Duplicate node ID: %s' % node_id) self.graph.add_node(node_id, node) def hide_nodes(self, type): for node_id in self.graph.node_list(): node = self.graph.node_data(node_id) if isinstance(node, type): self.graph.hide_node(node_id) def run(self, job): eg = ExecutionGraph(self, job) while eg.has_next(): next_id, next = eg.next_job() self.executor.execute(next, eg.job_done, next_id) return eg.outputs def to_dict(self, context): d = super(WorkflowApp, self).to_dict(context) d.update({ "@type": "Workflow", 'steps': [step.to_dict(context) for step in self.steps] }) return d @classmethod def from_dict(cls, context, d): steps = [Step( step['id'], context.from_dict(step['app']), step['inputs'], step.get('outputs') ) for step in d['steps']] return cls( d.get('@id', six.text_type(uuid4())), steps, context )